diff -Nru llvm-toolchain-13-13.0.0~+rc4/clang/CMakeLists.txt.orig llvm-toolchain-13-13.0.0/clang/CMakeLists.txt.orig --- llvm-toolchain-13-13.0.0~+rc4/clang/CMakeLists.txt.orig 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/clang/CMakeLists.txt.orig 2021-09-20 09:59:03.000000000 +0000 @@ -0,0 +1,904 @@ +cmake_minimum_required(VERSION 3.13.4) + +# If we are not building as a part of LLVM, build Clang as an +# standalone project, using LLVM as an external library: +if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) + project(Clang) + + set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to") + set(CMAKE_CXX_STANDARD_REQUIRED YES) + set(CMAKE_CXX_EXTENSIONS NO) + + # Rely on llvm-config. + set(CONFIG_OUTPUT) + if(LLVM_CONFIG) + set (LLVM_CONFIG_FOUND 1) + message(STATUS "Found LLVM_CONFIG as ${LLVM_CONFIG}") + message(DEPRECATION "Using llvm-config to detect the LLVM installation is \ + deprecated. The installed cmake files should be used \ + instead. CMake should be able to detect your LLVM install \ + automatically, but you can also use LLVM_DIR to specify \ + the path containing LLVMConfig.cmake.") + set(CONFIG_COMMAND ${LLVM_CONFIG} + "--assertion-mode" + "--bindir" + "--libdir" + "--includedir" + "--prefix" + "--src-root" + "--cmakedir") + execute_process( + COMMAND ${CONFIG_COMMAND} + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE CONFIG_OUTPUT + ) + if(NOT HAD_ERROR) + string(REGEX REPLACE + "[ \t]*[\r\n]+[ \t]*" ";" + CONFIG_OUTPUT ${CONFIG_OUTPUT}) + else() + string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}") + message(STATUS "${CONFIG_COMMAND_STR}") + message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}") + endif() + + list(GET CONFIG_OUTPUT 0 ENABLE_ASSERTIONS) + list(GET CONFIG_OUTPUT 1 TOOLS_BINARY_DIR) + list(GET CONFIG_OUTPUT 2 LIBRARY_DIR) + list(GET CONFIG_OUTPUT 3 INCLUDE_DIR) + list(GET CONFIG_OUTPUT 4 LLVM_OBJ_ROOT) + list(GET CONFIG_OUTPUT 5 MAIN_SRC_DIR) + list(GET CONFIG_OUTPUT 6 LLVM_CONFIG_CMAKE_DIR) + + # Normalize LLVM_CMAKE_DIR. --cmakedir might contain backslashes. + # CMake assumes slashes as PATH. + file(TO_CMAKE_PATH ${LLVM_CONFIG_CMAKE_DIR} LLVM_CMAKE_DIR) + endif() + + + if(NOT MSVC_IDE) + set(LLVM_ENABLE_ASSERTIONS ${ENABLE_ASSERTIONS} + CACHE BOOL "Enable assertions") + # Assertions should follow llvm-config's. + mark_as_advanced(LLVM_ENABLE_ASSERTIONS) + endif() + + find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_DIR}") + list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR}) + + # We can't check LLVM_CONFIG here, because find_package(LLVM ...) also sets + # LLVM_CONFIG. + if (NOT LLVM_CONFIG_FOUND) + # Pull values from LLVMConfig.cmake. We can drop this once the llvm-config + # path is removed. + set(TOOLS_BINARY_DIR ${LLVM_TOOLS_BINARY_DIR}) + set(LIBRARY_DIR ${LLVM_LIBRARY_DIR}) + set(INCLUDE_DIR ${LLVM_INCLUDE_DIR}) + set(LLVM_OBJ_DIR ${LLVM_BINARY_DIR}) + endif() + + set(LLVM_TOOLS_BINARY_DIR ${TOOLS_BINARY_DIR} CACHE PATH "Path to llvm/bin") + set(LLVM_LIBRARY_DIR ${LIBRARY_DIR} CACHE PATH "Path to llvm/lib") + set(LLVM_MAIN_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include") + set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree") + set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree") + + find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} + NO_DEFAULT_PATH) + + # They are used as destination of target generators. + set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) + set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX}) + if(WIN32 OR CYGWIN) + # DLL platform -- put DLLs into bin. + set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) + else() + set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) + endif() + + option(LLVM_INSTALL_TOOLCHAIN_ONLY + "Only include toolchain files in the 'install' target." OFF) + + option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN + "Set to ON to force using an old, unsupported host toolchain." OFF) + option(CLANG_ENABLE_BOOTSTRAP "Generate the clang bootstrap target" OFF) + option(LLVM_ENABLE_LIBXML2 "Use libxml2 if available." ON) + + include(AddLLVM) + include(TableGen) + include(HandleLLVMOptions) + include(VersionFromVCS) + include(GetErrcMessages) + include(LLVMDistributionSupport) + + set(PACKAGE_VERSION "${LLVM_PACKAGE_VERSION}") + set(BUG_REPORT_URL "${LLVM_PACKAGE_BUGREPORT}" CACHE STRING + "Default URL where bug reports are to be submitted.") + + if (NOT DEFINED LLVM_INCLUDE_TESTS) + set(LLVM_INCLUDE_TESTS ON) + endif() + + include_directories("${LLVM_BINARY_DIR}/include" "${LLVM_MAIN_INCLUDE_DIR}") + link_directories("${LLVM_LIBRARY_DIR}") + + set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin ) + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) + + if(LLVM_INCLUDE_TESTS) + find_package(Python3 ${LLVM_MINIMUM_PYTHON_VERSION} REQUIRED + COMPONENTS Interpreter) + + # Check prebuilt llvm/utils. + if(EXISTS ${LLVM_TOOLS_BINARY_DIR}/FileCheck${CMAKE_EXECUTABLE_SUFFIX} + AND EXISTS ${LLVM_TOOLS_BINARY_DIR}/count${CMAKE_EXECUTABLE_SUFFIX} + AND EXISTS ${LLVM_TOOLS_BINARY_DIR}/not${CMAKE_EXECUTABLE_SUFFIX}) + set(LLVM_UTILS_PROVIDED ON) + endif() + + if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) + # Note: path not really used, except for checking if lit was found + set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py) + if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/llvm-lit) + add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit utils/llvm-lit) + endif() + if(NOT LLVM_UTILS_PROVIDED) + add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/FileCheck utils/FileCheck) + add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/count utils/count) + add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/not utils/not) + set(LLVM_UTILS_PROVIDED ON) + set(CLANG_TEST_DEPS FileCheck count not) + endif() + set(UNITTEST_DIR ${LLVM_MAIN_SRC_DIR}/utils/unittest) + if(EXISTS ${UNITTEST_DIR}/googletest/include/gtest/gtest.h + AND NOT EXISTS ${LLVM_LIBRARY_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}gtest${CMAKE_STATIC_LIBRARY_SUFFIX} + AND EXISTS ${UNITTEST_DIR}/CMakeLists.txt) + add_subdirectory(${UNITTEST_DIR} utils/unittest) + endif() + else() + # Seek installed Lit. + find_program(LLVM_LIT + NAMES llvm-lit lit.py lit + PATHS "${LLVM_MAIN_SRC_DIR}/utils/lit" + DOC "Path to lit.py") + endif() + + if(LLVM_LIT) + # Define the default arguments to use with 'lit', and an option for the user + # to override. + set(LIT_ARGS_DEFAULT "-sv") + if (MSVC OR XCODE) + set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") + endif() + set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") + + get_errc_messages(LLVM_LIT_ERRC_MESSAGES) + + # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools. + if( WIN32 AND NOT CYGWIN ) + set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools") + endif() + else() + set(LLVM_INCLUDE_TESTS OFF) + endif() + endif() + + set( CLANG_BUILT_STANDALONE 1 ) + set(BACKEND_PACKAGE_STRING "LLVM ${LLVM_PACKAGE_VERSION}") +else() + set(BACKEND_PACKAGE_STRING "${PACKAGE_STRING}") +endif() + +# Make sure that our source directory is on the current cmake module path so that +# we can include cmake files from this directory. +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") + +if(LLVM_ENABLE_LIBXML2) + # Don't look for libxml if we're using MSan, since uninstrumented third party + # code may call MSan interceptors like strlen, leading to false positives. + if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") + set (LIBXML2_FOUND 0) + find_package(LibXml2 2.5.3 QUIET) + if (LIBXML2_FOUND) + set(CLANG_HAVE_LIBXML 1) + endif() + endif() +endif() + +include(CheckIncludeFile) +check_include_file(sys/resource.h CLANG_HAVE_RLIMITS) + +set(CLANG_RESOURCE_DIR "" CACHE STRING + "Relative directory from the Clang binary to its resource files.") + +set(C_INCLUDE_DIRS "" CACHE STRING + "Colon separated list of directories clang will search for headers.") + +set(GCC_INSTALL_PREFIX "" CACHE PATH "Directory where gcc is installed." ) +set(DEFAULT_SYSROOT "" CACHE STRING + "Default to all compiler invocations for --sysroot=." ) + +set(ENABLE_LINKER_BUILD_ID OFF CACHE BOOL "pass --build-id to ld") + +set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL + "enable x86 relax relocations by default") + +set(CLANG_SPAWN_CC1 OFF CACHE BOOL + "Whether clang should use a new process for the CC1 invocation") + +# TODO: verify the values against LangStandards.def? +set(CLANG_DEFAULT_STD_C "" CACHE STRING + "Default standard to use for C/ObjC code (IDENT from LangStandards.def, empty for platform default)") +set(CLANG_DEFAULT_STD_CXX "" CACHE STRING + "Default standard to use for C++/ObjC++ code (IDENT from LangStandards.def, empty for platform default)") + +set(CLANG_DEFAULT_LINKER "" CACHE STRING + "Default linker to use (linker name or absolute path, empty for platform default)") + +set(CLANG_DEFAULT_CXX_STDLIB "" CACHE STRING + "Default C++ stdlib to use (\"libstdc++\" or \"libc++\", empty for platform default") +if (NOT(CLANG_DEFAULT_CXX_STDLIB STREQUAL "" OR + CLANG_DEFAULT_CXX_STDLIB STREQUAL "libstdc++" OR + CLANG_DEFAULT_CXX_STDLIB STREQUAL "libc++")) + message(WARNING "Resetting default C++ stdlib to use platform default") + set(CLANG_DEFAULT_CXX_STDLIB "" CACHE STRING + "Default C++ stdlib to use (\"libstdc++\" or \"libc++\", empty for platform default" FORCE) +endif() + +set(CLANG_DEFAULT_RTLIB "" CACHE STRING + "Default runtime library to use (\"libgcc\" or \"compiler-rt\", empty for platform default)") +if (NOT(CLANG_DEFAULT_RTLIB STREQUAL "" OR + CLANG_DEFAULT_RTLIB STREQUAL "libgcc" OR + CLANG_DEFAULT_RTLIB STREQUAL "compiler-rt")) + message(WARNING "Resetting default rtlib to use platform default") + set(CLANG_DEFAULT_RTLIB "" CACHE STRING + "Default runtime library to use (\"libgcc\" or \"compiler-rt\", empty for platform default)" FORCE) +endif() + +set(CLANG_DEFAULT_UNWINDLIB "" CACHE STRING + "Default unwind library to use (\"none\" \"libgcc\" or \"libunwind\", empty to match runtime library.)") +if (CLANG_DEFAULT_UNWINDLIB STREQUAL "") + if (CLANG_DEFAULT_RTLIB STREQUAL "libgcc") + set (CLANG_DEFAULT_UNWINDLIB "libgcc" CACHE STRING "" FORCE) + endif() +endif() + +if (NOT(CLANG_DEFAULT_UNWINDLIB STREQUAL "" OR + CLANG_DEFAULT_UNWINDLIB STREQUAL "none" OR + CLANG_DEFAULT_UNWINDLIB STREQUAL "libgcc" OR + CLANG_DEFAULT_UNWINDLIB STREQUAL "libunwind")) + message(WARNING "Resetting default unwindlib to use platform default") + set(CLANG_DEFAULT_UNWINDLIB "" CACHE STRING + "Default unwind library to use (\"none\" \"libgcc\" or \"libunwind\", empty to match runtime library.)" FORCE) +endif() + +set(CLANG_DEFAULT_OBJCOPY "objcopy" CACHE STRING + "Default objcopy executable to use.") + +set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING + "Default OpenMP runtime used by -fopenmp.") + +# OpenMP offloading requires at least sm_35 because we use shuffle instructions +# to generate efficient code for reductions and the atomicMax instruction on +# 64-bit integers in the implementation of conditional lastprivate. +set(CUDA_ARCH_FLAGS "sm_35") + +# Try to find the highest Nvidia GPU architecture the system supports +if (NOT DEFINED CLANG_OPENMP_NVPTX_DEFAULT_ARCH) + find_package(CUDA QUIET) + if (CUDA_FOUND) + cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS) + endif() +else() + set(CUDA_ARCH_FLAGS ${CLANG_OPENMP_NVPTX_DEFAULT_ARCH}) +endif() + +string(REGEX MATCH "sm_([0-9]+)" CUDA_ARCH_MATCH ${CUDA_ARCH_FLAGS}) +if (NOT DEFINED CUDA_ARCH_MATCH OR "${CMAKE_MATCH_1}" LESS 35) + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH "sm_35" CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs." FORCE) + message(WARNING "Resetting default architecture for OpenMP offloading to Nvidia GPUs to sm_35") +else() + set(CLANG_OPENMP_NVPTX_DEFAULT_ARCH ${CUDA_ARCH_MATCH} CACHE STRING + "Default architecture for OpenMP offloading to Nvidia GPUs.") +endif() + +set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch") + +set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING + "Vendor-specific text for showing with version information.") + +set(CLANG_REPOSITORY_STRING "" CACHE STRING + "Vendor-specific text for showing the repository the source is taken from.") + +if(CLANG_REPOSITORY_STRING) + add_definitions(-DCLANG_REPOSITORY_STRING="${CLANG_REPOSITORY_STRING}") +endif() + +set(CLANG_VENDOR_UTI "org.llvm.clang" CACHE STRING + "Vendor-specific uti.") + +set(CLANG_PYTHON_BINDINGS_VERSIONS "" CACHE STRING + "Python versions to install libclang python bindings for") + +set(CLANG_LINK_CLANG_DYLIB ${LLVM_LINK_LLVM_DYLIB} CACHE BOOL + "Link tools against libclang-cpp.so") + +if (NOT LLVM_LINK_LLVM_DYLIB AND CLANG_LINK_CLANG_DYLIB) + message(FATAL_ERROR "Cannot set CLANG_LINK_CLANG_DYLIB=ON when " + "LLVM_LINK_LLVM_DYLIB=OFF") +endif() + +# The libdir suffix must exactly match whatever LLVM's configuration used. +set(CLANG_LIBDIR_SUFFIX "${LLVM_LIBDIR_SUFFIX}") + +set(CLANG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(CLANG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE ) + message(FATAL_ERROR "In-source builds are not allowed. " +"Please create a directory and run cmake " +"from there, passing the path to this source directory as the last argument. " +"This process created the file `CMakeCache.txt' and the directory " +"`CMakeFiles'. Please delete them.") +endif() + +# If CLANG_VERSION_* is specified, use it, if not use LLVM_VERSION_*. +if(NOT DEFINED CLANG_VERSION_MAJOR) + set(CLANG_VERSION_MAJOR ${LLVM_VERSION_MAJOR}) +endif() +if(NOT DEFINED CLANG_VERSION_MINOR) + set(CLANG_VERSION_MINOR ${LLVM_VERSION_MINOR}) +endif() +if(NOT DEFINED CLANG_VERSION_PATCHLEVEL) + set(CLANG_VERSION_PATCHLEVEL ${LLVM_VERSION_PATCH}) +endif() +# Unlike PACKAGE_VERSION, CLANG_VERSION does not include LLVM_VERSION_SUFFIX. +set(CLANG_VERSION "${CLANG_VERSION_MAJOR}.${CLANG_VERSION_MINOR}.${CLANG_VERSION_PATCHLEVEL}") +message(STATUS "Clang version: ${CLANG_VERSION}") + +# Configure the Version.inc file. +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/include/clang/Basic/Version.inc.in + ${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc) + +# Add appropriate flags for GCC +if (LLVM_COMPILER_IS_GCC_COMPATIBLE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual") + if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") + endif () + + # Enable -pedantic for Clang even if it's not enabled for LLVM. + if (NOT LLVM_ENABLE_PEDANTIC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long") + endif () + + check_cxx_compiler_flag("-Werror -Wnested-anon-types" CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG) + if( CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-nested-anon-types" ) + endif() +endif () + +# Determine HOST_LINK_VERSION on Darwin. +set(HOST_LINK_VERSION) +if (APPLE) + set(LD_V_OUTPUT) + execute_process( + COMMAND sh -c "${CMAKE_LINKER} -v 2>&1 | head -1" + RESULT_VARIABLE HAD_ERROR + OUTPUT_VARIABLE LD_V_OUTPUT + ) + if (HAD_ERROR) + message(FATAL_ERROR "${CMAKE_LINKER} failed with status ${HAD_ERROR}") + endif() + if ("${LD_V_OUTPUT}" MATCHES ".*ld64-([0-9.]+).*") + string(REGEX REPLACE ".*ld64-([0-9.]+).*" "\\1" HOST_LINK_VERSION ${LD_V_OUTPUT}) + elseif ("${LD_V_OUTPUT}" MATCHES "[^0-9]*([0-9.]+).*") + string(REGEX REPLACE "[^0-9]*([0-9.]+).*" "\\1" HOST_LINK_VERSION ${LD_V_OUTPUT}) + endif() + message(STATUS "Host linker version: ${HOST_LINK_VERSION}") +endif() + +include(CMakeParseArguments) +include(AddClang) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +include_directories(BEFORE + ${CMAKE_CURRENT_BINARY_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/include + ) + +if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + install(DIRECTORY include/clang include/clang-c + DESTINATION include + COMPONENT clang-headers + FILES_MATCHING + PATTERN "*.def" + PATTERN "*.h" + PATTERN "config.h" EXCLUDE + ) + + install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/clang + DESTINATION include + COMPONENT clang-headers + FILES_MATCHING + PATTERN "CMakeFiles" EXCLUDE + PATTERN "*.inc" + PATTERN "*.h" + ) + + # Installing the headers needs to depend on generating any public + # tablegen'd headers. + add_custom_target(clang-headers DEPENDS clang-tablegen-targets) + set_target_properties(clang-headers PROPERTIES FOLDER "Misc") + if(NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-clang-headers + DEPENDS clang-headers + COMPONENT clang-headers) + endif() + + add_custom_target(bash-autocomplete DEPENDS utils/bash-autocomplete.sh) + install(PROGRAMS utils/bash-autocomplete.sh + DESTINATION share/clang + COMPONENT bash-autocomplete) + if(NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-bash-autocomplete + DEPENDS bash-autocomplete + COMPONENT bash-autocomplete) + endif() +endif() + +add_definitions( -D_GNU_SOURCE ) + +option(CLANG_BUILD_TOOLS + "Build the Clang tools. If OFF, just generate build targets." ON) + +option(CLANG_ENABLE_ARCMT "Build ARCMT." ON) +option(CLANG_ENABLE_STATIC_ANALYZER + "Include static analyzer in clang binary." ON) + +option(CLANG_ENABLE_PROTO_FUZZER "Build Clang protobuf fuzzer." OFF) + +option(CLANG_ROUND_TRIP_CC1_ARGS + "Round-trip command line arguments in -cc1." ${LLVM_ENABLE_ASSERTIONS}) + +if(NOT CLANG_ENABLE_STATIC_ANALYZER AND CLANG_ENABLE_ARCMT) + message(FATAL_ERROR "Cannot disable static analyzer while enabling ARCMT or Z3") +endif() + +if(CLANG_ENABLE_ARCMT) + set(CLANG_ENABLE_OBJC_REWRITER ON) +endif() + +if (CLANG_ROUND_TRIP_CC1_ARGS) + add_definitions(-DCLANG_ROUND_TRIP_CC1_ARGS=ON) +endif() + +# Clang version information +set(CLANG_EXECUTABLE_VERSION + "${CLANG_VERSION_MAJOR}" CACHE STRING + "Major version number that will be appended to the clang executable name") +set(LIBCLANG_LIBRARY_VERSION + "${CLANG_VERSION_MAJOR}" CACHE STRING + "Major version number that will be appended to the libclang library") +mark_as_advanced(CLANG_EXECUTABLE_VERSION LIBCLANG_LIBRARY_VERSION) + +option(CLANG_INCLUDE_TESTS + "Generate build targets for the Clang unit tests." + ${LLVM_INCLUDE_TESTS}) + +add_subdirectory(utils/TableGen) + +add_subdirectory(include) + +# All targets below may depend on all tablegen'd files. +get_property(CLANG_TABLEGEN_TARGETS GLOBAL PROPERTY CLANG_TABLEGEN_TARGETS) +add_custom_target(clang-tablegen-targets + DEPENDS + omp_gen + ${CLANG_TABLEGEN_TARGETS}) +set_target_properties(clang-tablegen-targets PROPERTIES FOLDER "Misc") +list(APPEND LLVM_COMMON_DEPENDS clang-tablegen-targets) + +# Force target to be built as soon as possible. Clang modules builds depend +# header-wise on it as they ship all headers from the umbrella folders. Building +# an entire module might include header, which depends on intrinsics_gen. +if(LLVM_ENABLE_MODULES) + list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen) +endif() + +add_subdirectory(lib) +add_subdirectory(tools) +add_subdirectory(runtime) + +option(CLANG_BUILD_EXAMPLES "Build CLANG example programs by default." OFF) +add_subdirectory(examples) + +if(APPLE) + # this line is needed as a cleanup to ensure that any CMakeCaches with the old + # default value get updated to the new default. + if(CLANG_ORDER_FILE STREQUAL "") + unset(CLANG_ORDER_FILE CACHE) + unset(CLANG_ORDER_FILE) + endif() + + + set(CLANG_ORDER_FILE ${CMAKE_CURRENT_BINARY_DIR}/clang.order CACHE FILEPATH + "Order file to use when compiling clang in order to improve startup time (Darwin Only - requires ld64).") + + if(NOT EXISTS ${CLANG_ORDER_FILE}) + string(FIND "${CLANG_ORDER_FILE}" "${CMAKE_CURRENT_BINARY_DIR}" PATH_START) + if(PATH_START EQUAL 0) + file(WRITE ${CLANG_ORDER_FILE} "\n") + else() + message(FATAL_ERROR "Specified order file '${CLANG_ORDER_FILE}' does not exist.") + endif() + endif() +endif() + + +if( CLANG_INCLUDE_TESTS ) + if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include/gtest/gtest.h) + add_subdirectory(unittests) + list(APPEND CLANG_TEST_DEPS ClangUnitTests) + list(APPEND CLANG_TEST_PARAMS + clang_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/test/Unit/lit.site.cfg + ) + endif() + add_subdirectory(test) + add_subdirectory(bindings/python/tests) + + if(CLANG_BUILT_STANDALONE) + # Add a global check rule now that all subdirectories have been traversed + # and we know the total set of lit testsuites. + get_property(LLVM_LIT_TESTSUITES GLOBAL PROPERTY LLVM_LIT_TESTSUITES) + get_property(LLVM_LIT_PARAMS GLOBAL PROPERTY LLVM_LIT_PARAMS) + get_property(LLVM_LIT_DEPENDS GLOBAL PROPERTY LLVM_LIT_DEPENDS) + get_property(LLVM_LIT_EXTRA_ARGS GLOBAL PROPERTY LLVM_LIT_EXTRA_ARGS) + get_property(LLVM_ADDITIONAL_TEST_TARGETS + GLOBAL PROPERTY LLVM_ADDITIONAL_TEST_TARGETS) + add_lit_target(check-all + "Running all regression tests" + ${LLVM_LIT_TESTSUITES} + PARAMS ${LLVM_LIT_PARAMS} + DEPENDS ${LLVM_LIT_DEPENDS} ${LLVM_ADDITIONAL_TEST_TARGETS} + ARGS ${LLVM_LIT_EXTRA_ARGS} + ) + endif() + add_subdirectory(utils/perf-training) +endif() + +option(CLANG_INCLUDE_DOCS "Generate build targets for the Clang docs." + ${LLVM_INCLUDE_DOCS}) +if( CLANG_INCLUDE_DOCS ) + add_subdirectory(docs) +endif() + +# Custom target to install all clang libraries. +add_custom_target(clang-libraries) +set_target_properties(clang-libraries PROPERTIES FOLDER "Misc") + +if(NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-clang-libraries + DEPENDS clang-libraries + COMPONENT clang-libraries) +endif() + +get_property(CLANG_LIBS GLOBAL PROPERTY CLANG_LIBS) +if(CLANG_LIBS) + list(REMOVE_DUPLICATES CLANG_LIBS) + foreach(lib ${CLANG_LIBS}) + add_dependencies(clang-libraries ${lib}) + if(NOT LLVM_ENABLE_IDE) + add_dependencies(install-clang-libraries install-${lib}) + add_dependencies(install-clang-libraries-stripped install-${lib}-stripped) + endif() + endforeach() +endif() + +add_subdirectory(cmake/modules) + +if(CLANG_STAGE) + message(STATUS "Setting current clang stage to: ${CLANG_STAGE}") +endif() + +if (CLANG_ENABLE_BOOTSTRAP) + include(ExternalProject) + + add_custom_target(clang-bootstrap-deps DEPENDS clang) + + if(NOT CLANG_STAGE) + set(CLANG_STAGE stage1) + endif() + + string(REGEX MATCH "stage([0-9]*)" MATCHED_STAGE "${CLANG_STAGE}") + if(MATCHED_STAGE) + if(NOT LLVM_BUILD_INSTRUMENTED) + math(EXPR STAGE_NUM "${CMAKE_MATCH_1} + 1") + set(NEXT_CLANG_STAGE stage${STAGE_NUM}) + else() + set(NEXT_CLANG_STAGE stage${CMAKE_MATCH_1}) + endif() + else() + set(NEXT_CLANG_STAGE bootstrap) + endif() + + if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED) + set(NEXT_CLANG_STAGE ${NEXT_CLANG_STAGE}-instrumented) + endif() + message(STATUS "Setting next clang stage to: ${NEXT_CLANG_STAGE}") + + + set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-stamps/) + set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-bins/) + + if(BOOTSTRAP_LLVM_ENABLE_LLD) + # adding lld to clang-bootstrap-deps without having it enabled in + # LLVM_ENABLE_PROJECTS just generates a cryptic error message. + if (NOT "lld" IN_LIST LLVM_ENABLE_PROJECTS) + message(FATAL_ERROR "LLD is enabled in the boostrap build, but lld is not in LLVM_ENABLE_PROJECTS") + endif() + add_dependencies(clang-bootstrap-deps lld) + endif() + + # If the next stage is LTO we need to depend on LTO and possibly lld or LLVMgold + if(BOOTSTRAP_LLVM_ENABLE_LTO OR LLVM_ENABLE_LTO AND NOT LLVM_BUILD_INSTRUMENTED) + if(APPLE) + add_dependencies(clang-bootstrap-deps LTO) + # on Darwin we need to set DARWIN_LTO_LIBRARY so that -flto will work + # using the just-built compiler, and we need to override DYLD_LIBRARY_PATH + # so that the host object file tools will use the just-built libLTO. + # However if System Integrity Protection is enabled the DYLD variables + # will be scrubbed from the environment of any base system commands. This + # includes /bin/sh, which ninja uses when executing build commands. To + # work around the envar being filtered away we pass it in as a CMake + # variable, and have LLVM's CMake append the envar to the archiver calls. + set(LTO_LIBRARY -DDARWIN_LTO_LIBRARY=${LLVM_SHLIB_OUTPUT_INTDIR}/libLTO.dylib + -DDYLD_LIBRARY_PATH=${LLVM_LIBRARY_OUTPUT_INTDIR}) + elseif(NOT WIN32) + add_dependencies(clang-bootstrap-deps llvm-ar llvm-ranlib) + if(NOT BOOTSTRAP_LLVM_ENABLE_LLD AND LLVM_BINUTILS_INCDIR) + add_dependencies(clang-bootstrap-deps LLVMgold) + endif() + set(${CLANG_STAGE}_AR -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ar) + set(${CLANG_STAGE}_RANLIB -DCMAKE_RANLIB=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ranlib) + endif() + endif() + + if(CLANG_BOOTSTRAP_EXTRA_DEPS) + add_dependencies(clang-bootstrap-deps ${CLANG_BOOTSTRAP_EXTRA_DEPS}) + endif() + + add_custom_target(${NEXT_CLANG_STAGE}-clear + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-cleared + ) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-cleared + DEPENDS clang-bootstrap-deps + COMMAND ${CMAKE_COMMAND} -E remove_directory ${BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory ${BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E remove_directory ${STAMP_DIR} + COMMAND ${CMAKE_COMMAND} -E make_directory ${STAMP_DIR} + COMMENT "Clobberring ${NEXT_CLANG_STAGE} build and stamp directories" + ) + + if(CMAKE_VERBOSE_MAKEFILE) + set(verbose -DCMAKE_VERBOSE_MAKEFILE=On) + endif() + + set(_BOOTSTRAP_DEFAULT_PASSTHROUGH + PACKAGE_VERSION + PACKAGE_VENDOR + LLVM_VERSION_MAJOR + LLVM_VERSION_MINOR + LLVM_VERSION_PATCH + CLANG_VERSION_MAJOR + CLANG_VERSION_MINOR + CLANG_VERSION_PATCHLEVEL + CLANG_VENDOR + LLVM_VERSION_SUFFIX + LLVM_BINUTILS_INCDIR + CLANG_REPOSITORY_STRING + CMAKE_C_COMPILER_LAUNCHER + CMAKE_CXX_COMPILER_LAUNCHER + CMAKE_MAKE_PROGRAM + CMAKE_OSX_ARCHITECTURES + LLVM_ENABLE_PROJECTS + LLVM_ENABLE_RUNTIMES) + + # We don't need to depend on compiler-rt/libcxx if we're building instrumented + # because the next stage will use the same compiler used to build this stage. + if(NOT LLVM_BUILD_INSTRUMENTED) + if(TARGET compiler-rt) + add_dependencies(clang-bootstrap-deps compiler-rt) + endif() + if(TARGET cxx-headers) + add_dependencies(clang-bootstrap-deps cxx-headers) + endif() + endif() + + set(C_COMPILER "clang") + set(CXX_COMPILER "clang++") + if(WIN32) + set(C_COMPILER "clang-cl.exe") + set(CXX_COMPILER "clang-cl.exe") + endif() + + set(COMPILER_OPTIONS + -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${CXX_COMPILER} + -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${C_COMPILER} + -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/${C_COMPILER} + -DCMAKE_ASM_COMPILER_ID=Clang) + + # cmake requires CMAKE_LINKER to be specified if the compiler is MSVC-like, + # otherwise it defaults the linker to be link.exe. + if(BOOTSTRAP_LLVM_ENABLE_LLD) + if((WIN32 AND NOT BOOTSTRAP_CMAKE_SYSTEM_NAME) OR BOOTSTRAP_CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(${CLANG_STAGE}_LINKER -DCMAKE_LINKER=${LLVM_RUNTIME_OUTPUT_INTDIR}/lld-link${CMAKE_EXECUTABLE_SUFFIX}) + endif() + endif() + + if(BOOTSTRAP_CMAKE_SYSTEM_NAME) + set(${CLANG_STAGE}_CONFIG -DLLVM_CONFIG_PATH=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-config) + set(${CLANG_STAGE}_TABLEGEN + -DLLVM_TABLEGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-tblgen + -DCLANG_TABLEGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-tblgen) + if(BOOTSTRAP_CMAKE_SYSTEM_NAME STREQUAL "Linux") + if(BOOTSTRAP_LLVM_ENABLE_LLD) + set(${CLANG_STAGE}_LINKER -DCMAKE_LINKER=${LLVM_RUNTIME_OUTPUT_INTDIR}/ld.lld) + endif() + if(NOT BOOTSTRAP_LLVM_ENABLE_LTO) + add_dependencies(clang-bootstrap-deps llvm-ar llvm-ranlib) + set(${CLANG_STAGE}_AR -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ar) + set(${CLANG_STAGE}_RANLIB -DCMAKE_RANLIB=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ranlib) + endif() + add_dependencies(clang-bootstrap-deps llvm-objcopy llvm-strip) + set(${CLANG_STAGE}_OBJCOPY -DCMAKE_OBJCOPY=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-objcopy) + set(${CLANG_STAGE}_STRIP -DCMAKE_STRIP=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-strip) + endif() + endif() + + if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED) + add_dependencies(clang-bootstrap-deps llvm-profdata) + set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata) + endif() + + if(LLVM_BUILD_INSTRUMENTED) + add_dependencies(clang-bootstrap-deps generate-profdata) + set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata) + # Use the current tools for LTO instead of the instrumented ones + list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH + CMAKE_CXX_COMPILER + CMAKE_C_COMPILER + CMAKE_ASM_COMPILER + CMAKE_AR + CMAKE_RANLIB + DARWIN_LTO_LIBRARY + DYLD_LIBRARY_PATH) + + set(COMPILER_OPTIONS) + set(LTO_LIBRARY) + set(LTO_AR) + set(LTO_RANLIB) + endif() + + # Find all variables that start with BOOTSTRAP_ and populate a variable with + # them. + get_cmake_property(variableNames VARIABLES) + foreach(variableName ${variableNames}) + if(variableName MATCHES "^BOOTSTRAP_") + string(SUBSTRING ${variableName} 10 -1 varName) + string(REPLACE ";" "|" value "${${variableName}}") + list(APPEND PASSTHROUGH_VARIABLES + -D${varName}=${value}) + endif() + if(${variableName} AND variableName MATCHES "LLVM_EXTERNAL_.*_SOURCE_DIR") + list(APPEND PASSTHROUGH_VARIABLES + -D${variableName}=${${variableName}}) + endif() + endforeach() + + # Populate the passthrough variables + foreach(variableName ${CLANG_BOOTSTRAP_PASSTHROUGH} ${_BOOTSTRAP_DEFAULT_PASSTHROUGH}) + if(DEFINED ${variableName}) + if("${${variableName}}" STREQUAL "") + set(value "") + else() + string(REPLACE ";" "|" value "${${variableName}}") + endif() + list(APPEND PASSTHROUGH_VARIABLES + -D${variableName}=${value}) + endif() + endforeach() + + ExternalProject_Add(${NEXT_CLANG_STAGE} + DEPENDS clang-bootstrap-deps + PREFIX ${NEXT_CLANG_STAGE} + SOURCE_DIR ${CMAKE_SOURCE_DIR} + STAMP_DIR ${STAMP_DIR} + BINARY_DIR ${BINARY_DIR} + EXCLUDE_FROM_ALL 1 + CMAKE_ARGS + # We shouldn't need to set this here, but INSTALL_DIR doesn't + # seem to work, so instead I'm passing this through + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} + ${PASSTHROUGH_VARIABLES} + ${CLANG_BOOTSTRAP_CMAKE_ARGS} + -DCLANG_STAGE=${NEXT_CLANG_STAGE} + ${COMPILER_OPTIONS} + ${${CLANG_STAGE}_CONFIG} + ${${CLANG_STAGE}_TABLEGEN} + ${LTO_LIBRARY} ${verbose} ${PGO_OPT} + ${${CLANG_STAGE}_LINKER} + ${${CLANG_STAGE}_AR} + ${${CLANG_STAGE}_RANLIB} + ${${CLANG_STAGE}_OBJCOPY} + ${${CLANG_STAGE}_STRIP} + INSTALL_COMMAND "" + STEP_TARGETS configure build + USES_TERMINAL_CONFIGURE 1 + USES_TERMINAL_BUILD 1 + USES_TERMINAL_INSTALL 1 + LIST_SEPARATOR | + ) + + # exclude really-install from main target + set_target_properties(${NEXT_CLANG_STAGE} PROPERTIES _EP_really-install_EXCLUDE_FROM_MAIN On) + ExternalProject_Add_Step(${NEXT_CLANG_STAGE} really-install + COMMAND ${CMAKE_COMMAND} --build --target install + COMMENT "Performing install step for '${NEXT_CLANG_STAGE}'" + DEPENDEES build + USES_TERMINAL 1 + ) + ExternalProject_Add_StepTargets(${NEXT_CLANG_STAGE} really-install) + add_custom_target(${NEXT_CLANG_STAGE}-install DEPENDS ${NEXT_CLANG_STAGE}-really-install) + + if(NOT CLANG_BOOTSTRAP_TARGETS) + set(CLANG_BOOTSTRAP_TARGETS check-llvm check-clang check-all) + endif() + foreach(target ${CLANG_BOOTSTRAP_TARGETS}) + # Install targets have side effects, so we always want to execute them. + # "install" is reserved by CMake and can't be used as a step name for + # ExternalProject_Add_Step, so we can match against "^install-" instead of + # "^install" to get a tighter match. CMake's installation scripts already + # skip up-to-date files, so there's no behavior change if you install to the + # same destination multiple times. + if(target MATCHES "^install-") + set(step_always ON) + else() + set(step_always OFF) + endif() + + ExternalProject_Add_Step(${NEXT_CLANG_STAGE} ${target} + COMMAND ${CMAKE_COMMAND} --build --target ${target} + COMMENT "Performing ${target} for '${NEXT_CLANG_STAGE}'" + DEPENDEES configure + ALWAYS ${step_always} + EXCLUDE_FROM_MAIN ON + USES_TERMINAL 1 + ) + + if(target MATCHES "^stage[0-9]*") + add_custom_target(${target} DEPENDS ${NEXT_CLANG_STAGE}-${target}) + endif() + + ExternalProject_Add_StepTargets(${NEXT_CLANG_STAGE} ${target}) + endforeach() +endif() + +if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION) + add_subdirectory(utils/ClangVisualizers) +endif() +add_subdirectory(utils/hmaptool) + +if(CLANG_BUILT_STANDALONE) + llvm_distribution_add_targets() + process_llvm_pass_plugins() +endif() + +configure_file( + ${CLANG_SOURCE_DIR}/include/clang/Config/config.h.cmake + ${CLANG_BINARY_DIR}/include/clang/Config/config.h) diff -Nru llvm-toolchain-13-13.0.0~+rc4/debian/changelog llvm-toolchain-13-13.0.0/debian/changelog --- llvm-toolchain-13-13.0.0~+rc4/debian/changelog 2021-09-27 15:11:32.000000000 +0000 +++ llvm-toolchain-13-13.0.0/debian/changelog 2021-10-03 12:12:53.000000000 +0000 @@ -1,10 +1,14 @@ -llvm-toolchain-13 (1:13.0.0~+rc4-1ubuntu1) impish; urgency=low +llvm-toolchain-13 (1:13.0.0-1~oibaf~f) focal; urgency=medium - * Merge from Debian unstable. Remaining changes: - - Build using GCC 10 on armhf. No idea about the build failure with GCC 11 - yet. + * No-change backport to focal - -- Gianfranco Costamagna Mon, 27 Sep 2021 17:11:32 +0200 + -- Fabio Pedretti Sun, 03 Oct 2021 14:12:53 +0200 + +llvm-toolchain-13 (1:13.0.0-1) unstable; urgency=medium + + * New upstream release + + -- Sylvestre Ledru Fri, 01 Oct 2021 09:08:08 +0200 llvm-toolchain-13 (1:13.0.0~+rc4-1) unstable; urgency=medium @@ -19,13 +23,6 @@ -- Sylvestre Ledru Sat, 18 Sep 2021 19:30:47 +0200 -llvm-toolchain-13 (1:13.0.0~+rc3-1ubuntu2) impish; urgency=medium - - * Build using GCC 10 on armhf. No idea about the build failure with GCC 11 - yet. - - -- Matthias Klose Thu, 16 Sep 2021 19:41:26 +0200 - llvm-toolchain-13 (1:13.0.0~+rc3-1) unstable; urgency=medium * New testing release @@ -286,7 +283,7 @@ llvm-toolchain-12 (1:12.0.1-1) unstable; urgency=medium - * New uptsream release + * New upstream release -- Sylvestre Ledru Fri, 09 Jul 2021 09:13:12 +0200 @@ -571,12 +568,35 @@ -- Sylvestre Ledru Wed, 08 Jul 2020 15:19:14 +0200 -llvm-toolchain-11 (1:11.1.0-1~exp3) UNRELEASED; urgency=medium +llvm-toolchain-11 (1:11.1.0-3) unstable; urgency=medium + + * Add two patches from ubuntu to fix lto and gcc-11 build failures + * Use minimum version for cmake-test + * Don't require libclang-11-dev on cmake-test + * Add new llvm-11-linker-tools (from Ubuntu) + * Merge new fixes from branch=9 + * clang-11: Drop Recommends: libomp-11-dev + + -- Gianfranco Costamagna Mon, 27 Sep 2021 15:50:46 +0200 + +llvm-toolchain-11 (1:11.1.0-2) unstable; urgency=medium + + * Ajust the cmake test to unbreak autopkgtest + -- Sylvestre Ledru Sun, 26 Sep 2021 22:02:43 +0200 + +llvm-toolchain-11 (1:11.1.0-1) unstable; urgency=medium + + [ Sylvestre Ledru ] + * Upload to unstable + * bump autopkgtest cmake version (Closes: #994501) + Thanks to Timo Röhling for the patch + + [ Gianfranco Costamagna ] * Cherry-pick upstream commit to fix a libgl1-mesa-dri texture failure. (Closes: #989545) - -- Gianfranco Costamagna Wed, 01 Sep 2021 12:24:34 +0200 + -- Sylvestre Ledru Fri, 24 Sep 2021 12:54:55 +0200 llvm-toolchain-11 (1:11.1.0-1~exp2) experimental; urgency=medium @@ -658,6 +678,41 @@ llvm-toolchain-11 (1:11.0.0-5) unstable; urgency=medium +llvm-toolchain-9 (1:9.0.1-19) unstable; urgency=medium + + * Add two patches from ubuntu to fix lto and gcc-11 build failures + + -- Gianfranco Costamagna Mon, 27 Sep 2021 15:39:19 +0200 + +llvm-toolchain-9 (1:9.0.1-18) unstable; urgency=medium + + [ Gianfranco Costamagna ] + * Cherry-pick nmu again. (version 16.1) + * bump minimum cmake version on qualify-clang.sh too + * simplify cmake test without minor version + [ Andreas Beckmann ] + * clang-9: Drop Recommends: libomp-9-dev which is not co-installable with + libomp-11-dev (and libomp-dev) for smoother upgrades of libomp-dev from + buster to bullseye. (Closes: #990452) + + -- Gianfranco Costamagna Mon, 27 Sep 2021 15:02:13 +0200 + +llvm-toolchain-9 (1:9.0.1-17) unstable; urgency=medium + + * Remove an old breaks/replaces for 9 which was causing + some issues on Debian buster. Thanks to Julien Wajsberg + for the bug report + * bump autopkgtest cmake version (Closes: #994501) + Thanks to Timo Röhling for the patch + * cherry pick 68d5235cb58f988c71b403334cd9482d663841ab to build + with newer version of the kernel + + -- Sylvestre Ledru Sat, 25 Sep 2021 16:00:36 +0200 + +llvm-toolchain-9 (1:9.0.1-16) unstable; urgency=medium + + * Only enable libomp-9-doc on supported archs + [ Adrian Bunk ] * Fix the fuzzer build on i386. @@ -1451,9 +1506,9 @@ llvm-toolchain-9 (1:9.0.1-12) unstable; urgency=medium + [ Jessica Clarke ] * Cherry-pick upstream patch D74453 to fix atomic compare-and-swap on riscv64. - riscv64. [ William Grant ] * debian/patches/riscv64-multilib-empty.patch: Adjust riscv64 GCC detector @@ -2273,38 +2328,6 @@ -- Sylvestre Ledru Wed, 02 Jan 2019 12:37:45 +0100 -llvm-toolchain-snapshot (1:8~svn346586-1~exp1) experimental; urgency=medium - - * New snapshot release - * Also install libOptRemarks.so - * Merge all the recent 7 changes into snapshot - (Closes: #913058) the co install issue with libc++ & openmp - - -- Sylvestre Ledru Sat, 10 Nov 2018 15:14:15 +0100 - -llvm-toolchain-snapshot (1:8~svn345569-1~exp1) experimental; urgency=medium - - [ Sylvestre Ledru ] - * Remove bat files https://bugs.llvm.org/show_bug.cgi?id=30755 - * Fix the autopkgtest script (no gcc in the test) - * remove dep from lld to llvm-8-dev because lld - doesn't use LLVM LTO - * remove old Replaces/Breaks - * Standards-Version: 4.2.1 - * Backport a fix to improve scan-build code error. - Thanks to Roman Lebedev for the fix(Closes: #909662) - * Remove bat files https://bugs.llvm.org/show_bug.cgi?id=30755 - * Install bash-completion for clang - * Disable ocaml on armel - - [ Gianfranco Costamagna ] - * Take option two in bug #877567 to fix FTBFS on mips and mipsel - - [ Sylvestre Ledru ] - * New snapshot release - - -- Sylvestre Ledru Tue, 30 Oct 2018 08:31:14 +0100 - llvm-toolchain-7 (1:7.0.1-4) unstable; urgency=medium * New snapshot release diff -Nru llvm-toolchain-13-13.0.0~+rc4/debian/control llvm-toolchain-13-13.0.0/debian/control --- llvm-toolchain-13-13.0.0~+rc4/debian/control 2021-09-25 22:31:06.000000000 +0000 +++ llvm-toolchain-13-13.0.0/debian/control 2021-10-01 06:37:38.000000000 +0000 @@ -14,7 +14,6 @@ libjsoncpp-dev, pkg-config, lcov, procps, help2man, zlib1g-dev, g++-multilib [amd64 i386 kfreebsd-amd64 mips mips64 mips64el mipsel powerpc ppc64 s390 s390x sparc sparc64 x32], - g++-10 [armhf], libjs-mathjax, python3-recommonmark, doxygen, gfortran, ocaml-nox [amd64 arm64 armhf ppc64el riscv64 s390x], @@ -39,7 +38,8 @@ ${dep:devlibs-objc}, libclang-common-13-dev (= ${binary:Version}), libclang1-13 (= ${binary:Version}), libc6-dev, binutils Provides: c-compiler, objc-compiler, c++-compiler -Recommends: llvm-13-dev, python3, libomp-13-dev +Recommends: llvm-13-dev, python3 +# libomp-13-dev Suggests: clang-13-doc Breaks: llvm-13-dev (<< 1:13~++20210129063721) Replaces: llvm-13-dev (<< 1:13~++20210129063721) diff -Nru llvm-toolchain-13-13.0.0~+rc4/debian/qualify-clang.sh llvm-toolchain-13-13.0.0/debian/qualify-clang.sh --- llvm-toolchain-13-13.0.0~+rc4/debian/qualify-clang.sh 2021-09-25 22:31:07.000000000 +0000 +++ llvm-toolchain-13-13.0.0/debian/qualify-clang.sh 2021-10-01 06:37:38.000000000 +0000 @@ -89,7 +89,7 @@ rm -rf cmaketest && mkdir cmaketest cat > cmaketest/CMakeLists.txt < cmaketest/CMakeLists.txt < cmaketest/CMakeLists.txt < cmaketest/CMakeLists.txt < CMakeLists.txt -cmake_minimum_required(VERSION 2.6.2) +cmake_minimum_required(VERSION 3.7) project(cmake-test) find_package(LLVM $LLVM_VERSION REQUIRED COMPONENTS diff -Nru llvm-toolchain-13-13.0.0~+rc4/debian/tests/control llvm-toolchain-13-13.0.0/debian/tests/control --- llvm-toolchain-13-13.0.0~+rc4/debian/tests/control 2021-09-25 22:31:07.000000000 +0000 +++ llvm-toolchain-13-13.0.0/debian/tests/control 2021-10-01 06:37:38.000000000 +0000 @@ -10,7 +10,7 @@ Restrictions: allow-stderr Tests: cmake-test -Depends: gcc, build-essential, cmake, llvm-13-dev, libclang-common-13-dev +Depends: gcc, build-essential, cmake, llvm-13-dev Test-Command: python3 -c "import lldb; print(lldb.__file__); print(lldb)" Depends: python3-lldb-13 diff -Nru llvm-toolchain-13-13.0.0~+rc4/integration-test-suite/.github/workflows/fedora.yml llvm-toolchain-13-13.0.0/integration-test-suite/.github/workflows/fedora.yml --- llvm-toolchain-13-13.0.0~+rc4/integration-test-suite/.github/workflows/fedora.yml 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/integration-test-suite/.github/workflows/fedora.yml 2021-10-01 07:04:05.000000000 +0000 @@ -0,0 +1,23 @@ +name: Build on fedora +on: + push: + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + container: + image: "fedora:34" + steps: + - uses: actions/checkout@v2 + - name: Install Dependencies + run: | + sudo dnf install -y llvm-devel clang-devel cmake make python3-lit lld clang-tools-extra gcc gcc-c++ libcxx-devel compiler-rt libstdc++-devel \ + glibc-static libstdc++-static + - name: Run the testsuite + run: | + mkdir build && cd build + cmake .. -DENABLE_COMPILER_RT=ON -DENABLE_LIBCXX=ON + cmake --build . --target check -v diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/build.ninja llvm-toolchain-13-13.0.0/libcxxabi/build/build.ninja --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/build.ninja 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/build.ninja 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,870 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Ninja" Generator, CMake Version 3.18 + +# This file contains all the build statements describing the +# compilation DAG. + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# +# Which is the root file. +# ============================================================================= + +# ============================================================================= +# Project: libcxxabi +# Configurations: RelWithDebInfo +# ============================================================================= + +############################################# +# Minimal version of Ninja required by this file + +ninja_required_version = 1.5 + + +############################################# +# Set configuration variable for custom commands. + +CONFIGURATION = RelWithDebInfo +# ============================================================================= +# Include auxiliary files. + + +############################################# +# Include rules file. + +include CMakeFiles/rules.ninja + + +############################################# +# Utility command for install/strip + +build CMakeFiles/install/strip.util: CUSTOM_COMMAND all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build && /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build install/strip: phony CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build CMakeFiles/install/local.util: CUSTOM_COMMAND all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build && /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build install/local: phony CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build CMakeFiles/install.util: CUSTOM_COMMAND all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build && /usr/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build install: phony CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build && /usr/bin/cmake --regenerate-during-build -S/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi -B/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build rebuild_cache: phony CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build && /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build edit_cache: phony CMakeFiles/edit_cache.util + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/local + +build src/CMakeFiles/install/local.util: CUSTOM_COMMAND src/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src && /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build src/install/local: phony src/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build src/CMakeFiles/install.util: CUSTOM_COMMAND src/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src && /usr/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build src/install: phony src/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build src/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build src/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src && /usr/bin/cmake --regenerate-during-build -S/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi -B/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build src/rebuild_cache: phony src/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build src/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src && /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build src/edit_cache: phony src/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install-cxxabi-stripped + +build src/install-cxxabi-stripped: phony src/CMakeFiles/install-cxxabi-stripped src/cxxabi + + +############################################# +# Utility command for install-cxxabi + +build src/install-cxxabi: phony src/CMakeFiles/install-cxxabi src/cxxabi + + +############################################# +# Utility command for cxxabi + +build src/cxxabi: phony src/CMakeFiles/cxxabi lib/libc++abi.a lib/libc++abi.so + +# ============================================================================= +# Object build statements for STATIC_LIBRARY target cxxabi_static + + +############################################# +# Order-only phony target for cxxabi_static + +build cmake_object_order_depends_target_cxxabi_static: phony || src/CMakeFiles/cxxabi_static.dir + +build src/CMakeFiles/cxxabi_static.dir/cxa_aux_runtime.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_aux_runtime.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_aux_runtime.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_default_handlers.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_default_handlers.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_default_handlers.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_demangle.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_demangle.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_demangle.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_exception_storage.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_exception_storage.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_exception_storage.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_guard.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_guard.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_guard.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_handlers.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_handlers.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_handlers.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_vector.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_vector.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_vector.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_virtual.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_virtual.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_virtual.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/stdlib_exception.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/stdlib_exception.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/stdlib_exception.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/stdlib_stdexcept.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/stdlib_stdexcept.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/stdlib_stdexcept.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/stdlib_typeinfo.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/stdlib_typeinfo.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/stdlib_typeinfo.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/abort_message.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/abort_message.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/abort_message.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/fallback_malloc.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/fallback_malloc.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/fallback_malloc.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/private_typeinfo.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/private_typeinfo.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/private_typeinfo.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/stdlib_new_delete.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/stdlib_new_delete.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/stdlib_new_delete.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_exception.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_exception.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_exception.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_personality.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_personality.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_personality.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_static.dir/cxa_thread_atexit.cpp.o: CXX_COMPILER__cxxabi_static_RelWithDebInfo ../src/cxa_thread_atexit.cpp || cmake_object_order_depends_target_cxxabi_static + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_static.dir/cxa_thread_atexit.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_static.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_PDB = lib/libc++abi.pdb + + +# ============================================================================= +# Link build statements for STATIC_LIBRARY target cxxabi_static + + +############################################# +# Link the static library lib/libc++abi.a + +build lib/libc++abi.a: CXX_STATIC_LIBRARY_LINKER__cxxabi_static_RelWithDebInfo src/CMakeFiles/cxxabi_static.dir/cxa_aux_runtime.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_default_handlers.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_demangle.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_exception_storage.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_guard.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_handlers.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_vector.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_virtual.cpp.o src/CMakeFiles/cxxabi_static.dir/stdlib_exception.cpp.o src/CMakeFiles/cxxabi_static.dir/stdlib_stdexcept.cpp.o src/CMakeFiles/cxxabi_static.dir/stdlib_typeinfo.cpp.o src/CMakeFiles/cxxabi_static.dir/abort_message.cpp.o src/CMakeFiles/cxxabi_static.dir/fallback_malloc.cpp.o src/CMakeFiles/cxxabi_static.dir/private_typeinfo.cpp.o src/CMakeFiles/cxxabi_static.dir/stdlib_new_delete.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_exception.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_personality.cpp.o src/CMakeFiles/cxxabi_static.dir/cxa_thread_atexit.cpp.o + LANGUAGE_COMPILE_FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG + OBJECT_DIR = src/CMakeFiles/cxxabi_static.dir + POST_BUILD = : + PRE_LINK = : + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_static.dir/cxxabi_static.pdb + TARGET_FILE = lib/libc++abi.a + TARGET_PDB = lib/libc++abi.pdb + + +############################################# +# Utility command for install/strip + +build src/CMakeFiles/install/strip.util: CUSTOM_COMMAND src/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src && /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build src/install/strip: phony src/CMakeFiles/install/strip.util + +# ============================================================================= +# Object build statements for SHARED_LIBRARY target cxxabi_shared + + +############################################# +# Order-only phony target for cxxabi_shared + +build cmake_object_order_depends_target_cxxabi_shared: phony || src/CMakeFiles/cxxabi_shared.dir + +build src/CMakeFiles/cxxabi_shared.dir/cxa_aux_runtime.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_aux_runtime.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_aux_runtime.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_default_handlers.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_default_handlers.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_default_handlers.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_demangle.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_demangle.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_demangle.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_exception_storage.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_exception_storage.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_exception_storage.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_guard.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_guard.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_guard.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_handlers.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_handlers.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_handlers.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_vector.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_vector.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_vector.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_virtual.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_virtual.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_virtual.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/stdlib_exception.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/stdlib_exception.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/stdlib_exception.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/stdlib_stdexcept.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/stdlib_stdexcept.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/stdlib_stdexcept.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/stdlib_typeinfo.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/stdlib_typeinfo.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/stdlib_typeinfo.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/abort_message.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/abort_message.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/abort_message.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/fallback_malloc.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/fallback_malloc.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/fallback_malloc.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/private_typeinfo.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/private_typeinfo.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/private_typeinfo.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/stdlib_new_delete.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/stdlib_new_delete.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/stdlib_new_delete.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_exception.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_exception.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_exception.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_personality.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_personality.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_personality.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + +build src/CMakeFiles/cxxabi_shared.dir/cxa_thread_atexit.cpp.o: CXX_COMPILER__cxxabi_shared_RelWithDebInfo ../src/cxa_thread_atexit.cpp || cmake_object_order_depends_target_cxxabi_shared + DEFINES = -DHAVE___CXA_THREAD_ATEXIT_IMPL -DLIBCXXABI_USE_LLVM_UNWINDER -D_LIBCPP_BUILDING_LIBRARY -D_LIBCPP_DISABLE_EXTERN_TEMPLATE -D_LIBCXXABI_BUILDING_LIBRARY -D_LIBCXXABI_LINK_PTHREAD_LIB + DEP_FILE = src/CMakeFiles/cxxabi_shared.dir/cxa_thread_atexit.cpp.o.d + FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG -fPIC --target=x86_64-pc-linux-gnu -nostdinc++ -Werror=return-type -W -Wall -Wchar-subscripts -Wconversion -Wmismatched-tags -Wmissing-braces -Wnewline-eof -Wunused-function -Wshadow -Wshorten-64-to-32 -Wsign-compare -Wsign-conversion -Wstrict-aliasing=2 -Wstrict-overflow=4 -Wunused-parameter -Wunused-variable -Wwrite-strings -Wundef -Wno-suggest-override -Wno-error -pedantic -fstrict-aliasing -funwind-tables -D_DEBUG -I /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1 -std=c++20 + INCLUDES = -I../include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include -I/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + OBJECT_FILE_DIR = src/CMakeFiles/cxxabi_shared.dir + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_PDB = lib/libc++abi.pdb + + +# ============================================================================= +# Link build statements for SHARED_LIBRARY target cxxabi_shared + + +############################################# +# Link the shared library lib/libc++abi.so.1.0 + +build lib/libc++abi.so.1.0: CXX_SHARED_LIBRARY_LINKER__cxxabi_shared_RelWithDebInfo src/CMakeFiles/cxxabi_shared.dir/cxa_aux_runtime.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_default_handlers.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_demangle.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_exception_storage.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_guard.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_handlers.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_vector.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_virtual.cpp.o src/CMakeFiles/cxxabi_shared.dir/stdlib_exception.cpp.o src/CMakeFiles/cxxabi_shared.dir/stdlib_stdexcept.cpp.o src/CMakeFiles/cxxabi_shared.dir/stdlib_typeinfo.cpp.o src/CMakeFiles/cxxabi_shared.dir/abort_message.cpp.o src/CMakeFiles/cxxabi_shared.dir/fallback_malloc.cpp.o src/CMakeFiles/cxxabi_shared.dir/private_typeinfo.cpp.o src/CMakeFiles/cxxabi_shared.dir/stdlib_new_delete.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_exception.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_personality.cpp.o src/CMakeFiles/cxxabi_shared.dir/cxa_thread_atexit.cpp.o | /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/lib/linux/libclang_rt.builtins-x86_64.a + LANGUAGE_COMPILE_FLAGS = -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -O2 -g -DNDEBUG + LINK_FLAGS = --target=x86_64-pc-linux-gnu -nostdlib++ + LINK_LIBRARIES = /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/lib/linux/libclang_rt.builtins-x86_64.a -lunwind -lpthread -lc + OBJECT_DIR = src/CMakeFiles/cxxabi_shared.dir + POST_BUILD = : + PRE_LINK = : + SONAME = libc++abi.so.1 + SONAME_FLAG = -Wl,-soname, + TARGET_COMPILE_PDB = src/CMakeFiles/cxxabi_shared.dir/ + TARGET_FILE = lib/libc++abi.so.1.0 + TARGET_PDB = lib/libc++abi.pdb + + +############################################# +# Create library symlink lib/libc++abi.so + +build lib/libc++abi.so.1 lib/libc++abi.so: CMAKE_SYMLINK_LIBRARY lib/libc++abi.so.1.0 + POST_BUILD = : + + +############################################# +# Custom command for src/CMakeFiles/install-cxxabi-stripped + +build src/CMakeFiles/install-cxxabi-stripped: CUSTOM_COMMAND || lib/libc++abi.a lib/libc++abi.so src/cxxabi + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src && /usr/bin/cmake -DCMAKE_INSTALL_COMPONENT=cxxabi -DCMAKE_INSTALL_DO_STRIP=1 -P /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/cmake_install.cmake + + +############################################# +# Custom command for src/CMakeFiles/install-cxxabi + +build src/CMakeFiles/install-cxxabi: CUSTOM_COMMAND || lib/libc++abi.a lib/libc++abi.so src/cxxabi + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src && /usr/bin/cmake -DCMAKE_INSTALL_COMPONENT=cxxabi -P /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/cmake_install.cmake + + +############################################# +# Phony custom command for src/CMakeFiles/cxxabi + +build src/CMakeFiles/cxxabi: phony lib/libc++abi.so.1.0 lib/libc++abi.a || lib/libc++abi.a lib/libc++abi.so + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build test/CMakeFiles/install/strip.util: CUSTOM_COMMAND test/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test && /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build test/install/strip: phony test/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build test/CMakeFiles/install/local.util: CUSTOM_COMMAND test/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test && /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build test/install/local: phony test/CMakeFiles/install/local.util + + +############################################# +# Utility command for edit_cache + +build test/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test && /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build test/edit_cache: phony test/CMakeFiles/edit_cache.util + + +############################################# +# Utility command for install + +build test/CMakeFiles/install.util: CUSTOM_COMMAND test/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test && /usr/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build test/install: phony test/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build test/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build test/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test && /usr/bin/cmake --regenerate-during-build -S/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi -B/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build test/rebuild_cache: phony test/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for check-cxxabi + +build test/check-cxxabi: phony test/CMakeFiles/check-cxxabi lib/libc++abi.so + + +############################################# +# Custom command for test/CMakeFiles/check-cxxabi + +build test/CMakeFiles/check-cxxabi: CUSTOM_COMMAND || lib/libc++abi.so + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test && /usr/bin/python3.9 /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/utils/lit/lit.py -sv --show-xfail --show-unsupported /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test + DESC = Running libcxxabi tests + pool = console + +# ============================================================================= +# Write statements declared in CMakeLists.txt: +# /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/CMakeLists.txt +# ============================================================================= + + +############################################# +# Utility command for install/strip + +build fuzz/CMakeFiles/install/strip.util: CUSTOM_COMMAND fuzz/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz && /usr/bin/cmake -DCMAKE_INSTALL_DO_STRIP=1 -P cmake_install.cmake + DESC = Installing the project stripped... + pool = console + restat = 1 + +build fuzz/install/strip: phony fuzz/CMakeFiles/install/strip.util + + +############################################# +# Utility command for install/local + +build fuzz/CMakeFiles/install/local.util: CUSTOM_COMMAND fuzz/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz && /usr/bin/cmake -DCMAKE_INSTALL_LOCAL_ONLY=1 -P cmake_install.cmake + DESC = Installing only the local directory... + pool = console + restat = 1 + +build fuzz/install/local: phony fuzz/CMakeFiles/install/local.util + + +############################################# +# Utility command for install + +build fuzz/CMakeFiles/install.util: CUSTOM_COMMAND fuzz/all + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz && /usr/bin/cmake -P cmake_install.cmake + DESC = Install the project... + pool = console + restat = 1 + +build fuzz/install: phony fuzz/CMakeFiles/install.util + + +############################################# +# Utility command for list_install_components + +build fuzz/list_install_components: phony + + +############################################# +# Utility command for rebuild_cache + +build fuzz/CMakeFiles/rebuild_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz && /usr/bin/cmake --regenerate-during-build -S/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi -B/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build + DESC = Running CMake to regenerate build system... + pool = console + restat = 1 + +build fuzz/rebuild_cache: phony fuzz/CMakeFiles/rebuild_cache.util + + +############################################# +# Utility command for edit_cache + +build fuzz/CMakeFiles/edit_cache.util: CUSTOM_COMMAND + COMMAND = cd /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz && /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. + DESC = No interactive CMake dialog available... + restat = 1 + +build fuzz/edit_cache: phony fuzz/CMakeFiles/edit_cache.util + +# ============================================================================= +# Target aliases. + +build check-cxxabi: phony test/check-cxxabi + +build cxxabi: phony src/cxxabi + +build cxxabi_shared: phony lib/libc++abi.so + +build cxxabi_static: phony lib/libc++abi.a + +build install-cxxabi: phony src/install-cxxabi + +build install-cxxabi-stripped: phony src/install-cxxabi-stripped + +build libc++abi.a: phony lib/libc++abi.a + +build libc++abi.so: phony lib/libc++abi.so + +# ============================================================================= +# Folder targets. + +# ============================================================================= + +############################################# +# Folder: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build + +build all: phony src/all test/all fuzz/all + +# ============================================================================= + +############################################# +# Folder: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz + +build fuzz/all: phony + +# ============================================================================= + +############################################# +# Folder: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src + +build src/all: phony lib/libc++abi.a lib/libc++abi.so + +# ============================================================================= + +############################################# +# Folder: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test + +build test/all: phony + +# ============================================================================= +# Built-in targets + + +############################################# +# Re-run CMake if any of its inputs changed. + +build build.ninja: RERUN_CMAKE | ../CMakeLists.txt ../cmake/Modules/HandleCompilerRT.cmake ../cmake/Modules/HandleLibcxxabiFlags.cmake ../cmake/Modules/MacroEnsureOutOfSourceBuild.cmake ../cmake/config-ix.cmake ../fuzz/CMakeLists.txt ../src/CMakeLists.txt ../test/CMakeLists.txt ../test/lit.site.cfg.in /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/AddLLVM.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/DetermineGCCCompatible.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/GetHostTriple.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/LLVM-Config.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/LLVMDistributionSupport.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/LLVMProcessSources.cmake /usr/share/cmake-3.18/Modules/AddFileDependencies.cmake /usr/share/cmake-3.18/Modules/CMakeCCompiler.cmake.in /usr/share/cmake-3.18/Modules/CMakeCCompilerABI.c /usr/share/cmake-3.18/Modules/CMakeCInformation.cmake /usr/share/cmake-3.18/Modules/CMakeCXXCompiler.cmake.in /usr/share/cmake-3.18/Modules/CMakeCXXCompilerABI.cpp /usr/share/cmake-3.18/Modules/CMakeCXXInformation.cmake /usr/share/cmake-3.18/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake /usr/share/cmake-3.18/Modules/CMakeCommonLanguageInclude.cmake /usr/share/cmake-3.18/Modules/CMakeCompilerIdDetection.cmake /usr/share/cmake-3.18/Modules/CMakeDependentOption.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCXXCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompileFeatures.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompilerABI.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompilerId.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineSystem.cmake /usr/share/cmake-3.18/Modules/CMakeFindBinUtils.cmake /usr/share/cmake-3.18/Modules/CMakeGenericSystem.cmake /usr/share/cmake-3.18/Modules/CMakeInitializeConfigs.cmake /usr/share/cmake-3.18/Modules/CMakeLanguageInformation.cmake /usr/share/cmake-3.18/Modules/CMakeNinjaFindMake.cmake /usr/share/cmake-3.18/Modules/CMakeParseArguments.cmake /usr/share/cmake-3.18/Modules/CMakeParseImplicitIncludeInfo.cmake /usr/share/cmake-3.18/Modules/CMakeParseImplicitLinkInfo.cmake /usr/share/cmake-3.18/Modules/CMakePushCheckState.cmake /usr/share/cmake-3.18/Modules/CMakeSystem.cmake.in /usr/share/cmake-3.18/Modules/CMakeSystemSpecificInformation.cmake /usr/share/cmake-3.18/Modules/CMakeSystemSpecificInitialize.cmake /usr/share/cmake-3.18/Modules/CMakeTestCCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeTestCXXCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeTestCompilerCommon.cmake /usr/share/cmake-3.18/Modules/CheckCCompilerFlag.cmake /usr/share/cmake-3.18/Modules/CheckCSourceCompiles.cmake /usr/share/cmake-3.18/Modules/CheckCXXCompilerFlag.cmake /usr/share/cmake-3.18/Modules/CheckCXXSourceCompiles.cmake /usr/share/cmake-3.18/Modules/CheckFunctionExists.c /usr/share/cmake-3.18/Modules/CheckLibraryExists.cmake /usr/share/cmake-3.18/Modules/Compiler/ADSP-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/ARMCC-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/ARMClang-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/AppleClang-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Borland-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Bruce-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/CMakeCommonCompilerMacros.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-C.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-CXX.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-DetermineCompilerInternal.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-FindBinUtils.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang.cmake /usr/share/cmake-3.18/Modules/Compiler/Comeau-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Compaq-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Compaq-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Cray-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Embarcadero-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Fujitsu-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GHS-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GNU-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GNU-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GNU.cmake /usr/share/cmake-3.18/Modules/Compiler/HP-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/HP-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/IAR-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/IBMCPP-C-DetermineVersionInternal.cmake /usr/share/cmake-3.18/Modules/Compiler/IBMCPP-CXX-DetermineVersionInternal.cmake /usr/share/cmake-3.18/Modules/Compiler/Intel-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/MSVC-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/NVIDIA-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/OpenWatcom-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/PGI-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/PathScale-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SCO-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SDCC-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SunPro-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SunPro-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/TI-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/TinyCC-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/VisualAge-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/VisualAge-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Watcom-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XL-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XL-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XLClang-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XLClang-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/zOS-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/zOS-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/FindPackageHandleStandardArgs.cmake /usr/share/cmake-3.18/Modules/FindPackageMessage.cmake /usr/share/cmake-3.18/Modules/FindPython/Support.cmake /usr/share/cmake-3.18/Modules/FindPython3.cmake /usr/share/cmake-3.18/Modules/Internal/CMakeCheckCompilerFlag.cmake /usr/share/cmake-3.18/Modules/Internal/FeatureTesting.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-Clang-C.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-Clang-CXX.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-Determine-CXX.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-GNU-C.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-GNU-CXX.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-GNU.cmake /usr/share/cmake-3.18/Modules/Platform/Linux.cmake /usr/share/cmake-3.18/Modules/Platform/UnixPaths.cmake CMakeCache.txt CMakeFiles/3.18.4/CMakeCCompiler.cmake CMakeFiles/3.18.4/CMakeCXXCompiler.cmake CMakeFiles/3.18.4/CMakeSystem.cmake + pool = console + + +############################################# +# A missing CMake input file is not an error. + +build ../CMakeLists.txt ../cmake/Modules/HandleCompilerRT.cmake ../cmake/Modules/HandleLibcxxabiFlags.cmake ../cmake/Modules/MacroEnsureOutOfSourceBuild.cmake ../cmake/config-ix.cmake ../fuzz/CMakeLists.txt ../src/CMakeLists.txt ../test/CMakeLists.txt ../test/lit.site.cfg.in /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx/cmake/Modules/HandleOutOfTreeLLVM.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/AddLLVM.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/DetermineGCCCompatible.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/GetHostTriple.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/LLVM-Config.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/LLVMDistributionSupport.cmake /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/llvm/cmake/modules/LLVMProcessSources.cmake /usr/share/cmake-3.18/Modules/AddFileDependencies.cmake /usr/share/cmake-3.18/Modules/CMakeCCompiler.cmake.in /usr/share/cmake-3.18/Modules/CMakeCCompilerABI.c /usr/share/cmake-3.18/Modules/CMakeCInformation.cmake /usr/share/cmake-3.18/Modules/CMakeCXXCompiler.cmake.in /usr/share/cmake-3.18/Modules/CMakeCXXCompilerABI.cpp /usr/share/cmake-3.18/Modules/CMakeCXXInformation.cmake /usr/share/cmake-3.18/Modules/CMakeCheckCompilerFlagCommonPatterns.cmake /usr/share/cmake-3.18/Modules/CMakeCommonLanguageInclude.cmake /usr/share/cmake-3.18/Modules/CMakeCompilerIdDetection.cmake /usr/share/cmake-3.18/Modules/CMakeDependentOption.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCXXCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompileFeatures.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompilerABI.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineCompilerId.cmake /usr/share/cmake-3.18/Modules/CMakeDetermineSystem.cmake /usr/share/cmake-3.18/Modules/CMakeFindBinUtils.cmake /usr/share/cmake-3.18/Modules/CMakeGenericSystem.cmake /usr/share/cmake-3.18/Modules/CMakeInitializeConfigs.cmake /usr/share/cmake-3.18/Modules/CMakeLanguageInformation.cmake /usr/share/cmake-3.18/Modules/CMakeNinjaFindMake.cmake /usr/share/cmake-3.18/Modules/CMakeParseArguments.cmake /usr/share/cmake-3.18/Modules/CMakeParseImplicitIncludeInfo.cmake /usr/share/cmake-3.18/Modules/CMakeParseImplicitLinkInfo.cmake /usr/share/cmake-3.18/Modules/CMakePushCheckState.cmake /usr/share/cmake-3.18/Modules/CMakeSystem.cmake.in /usr/share/cmake-3.18/Modules/CMakeSystemSpecificInformation.cmake /usr/share/cmake-3.18/Modules/CMakeSystemSpecificInitialize.cmake /usr/share/cmake-3.18/Modules/CMakeTestCCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeTestCXXCompiler.cmake /usr/share/cmake-3.18/Modules/CMakeTestCompilerCommon.cmake /usr/share/cmake-3.18/Modules/CheckCCompilerFlag.cmake /usr/share/cmake-3.18/Modules/CheckCSourceCompiles.cmake /usr/share/cmake-3.18/Modules/CheckCXXCompilerFlag.cmake /usr/share/cmake-3.18/Modules/CheckCXXSourceCompiles.cmake /usr/share/cmake-3.18/Modules/CheckFunctionExists.c /usr/share/cmake-3.18/Modules/CheckLibraryExists.cmake /usr/share/cmake-3.18/Modules/Compiler/ADSP-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/ARMCC-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/ARMClang-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/AppleClang-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Borland-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Bruce-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/CMakeCommonCompilerMacros.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-C.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-CXX.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-DetermineCompilerInternal.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang-FindBinUtils.cmake /usr/share/cmake-3.18/Modules/Compiler/Clang.cmake /usr/share/cmake-3.18/Modules/Compiler/Comeau-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Compaq-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Compaq-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Cray-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Embarcadero-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Fujitsu-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GHS-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GNU-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GNU-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/GNU.cmake /usr/share/cmake-3.18/Modules/Compiler/HP-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/HP-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/IAR-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/IBMCPP-C-DetermineVersionInternal.cmake /usr/share/cmake-3.18/Modules/Compiler/IBMCPP-CXX-DetermineVersionInternal.cmake /usr/share/cmake-3.18/Modules/Compiler/Intel-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/MSVC-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/NVIDIA-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/OpenWatcom-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/PGI-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/PathScale-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SCO-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SDCC-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SunPro-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/SunPro-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/TI-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/TinyCC-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/VisualAge-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/VisualAge-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/Watcom-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XL-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XL-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XLClang-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/XLClang-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/zOS-C-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/Compiler/zOS-CXX-DetermineCompiler.cmake /usr/share/cmake-3.18/Modules/FindPackageHandleStandardArgs.cmake /usr/share/cmake-3.18/Modules/FindPackageMessage.cmake /usr/share/cmake-3.18/Modules/FindPython/Support.cmake /usr/share/cmake-3.18/Modules/FindPython3.cmake /usr/share/cmake-3.18/Modules/Internal/CMakeCheckCompilerFlag.cmake /usr/share/cmake-3.18/Modules/Internal/FeatureTesting.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-Clang-C.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-Clang-CXX.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-Determine-CXX.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-GNU-C.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-GNU-CXX.cmake /usr/share/cmake-3.18/Modules/Platform/Linux-GNU.cmake /usr/share/cmake-3.18/Modules/Platform/Linux.cmake /usr/share/cmake-3.18/Modules/Platform/UnixPaths.cmake CMakeCache.txt CMakeFiles/3.18.4/CMakeCCompiler.cmake CMakeFiles/3.18.4/CMakeCXXCompiler.cmake CMakeFiles/3.18.4/CMakeSystem.cmake: phony + + +############################################# +# Clean all the built files. + +build clean: CLEAN + + +############################################# +# Print all primary targets available. + +build help: HELP + + +############################################# +# Make the all target the default. + +default all diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeCCompiler.cmake llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeCCompiler.cmake --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeCCompiler.cmake 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeCCompiler.cmake 2021-09-19 21:51:03.000000000 +0000 @@ -0,0 +1,77 @@ +set(CMAKE_C_COMPILER "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang") +set(CMAKE_C_COMPILER_ARG1 "") +set(CMAKE_C_COMPILER_ID "Clang") +set(CMAKE_C_COMPILER_VERSION "14.0.0") +set(CMAKE_C_COMPILER_VERSION_INTERNAL "") +set(CMAKE_C_COMPILER_WRAPPER "") +set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "11") +set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert") +set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes") +set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros") +set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert") + +set(CMAKE_C_PLATFORM_ID "Linux") +set(CMAKE_C_SIMULATE_ID "") +set(CMAKE_C_COMPILER_FRONTEND_VARIANT "GNU") +set(CMAKE_C_SIMULATE_VERSION "") + + + + +set(CMAKE_AR "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ar") +set(CMAKE_C_COMPILER_AR "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ar") +set(CMAKE_RANLIB "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ranlib") +set(CMAKE_C_COMPILER_RANLIB "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ranlib") +set(CMAKE_LINKER "/usr/bin/ld") +set(CMAKE_MT "") +set(CMAKE_COMPILER_IS_GNUCC ) +set(CMAKE_C_COMPILER_LOADED 1) +set(CMAKE_C_COMPILER_WORKS TRUE) +set(CMAKE_C_ABI_COMPILED TRUE) +set(CMAKE_COMPILER_IS_MINGW ) +set(CMAKE_COMPILER_IS_CYGWIN ) +if(CMAKE_COMPILER_IS_CYGWIN) + set(CYGWIN 1) + set(UNIX 1) +endif() + +set(CMAKE_C_COMPILER_ENV_VAR "CC") + +if(CMAKE_COMPILER_IS_MINGW) + set(MINGW 1) +endif() +set(CMAKE_C_COMPILER_ID_RUN 1) +set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m) +set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC) +set(CMAKE_C_LINKER_PREFERENCE 10) + +# Save compiler ABI information. +set(CMAKE_C_SIZEOF_DATA_PTR "8") +set(CMAKE_C_COMPILER_ABI "ELF") +set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") + +if(CMAKE_C_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_C_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}") +endif() + +if(CMAKE_C_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") +endif() + +set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "") +if(CMAKE_C_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + + +set(CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") +set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s") +set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/10;/lib/x86_64-linux-gnu;/lib64;/usr/lib/x86_64-linux-gnu;/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib;/lib;/usr/lib") +set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeCXXCompiler.cmake llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeCXXCompiler.cmake --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeCXXCompiler.cmake 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeCXXCompiler.cmake 2021-09-19 21:51:03.000000000 +0000 @@ -0,0 +1,89 @@ +set(CMAKE_CXX_COMPILER "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang++") +set(CMAKE_CXX_COMPILER_ARG1 "") +set(CMAKE_CXX_COMPILER_ID "Clang") +set(CMAKE_CXX_COMPILER_VERSION "14.0.0") +set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "") +set(CMAKE_CXX_COMPILER_WRAPPER "") +set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "14") +set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17;cxx_std_20") +set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters") +set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates") +set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates") +set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17") +set(CMAKE_CXX20_COMPILE_FEATURES "cxx_std_20") + +set(CMAKE_CXX_PLATFORM_ID "Linux") +set(CMAKE_CXX_SIMULATE_ID "") +set(CMAKE_CXX_COMPILER_FRONTEND_VARIANT "GNU") +set(CMAKE_CXX_SIMULATE_VERSION "") + + + + +set(CMAKE_AR "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ar") +set(CMAKE_CXX_COMPILER_AR "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ar") +set(CMAKE_RANLIB "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ranlib") +set(CMAKE_CXX_COMPILER_RANLIB "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ranlib") +set(CMAKE_LINKER "/usr/bin/ld") +set(CMAKE_MT "") +set(CMAKE_COMPILER_IS_GNUCXX ) +set(CMAKE_CXX_COMPILER_LOADED 1) +set(CMAKE_CXX_COMPILER_WORKS TRUE) +set(CMAKE_CXX_ABI_COMPILED TRUE) +set(CMAKE_COMPILER_IS_MINGW ) +set(CMAKE_COMPILER_IS_CYGWIN ) +if(CMAKE_COMPILER_IS_CYGWIN) + set(CYGWIN 1) + set(UNIX 1) +endif() + +set(CMAKE_CXX_COMPILER_ENV_VAR "CXX") + +if(CMAKE_COMPILER_IS_MINGW) + set(MINGW 1) +endif() +set(CMAKE_CXX_COMPILER_ID_RUN 1) +set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;m;mm;CPP) +set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC) + +foreach (lang C OBJC OBJCXX) + if (CMAKE_${lang}_COMPILER_ID_RUN) + foreach(extension IN LISTS CMAKE_${lang}_SOURCE_FILE_EXTENSIONS) + list(REMOVE_ITEM CMAKE_CXX_SOURCE_FILE_EXTENSIONS ${extension}) + endforeach() + endif() +endforeach() + +set(CMAKE_CXX_LINKER_PREFERENCE 30) +set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1) + +# Save compiler ABI information. +set(CMAKE_CXX_SIZEOF_DATA_PTR "8") +set(CMAKE_CXX_COMPILER_ABI "ELF") +set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") + +if(CMAKE_CXX_SIZEOF_DATA_PTR) + set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}") +endif() + +if(CMAKE_CXX_COMPILER_ABI) + set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}") +endif() + +if(CMAKE_CXX_LIBRARY_ARCHITECTURE) + set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu") +endif() + +set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "") +if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX) + set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}") +endif() + + + + + +set(CMAKE_CXX_IMPLICIT_INCLUDE_DIRECTORIES "/usr/include/c++/10;/usr/include/x86_64-linux-gnu/c++/10;/usr/include/c++/10/backward;/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include") +set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc") +set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/10;/lib/x86_64-linux-gnu;/lib64;/usr/lib/x86_64-linux-gnu;/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib;/lib;/usr/lib") +set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "") Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeDetermineCompilerABI_C.bin and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeDetermineCompilerABI_C.bin differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeDetermineCompilerABI_CXX.bin and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeDetermineCompilerABI_CXX.bin differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeSystem.cmake llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeSystem.cmake --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CMakeSystem.cmake 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CMakeSystem.cmake 2021-09-19 21:51:02.000000000 +0000 @@ -0,0 +1,15 @@ +set(CMAKE_HOST_SYSTEM "Linux-5.4.0-4-amd64") +set(CMAKE_HOST_SYSTEM_NAME "Linux") +set(CMAKE_HOST_SYSTEM_VERSION "5.4.0-4-amd64") +set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64") + + + +set(CMAKE_SYSTEM "Linux-5.4.0-4-amd64") +set(CMAKE_SYSTEM_NAME "Linux") +set(CMAKE_SYSTEM_VERSION "5.4.0-4-amd64") +set(CMAKE_SYSTEM_PROCESSOR "x86_64") + +set(CMAKE_CROSSCOMPILING "FALSE") + +set(CMAKE_SYSTEM_LOADED 1) Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdC/a.out and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdC/a.out differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdC/CMakeCCompilerId.c llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdC/CMakeCCompilerId.c --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdC/CMakeCCompilerId.c 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdC/CMakeCCompilerId.c 2021-09-19 21:51:02.000000000 +0000 @@ -0,0 +1,674 @@ +#ifdef __cplusplus +# error "A C++ compiler has been selected for C." +#endif + +#if defined(__18CXX) +# define ID_VOID_MAIN +#endif +#if defined(__CLASSIC_C__) +/* cv-qualifiers did not exist in K&R C */ +# define const +# define volatile +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# if defined(__GNUC__) +# define SIMULATE_ID "GNU" +# endif + /* __INTEL_COMPILER = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_C) +# define COMPILER_ID "SunPro" +# if __SUNPRO_C >= 0x5100 + /* __SUNPRO_C = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_C>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_C>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_C & 0xF) +# endif + +#elif defined(__HP_cc) +# define COMPILER_ID "HP" + /* __HP_cc = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_cc/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_cc/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_cc % 100) + +#elif defined(__DECC) +# define COMPILER_ID "Compaq" + /* __DECC_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECC_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECC_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECC_VER % 10000) + +#elif defined(__IBMC__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__ibmxl__) && defined(__clang__) +# define COMPILER_ID "XLClang" +# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) +# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) +# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) + + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ >= 800 +# define COMPILER_ID "XL" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__IBMC__) && !defined(__COMPILER_VER__) && __IBMC__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMC__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMC__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMC__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMC__ % 10) + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version) +# define COMPILER_ID "Fujitsu" + +#elif defined(__ghs__) +# define COMPILER_ID "GHS" +/* __GHS_VERSION_NUMBER = VVVVRP */ +# ifdef __GHS_VERSION_NUMBER +# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) +# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) +# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) +# endif + +#elif defined(__TINYC__) +# define COMPILER_ID "TinyCC" + +#elif defined(__BCC__) +# define COMPILER_ID "Bruce" + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__ARMCC_VERSION) && !defined(__clang__) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) +# define COMPILER_ID "ARMClang" + # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) +# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__GNUC__) +# define COMPILER_ID "GNU" +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +# define COMPILER_ID "ADSP" +#if defined(__VISUALDSPVERSION__) + /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ +# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) +# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" +# if defined(__VER__) && defined(__ICCARM__) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) +# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) +# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__)) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) +# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) +# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# endif + +#elif defined(__SDCC_VERSION_MAJOR) || defined(SDCC) +# define COMPILER_ID "SDCC" +# if defined(__SDCC_VERSION_MAJOR) +# define COMPILER_VERSION_MAJOR DEC(__SDCC_VERSION_MAJOR) +# define COMPILER_VERSION_MINOR DEC(__SDCC_VERSION_MINOR) +# define COMPILER_VERSION_PATCH DEC(__SDCC_VERSION_PATCH) +# else + /* SDCC = VRP */ +# define COMPILER_VERSION_MAJOR DEC(SDCC/100) +# define COMPILER_VERSION_MINOR DEC(SDCC/10 % 10) +# define COMPILER_VERSION_PATCH DEC(SDCC % 10) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXE) || defined(__CRAYXC) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# elif defined(__VXWORKS__) +# define PLATFORM_ID "VxWorks" + +# else /* unknown platform */ +# define PLATFORM_ID +# endif + +#elif defined(__INTEGRITY) +# if defined(INT_178B) +# define PLATFORM_ID "Integrity178" + +# else /* regular Integrity */ +# define PLATFORM_ID "Integrity" +# endif + +#else /* unknown platform */ +# define PLATFORM_ID + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM64) +# define ARCHITECTURE_ID "ARM64" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# if defined(__ICCARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__ICCRX__) +# define ARCHITECTURE_ID "RX" + +# elif defined(__ICCRH850__) +# define ARCHITECTURE_ID "RH850" + +# elif defined(__ICCRL78__) +# define ARCHITECTURE_ID "RL78" + +# elif defined(__ICCRISCV__) +# define ARCHITECTURE_ID "RISCV" + +# elif defined(__ICCAVR__) +# define ARCHITECTURE_ID "AVR" + +# elif defined(__ICC430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__ICCV850__) +# define ARCHITECTURE_ID "V850" + +# elif defined(__ICC8051__) +# define ARCHITECTURE_ID "8051" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__ghs__) +# if defined(__PPC64__) +# define ARCHITECTURE_ID "PPC64" + +# elif defined(__ppc__) +# define ARCHITECTURE_ID "PPC" + +# elif defined(__ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__x86_64__) +# define ARCHITECTURE_ID "x64" + +# elif defined(__i386__) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif +#else +# define ARCHITECTURE_ID +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number components. */ +#ifdef COMPILER_VERSION_MAJOR +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the internal version number. */ +#ifdef COMPILER_VERSION_INTERNAL +char const info_version_internal[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', + 'i','n','t','e','r','n','a','l','[', + COMPILER_VERSION_INTERNAL,']','\0'}; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + + +#if !defined(__STDC__) +# if (defined(_MSC_VER) && !defined(__clang__)) \ + || (defined(__ibmxl__) || defined(__IBMC__)) +# define C_DIALECT "90" +# else +# define C_DIALECT +# endif +#elif __STDC_VERSION__ >= 201000L +# define C_DIALECT "11" +#elif __STDC_VERSION__ >= 199901L +# define C_DIALECT "99" +#else +# define C_DIALECT "90" +#endif +const char* info_language_dialect_default = + "INFO" ":" "dialect_default[" C_DIALECT "]"; + +/*--------------------------------------------------------------------------*/ + +#ifdef ID_VOID_MAIN +void main() {} +#else +# if defined(__CLASSIC_C__) +int main(argc, argv) int argc; char *argv[]; +# else +int main(int argc, char* argv[]) +# endif +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; + require += info_arch[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef COMPILER_VERSION_INTERNAL + require += info_version_internal[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXE) || defined(__CRAYXC) + require += info_cray[argc]; +#endif + require += info_language_dialect_default[argc]; + (void)argv; + return require; +} +#endif Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdCXX/a.out and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdCXX/a.out differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdCXX/CMakeCXXCompilerId.cpp llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdCXX/CMakeCXXCompilerId.cpp --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdCXX/CMakeCXXCompilerId.cpp 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdCXX/CMakeCXXCompilerId.cpp 2021-09-19 21:51:02.000000000 +0000 @@ -0,0 +1,663 @@ +/* This source file must have a .cpp extension so that all C++ compilers + recognize the extension without flags. Borland does not know .cxx for + example. */ +#ifndef __cplusplus +# error "A C compiler has been selected for C++." +#endif + + +/* Version number components: V=Version, R=Revision, P=Patch + Version date components: YYYY=Year, MM=Month, DD=Day */ + +#if defined(__COMO__) +# define COMPILER_ID "Comeau" + /* __COMO_VERSION__ = VRR */ +# define COMPILER_VERSION_MAJOR DEC(__COMO_VERSION__ / 100) +# define COMPILER_VERSION_MINOR DEC(__COMO_VERSION__ % 100) + +#elif defined(__INTEL_COMPILER) || defined(__ICC) +# define COMPILER_ID "Intel" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# if defined(__GNUC__) +# define SIMULATE_ID "GNU" +# endif + /* __INTEL_COMPILER = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__INTEL_COMPILER/100) +# define COMPILER_VERSION_MINOR DEC(__INTEL_COMPILER/10 % 10) +# if defined(__INTEL_COMPILER_UPDATE) +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER_UPDATE) +# else +# define COMPILER_VERSION_PATCH DEC(__INTEL_COMPILER % 10) +# endif +# if defined(__INTEL_COMPILER_BUILD_DATE) + /* __INTEL_COMPILER_BUILD_DATE = YYYYMMDD */ +# define COMPILER_VERSION_TWEAK DEC(__INTEL_COMPILER_BUILD_DATE) +# endif +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# if defined(__GNUC__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUC__) +# elif defined(__GNUG__) +# define SIMULATE_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define SIMULATE_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define SIMULATE_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(__PATHCC__) +# define COMPILER_ID "PathScale" +# define COMPILER_VERSION_MAJOR DEC(__PATHCC__) +# define COMPILER_VERSION_MINOR DEC(__PATHCC_MINOR__) +# if defined(__PATHCC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PATHCC_PATCHLEVEL__) +# endif + +#elif defined(__BORLANDC__) && defined(__CODEGEARC_VERSION__) +# define COMPILER_ID "Embarcadero" +# define COMPILER_VERSION_MAJOR HEX(__CODEGEARC_VERSION__>>24 & 0x00FF) +# define COMPILER_VERSION_MINOR HEX(__CODEGEARC_VERSION__>>16 & 0x00FF) +# define COMPILER_VERSION_PATCH DEC(__CODEGEARC_VERSION__ & 0xFFFF) + +#elif defined(__BORLANDC__) +# define COMPILER_ID "Borland" + /* __BORLANDC__ = 0xVRR */ +# define COMPILER_VERSION_MAJOR HEX(__BORLANDC__>>8) +# define COMPILER_VERSION_MINOR HEX(__BORLANDC__ & 0xFF) + +#elif defined(__WATCOMC__) && __WATCOMC__ < 1200 +# define COMPILER_ID "Watcom" + /* __WATCOMC__ = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(__WATCOMC__ / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__WATCOMC__) +# define COMPILER_ID "OpenWatcom" + /* __WATCOMC__ = VVRP + 1100 */ +# define COMPILER_VERSION_MAJOR DEC((__WATCOMC__ - 1100) / 100) +# define COMPILER_VERSION_MINOR DEC((__WATCOMC__ / 10) % 10) +# if (__WATCOMC__ % 10) > 0 +# define COMPILER_VERSION_PATCH DEC(__WATCOMC__ % 10) +# endif + +#elif defined(__SUNPRO_CC) +# define COMPILER_ID "SunPro" +# if __SUNPRO_CC >= 0x5100 + /* __SUNPRO_CC = 0xVRRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>12) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# else + /* __SUNPRO_CC = 0xVRP */ +# define COMPILER_VERSION_MAJOR HEX(__SUNPRO_CC>>8) +# define COMPILER_VERSION_MINOR HEX(__SUNPRO_CC>>4 & 0xF) +# define COMPILER_VERSION_PATCH HEX(__SUNPRO_CC & 0xF) +# endif + +#elif defined(__HP_aCC) +# define COMPILER_ID "HP" + /* __HP_aCC = VVRRPP */ +# define COMPILER_VERSION_MAJOR DEC(__HP_aCC/10000) +# define COMPILER_VERSION_MINOR DEC(__HP_aCC/100 % 100) +# define COMPILER_VERSION_PATCH DEC(__HP_aCC % 100) + +#elif defined(__DECCXX) +# define COMPILER_ID "Compaq" + /* __DECCXX_VER = VVRRTPPPP */ +# define COMPILER_VERSION_MAJOR DEC(__DECCXX_VER/10000000) +# define COMPILER_VERSION_MINOR DEC(__DECCXX_VER/100000 % 100) +# define COMPILER_VERSION_PATCH DEC(__DECCXX_VER % 10000) + +#elif defined(__IBMCPP__) && defined(__COMPILER_VER__) +# define COMPILER_ID "zOS" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__ibmxl__) && defined(__clang__) +# define COMPILER_ID "XLClang" +# define COMPILER_VERSION_MAJOR DEC(__ibmxl_version__) +# define COMPILER_VERSION_MINOR DEC(__ibmxl_release__) +# define COMPILER_VERSION_PATCH DEC(__ibmxl_modification__) +# define COMPILER_VERSION_TWEAK DEC(__ibmxl_ptf_fix_level__) + + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ >= 800 +# define COMPILER_ID "XL" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__IBMCPP__) && !defined(__COMPILER_VER__) && __IBMCPP__ < 800 +# define COMPILER_ID "VisualAge" + /* __IBMCPP__ = VRP */ +# define COMPILER_VERSION_MAJOR DEC(__IBMCPP__/100) +# define COMPILER_VERSION_MINOR DEC(__IBMCPP__/10 % 10) +# define COMPILER_VERSION_PATCH DEC(__IBMCPP__ % 10) + +#elif defined(__PGI) +# define COMPILER_ID "PGI" +# define COMPILER_VERSION_MAJOR DEC(__PGIC__) +# define COMPILER_VERSION_MINOR DEC(__PGIC_MINOR__) +# if defined(__PGIC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__PGIC_PATCHLEVEL__) +# endif + +#elif defined(_CRAYC) +# define COMPILER_ID "Cray" +# define COMPILER_VERSION_MAJOR DEC(_RELEASE_MAJOR) +# define COMPILER_VERSION_MINOR DEC(_RELEASE_MINOR) + +#elif defined(__TI_COMPILER_VERSION__) +# define COMPILER_ID "TI" + /* __TI_COMPILER_VERSION__ = VVVRRRPPP */ +# define COMPILER_VERSION_MAJOR DEC(__TI_COMPILER_VERSION__/1000000) +# define COMPILER_VERSION_MINOR DEC(__TI_COMPILER_VERSION__/1000 % 1000) +# define COMPILER_VERSION_PATCH DEC(__TI_COMPILER_VERSION__ % 1000) + +#elif defined(__FUJITSU) || defined(__FCC_VERSION) || defined(__fcc_version) +# define COMPILER_ID "Fujitsu" + +#elif defined(__ghs__) +# define COMPILER_ID "GHS" +/* __GHS_VERSION_NUMBER = VVVVRP */ +# ifdef __GHS_VERSION_NUMBER +# define COMPILER_VERSION_MAJOR DEC(__GHS_VERSION_NUMBER / 100) +# define COMPILER_VERSION_MINOR DEC(__GHS_VERSION_NUMBER / 10 % 10) +# define COMPILER_VERSION_PATCH DEC(__GHS_VERSION_NUMBER % 10) +# endif + +#elif defined(__SCO_VERSION__) +# define COMPILER_ID "SCO" + +#elif defined(__ARMCC_VERSION) && !defined(__clang__) +# define COMPILER_ID "ARMCC" +#if __ARMCC_VERSION >= 1000000 + /* __ARMCC_VERSION = VRRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#else + /* __ARMCC_VERSION = VRPPPP */ + # define COMPILER_VERSION_MAJOR DEC(__ARMCC_VERSION/100000) + # define COMPILER_VERSION_MINOR DEC(__ARMCC_VERSION/10000 % 10) + # define COMPILER_VERSION_PATCH DEC(__ARMCC_VERSION % 10000) +#endif + + +#elif defined(__clang__) && defined(__apple_build_version__) +# define COMPILER_ID "AppleClang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif +# define COMPILER_VERSION_TWEAK DEC(__apple_build_version__) + +#elif defined(__clang__) && defined(__ARMCOMPILER_VERSION) +# define COMPILER_ID "ARMClang" + # define COMPILER_VERSION_MAJOR DEC(__ARMCOMPILER_VERSION/1000000) + # define COMPILER_VERSION_MINOR DEC(__ARMCOMPILER_VERSION/10000 % 100) + # define COMPILER_VERSION_PATCH DEC(__ARMCOMPILER_VERSION % 10000) +# define COMPILER_VERSION_INTERNAL DEC(__ARMCOMPILER_VERSION) + +#elif defined(__clang__) +# define COMPILER_ID "Clang" +# if defined(_MSC_VER) +# define SIMULATE_ID "MSVC" +# endif +# define COMPILER_VERSION_MAJOR DEC(__clang_major__) +# define COMPILER_VERSION_MINOR DEC(__clang_minor__) +# define COMPILER_VERSION_PATCH DEC(__clang_patchlevel__) +# if defined(_MSC_VER) + /* _MSC_VER = VVRR */ +# define SIMULATE_VERSION_MAJOR DEC(_MSC_VER / 100) +# define SIMULATE_VERSION_MINOR DEC(_MSC_VER % 100) +# endif + +#elif defined(__GNUC__) || defined(__GNUG__) +# define COMPILER_ID "GNU" +# if defined(__GNUC__) +# define COMPILER_VERSION_MAJOR DEC(__GNUC__) +# else +# define COMPILER_VERSION_MAJOR DEC(__GNUG__) +# endif +# if defined(__GNUC_MINOR__) +# define COMPILER_VERSION_MINOR DEC(__GNUC_MINOR__) +# endif +# if defined(__GNUC_PATCHLEVEL__) +# define COMPILER_VERSION_PATCH DEC(__GNUC_PATCHLEVEL__) +# endif + +#elif defined(_MSC_VER) +# define COMPILER_ID "MSVC" + /* _MSC_VER = VVRR */ +# define COMPILER_VERSION_MAJOR DEC(_MSC_VER / 100) +# define COMPILER_VERSION_MINOR DEC(_MSC_VER % 100) +# if defined(_MSC_FULL_VER) +# if _MSC_VER >= 1400 + /* _MSC_FULL_VER = VVRRPPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 100000) +# else + /* _MSC_FULL_VER = VVRRPPPP */ +# define COMPILER_VERSION_PATCH DEC(_MSC_FULL_VER % 10000) +# endif +# endif +# if defined(_MSC_BUILD) +# define COMPILER_VERSION_TWEAK DEC(_MSC_BUILD) +# endif + +#elif defined(__VISUALDSPVERSION__) || defined(__ADSPBLACKFIN__) || defined(__ADSPTS__) || defined(__ADSP21000__) +# define COMPILER_ID "ADSP" +#if defined(__VISUALDSPVERSION__) + /* __VISUALDSPVERSION__ = 0xVVRRPP00 */ +# define COMPILER_VERSION_MAJOR HEX(__VISUALDSPVERSION__>>24) +# define COMPILER_VERSION_MINOR HEX(__VISUALDSPVERSION__>>16 & 0xFF) +# define COMPILER_VERSION_PATCH HEX(__VISUALDSPVERSION__>>8 & 0xFF) +#endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# define COMPILER_ID "IAR" +# if defined(__VER__) && defined(__ICCARM__) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 1000000) +# define COMPILER_VERSION_MINOR DEC(((__VER__) / 1000) % 1000) +# define COMPILER_VERSION_PATCH DEC((__VER__) % 1000) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# elif defined(__VER__) && (defined(__ICCAVR__) || defined(__ICCRX__) || defined(__ICCRH850__) || defined(__ICCRL78__) || defined(__ICC430__) || defined(__ICCRISCV__) || defined(__ICCV850__) || defined(__ICC8051__)) +# define COMPILER_VERSION_MAJOR DEC((__VER__) / 100) +# define COMPILER_VERSION_MINOR DEC((__VER__) - (((__VER__) / 100)*100)) +# define COMPILER_VERSION_PATCH DEC(__SUBVERSION__) +# define COMPILER_VERSION_INTERNAL DEC(__IAR_SYSTEMS_ICC__) +# endif + + +/* These compilers are either not known or too old to define an + identification macro. Try to identify the platform and guess that + it is the native compiler. */ +#elif defined(__hpux) || defined(__hpua) +# define COMPILER_ID "HP" + +#else /* unknown compiler */ +# define COMPILER_ID "" +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_compiler = "INFO" ":" "compiler[" COMPILER_ID "]"; +#ifdef SIMULATE_ID +char const* info_simulate = "INFO" ":" "simulate[" SIMULATE_ID "]"; +#endif + +#ifdef __QNXNTO__ +char const* qnxnto = "INFO" ":" "qnxnto[]"; +#endif + +#if defined(__CRAYXE) || defined(__CRAYXC) +char const *info_cray = "INFO" ":" "compiler_wrapper[CrayPrgEnv]"; +#endif + +#define STRINGIFY_HELPER(X) #X +#define STRINGIFY(X) STRINGIFY_HELPER(X) + +/* Identify known platforms by name. */ +#if defined(__linux) || defined(__linux__) || defined(linux) +# define PLATFORM_ID "Linux" + +#elif defined(__CYGWIN__) +# define PLATFORM_ID "Cygwin" + +#elif defined(__MINGW32__) +# define PLATFORM_ID "MinGW" + +#elif defined(__APPLE__) +# define PLATFORM_ID "Darwin" + +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) +# define PLATFORM_ID "Windows" + +#elif defined(__FreeBSD__) || defined(__FreeBSD) +# define PLATFORM_ID "FreeBSD" + +#elif defined(__NetBSD__) || defined(__NetBSD) +# define PLATFORM_ID "NetBSD" + +#elif defined(__OpenBSD__) || defined(__OPENBSD) +# define PLATFORM_ID "OpenBSD" + +#elif defined(__sun) || defined(sun) +# define PLATFORM_ID "SunOS" + +#elif defined(_AIX) || defined(__AIX) || defined(__AIX__) || defined(__aix) || defined(__aix__) +# define PLATFORM_ID "AIX" + +#elif defined(__hpux) || defined(__hpux__) +# define PLATFORM_ID "HP-UX" + +#elif defined(__HAIKU__) +# define PLATFORM_ID "Haiku" + +#elif defined(__BeOS) || defined(__BEOS__) || defined(_BEOS) +# define PLATFORM_ID "BeOS" + +#elif defined(__QNX__) || defined(__QNXNTO__) +# define PLATFORM_ID "QNX" + +#elif defined(__tru64) || defined(_tru64) || defined(__TRU64__) +# define PLATFORM_ID "Tru64" + +#elif defined(__riscos) || defined(__riscos__) +# define PLATFORM_ID "RISCos" + +#elif defined(__sinix) || defined(__sinix__) || defined(__SINIX__) +# define PLATFORM_ID "SINIX" + +#elif defined(__UNIX_SV__) +# define PLATFORM_ID "UNIX_SV" + +#elif defined(__bsdos__) +# define PLATFORM_ID "BSDOS" + +#elif defined(_MPRAS) || defined(MPRAS) +# define PLATFORM_ID "MP-RAS" + +#elif defined(__osf) || defined(__osf__) +# define PLATFORM_ID "OSF1" + +#elif defined(_SCO_SV) || defined(SCO_SV) || defined(sco_sv) +# define PLATFORM_ID "SCO_SV" + +#elif defined(__ultrix) || defined(__ultrix__) || defined(_ULTRIX) +# define PLATFORM_ID "ULTRIX" + +#elif defined(__XENIX__) || defined(_XENIX) || defined(XENIX) +# define PLATFORM_ID "Xenix" + +#elif defined(__WATCOMC__) +# if defined(__LINUX__) +# define PLATFORM_ID "Linux" + +# elif defined(__DOS__) +# define PLATFORM_ID "DOS" + +# elif defined(__OS2__) +# define PLATFORM_ID "OS2" + +# elif defined(__WINDOWS__) +# define PLATFORM_ID "Windows3x" + +# elif defined(__VXWORKS__) +# define PLATFORM_ID "VxWorks" + +# else /* unknown platform */ +# define PLATFORM_ID +# endif + +#elif defined(__INTEGRITY) +# if defined(INT_178B) +# define PLATFORM_ID "Integrity178" + +# else /* regular Integrity */ +# define PLATFORM_ID "Integrity" +# endif + +#else /* unknown platform */ +# define PLATFORM_ID + +#endif + +/* For windows compilers MSVC and Intel we can determine + the architecture of the compiler being used. This is because + the compilers do not have flags that can change the architecture, + but rather depend on which compiler is being used +*/ +#if defined(_WIN32) && defined(_MSC_VER) +# if defined(_M_IA64) +# define ARCHITECTURE_ID "IA64" + +# elif defined(_M_X64) || defined(_M_AMD64) +# define ARCHITECTURE_ID "x64" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# elif defined(_M_ARM64) +# define ARCHITECTURE_ID "ARM64" + +# elif defined(_M_ARM) +# if _M_ARM == 4 +# define ARCHITECTURE_ID "ARMV4I" +# elif _M_ARM == 5 +# define ARCHITECTURE_ID "ARMV5I" +# else +# define ARCHITECTURE_ID "ARMV" STRINGIFY(_M_ARM) +# endif + +# elif defined(_M_MIPS) +# define ARCHITECTURE_ID "MIPS" + +# elif defined(_M_SH) +# define ARCHITECTURE_ID "SHx" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__WATCOMC__) +# if defined(_M_I86) +# define ARCHITECTURE_ID "I86" + +# elif defined(_M_IX86) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__IAR_SYSTEMS_ICC__) || defined(__IAR_SYSTEMS_ICC) +# if defined(__ICCARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__ICCRX__) +# define ARCHITECTURE_ID "RX" + +# elif defined(__ICCRH850__) +# define ARCHITECTURE_ID "RH850" + +# elif defined(__ICCRL78__) +# define ARCHITECTURE_ID "RL78" + +# elif defined(__ICCRISCV__) +# define ARCHITECTURE_ID "RISCV" + +# elif defined(__ICCAVR__) +# define ARCHITECTURE_ID "AVR" + +# elif defined(__ICC430__) +# define ARCHITECTURE_ID "MSP430" + +# elif defined(__ICCV850__) +# define ARCHITECTURE_ID "V850" + +# elif defined(__ICC8051__) +# define ARCHITECTURE_ID "8051" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif + +#elif defined(__ghs__) +# if defined(__PPC64__) +# define ARCHITECTURE_ID "PPC64" + +# elif defined(__ppc__) +# define ARCHITECTURE_ID "PPC" + +# elif defined(__ARM__) +# define ARCHITECTURE_ID "ARM" + +# elif defined(__x86_64__) +# define ARCHITECTURE_ID "x64" + +# elif defined(__i386__) +# define ARCHITECTURE_ID "X86" + +# else /* unknown architecture */ +# define ARCHITECTURE_ID "" +# endif +#else +# define ARCHITECTURE_ID +#endif + +/* Convert integer to decimal digit literals. */ +#define DEC(n) \ + ('0' + (((n) / 10000000)%10)), \ + ('0' + (((n) / 1000000)%10)), \ + ('0' + (((n) / 100000)%10)), \ + ('0' + (((n) / 10000)%10)), \ + ('0' + (((n) / 1000)%10)), \ + ('0' + (((n) / 100)%10)), \ + ('0' + (((n) / 10)%10)), \ + ('0' + ((n) % 10)) + +/* Convert integer to hex digit literals. */ +#define HEX(n) \ + ('0' + ((n)>>28 & 0xF)), \ + ('0' + ((n)>>24 & 0xF)), \ + ('0' + ((n)>>20 & 0xF)), \ + ('0' + ((n)>>16 & 0xF)), \ + ('0' + ((n)>>12 & 0xF)), \ + ('0' + ((n)>>8 & 0xF)), \ + ('0' + ((n)>>4 & 0xF)), \ + ('0' + ((n) & 0xF)) + +/* Construct a string literal encoding the version number components. */ +#ifdef COMPILER_VERSION_MAJOR +char const info_version[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','[', + COMPILER_VERSION_MAJOR, +# ifdef COMPILER_VERSION_MINOR + '.', COMPILER_VERSION_MINOR, +# ifdef COMPILER_VERSION_PATCH + '.', COMPILER_VERSION_PATCH, +# ifdef COMPILER_VERSION_TWEAK + '.', COMPILER_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct a string literal encoding the internal version number. */ +#ifdef COMPILER_VERSION_INTERNAL +char const info_version_internal[] = { + 'I', 'N', 'F', 'O', ':', + 'c','o','m','p','i','l','e','r','_','v','e','r','s','i','o','n','_', + 'i','n','t','e','r','n','a','l','[', + COMPILER_VERSION_INTERNAL,']','\0'}; +#endif + +/* Construct a string literal encoding the version number components. */ +#ifdef SIMULATE_VERSION_MAJOR +char const info_simulate_version[] = { + 'I', 'N', 'F', 'O', ':', + 's','i','m','u','l','a','t','e','_','v','e','r','s','i','o','n','[', + SIMULATE_VERSION_MAJOR, +# ifdef SIMULATE_VERSION_MINOR + '.', SIMULATE_VERSION_MINOR, +# ifdef SIMULATE_VERSION_PATCH + '.', SIMULATE_VERSION_PATCH, +# ifdef SIMULATE_VERSION_TWEAK + '.', SIMULATE_VERSION_TWEAK, +# endif +# endif +# endif + ']','\0'}; +#endif + +/* Construct the string literal in pieces to prevent the source from + getting matched. Store it in a pointer rather than an array + because some compilers will just produce instructions to fill the + array rather than assigning a pointer to a static array. */ +char const* info_platform = "INFO" ":" "platform[" PLATFORM_ID "]"; +char const* info_arch = "INFO" ":" "arch[" ARCHITECTURE_ID "]"; + + + + +#if defined(__INTEL_COMPILER) && defined(_MSVC_LANG) && _MSVC_LANG < 201403L +# if defined(__INTEL_CXX11_MODE__) +# if defined(__cpp_aggregate_nsdmi) +# define CXX_STD 201402L +# else +# define CXX_STD 201103L +# endif +# else +# define CXX_STD 199711L +# endif +#elif defined(_MSC_VER) && defined(_MSVC_LANG) +# define CXX_STD _MSVC_LANG +#else +# define CXX_STD __cplusplus +#endif + +const char* info_language_dialect_default = "INFO" ":" "dialect_default[" +#if CXX_STD > 201703L + "20" +#elif CXX_STD >= 201703L + "17" +#elif CXX_STD >= 201402L + "14" +#elif CXX_STD >= 201103L + "11" +#else + "98" +#endif +"]"; + +/*--------------------------------------------------------------------------*/ + +int main(int argc, char* argv[]) +{ + int require = 0; + require += info_compiler[argc]; + require += info_platform[argc]; +#ifdef COMPILER_VERSION_MAJOR + require += info_version[argc]; +#endif +#ifdef COMPILER_VERSION_INTERNAL + require += info_version_internal[argc]; +#endif +#ifdef SIMULATE_ID + require += info_simulate[argc]; +#endif +#ifdef SIMULATE_VERSION_MAJOR + require += info_simulate_version[argc]; +#endif +#if defined(__CRAYXE) || defined(__CRAYXC) + require += info_cray[argc]; +#endif + require += info_language_dialect_default[argc]; + (void)argv; + return require; +} diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/cmake.check_cache llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/cmake.check_cache --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/cmake.check_cache 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/cmake.check_cache 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1 @@ +# This file is generated by cmake for dependency checking of the CMakeCache.txt file diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/CMakeError.log llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/CMakeError.log --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/CMakeError.log 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/CMakeError.log 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,36 @@ +Determining if the function write exists in the System failed with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_f386c && [1/2] Building C object CMakeFiles/cmTC_f386c.dir/CheckFunctionExists.c.o +[2/2] Linking C executable cmTC_f386c +FAILED: cmTC_f386c +: && /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security --target=x86_64-pc-linux-gnu -DCHECK_FUNCTION_EXISTS=write -nostdlib++ CMakeFiles/cmTC_f386c.dir/CheckFunctionExists.c.o -o cmTC_f386c -lSystem -lc /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/lib/linux/libclang_rt.builtins-x86_64.a && : +/usr/bin/ld: cannot find -lSystem +clang-14: error: linker command failed with exit code 1 (use -v to see invocation) +ninja: build stopped: subcommand failed. + + + +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WX_FLAG failed with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_b6a54 && [1/2] Building CXX object CMakeFiles/cmTC_b6a54.dir/src.cxx.o +warning: unknown warning option '-WX-' [-Wunknown-warning-option] +1 warning generated. +[2/2] Linking CXX executable cmTC_b6a54 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_EHSC_FLAG failed with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_19ec7 && [1/2] Building CXX object CMakeFiles/cmTC_19ec7.dir/src.cxx.o +FAILED: CMakeFiles/cmTC_19ec7.dir/src.cxx.o +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang++ -g -O2 -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 --target=x86_64-pc-linux-gnu -DLIBCXXABI_SUPPORTS_EHSC_FLAG -nostdlib++ -fPIE -EHsc -MD -MT CMakeFiles/cmTC_19ec7.dir/src.cxx.o -MF CMakeFiles/cmTC_19ec7.dir/src.cxx.o.d -o CMakeFiles/cmTC_19ec7.dir/src.cxx.o -c src.cxx +clang-14: error: unknown argument: '-EHsc' +ninja: build stopped: subcommand failed. + + +Source file was: +int main() { return 0; } diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/CMakeOutput.log llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/CMakeOutput.log --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/CMakeOutput.log 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/CMakeOutput.log 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,641 @@ +The system is: Linux - 5.4.0-4-amd64 - x86_64 +Compiling the CXX compiler identification source file "CMakeCXXCompilerId.cpp" succeeded. +Compiler: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang++ +Build flags: -g;-O2;-fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=.;-fstack-protector-strong;-Wformat;-Werror=format-security;-Wdate-time;-D_FORTIFY_SOURCE=2 +Id flags: + +The output was: +0 + + +Compilation of the CXX compiler identification source "CMakeCXXCompilerId.cpp" produced "a.out" + +The CXX compiler identification is Clang, found in "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdCXX/a.out" + +Compiling the C compiler identification source file "CMakeCCompilerId.c" succeeded. +Compiler: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang +Build flags: -g;-O2;-fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=.;-fstack-protector-strong;-Wformat;-Werror=format-security +Id flags: + +The output was: +0 + + +Compilation of the C compiler identification source "CMakeCCompilerId.c" produced "a.out" + +The C compiler identification is Clang, found in "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/3.18.4/CompilerIdC/a.out" + +Detecting CXX compiler ABI info compiled with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_18cec && [1/2] Building CXX object CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o +Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c) +Target: x86_64-pc-linux-gnu +Thread model: posix +InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9 +Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Candidate multilib: .;@m64 +Candidate multilib: 32;@m32 +Candidate multilib: x32;@mx32 +Selected multilib: .;@m64 + (in-process) + "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang-14" -cc1 -triple x86_64-pc-linux-gnu -emit-obj --mrelax-relocations -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CMakeCXXCompilerABI.cpp -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debug-info-kind=constructor -dwarf-version=4 -debugger-tuning=gdb -v -fcoverage-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -resource-dir /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0 -dependency-file CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o.d -MT CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o -sys-header-deps -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wformat -Wdate-time -fdeprecated-macro -fdebug-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -ferror-limit 19 -stack-protector 2 -fgnuc-version=4.2.1 -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o -x c++ /usr/share/cmake-3.18/Modules/CMakeCXXCompilerABI.cpp +clang -cc1 version 14.0.0 based upon LLVM 14.0.0 default target x86_64-pc-linux-gnu +ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include" +ignoring nonexistent directory "/include" +#include "..." search starts here: +#include <...> search starts here: + /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 + /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 + /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward + /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include + /usr/local/include + /usr/include/x86_64-linux-gnu + /usr/include +End of search list. +[2/2] Linking CXX executable cmTC_18cec +Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c) +Target: x86_64-pc-linux-gnu +Thread model: posix +InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9 +Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Candidate multilib: .;@m64 +Candidate multilib: 32;@m32 +Candidate multilib: x32;@mx32 +Selected multilib: .;@m64 + "/usr/bin/ld" --hash-style=both --build-id --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o cmTC_18cec /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/10 -L/lib/x86_64-linux-gnu -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib -L/lib -L/usr/lib CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/10/crtend.o /usr/lib/x86_64-linux-gnu/crtn.o + + + +Parsed CXX implicit include dir info from above output: rv=done + found start of include info + found start of implicit include info + add: [/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10] + add: [/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10] + add: [/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward] + add: [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] + add: [/usr/local/include] + add: [/usr/include/x86_64-linux-gnu] + add: [/usr/include] + end of search list found + collapse include dir [/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10] ==> [/usr/include/c++/10] + collapse include dir [/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10] ==> [/usr/include/x86_64-linux-gnu/c++/10] + collapse include dir [/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward] ==> [/usr/include/c++/10/backward] + collapse include dir [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] ==> [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] + collapse include dir [/usr/local/include] ==> [/usr/local/include] + collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu] + collapse include dir [/usr/include] ==> [/usr/include] + implicit include dirs: [/usr/include/c++/10;/usr/include/x86_64-linux-gnu/c++/10;/usr/include/c++/10/backward;/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include] + + +Parsed CXX implicit link information from above output: + link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)] + ignore line: [Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp] + ignore line: [] + ignore line: [Run Build Command(s):/usr/bin/ninja cmTC_18cec && [1/2] Building CXX object CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o] + ignore line: [Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c)] + ignore line: [Target: x86_64-pc-linux-gnu] + ignore line: [Thread model: posix] + ignore line: [InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9] + ignore line: [Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Candidate multilib: .] + ignore line: [@m64] + ignore line: [Candidate multilib: 32] + ignore line: [@m32] + ignore line: [Candidate multilib: x32] + ignore line: [@mx32] + ignore line: [Selected multilib: .] + ignore line: [@m64] + ignore line: [ (in-process)] + ignore line: [ "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang-14" -cc1 -triple x86_64-pc-linux-gnu -emit-obj --mrelax-relocations -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CMakeCXXCompilerABI.cpp -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debug-info-kind=constructor -dwarf-version=4 -debugger-tuning=gdb -v -fcoverage-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -resource-dir /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0 -dependency-file CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o.d -MT CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o -sys-header-deps -D _FORTIFY_SOURCE=2 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wformat -Wdate-time -fdeprecated-macro -fdebug-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -ferror-limit 19 -stack-protector 2 -fgnuc-version=4.2.1 -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o -x c++ /usr/share/cmake-3.18/Modules/CMakeCXXCompilerABI.cpp] + ignore line: [clang -cc1 version 14.0.0 based upon LLVM 14.0.0 default target x86_64-pc-linux-gnu] + ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include"] + ignore line: [ignoring nonexistent directory "/include"] + ignore line: [#include "..." search starts here:] + ignore line: [#include <...> search starts here:] + ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10] + ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10] + ignore line: [ /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward] + ignore line: [ /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] + ignore line: [ /usr/local/include] + ignore line: [ /usr/include/x86_64-linux-gnu] + ignore line: [ /usr/include] + ignore line: [End of search list.] + ignore line: [[2/2] Linking CXX executable cmTC_18cec] + ignore line: [Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c)] + ignore line: [Target: x86_64-pc-linux-gnu] + ignore line: [Thread model: posix] + ignore line: [InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9] + ignore line: [Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Candidate multilib: .] + ignore line: [@m64] + ignore line: [Candidate multilib: 32] + ignore line: [@m32] + ignore line: [Candidate multilib: x32] + ignore line: [@mx32] + ignore line: [Selected multilib: .] + ignore line: [@m64] + link line: [ "/usr/bin/ld" --hash-style=both --build-id --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o cmTC_18cec /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/10 -L/lib/x86_64-linux-gnu -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib -L/lib -L/usr/lib CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o -lstdc++ -lm -lgcc_s -lgcc -lc -lgcc_s -lgcc /usr/lib/gcc/x86_64-linux-gnu/10/crtend.o /usr/lib/x86_64-linux-gnu/crtn.o] + arg [/usr/bin/ld] ==> ignore + arg [--hash-style=both] ==> ignore + arg [--build-id] ==> ignore + arg [--eh-frame-hdr] ==> ignore + arg [-m] ==> ignore + arg [elf_x86_64] ==> ignore + arg [-dynamic-linker] ==> ignore + arg [/lib64/ld-linux-x86-64.so.2] ==> ignore + arg [-o] ==> ignore + arg [cmTC_18cec] ==> ignore + arg [/usr/lib/x86_64-linux-gnu/crt1.o] ==> ignore + arg [/usr/lib/x86_64-linux-gnu/crti.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o] ==> ignore + arg [-L/usr/lib/gcc/x86_64-linux-gnu/10] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/10] + arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu] + arg [-L/lib/../lib64] ==> dir [/lib/../lib64] + arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu] + arg [-L/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib] ==> dir [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib] + arg [-L/lib] ==> dir [/lib] + arg [-L/usr/lib] ==> dir [/usr/lib] + arg [CMakeFiles/cmTC_18cec.dir/CMakeCXXCompilerABI.cpp.o] ==> ignore + arg [-lstdc++] ==> lib [stdc++] + arg [-lm] ==> lib [m] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [-lc] ==> lib [c] + arg [-lgcc_s] ==> lib [gcc_s] + arg [-lgcc] ==> lib [gcc] + arg [/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o] ==> ignore + arg [/usr/lib/x86_64-linux-gnu/crtn.o] ==> ignore + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/10] ==> [/usr/lib/gcc/x86_64-linux-gnu/10] + collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu] + collapse library dir [/lib/../lib64] ==> [/lib64] + collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib] ==> [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib] + collapse library dir [/lib] ==> [/lib] + collapse library dir [/usr/lib] ==> [/usr/lib] + implicit libs: [stdc++;m;gcc_s;gcc;c;gcc_s;gcc] + implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/10;/lib/x86_64-linux-gnu;/lib64;/usr/lib/x86_64-linux-gnu;/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib;/lib;/usr/lib] + implicit fwks: [] + + +Detecting C compiler ABI info compiled with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_6b1f3 && [1/2] Building C object CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o +Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c) +Target: x86_64-pc-linux-gnu +Thread model: posix +InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9 +Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Candidate multilib: .;@m64 +Candidate multilib: 32;@m32 +Candidate multilib: x32;@mx32 +Selected multilib: .;@m64 + (in-process) + "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang-14" -cc1 -triple x86_64-pc-linux-gnu -emit-obj --mrelax-relocations -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CMakeCCompilerABI.c -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debug-info-kind=constructor -dwarf-version=4 -debugger-tuning=gdb -v -fcoverage-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -resource-dir /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0 -dependency-file CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o.d -MT CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o -sys-header-deps -internal-isystem /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wformat -fdebug-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -ferror-limit 19 -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o -x c /usr/share/cmake-3.18/Modules/CMakeCCompilerABI.c +clang -cc1 version 14.0.0 based upon LLVM 14.0.0 default target x86_64-pc-linux-gnu +ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include" +ignoring nonexistent directory "/include" +#include "..." search starts here: +#include <...> search starts here: + /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include + /usr/local/include + /usr/include/x86_64-linux-gnu + /usr/include +End of search list. +[2/2] Linking C executable cmTC_6b1f3 +Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c) +Target: x86_64-pc-linux-gnu +Thread model: posix +InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8 +Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9 +Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10 +Candidate multilib: .;@m64 +Candidate multilib: 32;@m32 +Candidate multilib: x32;@mx32 +Selected multilib: .;@m64 + "/usr/bin/ld" --hash-style=both --build-id --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o cmTC_6b1f3 /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/10 -L/lib/x86_64-linux-gnu -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib -L/lib -L/usr/lib CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-linux-gnu/10/crtend.o /usr/lib/x86_64-linux-gnu/crtn.o + + + +Parsed C implicit include dir info from above output: rv=done + found start of include info + found start of implicit include info + add: [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] + add: [/usr/local/include] + add: [/usr/include/x86_64-linux-gnu] + add: [/usr/include] + end of search list found + collapse include dir [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] ==> [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] + collapse include dir [/usr/local/include] ==> [/usr/local/include] + collapse include dir [/usr/include/x86_64-linux-gnu] ==> [/usr/include/x86_64-linux-gnu] + collapse include dir [/usr/include] ==> [/usr/include] + implicit include dirs: [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include;/usr/local/include;/usr/include/x86_64-linux-gnu;/usr/include] + + +Parsed C implicit link information from above output: + link line regex: [^( *|.*[/\])(ld|CMAKE_LINK_STARTFILE-NOTFOUND|([^/\]+-)?ld|collect2)[^/\]*( |$)] + ignore line: [Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp] + ignore line: [] + ignore line: [Run Build Command(s):/usr/bin/ninja cmTC_6b1f3 && [1/2] Building C object CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o] + ignore line: [Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c)] + ignore line: [Target: x86_64-pc-linux-gnu] + ignore line: [Thread model: posix] + ignore line: [InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9] + ignore line: [Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Candidate multilib: .] + ignore line: [@m64] + ignore line: [Candidate multilib: 32] + ignore line: [@m32] + ignore line: [Candidate multilib: x32] + ignore line: [@mx32] + ignore line: [Selected multilib: .] + ignore line: [@m64] + ignore line: [ (in-process)] + ignore line: [ "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang-14" -cc1 -triple x86_64-pc-linux-gnu -emit-obj --mrelax-relocations -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CMakeCCompilerABI.c -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debug-info-kind=constructor -dwarf-version=4 -debugger-tuning=gdb -v -fcoverage-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -resource-dir /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0 -dependency-file CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o.d -MT CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o -sys-header-deps -internal-isystem /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wformat -fdebug-compilation-dir=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp -fdebug-prefix-map=/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project=. -ferror-limit 19 -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o -x c /usr/share/cmake-3.18/Modules/CMakeCCompilerABI.c] + ignore line: [clang -cc1 version 14.0.0 based upon LLVM 14.0.0 default target x86_64-pc-linux-gnu] + ignore line: [ignoring nonexistent directory "/usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include"] + ignore line: [ignoring nonexistent directory "/include"] + ignore line: [#include "..." search starts here:] + ignore line: [#include <...> search starts here:] + ignore line: [ /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/include] + ignore line: [ /usr/local/include] + ignore line: [ /usr/include/x86_64-linux-gnu] + ignore line: [ /usr/include] + ignore line: [End of search list.] + ignore line: [[2/2] Linking C executable cmTC_6b1f3] + ignore line: [Debian clang version 14.0.0 (git@github.com:llvm/llvm-project.git 210d72e9d6b4a8e7633921d0bd7186fd3c7a2c8c)] + ignore line: [Target: x86_64-pc-linux-gnu] + ignore line: [Thread model: posix] + ignore line: [InstalledDir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/8] + ignore line: [Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/9] + ignore line: [Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/10] + ignore line: [Candidate multilib: .] + ignore line: [@m64] + ignore line: [Candidate multilib: 32] + ignore line: [@m32] + ignore line: [Candidate multilib: x32] + ignore line: [@mx32] + ignore line: [Selected multilib: .] + ignore line: [@m64] + link line: [ "/usr/bin/ld" --hash-style=both --build-id --eh-frame-hdr -m elf_x86_64 -dynamic-linker /lib64/ld-linux-x86-64.so.2 -o cmTC_6b1f3 /usr/lib/x86_64-linux-gnu/crt1.o /usr/lib/x86_64-linux-gnu/crti.o /usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o -L/usr/lib/gcc/x86_64-linux-gnu/10 -L/lib/x86_64-linux-gnu -L/lib/../lib64 -L/usr/lib/x86_64-linux-gnu -L/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib -L/lib -L/usr/lib CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o -lgcc --as-needed -lgcc_s --no-as-needed -lc -lgcc --as-needed -lgcc_s --no-as-needed /usr/lib/gcc/x86_64-linux-gnu/10/crtend.o /usr/lib/x86_64-linux-gnu/crtn.o] + arg [/usr/bin/ld] ==> ignore + arg [--hash-style=both] ==> ignore + arg [--build-id] ==> ignore + arg [--eh-frame-hdr] ==> ignore + arg [-m] ==> ignore + arg [elf_x86_64] ==> ignore + arg [-dynamic-linker] ==> ignore + arg [/lib64/ld-linux-x86-64.so.2] ==> ignore + arg [-o] ==> ignore + arg [cmTC_6b1f3] ==> ignore + arg [/usr/lib/x86_64-linux-gnu/crt1.o] ==> ignore + arg [/usr/lib/x86_64-linux-gnu/crti.o] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/10/crtbegin.o] ==> ignore + arg [-L/usr/lib/gcc/x86_64-linux-gnu/10] ==> dir [/usr/lib/gcc/x86_64-linux-gnu/10] + arg [-L/lib/x86_64-linux-gnu] ==> dir [/lib/x86_64-linux-gnu] + arg [-L/lib/../lib64] ==> dir [/lib/../lib64] + arg [-L/usr/lib/x86_64-linux-gnu] ==> dir [/usr/lib/x86_64-linux-gnu] + arg [-L/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib] ==> dir [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib] + arg [-L/lib] ==> dir [/lib] + arg [-L/usr/lib] ==> dir [/usr/lib] + arg [CMakeFiles/cmTC_6b1f3.dir/CMakeCCompilerABI.c.o] ==> ignore + arg [-lgcc] ==> lib [gcc] + arg [--as-needed] ==> ignore + arg [-lgcc_s] ==> lib [gcc_s] + arg [--no-as-needed] ==> ignore + arg [-lc] ==> lib [c] + arg [-lgcc] ==> lib [gcc] + arg [--as-needed] ==> ignore + arg [-lgcc_s] ==> lib [gcc_s] + arg [--no-as-needed] ==> ignore + arg [/usr/lib/gcc/x86_64-linux-gnu/10/crtend.o] ==> ignore + arg [/usr/lib/x86_64-linux-gnu/crtn.o] ==> ignore + collapse library dir [/usr/lib/gcc/x86_64-linux-gnu/10] ==> [/usr/lib/gcc/x86_64-linux-gnu/10] + collapse library dir [/lib/x86_64-linux-gnu] ==> [/lib/x86_64-linux-gnu] + collapse library dir [/lib/../lib64] ==> [/lib64] + collapse library dir [/usr/lib/x86_64-linux-gnu] ==> [/usr/lib/x86_64-linux-gnu] + collapse library dir [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/../lib] ==> [/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib] + collapse library dir [/lib] ==> [/lib] + collapse library dir [/usr/lib] ==> [/usr/lib] + implicit libs: [gcc;gcc_s;c;gcc;gcc_s] + implicit dirs: [/usr/lib/gcc/x86_64-linux-gnu/10;/lib/x86_64-linux-gnu;/lib64;/usr/lib/x86_64-linux-gnu;/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib;/lib;/usr/lib] + implicit fwks: [] + + +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_TARGET_EQ_X86_64_PC_LINUX_GNU_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_fe681 && [1/2] Building CXX object CMakeFiles/cmTC_fe681.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_fe681 + + +Source file was: +int main() { return 0; } +Determining if the function fopen exists in the c passed with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_de77d && [1/2] Building C object CMakeFiles/cmTC_de77d.dir/CheckFunctionExists.c.o +/usr/share/cmake-3.18/Modules/CheckFunctionExists.c:7:3: warning: declaration of built-in function 'fopen' requires inclusion of the header [-Wbuiltin-requires-header] + CHECK_FUNCTION_EXISTS(void); + ^ +:1:31: note: expanded from here +#define CHECK_FUNCTION_EXISTS fopen + ^ +1 warning generated. +[2/2] Linking C executable cmTC_de77d + + + +Performing C SOURCE FILE Test LIBCXXABI_SUPPORTS_NOSTDLIBXX_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_04126 && [1/2] Building C object CMakeFiles/cmTC_04126.dir/src.c.o +[2/2] Linking C executable cmTC_04126 + + +Source file was: +int main(void) { return 0; } +Performing C SOURCE FILE Test LIBCXXABI_HAS_COMMENT_LIB_PRAGMA succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_eb770 && [1/2] Building C object CMakeFiles/cmTC_eb770.dir/src.c.o +[2/2] Linking C executable cmTC_eb770 + + +Source file was: + +#pragma comment(lib, "c") +int main() { return 0; } + +Performing C++ SOURCE FILE Test LIBCXXABI_HAS_NOSTDINCXX_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_6e618 && [1/2] Building CXX object CMakeFiles/cmTC_6e618.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_6e618 + + +Source file was: +int main() { return 0; } +Determining if the function dladdr exists in the dl passed with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_efd1e && [1/2] Building C object CMakeFiles/cmTC_efd1e.dir/CheckFunctionExists.c.o +[2/2] Linking C executable cmTC_efd1e + + + +Determining if the function pthread_once exists in the pthread passed with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_25490 && [1/2] Building C object CMakeFiles/cmTC_25490.dir/CheckFunctionExists.c.o +[2/2] Linking C executable cmTC_25490 + + + +Determining if the function __cxa_thread_atexit_impl exists in the c passed with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_07398 && [1/2] Building C object CMakeFiles/cmTC_07398.dir/CheckFunctionExists.c.o +[2/2] Linking C executable cmTC_07398 + + + +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WERROR_EQ_RETURN_TYPE_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_31ff7 && [1/2] Building CXX object CMakeFiles/cmTC_31ff7.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_31ff7 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_W_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_88ba1 && [1/2] Building CXX object CMakeFiles/cmTC_88ba1.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_88ba1 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WALL_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_622e5 && [1/2] Building CXX object CMakeFiles/cmTC_622e5.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_622e5 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WCHAR_SUBSCRIPTS_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_cb804 && [1/2] Building CXX object CMakeFiles/cmTC_cb804.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_cb804 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WCONVERSION_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_4ac7a && [1/2] Building CXX object CMakeFiles/cmTC_4ac7a.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_4ac7a + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WMISMATCHED_TAGS_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_2ffd2 && [1/2] Building CXX object CMakeFiles/cmTC_2ffd2.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_2ffd2 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WMISSING_BRACES_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_1774b && [1/2] Building CXX object CMakeFiles/cmTC_1774b.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_1774b + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WNEWLINE_EOF_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_f87bf && [1/2] Building CXX object CMakeFiles/cmTC_f87bf.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_f87bf + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WUNUSED_FUNCTION_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_b89f2 && [1/2] Building CXX object CMakeFiles/cmTC_b89f2.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_b89f2 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WSHADOW_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_2e9fb && [1/2] Building CXX object CMakeFiles/cmTC_2e9fb.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_2e9fb + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WSHORTEN_64_TO_32_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_6712a && [1/2] Building CXX object CMakeFiles/cmTC_6712a.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_6712a + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WSIGN_COMPARE_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_fcc1d && [1/2] Building CXX object CMakeFiles/cmTC_fcc1d.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_fcc1d + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WSIGN_CONVERSION_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_3e06f && [1/2] Building CXX object CMakeFiles/cmTC_3e06f.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_3e06f + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WSTRICT_ALIASING_EQ_2_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_87d9b && [1/2] Building CXX object CMakeFiles/cmTC_87d9b.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_87d9b + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WSTRICT_OVERFLOW_EQ_4_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_050f5 && [1/2] Building CXX object CMakeFiles/cmTC_050f5.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_050f5 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WUNUSED_PARAMETER_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_47090 && [1/2] Building CXX object CMakeFiles/cmTC_47090.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_47090 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WUNUSED_VARIABLE_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_9afa7 && [1/2] Building CXX object CMakeFiles/cmTC_9afa7.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_9afa7 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WWRITE_STRINGS_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_8a36b && [1/2] Building CXX object CMakeFiles/cmTC_8a36b.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_8a36b + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WUNDEF_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_f1412 && [1/2] Building CXX object CMakeFiles/cmTC_f1412.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_f1412 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WNO_SUGGEST_OVERRIDE_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_fc71c && [1/2] Building CXX object CMakeFiles/cmTC_fc71c.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_fc71c + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_WNO_ERROR_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_7efa7 && [1/2] Building CXX object CMakeFiles/cmTC_7efa7.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_7efa7 + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_PEDANTIC_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_7a29d && [1/2] Building CXX object CMakeFiles/cmTC_7a29d.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_7a29d + + +Source file was: +int main() { return 0; } +Performing C++ SOURCE FILE Test LIBCXXABI_SUPPORTS_FSTRICT_ALIASING_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_bf577 && [1/2] Building CXX object CMakeFiles/cmTC_bf577.dir/src.cxx.o +[2/2] Linking CXX executable cmTC_bf577 + + +Source file was: +int main() { return 0; } +Performing C SOURCE FILE Test LIBCXXABI_SUPPORTS_FUNWIND_TABLES_FLAG succeeded with the following output: +Change Dir: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/CMakeTmp + +Run Build Command(s):/usr/bin/ninja cmTC_42c22 && [1/2] Building C object CMakeFiles/cmTC_42c22.dir/src.c.o +[2/2] Linking C executable cmTC_42c22 + + +Source file was: +int main(void) { return 0; } diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/rules.ninja llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/rules.ninja --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/rules.ninja 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/rules.ninja 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,91 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Ninja" Generator, CMake Version 3.18 + +# This file contains all the rules used to get the outputs files +# built from the input files. +# It is included in the main 'build.ninja'. + +# ============================================================================= +# Project: libcxxabi +# Configurations: RelWithDebInfo +# ============================================================================= +# ============================================================================= + +############################################# +# Rule for running custom commands. + +rule CUSTOM_COMMAND + command = $COMMAND + description = $DESC + + +############################################# +# Rule for compiling CXX files. + +rule CXX_COMPILER__cxxabi_static_RelWithDebInfo + depfile = $DEP_FILE + deps = gcc + command = /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang++ $DEFINES $INCLUDES $FLAGS -MD -MT $out -MF $DEP_FILE -o $out -c $in + description = Building CXX object $out + + +############################################# +# Rule for linking CXX static library. + +rule CXX_STATIC_LIBRARY_LINKER__cxxabi_static_RelWithDebInfo + command = $PRE_LINK && /usr/bin/cmake -E rm -f $TARGET_FILE && /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ar qc $TARGET_FILE $LINK_FLAGS $in && /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/llvm-ranlib $TARGET_FILE && $POST_BUILD + description = Linking CXX static library $TARGET_FILE + restat = $RESTAT + + +############################################# +# Rule for compiling CXX files. + +rule CXX_COMPILER__cxxabi_shared_RelWithDebInfo + depfile = $DEP_FILE + deps = gcc + command = /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang++ $DEFINES $INCLUDES $FLAGS -MD -MT $out -MF $DEP_FILE -o $out -c $in + description = Building CXX object $out + + +############################################# +# Rule for linking CXX shared library. + +rule CXX_SHARED_LIBRARY_LINKER__cxxabi_shared_RelWithDebInfo + command = $PRE_LINK && /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang++ -fPIC $LANGUAGE_COMPILE_FLAGS $ARCH_FLAGS $LINK_FLAGS -shared $SONAME_FLAG$SONAME -o $TARGET_FILE $in $LINK_PATH $LINK_LIBRARIES && $POST_BUILD + description = Linking CXX shared library $TARGET_FILE + restat = $RESTAT + + +############################################# +# Rule for creating library symlink. + +rule CMAKE_SYMLINK_LIBRARY + command = /usr/bin/cmake -E cmake_symlink_library $in $SONAME $out && $POST_BUILD + description = Creating library symlink $out + + +############################################# +# Rule for re-running cmake. + +rule RERUN_CMAKE + command = /usr/bin/cmake --regenerate-during-build -S/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi -B/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build + description = Re-running CMake... + generator = 1 + + +############################################# +# Rule for cleaning all built files. + +rule CLEAN + command = /usr/bin/ninja $FILE_ARG -t clean $TARGETS + description = Cleaning all built files... + + +############################################# +# Rule for printing all primary targets available. + +rule HELP + command = /usr/bin/ninja -t targets + description = All primary targets available: + diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/TargetDirectories.txt llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/TargetDirectories.txt --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/CMakeFiles/TargetDirectories.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/CMakeFiles/TargetDirectories.txt 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,30 @@ +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/install/strip.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/install/local.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/install.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/list_install_components.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/rebuild_cache.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/CMakeFiles/edit_cache.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/install/local.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/install.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/list_install_components.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/rebuild_cache.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/edit_cache.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/install-cxxabi-stripped.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/install-cxxabi.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/cxxabi.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/install/strip.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/CMakeFiles/install/strip.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/CMakeFiles/install/local.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/CMakeFiles/edit_cache.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/CMakeFiles/install.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/CMakeFiles/list_install_components.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/CMakeFiles/rebuild_cache.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/CMakeFiles/check-cxxabi.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz/CMakeFiles/install/strip.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz/CMakeFiles/install/local.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz/CMakeFiles/install.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz/CMakeFiles/list_install_components.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz/CMakeFiles/rebuild_cache.dir +/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz/CMakeFiles/edit_cache.dir diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/cmake_install.cmake llvm-toolchain-13-13.0.0/libcxxabi/build/cmake_install.cmake --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/cmake_install.cmake 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/cmake_install.cmake 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,62 @@ +# Install script for directory: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/lib/llvm-14") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "RelWithDebInfo") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "1") +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +# Set default install directory permissions. +if(NOT DEFINED CMAKE_OBJDUMP) + set(CMAKE_OBJDUMP "/usr/bin/objdump") +endif() + +if(NOT CMAKE_INSTALL_LOCAL_ONLY) + # Include the install script for each subdirectory. + include("/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/src/cmake_install.cmake") + include("/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/test/cmake_install.cmake") + include("/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/fuzz/cmake_install.cmake") + +endif() + +if(CMAKE_INSTALL_COMPONENT) + set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt") +else() + set(CMAKE_INSTALL_MANIFEST "install_manifest.txt") +endif() + +string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT + "${CMAKE_INSTALL_MANIFEST_FILES}") +file(WRITE "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/${CMAKE_INSTALL_MANIFEST}" + "${CMAKE_INSTALL_MANIFEST_CONTENT}") diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/fuzz/cmake_install.cmake llvm-toolchain-13-13.0.0/libcxxabi/build/fuzz/cmake_install.cmake --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/fuzz/cmake_install.cmake 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/fuzz/cmake_install.cmake 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,44 @@ +# Install script for directory: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/fuzz + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/lib/llvm-14") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "RelWithDebInfo") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "1") +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +# Set default install directory permissions. +if(NOT DEFINED CMAKE_OBJDUMP) + set(CMAKE_OBJDUMP "/usr/bin/objdump") +endif() + Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/lib/libc++abi.a and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/lib/libc++abi.a differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/lib/libc++abi.so and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/lib/libc++abi.so differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/lib/libc++abi.so.1 and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/lib/libc++abi.so.1 differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/lib/libc++abi.so.1.0 and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/lib/libc++abi.so.1.0 differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/.ninja_deps and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/.ninja_deps differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/.ninja_log llvm-toolchain-13-13.0.0/libcxxabi/build/.ninja_log --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/.ninja_log 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/.ninja_log 2021-09-19 21:51:11.000000000 +0000 @@ -0,0 +1,41 @@ +# ninja log v5 +3 48 1632088268434510336 src/CMakeFiles/cxxabi_static.dir/cxa_virtual.cpp.o b08268c6039fe3a3 +5 85 1632088268470510433 src/CMakeFiles/cxxabi_static.dir/abort_message.cpp.o db4b461b4dda9496 +17 94 1632088268466510422 src/CMakeFiles/cxxabi_shared.dir/cxa_virtual.cpp.o d50e27cf010f1133 +4 106 1632088268494510498 src/CMakeFiles/cxxabi_static.dir/stdlib_exception.cpp.o 404ba44797267af2 +7 117 1632088268490510487 src/CMakeFiles/cxxabi_shared.dir/cxa_aux_runtime.cpp.o 87dd9f693ebc8da5 +4 121 1632088268502510520 src/CMakeFiles/cxxabi_static.dir/stdlib_typeinfo.cpp.o 5f0ea511dd014858 +17 125 1632088268506510531 src/CMakeFiles/cxxabi_shared.dir/stdlib_exception.cpp.o 6c5a9ea7427bf379 +1 125 1632088268506510531 src/CMakeFiles/cxxabi_static.dir/cxa_aux_runtime.cpp.o 96b6bcec4ad1de42 +24 126 1632088268510510542 src/CMakeFiles/cxxabi_shared.dir/abort_message.cpp.o b991adadf671b2ed +23 148 1632088268534510607 src/CMakeFiles/cxxabi_shared.dir/stdlib_typeinfo.cpp.o 6b29d65ea4257ea3 +6 178 1632088268562510683 src/CMakeFiles/cxxabi_static.dir/stdlib_new_delete.cpp.o 4e4874eaf31ceeb4 +3 197 1632088268582510737 src/CMakeFiles/cxxabi_static.dir/cxa_vector.cpp.o a8bd84e20df6080b +13 208 1632088268594510771 src/CMakeFiles/cxxabi_shared.dir/cxa_vector.cpp.o 32aef2143df08a0f +94 227 1632088268614510824 src/CMakeFiles/cxxabi_shared.dir/stdlib_new_delete.cpp.o b98b76c6e8517a94 +7 259 1632088268642510900 src/CMakeFiles/cxxabi_static.dir/cxa_personality.cpp.o 673161b4dc89bee9 +9 266 1632088268650510922 src/CMakeFiles/cxxabi_shared.dir/cxa_exception_storage.cpp.o f540caed24b1e9b4 +5 288 1632088268674510988 src/CMakeFiles/cxxabi_static.dir/private_typeinfo.cpp.o 1153c6e7736317f +7 304 1632088268690511031 src/CMakeFiles/cxxabi_static.dir/cxa_thread_atexit.cpp.o 83ebffa9f02b54f4 +2 306 1632088268690511031 src/CMakeFiles/cxxabi_static.dir/cxa_exception_storage.cpp.o ce17d0e0862bd702 +5 333 1632088268718511108 src/CMakeFiles/cxxabi_static.dir/fallback_malloc.cpp.o 420db3b8b65e793d +117 351 1632088268738511161 src/CMakeFiles/cxxabi_shared.dir/cxa_personality.cpp.o 352545dd31f34de9 +121 417 1632088268802511336 src/CMakeFiles/cxxabi_shared.dir/cxa_thread_atexit.cpp.o b4317c99ca5e8a23 +86 423 1632088268806511346 src/CMakeFiles/cxxabi_shared.dir/private_typeinfo.cpp.o 5929e7b0e6895a52 +48 433 1632088268818511379 src/CMakeFiles/cxxabi_shared.dir/fallback_malloc.cpp.o 5ab0922c4cf92f43 +1 463 1632088268850511466 src/CMakeFiles/cxxabi_static.dir/cxa_default_handlers.cpp.o 171d1c4ed54b9514 +2 485 1632088268870511521 src/CMakeFiles/cxxabi_static.dir/cxa_handlers.cpp.o f17307ca3754d820 +8 505 1632088268890511575 src/CMakeFiles/cxxabi_shared.dir/cxa_default_handlers.cpp.o 19ef0a3cea231d15 +11 513 1632088268898511596 src/CMakeFiles/cxxabi_shared.dir/cxa_handlers.cpp.o 72ed382beef7280d +4 535 1632088268918511651 src/CMakeFiles/cxxabi_static.dir/stdlib_stdexcept.cpp.o 65ac00423dcb0517 +2 546 1632088268930511683 src/CMakeFiles/cxxabi_static.dir/cxa_guard.cpp.o 3437f0becd0fb928 +20 572 1632088268958511760 src/CMakeFiles/cxxabi_shared.dir/stdlib_stdexcept.cpp.o 8282a4e21c47e1ce +10 595 1632088268982511825 src/CMakeFiles/cxxabi_shared.dir/cxa_guard.cpp.o 1e7610c0c6161bde +6 630 1632088269014511912 src/CMakeFiles/cxxabi_static.dir/cxa_exception.cpp.o a751a6d4f541bbf4 +107 683 1632088269070512064 src/CMakeFiles/cxxabi_shared.dir/cxa_exception.cpp.o b8336a4178881bdd +1 3183 1632088271562518841 src/CMakeFiles/cxxabi_static.dir/cxa_demangle.cpp.o e981660348596566 +3184 3215 1632088271594518928 lib/libc++abi.a 693fcdd85747757b +9 3261 1632088271642519058 src/CMakeFiles/cxxabi_shared.dir/cxa_demangle.cpp.o c44584d3672133b3 +3269 3349 1632088271734519309 lib/libc++abi.so.1.0 2b3ee210a7fa6eb2 +3350 3368 1632088271734519309 lib/libc++abi.so.1 7c94b09a2527b963 +3350 3368 1632088271734519309 lib/libc++abi.so 7c94b09a2527b963 Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/abort_message.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/abort_message.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_aux_runtime.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_aux_runtime.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_default_handlers.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_default_handlers.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_demangle.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_demangle.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_exception.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_exception.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_exception_storage.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_exception_storage.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_guard.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_guard.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_handlers.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_handlers.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_personality.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_personality.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_thread_atexit.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_thread_atexit.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_vector.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_vector.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_virtual.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/cxa_virtual.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/fallback_malloc.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/fallback_malloc.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/private_typeinfo.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/private_typeinfo.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_exception.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_exception.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_new_delete.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_new_delete.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_stdexcept.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_stdexcept.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_typeinfo.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_shared.dir/stdlib_typeinfo.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/abort_message.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/abort_message.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_aux_runtime.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_aux_runtime.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_default_handlers.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_default_handlers.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_demangle.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_demangle.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_exception.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_exception.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_exception_storage.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_exception_storage.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_guard.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_guard.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_handlers.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_handlers.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_personality.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_personality.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_thread_atexit.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_thread_atexit.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_vector.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_vector.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_virtual.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/cxa_virtual.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/fallback_malloc.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/fallback_malloc.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/private_typeinfo.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/private_typeinfo.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_exception.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_exception.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_new_delete.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_new_delete.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_stdexcept.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_stdexcept.cpp.o differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_typeinfo.cpp.o and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/libcxxabi/build/src/CMakeFiles/cxxabi_static.dir/stdlib_typeinfo.cpp.o differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/cmake_install.cmake llvm-toolchain-13-13.0.0/libcxxabi/build/src/cmake_install.cmake --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/src/cmake_install.cmake 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/src/cmake_install.cmake 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,93 @@ +# Install script for directory: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/src + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/lib/llvm-14") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "RelWithDebInfo") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "1") +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +# Set default install directory permissions. +if(NOT DEFINED CMAKE_OBJDUMP) + set(CMAKE_OBJDUMP "/usr/bin/objdump") +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xcxxabix" OR NOT CMAKE_INSTALL_COMPONENT) + foreach(file + "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so.1.0" + "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so.1" + ) + if(EXISTS "${file}" AND + NOT IS_SYMLINK "${file}") + file(RPATH_CHECK + FILE "${file}" + RPATH "") + endif() + endforeach() + file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/lib" TYPE SHARED_LIBRARY FILES + "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/lib/libc++abi.so.1.0" + "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/lib/libc++abi.so.1" + ) + foreach(file + "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so.1.0" + "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so.1" + ) + if(EXISTS "${file}" AND + NOT IS_SYMLINK "${file}") + if(CMAKE_INSTALL_DO_STRIP) + execute_process(COMMAND "/usr/bin/strip" "${file}") + endif() + endif() + endforeach() +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xcxxabix" OR NOT CMAKE_INSTALL_COMPONENT) + if(EXISTS "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so" AND + NOT IS_SYMLINK "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so") + file(RPATH_CHECK + FILE "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so" + RPATH "") + endif() + file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/lib" TYPE SHARED_LIBRARY FILES "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/lib/libc++abi.so") + if(EXISTS "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so" AND + NOT IS_SYMLINK "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so") + if(CMAKE_INSTALL_DO_STRIP) + execute_process(COMMAND "/usr/bin/strip" "$ENV{DESTDIR}${CMAKE_INSTALL_PREFIX}/lib/libc++abi.so") + endif() + endif() +endif() + +if("x${CMAKE_INSTALL_COMPONENT}x" STREQUAL "xcxxabix" OR NOT CMAKE_INSTALL_COMPONENT) + file(INSTALL DESTINATION "${CMAKE_INSTALL_PREFIX}/lib" TYPE STATIC_LIBRARY FILES "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/lib/libc++abi.a") +endif() + diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/test/cmake_install.cmake llvm-toolchain-13-13.0.0/libcxxabi/build/test/cmake_install.cmake --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/test/cmake_install.cmake 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/test/cmake_install.cmake 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,44 @@ +# Install script for directory: /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/test + +# Set the install prefix +if(NOT DEFINED CMAKE_INSTALL_PREFIX) + set(CMAKE_INSTALL_PREFIX "/usr/lib/llvm-14") +endif() +string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") + +# Set the install configuration name. +if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME) + if(BUILD_TYPE) + string(REGEX REPLACE "^[^A-Za-z0-9_]+" "" + CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}") + else() + set(CMAKE_INSTALL_CONFIG_NAME "RelWithDebInfo") + endif() + message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"") +endif() + +# Set the component getting installed. +if(NOT CMAKE_INSTALL_COMPONENT) + if(COMPONENT) + message(STATUS "Install component: \"${COMPONENT}\"") + set(CMAKE_INSTALL_COMPONENT "${COMPONENT}") + else() + set(CMAKE_INSTALL_COMPONENT) + endif() +endif() + +# Install shared libraries without execute permission? +if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE) + set(CMAKE_INSTALL_SO_NO_EXE "1") +endif() + +# Is this installation the result of a crosscompile? +if(NOT DEFINED CMAKE_CROSSCOMPILING) + set(CMAKE_CROSSCOMPILING "FALSE") +endif() + +# Set default install directory permissions. +if(NOT DEFINED CMAKE_OBJDUMP) + set(CMAKE_OBJDUMP "/usr/bin/objdump") +endif() + diff -Nru llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/test/lit.site.cfg llvm-toolchain-13-13.0.0/libcxxabi/build/test/lit.site.cfg --- llvm-toolchain-13-13.0.0~+rc4/libcxxabi/build/test/lit.site.cfg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/libcxxabi/build/test/lit.site.cfg 2021-09-19 21:51:08.000000000 +0000 @@ -0,0 +1,65 @@ +## Autogenerated by libcxxabi configuration. +# Do not edit! + +# Lit parameters serialized here for llvm-lit to pick them up +config.enable_experimental = False +config.target_triple = "x86_64-pc-linux-gnu" + + +import os +import site + +config.cxx_under_test = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/bin/clang++" +config.project_obj_root = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build" +config.libcxxabi_hdr_root = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build" +config.libcxxabi_src_root = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi" +config.libcxxabi_obj_root = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build" +config.abi_library_root = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxxabi/build/lib" +config.libcxx_src_root = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx" +config.cxx_headers = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/include/c++/v1" +config.libunwind_headers = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libunwind/include" +config.cxx_library_root = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx/build/lib" +config.llvm_unwinder = True +config.builtins_library = "/home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/build-llvm/tools/clang/stage2-bins/lib/clang/14.0.0/lib/linux/libclang_rt.builtins-x86_64.a" +config.enable_threads = True +config.target_info = "libcxx.test.target_info.LocalTI" +config.executor = "/usr/bin/python3.9 /home/sylvestre/dev/debian/pkg-llvm/llvm-toolchain/branches/llvm-project/libcxx/utils/run.py" +config.libcxxabi_shared = True +config.enable_shared = True +config.host_triple = "x86_64-pc-linux-gnu" +config.sysroot = "" +config.gcc_toolchain = "" +config.cxx_ext_threads = False + +config.pstl_src_root = "" if False else None +config.pstl_obj_root = "" if False else None + +# Code signing +config.llvm_codesign_identity = "" + +site.addsitedir(os.path.join(config.libcxxabi_src_root, 'test')) +site.addsitedir(os.path.join(config.libcxx_src_root, 'utils')) + +# name: The name of this test suite. +config.name = 'libc++abi' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.cpp', '.s'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.join(config.libcxxabi_src_root, 'test') + +# Allow expanding substitutions that are based on other substitutions +config.recursiveExpansionLimit = 10 + +# Infer the test_exec_root from the build directory. +config.test_exec_root = os.path.join(config.libcxxabi_obj_root, 'test') + +import libcxx.test.format +config.test_format = libcxx.test.format.CxxStandardLibraryTest() + +lit_config.note('Using configuration variant: libcxxabi') +import libcxxabi.test.config +configuration = libcxxabi.test.config.Configuration(lit_config, config) +configuration.configure() +configuration.print_config_info() diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/CMakeLists.txt.orig llvm-toolchain-13-13.0.0/llvm/CMakeLists.txt.orig --- llvm-toolchain-13-13.0.0~+rc4/llvm/CMakeLists.txt.orig 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/CMakeLists.txt.orig 2021-09-21 08:43:12.000000000 +0000 @@ -0,0 +1,1194 @@ +# See docs/CMake.html for instructions about how to build LLVM with CMake. + +cmake_minimum_required(VERSION 3.13.4) + +# CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()` +# New in CMake 3.20. https://cmake.org/cmake/help/latest/policy/CMP0116.html +if(POLICY CMP0116) + cmake_policy(SET CMP0116 OLD) +endif() + +set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON) + +if(NOT DEFINED LLVM_VERSION_MAJOR) + set(LLVM_VERSION_MAJOR 14) +endif() +if(NOT DEFINED LLVM_VERSION_MINOR) + set(LLVM_VERSION_MINOR 0) +endif() +if(NOT DEFINED LLVM_VERSION_PATCH) + set(LLVM_VERSION_PATCH 0) +endif() +if(NOT DEFINED LLVM_VERSION_SUFFIX) + set(LLVM_VERSION_SUFFIX git) +endif() + +if (NOT PACKAGE_VERSION) + set(PACKAGE_VERSION + "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") +endif() + +if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (CMAKE_GENERATOR_TOOLSET STREQUAL "")) + message(WARNING "Visual Studio generators use the x86 host compiler by " + "default, even for 64-bit targets. This can result in linker " + "instability and out of memory errors. To use the 64-bit " + "host compiler, pass -Thost=x64 on the CMake command line.") +endif() + +if (CMAKE_GENERATOR STREQUAL "Xcode" AND NOT CMAKE_OSX_ARCHITECTURES) + # Some CMake features like object libraries get confused if you don't + # explicitly specify an architecture setting with the Xcode generator. + set(CMAKE_OSX_ARCHITECTURES "x86_64") +endif() + +project(LLVM + VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH} + LANGUAGES C CXX ASM) + +set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to") +set(CMAKE_CXX_STANDARD_REQUIRED YES) +if (CYGWIN) + # Cygwin is a bit stricter and lack things like 'strdup', 'stricmp', etc in + # c++xx mode. + set(CMAKE_CXX_EXTENSIONS YES) +else() + set(CMAKE_CXX_EXTENSIONS NO) +endif() + +if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "No build type selected, default to Debug") + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Build type (default Debug)" FORCE) +endif() + +# Side-by-side subprojects layout: automatically set the +# LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS +# This allows an easy way of setting up a build directory for llvm and another +# one for llvm+clang+... using the same sources. +set(LLVM_ALL_PROJECTS "clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;mlir;openmp;parallel-libs;polly;pstl") +# The flang project is not yet part of "all" projects (see C++ requirements) +set(LLVM_EXTRA_PROJECTS "flang") +# List of all known projects in the mono repo +set(LLVM_KNOWN_PROJECTS "${LLVM_ALL_PROJECTS};${LLVM_EXTRA_PROJECTS}") +set(LLVM_ENABLE_PROJECTS "" CACHE STRING + "Semicolon-separated list of projects to build (${LLVM_KNOWN_PROJECTS}), or \"all\".") + +option(LLVM_CHECK_ENABLED_PROJECTS "Whether to check LLVM_ENABLE_PROJECTS" ON) +if (LLVM_CHECK_ENABLED_PROJECTS) + foreach(proj ${LLVM_ENABLE_PROJECTS}) + if (NOT proj STREQUAL "all" AND NOT proj STREQUAL "llvm" AND NOT "${proj}" IN_LIST LLVM_KNOWN_PROJECTS) + MESSAGE(FATAL_ERROR "${proj} isn't a known project: ${LLVM_KNOWN_PROJECTS}") + endif() + endforeach() +endif() + +if( LLVM_ENABLE_PROJECTS STREQUAL "all" ) + set( LLVM_ENABLE_PROJECTS ${LLVM_ALL_PROJECTS}) +endif() + +if ("flang" IN_LIST LLVM_ENABLE_PROJECTS) + if (NOT "mlir" IN_LIST LLVM_ENABLE_PROJECTS) + message(STATUS "Enabling MLIR as a dependency to flang") + list(APPEND LLVM_ENABLE_PROJECTS "mlir") + endif() + + if (NOT "clang" IN_LIST LLVM_ENABLE_PROJECTS) + message(FATAL_ERROR "Clang is not enabled, but is required for the Flang driver") + endif() +endif() + +# LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the +# `LLVM_ENABLE_PROJECTS` CMake cache variable. This exists for +# several reasons: +# +# * As an indicator that the `LLVM_ENABLE_PROJECTS` list is now the single +# source of truth for which projects to build. This means we will ignore user +# supplied `LLVM_TOOL__BUILD` CMake cache variables and overwrite +# them. +# +# * The case where the user previously had `LLVM_ENABLE_PROJECTS` set to a +# non-empty list but now the user wishes to disable building all other projects +# by setting `LLVM_ENABLE_PROJECTS` to an empty string. In that case we still +# need to set the `LLVM_TOOL_${upper_proj}_BUILD` variables so that we disable +# building all the projects that were previously enabled. +set(LLVM_ENABLE_PROJECTS_USED OFF CACHE BOOL "") +mark_as_advanced(LLVM_ENABLE_PROJECTS_USED) + +if (LLVM_ENABLE_PROJECTS_USED OR NOT LLVM_ENABLE_PROJECTS STREQUAL "") + set(LLVM_ENABLE_PROJECTS_USED ON CACHE BOOL "" FORCE) + foreach(proj ${LLVM_KNOWN_PROJECTS} ${LLVM_EXTERNAL_PROJECTS}) + string(TOUPPER "${proj}" upper_proj) + string(REGEX REPLACE "-" "_" upper_proj ${upper_proj}) + if ("${proj}" IN_LIST LLVM_ENABLE_PROJECTS) + message(STATUS "${proj} project is enabled") + set(SHOULD_ENABLE_PROJECT TRUE) + set(PROJ_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${proj}") + if(NOT EXISTS "${PROJ_DIR}" OR NOT IS_DIRECTORY "${PROJ_DIR}") + message(FATAL_ERROR "LLVM_ENABLE_PROJECTS requests ${proj} but directory not found: ${PROJ_DIR}") + endif() + if( LLVM_EXTERNAL_${upper_proj}_SOURCE_DIR STREQUAL "" ) + set(LLVM_EXTERNAL_${upper_proj}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${proj}" CACHE PATH "" FORCE) + else() + set(LLVM_EXTERNAL_${upper_proj}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${proj}" CACHE PATH "") + endif() + elseif ("${proj}" IN_LIST LLVM_EXTERNAL_PROJECTS) + message(STATUS "${proj} project is enabled") + set(SHOULD_ENABLE_PROJECT TRUE) + else() + message(STATUS "${proj} project is disabled") + set(SHOULD_ENABLE_PROJECT FALSE) + endif() + # Force `LLVM_TOOL_${upper_proj}_BUILD` variables to have values that + # corresponds with `LLVM_ENABLE_PROJECTS`. This prevents the user setting + # `LLVM_TOOL_${upper_proj}_BUILD` variables externally. At some point + # we should deprecate allowing users to set these variables by turning them + # into normal CMake variables rather than cache variables. + set(LLVM_TOOL_${upper_proj}_BUILD + ${SHOULD_ENABLE_PROJECT} + CACHE + BOOL "Whether to build ${upper_proj} as part of LLVM" FORCE + ) + endforeach() +endif() +unset(SHOULD_ENABLE_PROJECT) + +# Build llvm with ccache if the package is present +set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build") +if(LLVM_CCACHE_BUILD) + find_program(CCACHE_PROGRAM ccache) + if(CCACHE_PROGRAM) + set(LLVM_CCACHE_MAXSIZE "" CACHE STRING "Size of ccache") + set(LLVM_CCACHE_DIR "" CACHE STRING "Directory to keep ccached data") + set(LLVM_CCACHE_PARAMS "CCACHE_CPP2=yes CCACHE_HASHDIR=yes" + CACHE STRING "Parameters to pass through to ccache") + + set(CCACHE_PROGRAM "${LLVM_CCACHE_PARAMS} ${CCACHE_PROGRAM}") + if (LLVM_CCACHE_MAXSIZE) + set(CCACHE_PROGRAM "CCACHE_MAXSIZE=${LLVM_CCACHE_MAXSIZE} ${CCACHE_PROGRAM}") + endif() + if (LLVM_CCACHE_DIR) + set(CCACHE_PROGRAM "CCACHE_DIR=${LLVM_CCACHE_DIR} ${CCACHE_PROGRAM}") + endif() + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PROGRAM}) + else() + message(FATAL_ERROR "Unable to find the program ccache. Set LLVM_CCACHE_BUILD to OFF") + endif() +endif() + +option(LLVM_DEPENDENCY_DEBUGGING "Dependency debugging mode to verify correctly expressed library dependencies (Darwin only)" OFF) + +# Some features of the LLVM build may be disallowed when dependency debugging is +# enabled. In particular you cannot use ccache because we want to force compile +# operations to always happen. +if(LLVM_DEPENDENCY_DEBUGGING) + if(NOT CMAKE_HOST_APPLE) + message(FATAL_ERROR "Dependency debugging is only currently supported on Darwin hosts.") + endif() + if(LLVM_CCACHE_BUILD) + message(FATAL_ERROR "Cannot enable dependency debugging while using ccache.") + endif() +endif() + +option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF) +option(LLVM_ENABLE_GISEL_COV "Enable collection of GlobalISel rule coverage" OFF) +if(LLVM_ENABLE_GISEL_COV) + set(LLVM_GISEL_COV_PREFIX "${CMAKE_BINARY_DIR}/gisel-coverage-" CACHE STRING "Provide a filename prefix to collect the GlobalISel rule coverage") +endif() + +# Add path for custom modules +set(CMAKE_MODULE_PATH + ${CMAKE_MODULE_PATH} + "${CMAKE_CURRENT_SOURCE_DIR}/cmake" + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" + ) + +# Generate a CompilationDatabase (compile_commands.json file) for our build, +# for use by clang_complete, YouCompleteMe, etc. +set(CMAKE_EXPORT_COMPILE_COMMANDS 1) + +option(LLVM_INSTALL_BINUTILS_SYMLINKS + "Install symlinks from the binutils tool names to the corresponding LLVM tools." OFF) + +option(LLVM_INSTALL_CCTOOLS_SYMLINKS + "Install symlinks from the cctools tool names to the corresponding LLVM tools." OFF) + +option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF) + +option(LLVM_INSTALL_TOOLCHAIN_ONLY "Only include toolchain files in the 'install' target." OFF) + +# Unfortunatly Clang is too eager to search directories for module maps, which can cause the +# installed version of the maps to be found when building LLVM from source. Therefore we turn off +# the installation by default. See llvm.org/PR31905. +option(LLVM_INSTALL_MODULEMAPS "Install the modulemap files in the 'install' target." OFF) + +option(LLVM_USE_FOLDERS "Enable solution folders in Visual Studio. Disable for Express versions." ON) +if ( LLVM_USE_FOLDERS ) + set_property(GLOBAL PROPERTY USE_FOLDERS ON) +endif() + +include(VersionFromVCS) + +option(LLVM_APPEND_VC_REV + "Embed the version control system revision in LLVM" ON) + +set(PACKAGE_NAME LLVM) +set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") +set(PACKAGE_BUGREPORT "https://bugs.llvm.org/") + +set(BUG_REPORT_URL "${PACKAGE_BUGREPORT}" CACHE STRING + "Default URL where bug reports are to be submitted.") + +# Configure CPack. +set(CPACK_PACKAGE_INSTALL_DIRECTORY "LLVM") +set(CPACK_PACKAGE_VENDOR "LLVM") +set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR}) +set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR}) +set(CPACK_PACKAGE_VERSION_PATCH ${LLVM_VERSION_PATCH}) +set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION}) +set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.TXT") +set(CPACK_NSIS_COMPRESSOR "/SOLID lzma \r\n SetCompressorDictSize 32") +if(WIN32 AND NOT UNIX) + set(CPACK_PACKAGE_INSTALL_REGISTRY_KEY "LLVM") + set(CPACK_PACKAGE_ICON "${CMAKE_CURRENT_SOURCE_DIR}\\\\cmake\\\\nsis_logo.bmp") + set(CPACK_NSIS_MUI_ICON "${CMAKE_CURRENT_SOURCE_DIR}\\\\cmake\\\\nsis_icon.ico") + set(CPACK_NSIS_MUI_UNIICON "${CMAKE_CURRENT_SOURCE_DIR}\\\\cmake\\\\nsis_icon.ico") + set(CPACK_NSIS_MODIFY_PATH "ON") + set(CPACK_NSIS_ENABLE_UNINSTALL_BEFORE_INSTALL "ON") + if( CMAKE_CL_64 ) + set(CPACK_NSIS_INSTALL_ROOT "$PROGRAMFILES64") + endif() +endif() +include(CPack) + +# Sanity check our source directory to make sure that we are not trying to +# generate an in-source build (unless on MSVC_IDE, where it is ok), and to make +# sure that we don't have any stray generated files lying around in the tree +# (which would end up getting picked up by header search, instead of the correct +# versions). +if( CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR AND NOT MSVC_IDE ) + message(FATAL_ERROR "In-source builds are not allowed. +Please create a directory and run cmake from there, passing the path +to this source directory as the last argument. +This process created the file `CMakeCache.txt' and the directory `CMakeFiles'. +Please delete them.") +endif() + +string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE) + +if (CMAKE_BUILD_TYPE AND + NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$") + message(FATAL_ERROR "Invalid value for CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") +endif() + +set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" ) + +set(LLVM_TOOLS_INSTALL_DIR "bin" CACHE STRING "Path for binary subdirectory (defaults to 'bin')") +mark_as_advanced(LLVM_TOOLS_INSTALL_DIR) + +set(LLVM_UTILS_INSTALL_DIR "${LLVM_TOOLS_INSTALL_DIR}" CACHE STRING + "Path to install LLVM utilities (enabled by LLVM_INSTALL_UTILS=ON) (defaults to LLVM_TOOLS_INSTALL_DIR)") +mark_as_advanced(LLVM_UTILS_INSTALL_DIR) + +# They are used as destination of target generators. +set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin) +set(LLVM_LIBRARY_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX}) +if(WIN32 OR CYGWIN) + # DLL platform -- put DLLs into bin. + set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) +else() + set(LLVM_SHLIB_OUTPUT_INTDIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) +endif() + +# Each of them corresponds to llvm-config's. +set(LLVM_TOOLS_BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) # --bindir +set(LLVM_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) # --libdir +set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) # --src-root +set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include ) # --includedir +set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) # --prefix + +# Note: LLVM_CMAKE_DIR does not include generated files +set(LLVM_CMAKE_DIR ${LLVM_MAIN_SRC_DIR}/cmake/modules) +set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples) +set(LLVM_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include) + +# List of all targets to be built by default: +set(LLVM_ALL_TARGETS + AArch64 + AMDGPU + ARM + AVR + BPF + Hexagon + Lanai + Mips + MSP430 + NVPTX + PowerPC + RISCV + Sparc + SystemZ + WebAssembly + X86 + XCore + ) + +# List of targets with JIT support: +set(LLVM_TARGETS_WITH_JIT X86 PowerPC AArch64 ARM Mips SystemZ) + +set(LLVM_TARGETS_TO_BUILD "all" + CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") + +set(LLVM_EXPERIMENTAL_TARGETS_TO_BUILD "" + CACHE STRING "Semicolon-separated list of experimental targets to build.") + +option(BUILD_SHARED_LIBS + "Build all libraries as shared libraries instead of static" OFF) + +option(LLVM_ENABLE_BACKTRACES "Enable embedding backtraces on crash." ON) +if(LLVM_ENABLE_BACKTRACES) + set(ENABLE_BACKTRACES 1) +endif() + +option(LLVM_ENABLE_UNWIND_TABLES "Emit unwind tables for the libraries" ON) + +option(LLVM_ENABLE_CRASH_OVERRIDES "Enable crash overrides." ON) +if(LLVM_ENABLE_CRASH_OVERRIDES) + set(ENABLE_CRASH_OVERRIDES 1) +endif() + +option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF) + +option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF) +set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so") +set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h") + +set(LLVM_TARGET_ARCH "host" + CACHE STRING "Set target to use for LLVM JIT or use \"host\" for automatic detection.") + +option(LLVM_ENABLE_TERMINFO "Use terminfo database if available." ON) + +set(LLVM_ENABLE_LIBXML2 "ON" CACHE STRING "Use libxml2 if available. Can be ON, OFF, or FORCE_ON") + +option(LLVM_ENABLE_LIBEDIT "Use libedit if available." ON) + +option(LLVM_ENABLE_LIBPFM "Use libpfm for performance counters if available." ON) + +# On z/OS, threads cannot be used because TLS is not supported. +if (CMAKE_SYSTEM_NAME MATCHES "OS390") + option(LLVM_ENABLE_THREADS "Use threads if available." OFF) +else() + option(LLVM_ENABLE_THREADS "Use threads if available." ON) +endif() + +set(LLVM_ENABLE_ZLIB "ON" CACHE STRING "Use zlib for compression/decompression if available. Can be ON, OFF, or FORCE_ON") + +set(LLVM_Z3_INSTALL_DIR "" CACHE STRING "Install directory of the Z3 solver.") + +option(LLVM_ENABLE_Z3_SOLVER + "Enable Support for the Z3 constraint solver in LLVM." + ${LLVM_ENABLE_Z3_SOLVER_DEFAULT} +) + +if (LLVM_ENABLE_Z3_SOLVER) + find_package(Z3 4.7.1) + + if (LLVM_Z3_INSTALL_DIR) + if (NOT Z3_FOUND) + message(FATAL_ERROR "Z3 >= 4.7.1 has not been found in LLVM_Z3_INSTALL_DIR: ${LLVM_Z3_INSTALL_DIR}.") + endif() + endif() + + if (NOT Z3_FOUND) + message(FATAL_ERROR "LLVM_ENABLE_Z3_SOLVER cannot be enabled when Z3 is not available.") + endif() + + set(LLVM_WITH_Z3 1) +endif() + +set(LLVM_ENABLE_Z3_SOLVER_DEFAULT "${Z3_FOUND}") + + +if( LLVM_TARGETS_TO_BUILD STREQUAL "all" ) + set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} ) +endif() + +set(LLVM_TARGETS_TO_BUILD + ${LLVM_TARGETS_TO_BUILD} + ${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD}) +list(REMOVE_DUPLICATES LLVM_TARGETS_TO_BUILD) + +option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON) +option(LLVM_ENABLE_MODULES "Compile with C++ modules enabled." OFF) +if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + option(LLVM_ENABLE_MODULE_DEBUGGING "Compile with -gmodules." ON) +else() + option(LLVM_ENABLE_MODULE_DEBUGGING "Compile with -gmodules." OFF) +endif() +option(LLVM_ENABLE_LOCAL_SUBMODULE_VISIBILITY "Compile with -fmodules-local-submodule-visibility." ON) +option(LLVM_ENABLE_LIBCXX "Use libc++ if available." OFF) +option(LLVM_STATIC_LINK_CXX_STDLIB "Statically link the standard library." OFF) +option(LLVM_ENABLE_LLD "Use lld as C and C++ linker." OFF) +option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) +option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF) + +option(LLVM_ENABLE_DUMP "Enable dump functions even when assertions are disabled" OFF) + +if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" ) + option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF) +else() + option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON) +endif() + +option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF) + +# While adding scalable vector support to LLVM, we temporarily want to +# allow an implicit conversion of TypeSize to uint64_t, and to allow +# code to get the fixed number of elements from a possibly scalable vector. +# This CMake flag enables a more strict mode where it asserts that the type +# is not a scalable vector type. +# +# Enabling this flag makes it easier to find cases where the compiler makes +# assumptions on the size being 'fixed size', when building tests for +# SVE/SVE2 or other scalable vector architectures. +option(LLVM_ENABLE_STRICT_FIXED_SIZE_VECTORS + "Enable assertions that type is not scalable in implicit conversion from TypeSize to uint64_t and calls to getNumElements" OFF) + +set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING + "Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.") + +option(LLVM_FORCE_USE_OLD_TOOLCHAIN + "Set to ON to force using an old, unsupported host toolchain." OFF) + +set(LLVM_LOCAL_RPATH "" CACHE FILEPATH + "If set, an absolute path added as rpath on binaries that do not already contain an executable-relative rpath.") + +option(LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN + "Set to ON to only warn when using a toolchain which is about to be deprecated, instead of emitting an error." OFF) + +option(LLVM_USE_INTEL_JITEVENTS + "Use Intel JIT API to inform Intel(R) VTune(TM) Amplifier XE 2011 about JIT code" + OFF) + +if( LLVM_USE_INTEL_JITEVENTS ) + # Verify we are on a supported platform + if( NOT CMAKE_SYSTEM_NAME MATCHES "Windows" AND NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) + message(FATAL_ERROR + "Intel JIT API support is available on Linux and Windows only.") + endif() +endif( LLVM_USE_INTEL_JITEVENTS ) + +option(LLVM_USE_OPROFILE + "Use opagent JIT interface to inform OProfile about JIT code" OFF) + +option(LLVM_EXTERNALIZE_DEBUGINFO + "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF) + +set(LLVM_CODESIGNING_IDENTITY "" CACHE STRING + "Sign executables and dylibs with the given identity or skip if empty (Darwin Only)") + +# If enabled, verify we are on a platform that supports oprofile. +if( LLVM_USE_OPROFILE ) + if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) + message(FATAL_ERROR "OProfile support is available on Linux only.") + endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) +endif( LLVM_USE_OPROFILE ) + +option(LLVM_USE_PERF + "Use perf JIT interface to inform perf about JIT code" OFF) + +# If enabled, verify we are on a platform that supports perf. +if( LLVM_USE_PERF ) + if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) + message(FATAL_ERROR "perf support is available on Linux only.") + endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) +endif( LLVM_USE_PERF ) + +set(LLVM_USE_SANITIZER "" CACHE STRING + "Define the sanitizer used to build binaries and tests.") +option(LLVM_OPTIMIZE_SANITIZED_BUILDS "Pass -O1 on debug sanitizer builds" ON) +set(LLVM_UBSAN_FLAGS + "-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all" + CACHE STRING + "Compile flags set to enable UBSan. Only used if LLVM_USE_SANITIZER contains 'Undefined'.") +set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH + "Path to fuzzing library for linking with fuzz targets") + +option(LLVM_USE_SPLIT_DWARF + "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF) + +# Define an option controlling whether we should build for 32-bit on 64-bit +# platforms, where supported. +if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX")) + # TODO: support other platforms and toolchains. + option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF) +endif() + +# Define the default arguments to use with 'lit', and an option for the user to +# override. +set(LIT_ARGS_DEFAULT "-sv") +if (MSVC_IDE OR XCODE) + set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") +endif() +set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") + +# On Win32 hosts, provide an option to specify the path to the GnuWin32 tools. +if( WIN32 AND NOT CYGWIN ) + set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools") +endif() + +set(LLVM_INTEGRATED_CRT_ALLOC "" CACHE PATH "Replace the Windows CRT allocator with any of {rpmalloc|mimalloc|snmalloc}. Only works with /MT enabled.") +if(LLVM_INTEGRATED_CRT_ALLOC) + if(NOT WIN32) + message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC is only supported on Windows.") + endif() + if(LLVM_USE_SANITIZER) + message(FATAL_ERROR "LLVM_INTEGRATED_CRT_ALLOC cannot be used along with LLVM_USE_SANITIZER!") + endif() + if(CMAKE_BUILD_TYPE AND uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") + message(FATAL_ERROR "The Debug target isn't supported along with LLVM_INTEGRATED_CRT_ALLOC!") + endif() +endif() + +# Define options to control the inclusion and default build behavior for +# components which may not strictly be necessary (tools, examples, and tests). +# +# This is primarily to support building smaller or faster project files. +option(LLVM_INCLUDE_TOOLS "Generate build targets for the LLVM tools." ON) +option(LLVM_BUILD_TOOLS + "Build the LLVM tools. If OFF, just generate build targets." ON) + +option(LLVM_INCLUDE_UTILS "Generate build targets for the LLVM utils." ON) +option(LLVM_BUILD_UTILS + "Build LLVM utility binaries. If OFF, just generate build targets." ON) + +option(LLVM_INCLUDE_RUNTIMES "Generate build targets for the LLVM runtimes." ON) +option(LLVM_BUILD_RUNTIMES + "Build the LLVM runtimes. If OFF, just generate build targets." ON) + +option(LLVM_BUILD_RUNTIME + "Build the LLVM runtime libraries." ON) +option(LLVM_BUILD_EXAMPLES + "Build the LLVM example programs. If OFF, just generate build targets." OFF) +option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) + +if(LLVM_BUILD_EXAMPLES) + add_definitions(-DBUILD_EXAMPLES) +endif(LLVM_BUILD_EXAMPLES) + +option(LLVM_BUILD_TESTS + "Build LLVM unit tests. If OFF, just generate build targets." OFF) +option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) +option(LLVM_INCLUDE_GO_TESTS "Include the Go bindings tests in test build targets." ON) + +option(LLVM_BUILD_BENCHMARKS "Add LLVM benchmark targets to the list of default +targets. If OFF, benchmarks still could be built using Benchmarks target." OFF) +option(LLVM_INCLUDE_BENCHMARKS "Generate benchmark targets. If OFF, benchmarks can't be built." ON) + +option (LLVM_BUILD_DOCS "Build the llvm documentation." OFF) +option (LLVM_INCLUDE_DOCS "Generate build targets for llvm documentation." ON) +option (LLVM_ENABLE_DOXYGEN "Use doxygen to generate llvm API documentation." OFF) +option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF) +option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON) +option (LLVM_ENABLE_BINDINGS "Build bindings." ON) + +set(LLVM_INSTALL_DOXYGEN_HTML_DIR "share/doc/llvm/doxygen-html" + CACHE STRING "Doxygen-generated HTML documentation install directory") +set(LLVM_INSTALL_OCAMLDOC_HTML_DIR "share/doc/llvm/ocaml-html" + CACHE STRING "OCamldoc-generated HTML documentation install directory") + +option (LLVM_BUILD_EXTERNAL_COMPILER_RT + "Build compiler-rt as an external project." OFF) + +option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO + "Show target and host info when tools are invoked with --version." ON) + +# You can configure which libraries from LLVM you want to include in the +# shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited +# list of LLVM components. All component names handled by llvm-config are valid. +if(NOT DEFINED LLVM_DYLIB_COMPONENTS) + set(LLVM_DYLIB_COMPONENTS "all" CACHE STRING + "Semicolon-separated list of components to include in libLLVM, or \"all\".") +endif() + +if(MSVC) + option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON) + # Set this variable to OFF here so it can't be set with a command-line + # argument. + set (LLVM_LINK_LLVM_DYLIB OFF) + if (BUILD_SHARED_LIBS) + message(FATAL_ERROR "BUILD_SHARED_LIBS options is not supported on Windows.") + endif() +else() + option(LLVM_LINK_LLVM_DYLIB "Link tools against the libllvm dynamic library" OFF) + option(LLVM_BUILD_LLVM_C_DYLIB "Build libllvm-c re-export library (Darwin only)" OFF) + set(LLVM_BUILD_LLVM_DYLIB_default OFF) + if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB) + set(LLVM_BUILD_LLVM_DYLIB_default ON) + endif() + option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default}) +endif() + +if (LLVM_LINK_LLVM_DYLIB AND BUILD_SHARED_LIBS) + message(FATAL_ERROR "Cannot enable BUILD_SHARED_LIBS with LLVM_LINK_LLVM_DYLIB. We recommend disabling BUILD_SHARED_LIBS.") +endif() + +option(LLVM_OPTIMIZED_TABLEGEN "Force TableGen to be built with optimization" OFF) +if(CMAKE_CROSSCOMPILING OR (LLVM_OPTIMIZED_TABLEGEN AND (LLVM_ENABLE_ASSERTIONS OR CMAKE_CONFIGURATION_TYPES))) + set(LLVM_USE_HOST_TOOLS ON) +endif() + +if (MSVC_IDE) + option(LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION "Configure project to use Visual Studio native visualizers" TRUE) +endif() + +if (LLVM_BUILD_INSTRUMENTED OR LLVM_BUILD_INSTRUMENTED_COVERAGE OR + LLVM_ENABLE_IR_PGO) + if(NOT LLVM_PROFILE_MERGE_POOL_SIZE) + # A pool size of 1-2 is probably sufficient on a SSD. 3-4 should be fine + # for spining disks. Anything higher may only help on slower mediums. + set(LLVM_PROFILE_MERGE_POOL_SIZE "4") + endif() + if(NOT LLVM_PROFILE_FILE_PATTERN) + if(NOT LLVM_PROFILE_DATA_DIR) + file(TO_NATIVE_PATH "${LLVM_BINARY_DIR}/profiles" LLVM_PROFILE_DATA_DIR) + endif() + file(TO_NATIVE_PATH "${LLVM_PROFILE_DATA_DIR}/%${LLVM_PROFILE_MERGE_POOL_SIZE}m.profraw" LLVM_PROFILE_FILE_PATTERN) + endif() + if(NOT LLVM_CSPROFILE_FILE_PATTERN) + if(NOT LLVM_CSPROFILE_DATA_DIR) + file(TO_NATIVE_PATH "${LLVM_BINARY_DIR}/csprofiles" LLVM_CSPROFILE_DATA_DIR) + endif() + file(TO_NATIVE_PATH "${LLVM_CSPROFILE_DATA_DIR}/%${LLVM_PROFILE_MERGE_POOL_SIZE}m.profraw" LLVM_CSPROFILE_FILE_PATTERN) + endif() +endif() + +if (LLVM_BUILD_STATIC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") + # Remove shared library suffixes from use in find_library + foreach (shared_lib_suffix ${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_IMPORT_LIBRARY_SUFFIX}) + list(FIND CMAKE_FIND_LIBRARY_SUFFIXES ${shared_lib_suffix} shared_lib_suffix_idx) + if(NOT ${shared_lib_suffix_idx} EQUAL -1) + list(REMOVE_AT CMAKE_FIND_LIBRARY_SUFFIXES ${shared_lib_suffix_idx}) + endif() + endforeach() +endif() + +# Use libtool instead of ar if you are both on an Apple host, and targeting Apple. +if(CMAKE_HOST_APPLE AND APPLE) + include(UseLibtool) +endif() + +# Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. +set(LLVM_TARGET_TRIPLE_ENV CACHE STRING "The name of environment variable to override default target. Disabled by blank.") +mark_as_advanced(LLVM_TARGET_TRIPLE_ENV) + +if(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_default ON) +else() + set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_default OFF) +endif() +set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_default} CACHE BOOL + "Enable per-target runtimes directory") + +set(LLVM_PROFDATA_FILE "" CACHE FILEPATH + "Profiling data file to use when compiling in order to improve runtime performance.") + +# All options referred to from HandleLLVMOptions have to be specified +# BEFORE this include, otherwise options will not be correctly set on +# first cmake run +include(config-ix) + +# By default, we target the host, but this can be overridden at CMake +# invocation time. +set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}" CACHE STRING + "Default target for which LLVM will generate code." ) +set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}") +message(STATUS "LLVM host triple: ${LLVM_HOST_TRIPLE}") +message(STATUS "LLVM default target triple: ${LLVM_DEFAULT_TARGET_TRIPLE}") + +if(WIN32 OR CYGWIN) + if(BUILD_SHARED_LIBS OR LLVM_BUILD_LLVM_DYLIB) + set(LLVM_ENABLE_PLUGINS_default ON) + else() + set(LLVM_ENABLE_PLUGINS_default OFF) + endif() +else() + set(LLVM_ENABLE_PLUGINS_default ${LLVM_ENABLE_PIC}) +endif() +option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default}) + +set(LLVM_ENABLE_NEW_PASS_MANAGER TRUE CACHE BOOL + "Enable the new pass manager by default.") +if(NOT LLVM_ENABLE_NEW_PASS_MANAGER) + message(WARNING "Using the legacy pass manager for the optimization pipeline" + " is deprecated. The functionality will degrade over time and" + " be removed in a future release.") +endif() + +include(HandleLLVMOptions) + +find_package(Python3 ${LLVM_MINIMUM_PYTHON_VERSION} REQUIRED + COMPONENTS Interpreter) + +###### + +# Configure all of the various header file fragments LLVM uses which depend on +# configuration variables. +set(LLVM_ENUM_TARGETS "") +set(LLVM_ENUM_ASM_PRINTERS "") +set(LLVM_ENUM_ASM_PARSERS "") +set(LLVM_ENUM_DISASSEMBLERS "") +set(LLVM_ENUM_TARGETMCAS "") +foreach(t ${LLVM_TARGETS_TO_BUILD}) + set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} ) + + list(FIND LLVM_ALL_TARGETS ${t} idx) + list(FIND LLVM_EXPERIMENTAL_TARGETS_TO_BUILD ${t} idy) + # At this point, LLVMBUILDTOOL already checked all the targets passed in + # LLVM_TARGETS_TO_BUILD and LLVM_EXPERIMENTAL_TARGETS_TO_BUILD, so + # this test just makes sure that any experimental targets were passed via + # LLVM_EXPERIMENTAL_TARGETS_TO_BUILD, not LLVM_TARGETS_TO_BUILD. + if( idx LESS 0 AND idy LESS 0 ) + message(FATAL_ERROR "The target `${t}' is experimental and must be passed " + "via LLVM_EXPERIMENTAL_TARGETS_TO_BUILD.") + else() + set(LLVM_ENUM_TARGETS "${LLVM_ENUM_TARGETS}LLVM_TARGET(${t})\n") + endif() + + file(GLOB asmp_file "${td}/*AsmPrinter.cpp") + if( asmp_file ) + set(LLVM_ENUM_ASM_PRINTERS + "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n") + endif() + if( EXISTS ${td}/AsmParser/CMakeLists.txt ) + set(LLVM_ENUM_ASM_PARSERS + "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n") + endif() + if( EXISTS ${td}/Disassembler/CMakeLists.txt ) + set(LLVM_ENUM_DISASSEMBLERS + "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n") + endif() + if( EXISTS ${td}/MCA/CMakeLists.txt ) + set(LLVM_ENUM_TARGETMCAS + "${LLVM_ENUM_TARGETMCAS}LLVM_TARGETMCA(${t})\n") + endif() +endforeach(t) + +# Produce the target definition files, which provide a way for clients to easily +# include various classes of targets. +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in + ${LLVM_INCLUDE_DIR}/llvm/Config/AsmPrinters.def + ) +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in + ${LLVM_INCLUDE_DIR}/llvm/Config/AsmParsers.def + ) +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in + ${LLVM_INCLUDE_DIR}/llvm/Config/Disassemblers.def + ) +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Targets.def.in + ${LLVM_INCLUDE_DIR}/llvm/Config/Targets.def + ) +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/TargetMCAs.def.in + ${LLVM_INCLUDE_DIR}/llvm/Config/TargetMCAs.def + ) + +# They are not referenced. See set_output_directory(). +set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/bin ) +set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) +set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) + +if(LLVM_INCLUDE_TESTS) + include(GetErrcMessages) + get_errc_messages(LLVM_LIT_ERRC_MESSAGES) +endif() + +# For up-to-date instructions for installing the Tensorflow dependency, refer to +# the bot setup script: https://github.com/google/ml-compiler-opt/blob/master/buildbot/buildbot_init.sh +# In this case, the latest C API library is available for download from +# https://www.tensorflow.org/install/lang_c. +# We will expose the conditional compilation variable, +# LLVM_HAVE_TF_API, through llvm-config.h, so that a user of the LLVM library may +# also leverage the dependency. +set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install") +if (TENSORFLOW_C_LIB_PATH) + find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib NO_DEFAULT_PATH REQUIRED) + # Currently, the protobuf headers are distributed with the pip package that corresponds to the version + # of the C API library. + find_library(tensorflow_fx tensorflow_framework PATHS ${TENSORFLOW_C_LIB_PATH}/lib NO_DEFAULT_PATH REQUIRED) + set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available") + include_directories(${TENSORFLOW_C_LIB_PATH}/include) + if (NOT TF_PROTO_HEADERS) + message(STATUS "TF_PROTO_HEADERS not defined. Looking for tensorflow pip package.") + execute_process(COMMAND + ${Python3_EXECUTABLE} "-m" "pip" "show" "tensorflow" + OUTPUT_VARIABLE TF_PIP_OUT) + if ("${TF_PIP_OUT}" STREQUAL "") + message(FATAL ERROR "Tensorflow pip package is also required for 'development' mode (protobuf headers)") + endif() + string(REGEX MATCH "Location: ([^\n]*\n)" TF_PIP_LOC "${TF_PIP_OUT}") + string(REPLACE "Location: " "" TF_PIP ${TF_PIP_LOC}) + string(STRIP ${TF_PIP} TF_PIP) + set(TF_PROTO_HEADERS "${TF_PIP}/tensorflow/include") + endif() + message(STATUS "Using Tensorflow headers under: ${TF_PROTO_HEADERS}") + include_directories(${TF_PROTO_HEADERS}) + add_definitions("-DGOOGLE_PROTOBUF_NO_RTTI") + add_definitions("-D_GLIBCXX_USE_CXX11_ABI=0") +endif() + +# For up-to-date instructions for installing the Tensorflow dependency, refer to +# the bot setup script: https://github.com/google/ml-compiler-opt/blob/master/buildbot/buildbot_init.sh +# Specifically, assuming python3 is installed: +# python3 -m pip install --upgrade pip && python3 -m pip install --user tf_nightly==2.3.0.dev20200528 +# Then set TENSORFLOW_AOT_PATH to the package install - usually it's ~/.local/lib/python3.7/site-packages/tensorflow +# +set(TENSORFLOW_AOT_PATH "" CACHE PATH "Path to TensorFlow pip install dir") + +if (NOT TENSORFLOW_AOT_PATH STREQUAL "") + set(LLVM_HAVE_TF_AOT "ON" CACHE BOOL "Tensorflow AOT available") + set(TENSORFLOW_AOT_COMPILER + "${TENSORFLOW_AOT_PATH}/../../../../bin/saved_model_cli" + CACHE PATH "Path to the Tensorflow AOT compiler") + include_directories(${TENSORFLOW_AOT_PATH}/include) + add_subdirectory(${TENSORFLOW_AOT_PATH}/xla_aot_runtime_src + ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime) + install(TARGETS tf_xla_runtime EXPORT LLVMExports + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT tf_xla_runtime) + set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS tf_xla_runtime) +endif() + +# Configure the three LLVM configuration header files. +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake + ${LLVM_INCLUDE_DIR}/llvm/Config/config.h) +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake + ${LLVM_INCLUDE_DIR}/llvm/Config/llvm-config.h) +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/abi-breaking.h.cmake + ${LLVM_INCLUDE_DIR}/llvm/Config/abi-breaking.h) + +# Add target for generating source rpm package. +set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in + CACHE FILEPATH ".spec file to use for srpm generation") +set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec) +set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm") + +get_source_info(${CMAKE_CURRENT_SOURCE_DIR} revision repository) +string(LENGTH "${revision}" revision_length) +set(LLVM_RPM_SPEC_REVISION "${revision}") + +configure_file( + ${LLVM_SRPM_USER_BINARY_SPECFILE} + ${LLVM_SRPM_BINARY_SPECFILE} @ONLY) + +add_custom_target(srpm + COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES + COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE}) +set_target_properties(srpm PROPERTIES FOLDER "Misc") + +if(APPLE AND DARWIN_LTO_LIBRARY) + set(CMAKE_EXE_LINKER_FLAGS + "${CMAKE_EXE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") + set(CMAKE_SHARED_LINKER_FLAGS + "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") + set(CMAKE_MODULE_LINKER_FLAGS + "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}") +endif() + +# Build with _XOPEN_SOURCE on AIX, as stray macros in _ALL_SOURCE mode tend to +# break things. In this case we need to enable the large-file API as well. +if (UNIX AND ${CMAKE_SYSTEM_NAME} MATCHES "AIX") + add_definitions("-D_XOPEN_SOURCE=700") + add_definitions("-D_LARGE_FILE_API") + + # CMake versions less than 3.16 set default linker flags to include -brtl, as + # well as setting -G when building libraries, so clear them out. Note we only + # try to clear the form that CMake will set as part of its initial + # configuration, it is still possible the user may force it as part of a + # compound option. + if(CMAKE_VERSION VERSION_LESS 3.16) + string(REGEX REPLACE "(^|[ \t]+)-Wl,-brtl([ \t]+|$)" " " CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)-Wl,-brtl([ \t]+|$)" " " CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)-Wl,-brtl([ \t]+|$)" " " CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)(-Wl,)?-G([ \t]+|$)" " " CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS + "${CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)(-Wl,)?-G([ \t]+|$)" " " CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS + "${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)(-Wl,)?-G([ \t]+|$)" " " CMAKE_SHARED_LIBRARY_CREATE_ASM_FLAGS + "${CMAKE_SHARED_LIBRARY_CREATE_ASM_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)-Wl,-G," " -Wl," CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS + "${CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)-Wl,-G," " -Wl," CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS + "${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS}") + string(REGEX REPLACE "(^|[ \t]+)-Wl,-G," " -Wl," CMAKE_SHARED_LIBRARY_CREATE_ASM_FLAGS + "${CMAKE_SHARED_LIBRARY_CREATE_ASM_FLAGS}") + endif() + + # Modules should be built with -shared -Wl,-G, so we can use runtime linking + # with plugins. + string(APPEND CMAKE_MODULE_LINKER_FLAGS " -shared -Wl,-G") + + # Also set the correct flags for building shared libraries. + string(APPEND CMAKE_SHARED_LINKER_FLAGS " -shared") +endif() + +# Build with _XOPEN_SOURCE on z/OS. +if (CMAKE_SYSTEM_NAME MATCHES "OS390") + add_definitions("-D_XOPEN_SOURCE=600") + add_definitions("-D_OPEN_SYS") # Needed for process information. + add_definitions("-D_OPEN_SYS_FILE_EXT") # Needed for EBCDIC I/O. +endif() + +# Build with _FILE_OFFSET_BITS=64 on Solaris to match g++ >= 9. +if (UNIX AND ${CMAKE_SYSTEM_NAME} MATCHES "SunOS") + add_definitions("-D_FILE_OFFSET_BITS=64") +endif() + +set(CMAKE_INCLUDE_CURRENT_DIR ON) + +include_directories( ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR}) + +# when crosscompiling import the executable targets from a file +if(LLVM_USE_HOST_TOOLS) + include(CrossCompile) + llvm_create_cross_target(LLVM NATIVE "" Release) +endif(LLVM_USE_HOST_TOOLS) +if(LLVM_TARGET_IS_CROSSCOMPILE_HOST) +# Dummy use to avoid CMake Warning: Manually-specified variables were not used +# (this is a variable that CrossCompile sets on recursive invocations) +endif() + +if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) + # special hack for Solaris to handle crazy system sys/regset.h + include_directories("${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/Solaris") +endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) + +# Make sure we don't get -rdynamic in every binary. For those that need it, +# use export_executable_symbols(target). +set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") + +include(AddLLVM) +include(TableGen) + +include(LLVMDistributionSupport) + +if( MINGW AND NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) + # People report that -O3 is unreliable on MinGW. The traditional + # build also uses -O2 for that reason: + llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2") +endif() + +# Put this before tblgen. Else we have a circular dependence. +add_subdirectory(lib/Demangle) +add_subdirectory(lib/Support) +add_subdirectory(lib/TableGen) + +add_subdirectory(utils/TableGen) + +add_subdirectory(include/llvm) + +add_subdirectory(lib) + +if( LLVM_INCLUDE_UTILS ) + add_subdirectory(utils/FileCheck) + add_subdirectory(utils/PerfectShuffle) + add_subdirectory(utils/count) + add_subdirectory(utils/not) + add_subdirectory(utils/yaml-bench) +else() + if ( LLVM_INCLUDE_TESTS ) + message(FATAL_ERROR "Including tests when not building utils will not work. + Either set LLVM_INCLUDE_UTILS to On, or set LLVM_INCLUDE_TESTS to Off.") + endif() +endif() + +# Use LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION instead of LLVM_INCLUDE_UTILS because it is not really a util +if (LLVM_ADD_NATIVE_VISUALIZERS_TO_SOLUTION) + add_subdirectory(utils/LLVMVisualizers) +endif() + +foreach( binding ${LLVM_BINDINGS_LIST} ) + if( EXISTS "${LLVM_MAIN_SRC_DIR}/bindings/${binding}/CMakeLists.txt" ) + add_subdirectory(bindings/${binding}) + endif() +endforeach() + +add_subdirectory(projects) + +if( LLVM_INCLUDE_TOOLS ) + add_subdirectory(tools) +endif() + +if( LLVM_INCLUDE_RUNTIMES ) + add_subdirectory(runtimes) +endif() + +if( LLVM_INCLUDE_EXAMPLES ) + add_subdirectory(examples) +endif() + +if( LLVM_INCLUDE_TESTS ) + if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang) + include(LLVMExternalProjectUtils) + llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite + USE_TOOLCHAIN + EXCLUDE_FROM_ALL + NO_INSTALL + ALWAYS_CLEAN) + endif() + add_subdirectory(utils/lit) + add_subdirectory(test) + add_subdirectory(unittests) + if( LLVM_INCLUDE_UTILS ) + add_subdirectory(utils/unittest) + endif() + + if (WIN32) + # This utility is used to prevent crashing tests from calling Dr. Watson on + # Windows. + add_subdirectory(utils/KillTheDoctor) + endif() + + # Add a global check rule now that all subdirectories have been traversed + # and we know the total set of lit testsuites. + get_property(LLVM_LIT_TESTSUITES GLOBAL PROPERTY LLVM_LIT_TESTSUITES) + get_property(LLVM_LIT_PARAMS GLOBAL PROPERTY LLVM_LIT_PARAMS) + get_property(LLVM_LIT_DEPENDS GLOBAL PROPERTY LLVM_LIT_DEPENDS) + get_property(LLVM_LIT_EXTRA_ARGS GLOBAL PROPERTY LLVM_LIT_EXTRA_ARGS) + get_property(LLVM_ADDITIONAL_TEST_TARGETS + GLOBAL PROPERTY LLVM_ADDITIONAL_TEST_TARGETS) + get_property(LLVM_ADDITIONAL_TEST_DEPENDS + GLOBAL PROPERTY LLVM_ADDITIONAL_TEST_DEPENDS) + add_lit_target(check-all + "Running all regression tests" + ${LLVM_LIT_TESTSUITES} + PARAMS ${LLVM_LIT_PARAMS} + DEPENDS ${LLVM_LIT_DEPENDS} ${LLVM_ADDITIONAL_TEST_TARGETS} + ARGS ${LLVM_LIT_EXTRA_ARGS} + ) + if(TARGET check-runtimes) + add_dependencies(check-all check-runtimes) + endif() + add_custom_target(test-depends + DEPENDS ${LLVM_LIT_DEPENDS} ${LLVM_ADDITIONAL_TEST_DEPENDS}) + set_target_properties(test-depends PROPERTIES FOLDER "Tests") +endif() + +if (LLVM_INCLUDE_DOCS) + add_subdirectory(docs) +endif() + +add_subdirectory(cmake/modules) + +# Do this last so that all lit targets have already been created. +if (LLVM_INCLUDE_UTILS) + add_subdirectory(utils/llvm-lit) +endif() + +if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + install(DIRECTORY include/llvm include/llvm-c + DESTINATION include + COMPONENT llvm-headers + FILES_MATCHING + PATTERN "*.def" + PATTERN "*.h" + PATTERN "*.td" + PATTERN "*.inc" + PATTERN "LICENSE.TXT" + ) + + install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm ${LLVM_INCLUDE_DIR}/llvm-c + DESTINATION include + COMPONENT llvm-headers + FILES_MATCHING + PATTERN "*.def" + PATTERN "*.h" + PATTERN "*.gen" + PATTERN "*.inc" + # Exclude include/llvm/CMakeFiles/intrinsics_gen.dir, matched by "*.def" + PATTERN "CMakeFiles" EXCLUDE + PATTERN "config.h" EXCLUDE + ) + + if (LLVM_INSTALL_MODULEMAPS) + install(DIRECTORY include/llvm include/llvm-c + DESTINATION include + COMPONENT llvm-headers + FILES_MATCHING + PATTERN "module.modulemap" + ) + install(FILES include/llvm/module.install.modulemap + DESTINATION include/llvm + COMPONENT llvm-headers + RENAME "module.extern.modulemap" + ) + endif(LLVM_INSTALL_MODULEMAPS) + + # Installing the headers needs to depend on generating any public + # tablegen'd headers. + add_custom_target(llvm-headers DEPENDS intrinsics_gen omp_gen) + set_target_properties(llvm-headers PROPERTIES FOLDER "Misc") + + if (NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-llvm-headers + DEPENDS llvm-headers + COMPONENT llvm-headers) + endif() + + # Custom target to install all libraries. + add_custom_target(llvm-libraries) + set_target_properties(llvm-libraries PROPERTIES FOLDER "Misc") + + if (NOT LLVM_ENABLE_IDE) + add_llvm_install_targets(install-llvm-libraries + DEPENDS llvm-libraries + COMPONENT llvm-libraries) + endif() + + get_property(LLVM_LIBS GLOBAL PROPERTY LLVM_LIBS) + if(LLVM_LIBS) + list(REMOVE_DUPLICATES LLVM_LIBS) + foreach(lib ${LLVM_LIBS}) + add_dependencies(llvm-libraries ${lib}) + if (NOT LLVM_ENABLE_IDE) + add_dependencies(install-llvm-libraries install-${lib}) + add_dependencies(install-llvm-libraries-stripped install-${lib}-stripped) + endif() + endforeach() + endif() +endif() + +# This must be at the end of the LLVM root CMakeLists file because it must run +# after all targets are created. +llvm_distribution_add_targets() +process_llvm_pass_plugins(GEN_CONFIG) +include(CoverageReport) + +# This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake +if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows" AND CMAKE_INSTALL_UCRT_LIBRARIES) + include(InstallRequiredSystemLibraries) +endif() + +if (LLVM_INCLUDE_BENCHMARKS) + # Override benchmark defaults so that when the library itself is updated these + # modifications are not lost. + set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing" FORCE) + set(BENCHMARK_ENABLE_EXCEPTIONS OFF CACHE BOOL "Disable benchmark exceptions" FORCE) + set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "Don't install benchmark" FORCE) + set(BENCHMARK_DOWNLOAD_DEPENDENCIES OFF CACHE BOOL "Don't download dependencies" FORCE) + set(BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "Disable Google Test in benchmark" FORCE) + # Since LLVM requires C++11 it is safe to assume that std::regex is available. + set(HAVE_STD_REGEX ON CACHE BOOL "OK" FORCE) + + add_subdirectory(utils/benchmark) + add_subdirectory(benchmarks) +endif() + +if (LLVM_INCLUDE_UTILS AND LLVM_INCLUDE_TOOLS) + add_subdirectory(utils/llvm-locstats) +endif() Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AddingConstrainedIntrinsics.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AddingConstrainedIntrinsics.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AdvancedBuilds.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AdvancedBuilds.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AliasAnalysis.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AliasAnalysis.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX1011.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX1011.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX900.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX900.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX904.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX904.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX906.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX906.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX908.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX908.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX90a.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX90a.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/AMDGPUAsmGFX9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx1011_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx1011_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx1011_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx1011_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx1011_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx1011_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx1011_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_attr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_attr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_dst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_dst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_fx_operand.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_fx_operand.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_hwreg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_hwreg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_imm16_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_imm16_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_imm16_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_imm16_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_imm16.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_imm16.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_label.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_label.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_m_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_m_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_msg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_msg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_opt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_opt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_param.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_param.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_probe.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_probe.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_saddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_saddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_saddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_saddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sbase_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sbase_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sbase_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sbase_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sbase.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sbase.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_sdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_simm32_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_simm32_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_simm32_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_simm32_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_simm32.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_simm32.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_soffset_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_soffset_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_soffset_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_soffset_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_soffset.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_soffset.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_srsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_srsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_srsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_srsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssamp.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssamp.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_ssrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_tgt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_tgt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vaddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vcc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vcc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata0_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata0_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata0.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata0.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata1_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata1_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_11.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_11.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_12.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_12.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_13.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_13.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx10_waitcnt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx10_waitcnt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_attr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_attr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_dst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_dst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_hwreg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_hwreg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_imm16_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_imm16_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_imm16_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_imm16_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_imm16.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_imm16.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_label.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_label.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_msg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_msg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_opt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_opt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_param.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_param.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sbase_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sbase_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sbase.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sbase.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_sdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_simm32_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_simm32_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_simm32.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_simm32.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_soffset_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_soffset_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_soffset.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_soffset.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_srsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_srsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_srsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_srsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssamp.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssamp.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_ssrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_tgt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_tgt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vaddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vcc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vcc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata0_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata0_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata0.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata0.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata1_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata1_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_11.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_11.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_12.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_12.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx7_waitcnt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx7_waitcnt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_attr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_attr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_dst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_dst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_hwreg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_hwreg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_imask.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_imask.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_imm16_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_imm16_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_imm16_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_imm16_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_imm16.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_imm16.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_label.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_label.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_m_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_m_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_msg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_msg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_opt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_opt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_param.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_param.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_probe.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_probe.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sbase_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sbase_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sbase.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sbase.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_sdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_simm32_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_simm32_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_simm32_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_simm32_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_simm32.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_simm32.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_soffset_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_soffset_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_soffset_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_soffset_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_soffset.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_soffset.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_srsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_srsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_srsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_srsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssamp.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssamp.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_ssrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_tgt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_tgt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vaddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vcc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vcc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata0_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata0_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata0.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata0.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_11.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_11.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata1_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata1_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_12.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_12.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_13.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_13.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_14.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_14.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_11.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_11.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_12.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_12.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_13.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_13.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_14.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_14.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_15.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_15.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_16.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_16.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_17.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_17.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx8_waitcnt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx8_waitcnt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx900_fx_operand.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx900_fx_operand.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx900_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx900_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx900_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx900_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx900_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx900_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx900_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx900_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx904_fx_operand.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx904_fx_operand.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx904_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx904_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx904_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx904_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx904_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx904_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx904_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx904_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_fx_operand.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_fx_operand.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_m_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_m_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_src_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx906_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx906_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_dst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_dst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_fx_operand.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_fx_operand.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_m_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_m_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_opt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_opt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_saddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_saddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_soffset.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_soffset.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_src_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_srsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_srsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vaddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vaddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vaddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vaddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx908_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_dst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_dst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_fx_operand.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_fx_operand.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_hwreg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_hwreg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imask.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imask.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imm16_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imm16_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imm16_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imm16_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imm16.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_imm16.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_label.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_label.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_m_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_m_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_msg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_msg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_opt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_opt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_probe.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_probe.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_saddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_saddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_saddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_saddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sbase_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sbase_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sbase_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sbase_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sbase.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sbase.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_sdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_simm32_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_simm32_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_simm32_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_simm32_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_simm32.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_simm32.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_soffset_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_soffset_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_soffset_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_soffset_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_soffset.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_soffset.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_11.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_11.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_srsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_srsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_srsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_srsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssamp.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssamp.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_ssrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vaddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vcc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vcc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata0_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata0_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata0.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata0.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata1_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata1_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_11.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_11.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_12.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_12.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_13.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_13.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_14.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_14.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_15.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_15.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_16.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_16.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_17.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_17.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_18.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_18.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_19.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_19.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx90a_waitcnt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx90a_waitcnt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_attr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_attr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_dst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_dst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_hwreg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_hwreg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_imask.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_imask.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_imm16_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_imm16_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_imm16_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_imm16_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_imm16.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_imm16.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_label.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_label.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_m_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_m_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_m.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_m.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_msg.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_msg.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_opt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_opt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_param.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_param.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_probe.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_probe.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_saddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_saddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_saddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_saddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sbase_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sbase_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sbase_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sbase_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sbase.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sbase.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_sdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_simm32_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_simm32_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_simm32_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_simm32_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_simm32.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_simm32.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_soffset_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_soffset_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_soffset_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_soffset_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_soffset.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_soffset.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_src.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_src.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_srsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_srsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_srsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_srsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssamp.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssamp.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_ssrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_tgt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_tgt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_type_deviation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_type_deviation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vaddr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vcc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vcc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata0_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata0_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata0.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata0.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata1_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata1_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_11.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_11.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_12.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_12.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_13.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_13.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_5.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_5.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_6.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_6.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_7.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_7.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_8.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_8.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_9.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst_9.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vdst.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc_1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc_1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc_2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc_2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc_3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc_3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_vsrc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPU/gfx9_waitcnt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPU/gfx9_waitcnt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPUDwarfExtensionsForHeterogeneousDebugging.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPUDwarfExtensionsForHeterogeneousDebugging.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPUInstructionNotation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPUInstructionNotation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPUInstructionSyntax.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPUInstructionSyntax.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPUModifierSyntax.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPUModifierSyntax.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPUOperandSyntax.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPUOperandSyntax.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/AMDGPUUsage.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/AMDGPUUsage.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Atomics.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Atomics.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Benchmarking.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Benchmarking.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/BigEndianNEON.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/BigEndianNEON.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/BitCodeFormat.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/BitCodeFormat.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/BlockFrequencyTerminology.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/BlockFrequencyTerminology.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/BranchWeightMetadata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/BranchWeightMetadata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/BugLifeCycle.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/BugLifeCycle.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Bugpoint.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Bugpoint.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/BugpointRedesign.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/BugpointRedesign.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/BuildingADistribution.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/BuildingADistribution.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CFIVerify.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CFIVerify.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CMake.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CMake.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CMakePrimer.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CMakePrimer.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CodeGenerator.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CodeGenerator.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CodeOfConduct.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CodeOfConduct.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CodeReview.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CodeReview.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CodingStandards.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CodingStandards.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/bugpoint.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/bugpoint.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/clang-tblgen.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/clang-tblgen.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/dsymutil.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/dsymutil.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/FileCheck.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/FileCheck.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/lit.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/lit.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llc.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llc.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/lldb-tblgen.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/lldb-tblgen.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/lli.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/lli.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-addr2line.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-addr2line.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-ar.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-ar.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-as.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-as.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-bcanalyzer.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-bcanalyzer.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-config.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-config.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-cov.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-cov.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-cxxfilt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-cxxfilt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-cxxmap.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-cxxmap.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-diff.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-diff.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-dis.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-dis.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-dwarfdump.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-dwarfdump.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-exegesis.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-exegesis.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-extract.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-extract.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-install-name-tool.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-install-name-tool.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-lib.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-lib.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-libtool-darwin.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-libtool-darwin.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-link.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-link.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-lipo.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-lipo.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-locstats.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-locstats.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-mca.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-mca.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-nm.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-nm.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-objcopy.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-objcopy.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-objdump.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-objdump.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-otool.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-otool.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-pdbutil.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-pdbutil.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-profdata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-profdata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-profgen.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-profgen.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-ranlib.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-ranlib.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-readelf.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-readelf.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-readobj.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-readobj.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-size.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-size.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-stress.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-stress.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-strings.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-strings.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-strip.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-strip.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-symbolizer.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-symbolizer.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/llvm-tblgen.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/llvm-tblgen.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/mlir-tblgen.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/mlir-tblgen.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/opt.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/opt.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandGuide/tblgen.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandGuide/tblgen.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CommandLine.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CommandLine.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CompileCudaWithLLVM.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CompileCudaWithLLVM.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CompilerWriterInfo.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CompilerWriterInfo.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Contributing.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Contributing.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Coroutines.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Coroutines.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/CoverageMappingFormat.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/CoverageMappingFormat.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/DebuggingJITedCode.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/DebuggingJITedCode.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/DependenceGraphs/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/DependenceGraphs/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/DeveloperPolicy.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/DeveloperPolicy.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Docker.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Docker.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/environment.pickle and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/environment.pickle differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ExceptionHandling.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ExceptionHandling.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ExtendingLLVM.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ExtendingLLVM.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Extensions.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Extensions.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/FAQ.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/FAQ.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/FaultMaps.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/FaultMaps.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Frontend/PerformanceTips.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Frontend/PerformanceTips.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/FuzzingLLVM.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/FuzzingLLVM.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GarbageCollection.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GarbageCollection.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GetElementPtr.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GetElementPtr.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GettingInvolved.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GettingInvolved.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GettingStarted.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GettingStarted.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GettingStartedTutorials.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GettingStartedTutorials.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GettingStartedVS.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GettingStartedVS.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GitBisecting.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GitBisecting.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/GenericOpcode.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/GenericOpcode.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/GMIR.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/GMIR.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/InstructionSelect.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/InstructionSelect.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/IRTranslator.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/IRTranslator.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/KnownBits.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/KnownBits.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/Legalizer.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/Legalizer.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/Pipeline.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/Pipeline.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/Porting.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/Porting.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/RegBankSelect.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/RegBankSelect.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GlobalISel/Resources.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GlobalISel/Resources.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GoldPlugin.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GoldPlugin.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/GwpAsan.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/GwpAsan.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToAddABuilder.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToAddABuilder.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToBuildOnARM.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToBuildOnARM.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToBuildWindowsItaniumPrograms.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToBuildWindowsItaniumPrograms.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToBuildWithPGO.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToBuildWithPGO.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToCrossCompileBuiltinsOnArm.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToCrossCompileBuiltinsOnArm.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToCrossCompileLLVM.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToCrossCompileLLVM.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToReleaseLLVM.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToReleaseLLVM.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToSetUpLLVMStyleRTTI.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToSetUpLLVMStyleRTTI.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToSubmitABug.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToSubmitABug.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToUpdateDebugInfo.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToUpdateDebugInfo.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToUseAttributes.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToUseAttributes.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/HowToUseInstrMappings.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/HowToUseInstrMappings.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/InAlloca.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/InAlloca.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/JITLink.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/JITLink.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/LangRef.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/LangRef.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Lexicon.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Lexicon.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/LibFuzzer.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/LibFuzzer.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/LinkTimeOptimization.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/LinkTimeOptimization.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/LoopTerminology.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/LoopTerminology.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MarkdownQuickstartTemplate.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MarkdownQuickstartTemplate.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MarkedUpDisassembly.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MarkedUpDisassembly.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MCJITDesignAndImplementation.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MCJITDesignAndImplementation.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MeetupGuidelines.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MeetupGuidelines.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MemorySSA.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MemorySSA.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MemTagSanitizer.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MemTagSanitizer.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MergeFunctions.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MergeFunctions.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MIRLangRef.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MIRLangRef.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/MyFirstTypoFix.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/MyFirstTypoFix.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/NewPassManager.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/NewPassManager.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/NVPTXUsage.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/NVPTXUsage.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/OpaquePointers.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/OpaquePointers.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/OptBisect.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/OptBisect.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ORCv2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ORCv2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Packaging.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Packaging.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Passes.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Passes.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/CodeViewSymbols.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/CodeViewSymbols.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/CodeViewTypes.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/CodeViewTypes.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/DbiStream.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/DbiStream.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/GlobalStream.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/GlobalStream.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/HashTable.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/HashTable.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/ModiStream.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/ModiStream.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/MsfFile.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/MsfFile.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/PdbStream.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/PdbStream.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/PublicStream.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/PublicStream.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/PDB/TpiStream.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/PDB/TpiStream.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Phabricator.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Phabricator.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ProgrammersManual.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ProgrammersManual.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Projects.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Projects.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Proposals/GitHubMove.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Proposals/GitHubMove.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Proposals/LLVMLibC.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Proposals/LLVMLibC.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Proposals/TestSuite.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Proposals/TestSuite.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Proposals/VariableNames.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Proposals/VariableNames.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Proposals/VectorizationPlan.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Proposals/VectorizationPlan.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Proposals/VectorPredication.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Proposals/VectorPredication.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Reference.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Reference.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ReleaseNotes.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ReleaseNotes.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ReleaseProcess.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ReleaseProcess.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Remarks.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Remarks.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ReportingGuide.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ReportingGuide.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/ScudoHardenedAllocator.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/ScudoHardenedAllocator.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Security.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Security.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/SegmentedStacks.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/SegmentedStacks.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/SourceLevelDebugging.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/SourceLevelDebugging.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/SpeculativeLoadHardening.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/SpeculativeLoadHardening.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/SphinxQuickstartTemplate.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/SphinxQuickstartTemplate.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/StackMaps.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/StackMaps.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/StackSafetyAnalysis.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/StackSafetyAnalysis.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Statepoints.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Statepoints.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/SupportLibrary.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/SupportLibrary.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/SupportPolicy.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/SupportPolicy.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/SystemLibrary.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/SystemLibrary.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TableGen/BackEnds.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TableGen/BackEnds.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TableGen/BackGuide.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TableGen/BackGuide.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TableGen/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TableGen/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TableGen/ProgRef.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TableGen/ProgRef.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TableGenFundamentals.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TableGenFundamentals.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TestingGuide.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TestingGuide.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TestSuiteGuide.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TestSuiteGuide.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TestSuiteMakefileGuide.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TestSuiteMakefileGuide.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TransformMetadata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TransformMetadata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/BuildingAJIT1.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/BuildingAJIT1.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/BuildingAJIT2.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/BuildingAJIT2.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/BuildingAJIT3.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/BuildingAJIT3.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/BuildingAJIT4.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/BuildingAJIT4.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl01.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl01.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl02.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl02.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl03.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl03.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl04.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl04.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl05.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl05.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl06.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl06.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl07.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl07.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl08.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl08.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl09.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl09.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/LangImpl10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/LangImpl10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/index.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/index.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl01.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl01.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl02.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl02.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl03.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl03.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl04.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl04.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl05.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl05.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl06.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl06.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl07.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl07.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl08.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl08.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl09.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl09.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl10.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/tutorial/MyFirstLanguageFrontend/LangImpl10.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/TypeMetadata.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/TypeMetadata.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/UserGuides.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/UserGuides.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/Vectorizers.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/Vectorizers.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/WritingAnLLVMBackend.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/WritingAnLLVMBackend.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/WritingAnLLVMNewPMPass.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/WritingAnLLVMNewPMPass.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/WritingAnLLVMPass.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/WritingAnLLVMPass.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/XRay.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/XRay.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/XRayExample.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/XRayExample.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/XRayFDRFormat.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/XRayFDRFormat.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/yaml2obj.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/yaml2obj.doctree differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/doctrees/YamlIO.doctree and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/doctrees/YamlIO.doctree differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AddingConstrainedIntrinsics.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AddingConstrainedIntrinsics.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AddingConstrainedIntrinsics.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AddingConstrainedIntrinsics.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,247 @@ + + + + + + + + + How To Add A Constrained Floating-Point Intrinsic — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Add A Constrained Floating-Point Intrinsic

+ +
+

Warning

+

This is a work in progress.

+
+
+

Add the intrinsic

+

Multiple files need to be updated when adding a new constrained intrinsic.

+

Add the new intrinsic to the table of intrinsics:

+
include/llvm/IR/Intrinsics.td
+
+
+
+
+

Add SelectionDAG node types

+

Add the new STRICT version of the node type to the ISD::NodeType enum:

+
include/llvm/CodeGen/ISDOpcodes.h
+
+
+

Strict version name must be a concatenation of prefix STRICT_ and the name +of corresponding non-strict node name. For instance, strict version of the +node FADD must be STRICT_FADD.

+
+
+

Update mappings

+

Add new record to the mapping of instructions to constrained intrinsic and +DAG nodes:

+
include/llvm/IR/ConstrainedOps.def
+
+
+

Follow instructions provided in this file.

+
+
+

Update IR components

+

Update the IR verifier:

+
lib/IR/Verifier.cpp
+
+
+
+
+

Update Selector components

+
+

Building the SelectionDAG

+

The function SelectionDAGBuilder::visitConstrainedFPIntrinsic builds DAG nodes +using mappings specified in ConstrainedOps.def. If however this default build is +not sufficient, the build can be modified, see how it is implemented for +STRICT_FP_ROUND. The new STRICT node will eventually be converted +to the matching non-STRICT node. For this reason it should have the same +operands and values as the non-STRICT version but should also use the chain. +This makes subsequent sharing of code for STRICT and non-STRICT code paths +easier:

+
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+
+
+

Most of the STRICT nodes get legalized the same as their matching non-STRICT +counterparts. A new STRICT node with this property must get added to the +switch in SelectionDAGLegalize::LegalizeOp().:

+
lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+
+
+

Other parts of the legalizer may need to be updated as well. Look for +places where the non-STRICT counterpart is legalized and update as needed. +Be careful of the chain since STRICT nodes use it but their counterparts +often don’t.

+

The code to do the conversion or mutation of the STRICT node to a non-STRICT +version of the node happens in SelectionDAG::mutateStrictFPToFP(). In most cases +the function can do the conversion using information from ConstrainedOps.def. Be +careful updating this function since some nodes have the same return type +as their input operand, but some are different. Both of these cases must +be properly handled:

+
lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+
+
+

Whether the mutation may happens or not, depends on how the new node has been +registered in TargetLoweringBase::initActions(). By default all strict nodes are +registered with Expand action:

+
lib/CodeGen/TargetLoweringBase.cpp
+
+
+

To make debug logs readable it is helpful to update the SelectionDAG’s +debug logger::

+
lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+
+
+
+
+
+

Add documentation and tests

+
docs/LangRef.rst
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AdvancedBuilds.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AdvancedBuilds.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AdvancedBuilds.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AdvancedBuilds.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,296 @@ + + + + + + + + + Advanced Build Configurations — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Advanced Build Configurations

+ +
+

Introduction

+

CMake is a cross-platform build-generator tool. CMake +does not build the project, it generates the files needed by your build tool +(GNU make, Visual Studio, etc.) for building LLVM.

+

If you are a new contributor, please start with the Getting Started with the LLVM System or +Building LLVM with CMake pages. This page is intended for users doing more complex builds.

+

Many of the examples below are written assuming specific CMake Generators. +Unless otherwise explicitly called out these commands should work with any CMake +generator.

+
+
+

Bootstrap Builds

+

The Clang CMake build system supports bootstrap (aka multi-stage) builds. At a +high level a multi-stage build is a chain of builds that pass data from one +stage into the next. The most common and simple version of this is a traditional +bootstrap build.

+

In a simple two-stage bootstrap build, we build clang using the system compiler, +then use that just-built clang to build clang again. In CMake this simplest form +of a bootstrap build can be configured with a single option, +CLANG_ENABLE_BOOTSTRAP.

+
$ cmake -G Ninja -DCLANG_ENABLE_BOOTSTRAP=On <path to source>
+$ ninja stage2
+
+
+

This command itself isn’t terribly useful because it assumes default +configurations for each stage. The next series of examples utilize CMake cache +scripts to provide more complex options.

+

By default, only a few CMake options will be passed between stages. +The list, called _BOOTSTRAP_DEFAULT_PASSTHROUGH, is defined in clang/CMakeLists.txt. +To force the passing of the variables between stages, use the -DCLANG_BOOTSTRAP_PASSTHROUGH +CMake option, each variable separated by a “;”. As example:

+
$ cmake -G Ninja -DCLANG_ENABLE_BOOTSTRAP=On -DCLANG_BOOTSTRAP_PASSTHROUGH="CMAKE_INSTALL_PREFIX;CMAKE_VERBOSE_MAKEFILE" <path to source>
+$ ninja stage2
+
+
+

CMake options starting by BOOTSTRAP_ will be passed only to the stage2 build. +This gives the opportunity to use Clang specific build flags. +For example, the following CMake call will enabled ‘-fno-addrsig’ only during +the stage2 build for C and C++.

+
$ cmake [..]  -DBOOTSTRAP_CMAKE_CXX_FLAGS='-fno-addrsig' -DBOOTSTRAP_CMAKE_C_FLAGS='-fno-addrsig' [..]
+
+
+

The clang build system refers to builds as stages. A stage1 build is a standard +build using the compiler installed on the host, and a stage2 build is built +using the stage1 compiler. This nomenclature holds up to more stages too. In +general a stage*n* build is built using the output from stage*n-1*.

+
+
+

Apple Clang Builds (A More Complex Bootstrap)

+

Apple’s Clang builds are a slightly more complicated example of the simple +bootstrapping scenario. Apple Clang is built using a 2-stage build.

+

The stage1 compiler is a host-only compiler with some options set. The stage1 +compiler is a balance of optimization vs build time because it is a throwaway. +The stage2 compiler is the fully optimized compiler intended to ship to users.

+

Setting up these compilers requires a lot of options. To simplify the +configuration the Apple Clang build settings are contained in CMake Cache files. +You can build an Apple Clang compiler using the following commands:

+
$ cmake -G Ninja -C <path to clang>/cmake/caches/Apple-stage1.cmake <path to source>
+$ ninja stage2-distribution
+
+
+

This CMake invocation configures the stage1 host compiler, and sets +CLANG_BOOTSTRAP_CMAKE_ARGS to pass the Apple-stage2.cmake cache script to the +stage2 configuration step.

+

When you build the stage2-distribution target it builds the minimal stage1 +compiler and required tools, then configures and builds the stage2 compiler +based on the settings in Apple-stage2.cmake.

+

This pattern of using cache scripts to set complex settings, and specifically to +make later stage builds include cache scripts is common in our more advanced +build configurations.

+
+
+

Multi-stage PGO

+

Profile-Guided Optimizations (PGO) is a really great way to optimize the code +clang generates. Our multi-stage PGO builds are a workflow for generating PGO +profiles that can be used to optimize clang.

+

At a high level, the way PGO works is that you build an instrumented compiler, +then you run the instrumented compiler against sample source files. While the +instrumented compiler runs it will output a bunch of files containing +performance counters (.profraw files). After generating all the profraw files +you use llvm-profdata to merge the files into a single profdata file that you +can feed into the LLVM_PROFDATA_FILE option.

+

Our PGO.cmake cache script automates that whole process. You can use it by +running:

+
$ cmake -G Ninja -C <path_to_clang>/cmake/caches/PGO.cmake <source dir>
+$ ninja stage2-instrumented-generate-profdata
+
+
+

If you let that run for a few hours or so, it will place a profdata file in your +build directory. This takes a really long time because it builds clang twice, +and you must have compiler-rt in your build tree.

+

This process uses any source files under the perf-training directory as training +data as long as the source files are marked up with LIT-style RUN lines.

+

After it finishes you can use “find . -name clang.profdata” to find it, but it +should be at a path something like:

+
<build dir>/tools/clang/stage2-instrumented-bins/utils/perf-training/clang.profdata
+
+
+

You can feed that file into the LLVM_PROFDATA_FILE option when you build your +optimized compiler.

+

The PGO came cache has a slightly different stage naming scheme than other +multi-stage builds. It generates three stages; stage1, stage2-instrumented, and +stage2. Both of the stage2 builds are built using the stage1 compiler.

+

The PGO came cache generates the following additional targets:

+
+
stage2-instrumented

Builds a stage1 x86 compiler, runtime, and required tools (llvm-config, +llvm-profdata) then uses that compiler to build an instrumented stage2 compiler.

+
+
stage2-instrumented-generate-profdata

Depends on “stage2-instrumented” and will use the instrumented compiler to +generate profdata based on the training files in <clang>/utils/perf-training

+
+
stage2

Depends of “stage2-instrumented-generate-profdata” and will use the stage1 +compiler with the stage2 profdata to build a PGO-optimized compiler.

+
+
stage2-check-llvm

Depends on stage2 and runs check-llvm using the stage2 compiler.

+
+
stage2-check-clang

Depends on stage2 and runs check-clang using the stage2 compiler.

+
+
stage2-check-all

Depends on stage2 and runs check-all using the stage2 compiler.

+
+
stage2-test-suite

Depends on stage2 and runs the test-suite using the stage3 compiler (requires +in-tree test-suite).

+
+
+
+
+

3-Stage Non-Determinism

+

In the ancient lore of compilers non-determinism is like the multi-headed hydra. +Whenever its head pops up, terror and chaos ensue.

+

Historically one of the tests to verify that a compiler was deterministic would +be a three stage build. The idea of a three stage build is you take your sources +and build a compiler (stage1), then use that compiler to rebuild the sources +(stage2), then you use that compiler to rebuild the sources a third time +(stage3) with an identical configuration to the stage2 build. At the end of +this, you have a stage2 and stage3 compiler that should be bit-for-bit +identical.

+

You can perform one of these 3-stage builds with LLVM & clang using the +following commands:

+
$ cmake -G Ninja -C <path_to_clang>/cmake/caches/3-stage.cmake <source dir>
+$ cmake --build . --target stage3 --parallel
+
+
+

After the build you can compare the stage2 & stage3 compilers.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AliasAnalysis.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AliasAnalysis.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AliasAnalysis.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AliasAnalysis.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,794 @@ + + + + + + + + + LLVM Alias Analysis Infrastructure — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Alias Analysis Infrastructure

+ +
+

Introduction

+

Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt to +determine whether or not two pointers ever can point to the same object in +memory. There are many different algorithms for alias analysis and many +different ways of classifying them: flow-sensitive vs. flow-insensitive, +context-sensitive vs. context-insensitive, field-sensitive +vs. field-insensitive, unification-based vs. subset-based, etc. Traditionally, +alias analyses respond to a query with a Must, May, or No alias response, +indicating that two pointers always point to the same object, might point to the +same object, or are known to never point to the same object.

+

The LLVM AliasAnalysis class is the +primary interface used by clients and implementations of alias analyses in the +LLVM system. This class is the common interface between clients of alias +analysis information and the implementations providing it, and is designed to +support a wide range of implementations and clients (but currently all clients +are assumed to be flow-insensitive). In addition to simple alias analysis +information, this class exposes Mod/Ref information from those implementations +which can provide it, allowing for powerful analyses and transformations to work +well together.

+

This document contains information necessary to successfully implement this +interface, use it, and to test both sides. It also explains some of the finer +points about what exactly results mean.

+
+
+

AliasAnalysis Class Overview

+

The AliasAnalysis +class defines the interface that the various alias analysis implementations +should support. This class exports two important enums: AliasResult and +ModRefResult which represent the result of an alias query or a mod/ref +query, respectively.

+

The AliasAnalysis interface exposes information about memory, represented in +several different ways. In particular, memory objects are represented as a +starting address and size, and function calls are represented as the actual +call or invoke instructions that performs the call. The +AliasAnalysis interface also exposes some helper methods which allow you to +get mod/ref information for arbitrary instructions.

+

All AliasAnalysis interfaces require that in queries involving multiple +values, values which are not constants are all +defined within the same function.

+
+

Representation of Pointers

+

Most importantly, the AliasAnalysis class provides several methods which are +used to query whether or not two memory objects alias, whether function calls +can modify or read a memory object, etc. For all of these queries, memory +objects are represented as a pair of their starting address (a symbolic LLVM +Value*) and a static size.

+

Representing memory objects as a starting address and a size is critically +important for correct Alias Analyses. For example, consider this (silly, but +possible) C code:

+
int i;
+char C[2];
+char A[10];
+/* ... */
+for (i = 0; i != 10; ++i) {
+  C[0] = A[i];          /* One byte store */
+  C[1] = A[9-i];        /* One byte store */
+}
+
+
+

In this case, the basic-aa pass will disambiguate the stores to C[0] and +C[1] because they are accesses to two distinct locations one byte apart, and +the accesses are each one byte. In this case, the Loop Invariant Code Motion +(LICM) pass can use store motion to remove the stores from the loop. In +contrast, the following code:

+
int i;
+char C[2];
+char A[10];
+/* ... */
+for (i = 0; i != 10; ++i) {
+  ((short*)C)[0] = A[i];  /* Two byte store! */
+  C[1] = A[9-i];          /* One byte store */
+}
+
+
+

In this case, the two stores to C do alias each other, because the access to the +&C[0] element is a two byte access. If size information wasn’t available in +the query, even the first case would have to conservatively assume that the +accesses alias.

+
+
+

The alias method

+

The alias method is the primary interface used to determine whether or not +two memory objects alias each other. It takes two memory objects as input and +returns MustAlias, PartialAlias, MayAlias, or NoAlias as appropriate.

+

Like all AliasAnalysis interfaces, the alias method requires that either +the two pointer values be defined within the same function, or at least one of +the values is a constant.

+
+

Must, May, and No Alias Responses

+

The NoAlias response may be used when there is never an immediate dependence +between any memory reference based on one pointer and any memory reference +based the other. The most obvious example is when the two pointers point to +non-overlapping memory ranges. Another is when the two pointers are only ever +used for reading memory. Another is when the memory is freed and reallocated +between accesses through one pointer and accesses through the other — in this +case, there is a dependence, but it’s mediated by the free and reallocation.

+

As an exception to this is with the noalias keyword; +the “irrelevant” dependencies are ignored.

+

The MayAlias response is used whenever the two pointers might refer to the +same object.

+

The PartialAlias response is used when the two memory objects are known to +be overlapping in some way, regardless whether they start at the same address +or not.

+

The MustAlias response may only be returned if the two memory objects are +guaranteed to always start at exactly the same location. A MustAlias +response does not imply that the pointers compare equal.

+
+
+
+

The getModRefInfo methods

+

The getModRefInfo methods return information about whether the execution of +an instruction can read or modify a memory location. Mod/Ref information is +always conservative: if an instruction might read or write a location, +ModRef is returned.

+

The AliasAnalysis class also provides a getModRefInfo method for testing +dependencies between function calls. This method takes two call sites (CS1 +& CS2), returns NoModRef if neither call writes to memory read or +written by the other, Ref if CS1 reads memory written by CS2, +Mod if CS1 writes to memory read or written by CS2, or ModRef if +CS1 might read or write memory written to by CS2. Note that this +relation is not commutative.

+
+
+

Other useful AliasAnalysis methods

+

Several other tidbits of information are often collected by various alias +analysis implementations and can be put to good use by various clients.

+
+

The pointsToConstantMemory method

+

The pointsToConstantMemory method returns true if and only if the analysis +can prove that the pointer only points to unchanging memory locations +(functions, constant global variables, and the null pointer). This information +can be used to refine mod/ref information: it is impossible for an unchanging +memory location to be modified.

+
+
+

The doesNotAccessMemory and onlyReadsMemory methods

+

These methods are used to provide very simple mod/ref information for function +calls. The doesNotAccessMemory method returns true for a function if the +analysis can prove that the function never reads or writes to memory, or if the +function only reads from constant memory. Functions with this property are +side-effect free and only depend on their input arguments, allowing them to be +eliminated if they form common subexpressions or be hoisted out of loops. Many +common functions behave this way (e.g., sin and cos) but many others do +not (e.g., acos, which modifies the errno variable).

+

The onlyReadsMemory method returns true for a function if analysis can prove +that (at most) the function only reads from non-volatile memory. Functions with +this property are side-effect free, only depending on their input arguments and +the state of memory when they are called. This property allows calls to these +functions to be eliminated and moved around, as long as there is no store +instruction that changes the contents of memory. Note that all functions that +satisfy the doesNotAccessMemory method also satisfy onlyReadsMemory.

+
+
+
+
+

Writing a new AliasAnalysis Implementation

+

Writing a new alias analysis implementation for LLVM is quite straight-forward. +There are already several implementations that you can use for examples, and the +following information should help fill in any details. For a examples, take a +look at the various alias analysis implementations included with LLVM.

+
+

Different Pass styles

+

The first step to determining what type of LLVM pass +you need to use for your Alias Analysis. As is the case with most other +analyses and transformations, the answer should be fairly obvious from what type +of problem you are trying to solve:

+
    +
  1. If you require interprocedural analysis, it should be a Pass.

  2. +
  3. If you are a function-local analysis, subclass FunctionPass.

  4. +
  5. If you don’t need to look at the program at all, subclass ImmutablePass.

  6. +
+

In addition to the pass that you subclass, you should also inherit from the +AliasAnalysis interface, of course, and use the RegisterAnalysisGroup +template to register as an implementation of AliasAnalysis.

+
+
+

Required initialization calls

+

Your subclass of AliasAnalysis is required to invoke two methods on the +AliasAnalysis base class: getAnalysisUsage and +InitializeAliasAnalysis. In particular, your implementation of +getAnalysisUsage should explicitly call into the +AliasAnalysis::getAnalysisUsage method in addition to doing any declaring +any pass dependencies your pass has. Thus you should have something like this:

+
void getAnalysisUsage(AnalysisUsage &AU) const {
+  AliasAnalysis::getAnalysisUsage(AU);
+  // declare your dependencies here.
+}
+
+
+

Additionally, your must invoke the InitializeAliasAnalysis method from your +analysis run method (run for a Pass, runOnFunction for a +FunctionPass, or InitializePass for an ImmutablePass). For example +(as part of a Pass):

+
bool run(Module &M) {
+  InitializeAliasAnalysis(this);
+  // Perform analysis here...
+  return false;
+}
+
+
+
+
+

Required methods to override

+

You must override the getAdjustedAnalysisPointer method on all subclasses +of AliasAnalysis. An example implementation of this method would look like:

+
void *getAdjustedAnalysisPointer(const void* ID) override {
+  if (ID == &AliasAnalysis::ID)
+    return (AliasAnalysis*)this;
+  return this;
+}
+
+
+
+
+

Interfaces which may be specified

+

All of the AliasAnalysis virtual methods +default to providing chaining to another alias +analysis implementation, which ends up returning conservatively correct +information (returning “May” Alias and “Mod/Ref” for alias and mod/ref queries +respectively). Depending on the capabilities of the analysis you are +implementing, you just override the interfaces you can improve.

+
+
+

AliasAnalysis chaining behavior

+

Every alias analysis pass chains to another alias analysis implementation (for +example, the user can specify “-basic-aa -ds-aa -licm” to get the maximum +benefit from both alias analyses). The alias analysis class automatically +takes care of most of this for methods that you don’t override. For methods +that you do override, in code paths that return a conservative MayAlias or +Mod/Ref result, simply return whatever the superclass computes. For example:

+
AliasResult alias(const Value *V1, unsigned V1Size,
+                  const Value *V2, unsigned V2Size) {
+  if (...)
+    return NoAlias;
+  ...
+
+  // Couldn't determine a must or no-alias result.
+  return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+}
+
+
+

In addition to analysis queries, you must make sure to unconditionally pass LLVM +update notification methods to the superclass as well if you override them, +which allows all alias analyses in a change to be updated.

+
+
+

Updating analysis results for transformations

+

Alias analysis information is initially computed for a static snapshot of the +program, but clients will use this information to make transformations to the +code. All but the most trivial forms of alias analysis will need to have their +analysis results updated to reflect the changes made by these transformations.

+

The AliasAnalysis interface exposes four methods which are used to +communicate program changes from the clients to the analysis implementations. +Various alias analysis implementations should use these methods to ensure that +their internal data structures are kept up-to-date as the program changes (for +example, when an instruction is deleted), and clients of alias analysis must be +sure to call these interfaces appropriately.

+
+

The deleteValue method

+

The deleteValue method is called by transformations when they remove an +instruction or any other value from the program (including values that do not +use pointers). Typically alias analyses keep data structures that have entries +for each value in the program. When this method is called, they should remove +any entries for the specified value, if they exist.

+
+
+

The copyValue method

+

The copyValue method is used when a new value is introduced into the +program. There is no way to introduce a value into the program that did not +exist before (this doesn’t make sense for a safe compiler transformation), so +this is the only way to introduce a new value. This method indicates that the +new value has exactly the same properties as the value being copied.

+
+
+

The replaceWithNewValue method

+

This method is a simple helper method that is provided to make clients easier to +use. It is implemented by copying the old analysis information to the new +value, then deleting the old value. This method cannot be overridden by alias +analysis implementations.

+
+
+

The addEscapingUse method

+

The addEscapingUse method is used when the uses of a pointer value have +changed in ways that may invalidate precomputed analysis information. +Implementations may either use this callback to provide conservative responses +for points whose uses have change since analysis time, or may recompute some or +all of their internal state to continue providing accurate responses.

+

In general, any new use of a pointer value is considered an escaping use, and +must be reported through this callback, except for the uses below:

+
    +
  • A bitcast or getelementptr of the pointer

  • +
  • A store through the pointer (but not a store of the pointer)

  • +
  • A load through the pointer

  • +
+
+
+
+

Efficiency Issues

+

From the LLVM perspective, the only thing you need to do to provide an efficient +alias analysis is to make sure that alias analysis queries are serviced +quickly. The actual calculation of the alias analysis results (the “run” +method) is only performed once, but many (perhaps duplicate) queries may be +performed. Because of this, try to move as much computation to the run method +as possible (within reason).

+
+
+

Limitations

+

The AliasAnalysis infrastructure has several limitations which make writing a +new AliasAnalysis implementation difficult.

+

There is no way to override the default alias analysis. It would be very useful +to be able to do something like “opt -my-aa -O2” and have it use -my-aa +for all passes which need AliasAnalysis, but there is currently no support for +that, short of changing the source code and recompiling. Similarly, there is +also no way of setting a chain of analyses as the default.

+

There is no way for transform passes to declare that they preserve +AliasAnalysis implementations. The AliasAnalysis interface includes +deleteValue and copyValue methods which are intended to allow a pass to +keep an AliasAnalysis consistent, however there’s no way for a pass to declare +in its getAnalysisUsage that it does so. Some passes attempt to use +AU.addPreserved<AliasAnalysis>, however this doesn’t actually have any +effect.

+

Similarly, the opt -p option introduces ModulePass passes between each +pass, which prevents the use of FunctionPass alias analysis passes.

+

The AliasAnalysis API does have functions for notifying implementations when +values are deleted or copied, however these aren’t sufficient. There are many +other ways that LLVM IR can be modified which could be relevant to +AliasAnalysis implementations which can not be expressed.

+

The AliasAnalysisDebugger utility seems to suggest that AliasAnalysis +implementations can expect that they will be informed of any relevant Value +before it appears in an alias query. However, popular clients such as GVN +don’t support this, and are known to trigger errors when run with the +AliasAnalysisDebugger.

+

The AliasSetTracker class (which is used by LICM) makes a +non-deterministic number of alias queries. This can cause debugging techniques +involving pausing execution after a predetermined number of queries to be +unreliable.

+

Many alias queries can be reformulated in terms of other alias queries. When +multiple AliasAnalysis queries are chained together, it would make sense to +start those queries from the beginning of the chain, with care taken to avoid +infinite looping, however currently an implementation which wants to do this can +only start such queries from itself.

+
+
+
+

Using alias analysis results

+

There are several different ways to use alias analysis results. In order of +preference, these are:

+
+

Using the MemoryDependenceAnalysis Pass

+

The memdep pass uses alias analysis to provide high-level dependence +information about memory-using instructions. This will tell you which store +feeds into a load, for example. It uses caching and other techniques to be +efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations.

+
+
+

Using the AliasSetTracker class

+

Many transformations need information about alias sets that are active in +some scope, rather than information about pairwise aliasing. The +AliasSetTracker +class is used to efficiently build these Alias Sets from the pairwise alias +analysis information provided by the AliasAnalysis interface.

+

First you initialize the AliasSetTracker by using the “add” methods to add +information about various potentially aliasing instructions in the scope you are +interested in. Once all of the alias sets are completed, your pass should +simply iterate through the constructed alias sets, using the AliasSetTracker +begin()/end() methods.

+

The AliasSets formed by the AliasSetTracker are guaranteed to be +disjoint, calculate mod/ref information and volatility for the set, and keep +track of whether or not all of the pointers in the set are Must aliases. The +AliasSetTracker also makes sure that sets are properly folded due to call +instructions, and can provide a list of pointers in each set.

+

As an example user of this, the Loop Invariant Code Motion pass uses AliasSetTrackers to calculate alias +sets for each loop nest. If an AliasSet in a loop is not modified, then all +load instructions from that set may be hoisted out of the loop. If any alias +sets are stored to and are must alias sets, then the stores may be sunk +to outside of the loop, promoting the memory location to a register for the +duration of the loop nest. Both of these transformations only apply if the +pointer argument is loop-invariant.

+
+

The AliasSetTracker implementation

+

The AliasSetTracker class is implemented to be as efficient as possible. It +uses the union-find algorithm to efficiently merge AliasSets when a pointer is +inserted into the AliasSetTracker that aliases multiple sets. The primary data +structure is a hash table mapping pointers to the AliasSet they are in.

+

The AliasSetTracker class must maintain a list of all of the LLVM Value*s +that are in each AliasSet. Since the hash table already has entries for each +LLVM Value* of interest, the AliasesSets thread the linked list through +these hash-table nodes to avoid having to allocate memory unnecessarily, and to +make merging alias sets extremely efficient (the linked list merge is constant +time).

+

You shouldn’t need to understand these details if you are just a client of the +AliasSetTracker, but if you look at the code, hopefully this brief description +will help make sense of why things are designed the way they are.

+
+
+
+

Using the AliasAnalysis interface directly

+

If neither of these utility class are what your pass needs, you should use the +interfaces exposed by the AliasAnalysis class directly. Try to use the +higher-level methods when possible (e.g., use mod/ref information instead of the +alias method directly if possible) to get the best precision and efficiency.

+
+
+
+

Existing alias analysis implementations and clients

+

If you’re going to be working with the LLVM alias analysis infrastructure, you +should know what clients and implementations of alias analysis are available. +In particular, if you are implementing an alias analysis, you should be aware of +the the clients that are useful for monitoring and evaluating different +implementations.

+
+

Available AliasAnalysis implementations

+

This section lists the various implementations of the AliasAnalysis +interface. All of these chain to other +alias analysis implementations.

+
+

The -basic-aa pass

+

The -basic-aa pass is an aggressive local analysis that knows many +important facts:

+
    +
  • Distinct globals, stack allocations, and heap allocations can never alias.

  • +
  • Globals, stack allocations, and heap allocations never alias the null pointer.

  • +
  • Different fields of a structure do not alias.

  • +
  • Indexes into arrays with statically differing subscripts cannot alias.

  • +
  • Many common standard C library functions never access memory or only read +memory.

  • +
  • Pointers that obviously point to constant globals “pointToConstantMemory”.

  • +
  • Function calls can not modify or references stack allocations if they never +escape from the function that allocates them (a common case for automatic +arrays).

  • +
+
+
+

The -globalsmodref-aa pass

+

This pass implements a simple context-sensitive mod/ref and alias analysis for +internal global variables that don’t “have their address taken”. If a global +does not have its address taken, the pass knows that no pointers alias the +global. This pass also keeps track of functions that it knows never access +memory or never read memory. This allows certain optimizations (e.g. GVN) to +eliminate call instructions entirely.

+

The real power of this pass is that it provides context-sensitive mod/ref +information for call instructions. This allows the optimizer to know that calls +to a function do not clobber or read the value of the global, allowing loads and +stores to be eliminated.

+
+

Note

+

This pass is somewhat limited in its scope (only support non-address taken +globals), but is very quick analysis.

+
+
+
+

The -steens-aa pass

+

The -steens-aa pass implements a variation on the well-known “Steensgaard’s +algorithm” for interprocedural alias analysis. Steensgaard’s algorithm is a +unification-based, flow-insensitive, context-insensitive, and field-insensitive +alias analysis that is also very scalable (effectively linear time).

+

The LLVM -steens-aa pass implements a “speculatively field-sensitive” +version of Steensgaard’s algorithm using the Data Structure Analysis framework. +This gives it substantially more precision than the standard algorithm while +maintaining excellent analysis scalability.

+
+

Note

+

-steens-aa is available in the optional “poolalloc” module. It is not part +of the LLVM core.

+
+
+
+

The -ds-aa pass

+

The -ds-aa pass implements the full Data Structure Analysis algorithm. Data +Structure Analysis is a modular unification-based, flow-insensitive, +context-sensitive, and speculatively field-sensitive alias +analysis that is also quite scalable, usually at O(n * log(n)).

+

This algorithm is capable of responding to a full variety of alias analysis +queries, and can provide context-sensitive mod/ref information as well. The +only major facility not implemented so far is support for must-alias +information.

+
+

Note

+

-ds-aa is available in the optional “poolalloc” module. It is not part of +the LLVM core.

+
+
+
+

The -scev-aa pass

+

The -scev-aa pass implements AliasAnalysis queries by translating them into +ScalarEvolution queries. This gives it a more complete understanding of +getelementptr instructions and loop induction variables than other alias +analyses have.

+
+
+
+

Alias analysis driven transformations

+

LLVM includes several alias-analysis driven transformations which can be used +with any of the implementations above.

+
+

The -adce pass

+

The -adce pass, which implements Aggressive Dead Code Elimination uses the +AliasAnalysis interface to delete calls to functions that do not have +side-effects and are not used.

+
+
+

The -licm pass

+

The -licm pass implements various Loop Invariant Code Motion related +transformations. It uses the AliasAnalysis interface for several different +transformations:

+
    +
  • It uses mod/ref information to hoist or sink load instructions out of loops if +there are no instructions in the loop that modifies the memory loaded.

  • +
  • It uses mod/ref information to hoist function calls out of loops that do not +write to memory and are loop-invariant.

  • +
  • It uses alias information to promote memory objects that are loaded and stored +to in loops to live in a register instead. It can do this if there are no may +aliases to the loaded/stored memory location.

  • +
+
+
+

The -argpromotion pass

+

The -argpromotion pass promotes by-reference arguments to be passed in +by-value instead. In particular, if pointer arguments are only loaded from it +passes in the value loaded instead of the address to the function. This pass +uses alias information to make sure that the value loaded from the argument +pointer is not modified between the entry of the function and any load of the +pointer.

+
+
+

The -gvn, -memcpyopt, and -dse passes

+

These passes use AliasAnalysis information to reason about loads and stores.

+
+
+
+

Clients for debugging and evaluation of implementations

+

These passes are useful for evaluating the various alias analysis +implementations. You can use them with commands like:

+
% opt -ds-aa -aa-eval foo.bc -disable-output -stats
+
+
+
+

The -print-alias-sets pass

+

The -print-alias-sets pass is exposed as part of the opt tool to print +out the Alias Sets formed by the AliasSetTracker class. This is useful if +you’re using the AliasSetTracker class. To use it, use something like:

+
% opt -ds-aa -print-alias-sets -disable-output
+
+
+
+
+

The -aa-eval pass

+

The -aa-eval pass simply iterates through all pairs of pointers in a +function and asks an alias analysis whether or not the pointers alias. This +gives an indication of the precision of the alias analysis. Statistics are +printed indicating the percent of no/may/must aliases found (a more precise +algorithm will have a lower number of may aliases).

+
+
+
+
+

Memory Dependence Analysis

+
+

Note

+

We are currently in the process of migrating things from +MemoryDependenceAnalysis to MemorySSA. Please try to use +that instead.

+
+

If you’re just looking to be a client of alias analysis information, consider +using the Memory Dependence Analysis interface instead. MemDep is a lazy, +caching layer on top of alias analysis that is able to answer the question of +what preceding memory operations a given instruction depends on, either at an +intra- or inter-block level. Because of its laziness and caching policy, using +MemDep can be a significant performance win over accessing alias analysis +directly.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX1011.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX1011.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX1011.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX1011.html 2021-09-19 16:16:14.000000000 +0000 @@ -0,0 +1,210 @@ + + + + + + + + + Syntax of gfx1011 and gfx1012 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of gfx1011 and gfx1012 Instructions

+ +
+

Introduction

+

This document describes the syntax of instructions specific to gfx1011 and gfx1012.

+

For a description of other gfx1011 and gfx1012 instructions see Syntax of Core GFX10 Instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

DPP16

+
INSTRUCTION            DST      SRC0         SRC1         MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————
+v_dot2c_f32_f16_dpp    vdst,    vsrc0:f16x2, vsrc1:f16x2  dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_dot4c_i32_i8_dpp     vdst,    vsrc0:i8x4,  vsrc1:i8x4   dpp16_ctrl row_mask bank_mask bound_ctrl fi
+
+
+

DPP8

+
INSTRUCTION                    DST       SRC0         SRC1             MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————
+v_dot2c_f32_f16_dpp            vdst,     vsrc0:f16x2, vsrc1:f16x2      dpp8_sel fi
+v_dot4c_i32_i8_dpp             vdst,     vsrc0:i8x4,  vsrc1:i8x4       dpp8_sel fi
+
+
+

VOP2

+
INSTRUCTION                    DST       SRC0        SRC1
+————————————————————————————————————————————————————————————————
+v_dot2c_f32_f16                vdst,     src0:f16x2, vsrc1:f16x2
+v_dot4c_i32_i8                 vdst,     src0:i8x4,  vsrc1:i8x4
+
+
+

VOP3P

+
INSTRUCTION                    DST       SRC0        SRC1        SRC2           MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————
+v_dot2_f32_f16                 vdst,     src0:f16x2, src1:f16x2, src2:f32       neg_lo neg_hi clamp
+v_dot2_i32_i16                 vdst,     src0:i16x2, src1:i16x2, src2:i32       clamp
+v_dot2_u32_u16                 vdst,     src0:u16x2, src1:u16x2, src2:u32       clamp
+v_dot4_i32_i8                  vdst,     src0:i8x4,  src1:i8x4,  src2:i32       clamp
+v_dot4_u32_u8                  vdst,     src0:u8x4,  src1:u8x4,  src2:u32       clamp
+v_dot8_i32_i4                  vdst,     src0:i4x8,  src1:i4x8,  src2:i32       clamp
+v_dot8_u32_u4                  vdst,     src0:u4x8,  src1:u4x8,  src2:u32       clamp
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX10.html 2021-09-19 16:16:14.000000000 +0000 @@ -0,0 +1,2243 @@ + + + + + + + + + Syntax of Core GFX10 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of Core GFX10 Instructions

+ +
+

Introduction

+

This document describes the syntax of core GFX10 instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

DPP16

+
INSTRUCTION              DST0       DST1 SRC0       SRC1      SRC2  MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_co_ci_u32_dpp      vdst,      vcc, vsrc0,     vsrc1,    vcc   dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_add_f16_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_add_f32_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_add_nc_u32_dpp         vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_and_b32_dpp            vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_ashrrev_i32_dpp        vdst,           vsrc0:u32, vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_bfrev_b32_dpp          vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_ceil_f16_dpp           vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_ceil_f32_dpp           vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cndmask_b32_dpp        vdst,           vsrc0,     vsrc1,    vcc   dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cos_f16_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cos_f32_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f16_f32_dpp        vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f16_i16_dpp        vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f16_u16_dpp        vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f32_f16_dpp        vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f32_i32_dpp        vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f32_u32_dpp        vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f32_ubyte0_dpp     vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f32_ubyte1_dpp     vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f32_ubyte2_dpp     vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_f32_ubyte3_dpp     vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_flr_i32_f32_dpp    vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_i16_f16_dpp        vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_i32_f32_dpp        vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_norm_i16_f16_dpp   vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_norm_u16_f16_dpp   vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_off_f32_i4_dpp     vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_rpi_i32_f32_dpp    vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_u16_f16_dpp        vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_cvt_u32_f32_dpp        vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_exp_f16_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_exp_f32_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_ffbh_i32_dpp           vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_ffbh_u32_dpp           vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_ffbl_b32_dpp           vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_floor_f16_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_floor_f32_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_fmac_f16_dpp           vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_fmac_f32_dpp           vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_fract_f16_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_fract_f32_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_frexp_exp_i16_f16_dpp  vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_frexp_exp_i32_f32_dpp  vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_frexp_mant_f16_dpp     vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_frexp_mant_f32_dpp     vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_ldexp_f16_dpp          vdst,           vsrc0:m,   vsrc1:i16       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_log_f16_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_log_f32_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_lshlrev_b32_dpp        vdst,           vsrc0:u32, vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_lshrrev_b32_dpp        vdst,           vsrc0:u32, vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mac_f32_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_max_f16_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_max_f32_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_max_i32_dpp            vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_max_u32_dpp            vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_min_f16_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_min_f32_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_min_i32_dpp            vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_min_u32_dpp            vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mov_b32_dpp            vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_movreld_b32_dpp        vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_movrels_b32_dpp        vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_movrelsd_2_b32_dpp     vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_movrelsd_b32_dpp       vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mul_f16_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mul_f32_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mul_hi_i32_i24_dpp     vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mul_hi_u32_u24_dpp     vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mul_i32_i24_dpp        vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mul_legacy_f32_dpp     vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_mul_u32_u24_dpp        vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_not_b32_dpp            vdst,           vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_or_b32_dpp             vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_rcp_f16_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_rcp_f32_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_rcp_iflag_f32_dpp      vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_rndne_f16_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_rndne_f32_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_rsq_f16_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_rsq_f32_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sat_pk_u8_i16_dpp      vdst:u8x4,      vsrc                       dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sin_f16_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sin_f32_dpp            vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sqrt_f16_dpp           vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sqrt_f32_dpp           vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sub_co_ci_u32_dpp      vdst,      vcc, vsrc0,     vsrc1,    vcc   dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sub_f16_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sub_f32_dpp            vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_sub_nc_u32_dpp         vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_subrev_co_ci_u32_dpp   vdst,      vcc, vsrc0,     vsrc1,    vcc   dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_subrev_f16_dpp         vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_subrev_f32_dpp         vdst,           vsrc0:m,   vsrc1:m         dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_subrev_nc_u32_dpp      vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_trunc_f16_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_trunc_f32_dpp          vdst,           vsrc:m                     dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_xnor_b32_dpp           vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+v_xor_b32_dpp            vdst,           vsrc0,     vsrc1           dpp16_ctrl row_mask bank_mask bound_ctrl fi
+
+
+

DPP8

+
INSTRUCTION                    DST0       DST1      SRC0       SRC1      SRC2           MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_co_ci_u32_dpp            vdst,      vcc,      vsrc0,     vsrc1,    vcc            dpp8_sel fi
+v_add_f16_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_add_f32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_add_nc_u32_dpp               vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_and_b32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_ashrrev_i32_dpp              vdst,                vsrc0:u32, vsrc1                    dpp8_sel fi
+v_bfrev_b32_dpp                vdst,                vsrc                                dpp8_sel fi
+v_ceil_f16_dpp                 vdst,                vsrc                                dpp8_sel fi
+v_ceil_f32_dpp                 vdst,                vsrc                                dpp8_sel fi
+v_cndmask_b32_dpp              vdst,                vsrc0,     vsrc1,    vcc            dpp8_sel fi
+v_cos_f16_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_cos_f32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_cvt_f16_f32_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_f16_i16_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_f16_u16_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_f32_f16_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_f32_i32_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_f32_u32_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_f32_ubyte0_dpp           vdst,                vsrc                                dpp8_sel fi
+v_cvt_f32_ubyte1_dpp           vdst,                vsrc                                dpp8_sel fi
+v_cvt_f32_ubyte2_dpp           vdst,                vsrc                                dpp8_sel fi
+v_cvt_f32_ubyte3_dpp           vdst,                vsrc                                dpp8_sel fi
+v_cvt_flr_i32_f32_dpp          vdst,                vsrc                                dpp8_sel fi
+v_cvt_i16_f16_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_i32_f32_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_norm_i16_f16_dpp         vdst,                vsrc                                dpp8_sel fi
+v_cvt_norm_u16_f16_dpp         vdst,                vsrc                                dpp8_sel fi
+v_cvt_off_f32_i4_dpp           vdst,                vsrc                                dpp8_sel fi
+v_cvt_rpi_i32_f32_dpp          vdst,                vsrc                                dpp8_sel fi
+v_cvt_u16_f16_dpp              vdst,                vsrc                                dpp8_sel fi
+v_cvt_u32_f32_dpp              vdst,                vsrc                                dpp8_sel fi
+v_exp_f16_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_exp_f32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_ffbh_i32_dpp                 vdst,                vsrc                                dpp8_sel fi
+v_ffbh_u32_dpp                 vdst,                vsrc                                dpp8_sel fi
+v_ffbl_b32_dpp                 vdst,                vsrc                                dpp8_sel fi
+v_floor_f16_dpp                vdst,                vsrc                                dpp8_sel fi
+v_floor_f32_dpp                vdst,                vsrc                                dpp8_sel fi
+v_fmac_f16_dpp                 vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_fmac_f32_dpp                 vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_fract_f16_dpp                vdst,                vsrc                                dpp8_sel fi
+v_fract_f32_dpp                vdst,                vsrc                                dpp8_sel fi
+v_frexp_exp_i16_f16_dpp        vdst,                vsrc                                dpp8_sel fi
+v_frexp_exp_i32_f32_dpp        vdst,                vsrc                                dpp8_sel fi
+v_frexp_mant_f16_dpp           vdst,                vsrc                                dpp8_sel fi
+v_frexp_mant_f32_dpp           vdst,                vsrc                                dpp8_sel fi
+v_ldexp_f16_dpp                vdst,                vsrc0,     vsrc1:i16                dpp8_sel fi
+v_log_f16_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_log_f32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_lshlrev_b32_dpp              vdst,                vsrc0:u32, vsrc1                    dpp8_sel fi
+v_lshrrev_b32_dpp              vdst,                vsrc0:u32, vsrc1                    dpp8_sel fi
+v_mac_f32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_max_f16_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_max_f32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_max_i32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_max_u32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_min_f16_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_min_f32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_min_i32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_min_u32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_mov_b32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_movreld_b32_dpp              vdst,                vsrc                                dpp8_sel fi
+v_movrels_b32_dpp              vdst,                vsrc                                dpp8_sel fi
+v_movrelsd_2_b32_dpp           vdst,                vsrc                                dpp8_sel fi
+v_movrelsd_b32_dpp             vdst,                vsrc                                dpp8_sel fi
+v_mul_f16_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_mul_f32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_mul_hi_i32_i24_dpp           vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_mul_hi_u32_u24_dpp           vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_mul_i32_i24_dpp              vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_mul_legacy_f32_dpp           vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_mul_u32_u24_dpp              vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_not_b32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_or_b32_dpp                   vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_rcp_f16_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_rcp_f32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_rcp_iflag_f32_dpp            vdst,                vsrc                                dpp8_sel fi
+v_rndne_f16_dpp                vdst,                vsrc                                dpp8_sel fi
+v_rndne_f32_dpp                vdst,                vsrc                                dpp8_sel fi
+v_rsq_f16_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_rsq_f32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_sat_pk_u8_i16_dpp            vdst:u8x4,           vsrc                                dpp8_sel fi
+v_sin_f16_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_sin_f32_dpp                  vdst,                vsrc                                dpp8_sel fi
+v_sqrt_f16_dpp                 vdst,                vsrc                                dpp8_sel fi
+v_sqrt_f32_dpp                 vdst,                vsrc                                dpp8_sel fi
+v_sub_co_ci_u32_dpp            vdst,      vcc,      vsrc0,     vsrc1,    vcc            dpp8_sel fi
+v_sub_f16_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_sub_f32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_sub_nc_u32_dpp               vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_subrev_co_ci_u32_dpp         vdst,      vcc,      vsrc0,     vsrc1,    vcc            dpp8_sel fi
+v_subrev_f16_dpp               vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_subrev_f32_dpp               vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_subrev_nc_u32_dpp            vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_trunc_f16_dpp                vdst,                vsrc                                dpp8_sel fi
+v_trunc_f32_dpp                vdst,                vsrc                                dpp8_sel fi
+v_xnor_b32_dpp                 vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+v_xor_b32_dpp                  vdst,                vsrc0,     vsrc1                    dpp8_sel fi
+
+
+

DS

+
INSTRUCTION                    DST         SRC0      SRC1      SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————
+ds_add_f32                                 vaddr,    vdata                    offset gds
+ds_add_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_add_src2_f32                            vaddr                              offset gds
+ds_add_src2_u32                            vaddr                              offset gds
+ds_add_src2_u64                            vaddr                              offset gds
+ds_add_u32                                 vaddr,    vdata                    offset gds
+ds_add_u64                                 vaddr,    vdata                    offset gds
+ds_and_b32                                 vaddr,    vdata                    offset gds
+ds_and_b64                                 vaddr,    vdata                    offset gds
+ds_and_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_and_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_and_src2_b32                            vaddr                              offset gds
+ds_and_src2_b64                            vaddr                              offset gds
+ds_append                      vdst                                           offset gds
+ds_bpermute_b32                vdst,       vaddr,    vdata                    offset
+ds_cmpst_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_condxchg32_rtn_b64          vdst,       vaddr,    vdata                    offset gds
+ds_consume                     vdst                                           offset gds
+ds_dec_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_src2_u32                            vaddr                              offset gds
+ds_dec_src2_u64                            vaddr                              offset gds
+ds_dec_u32                                 vaddr,    vdata                    offset gds
+ds_dec_u64                                 vaddr,    vdata                    offset gds
+ds_gws_barrier                             vdata                              offset gds
+ds_gws_init                                vdata                              offset gds
+ds_gws_sema_br                             vdata                              offset gds
+ds_gws_sema_p                                                                 offset gds
+ds_gws_sema_release_all                                                       offset gds
+ds_gws_sema_v                                                                 offset gds
+ds_inc_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_src2_u32                            vaddr                              offset gds
+ds_inc_src2_u64                            vaddr                              offset gds
+ds_inc_u32                                 vaddr,    vdata                    offset gds
+ds_inc_u64                                 vaddr,    vdata                    offset gds
+ds_max_f32                                 vaddr,    vdata                    offset gds
+ds_max_f64                                 vaddr,    vdata                    offset gds
+ds_max_i32                                 vaddr,    vdata                    offset gds
+ds_max_i64                                 vaddr,    vdata                    offset gds
+ds_max_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_src2_f32                            vaddr                              offset gds
+ds_max_src2_f64                            vaddr                              offset gds
+ds_max_src2_i32                            vaddr                              offset gds
+ds_max_src2_i64                            vaddr                              offset gds
+ds_max_src2_u32                            vaddr                              offset gds
+ds_max_src2_u64                            vaddr                              offset gds
+ds_max_u32                                 vaddr,    vdata                    offset gds
+ds_max_u64                                 vaddr,    vdata                    offset gds
+ds_min_f32                                 vaddr,    vdata                    offset gds
+ds_min_f64                                 vaddr,    vdata                    offset gds
+ds_min_i32                                 vaddr,    vdata                    offset gds
+ds_min_i64                                 vaddr,    vdata                    offset gds
+ds_min_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_src2_f32                            vaddr                              offset gds
+ds_min_src2_f64                            vaddr                              offset gds
+ds_min_src2_i32                            vaddr                              offset gds
+ds_min_src2_i64                            vaddr                              offset gds
+ds_min_src2_u32                            vaddr                              offset gds
+ds_min_src2_u64                            vaddr                              offset gds
+ds_min_u32                                 vaddr,    vdata                    offset gds
+ds_min_u64                                 vaddr,    vdata                    offset gds
+ds_mskor_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_nop
+ds_or_b32                                  vaddr,    vdata                    offset gds
+ds_or_b64                                  vaddr,    vdata                    offset gds
+ds_or_rtn_b32                  vdst,       vaddr,    vdata                    offset gds
+ds_or_rtn_b64                  vdst,       vaddr,    vdata                    offset gds
+ds_or_src2_b32                             vaddr                              offset gds
+ds_or_src2_b64                             vaddr                              offset gds
+ds_ordered_count               vdst,       vaddr                              offset gds
+ds_permute_b32                 vdst,       vaddr,    vdata                    offset
+ds_read2_b32                   vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2_b64                   vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b32               vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b64               vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read_addtid_b32             vdst                                           offset gds
+ds_read_b128                   vdst,       vaddr                              offset gds
+ds_read_b32                    vdst,       vaddr                              offset gds
+ds_read_b64                    vdst,       vaddr                              offset gds
+ds_read_b96                    vdst,       vaddr                              offset gds
+ds_read_i16                    vdst,       vaddr                              offset gds
+ds_read_i8                     vdst,       vaddr                              offset gds
+ds_read_i8_d16                 vdst,       vaddr                              offset gds
+ds_read_i8_d16_hi              vdst,       vaddr                              offset gds
+ds_read_u16                    vdst,       vaddr                              offset gds
+ds_read_u16_d16                vdst,       vaddr                              offset gds
+ds_read_u16_d16_hi             vdst,       vaddr                              offset gds
+ds_read_u8                     vdst,       vaddr                              offset gds
+ds_read_u8_d16                 vdst,       vaddr                              offset gds
+ds_read_u8_d16_hi              vdst,       vaddr                              offset gds
+ds_rsub_rtn_u32                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_rtn_u64                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_src2_u32                           vaddr                              offset gds
+ds_rsub_src2_u64                           vaddr                              offset gds
+ds_rsub_u32                                vaddr,    vdata                    offset gds
+ds_rsub_u64                                vaddr,    vdata                    offset gds
+ds_sub_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_src2_u32                            vaddr                              offset gds
+ds_sub_src2_u64                            vaddr                              offset gds
+ds_sub_u32                                 vaddr,    vdata                    offset gds
+ds_sub_u64                                 vaddr,    vdata                    offset gds
+ds_swizzle_b32                 vdst,       vaddr                              pattern gds
+ds_wrap_rtn_b32                vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_write2_b32                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2_b64                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b32                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b64                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write_addtid_b32                        vdata                              offset gds
+ds_write_b128                              vaddr,    vdata                    offset gds
+ds_write_b16                               vaddr,    vdata                    offset gds
+ds_write_b16_d16_hi                        vaddr,    vdata                    offset gds
+ds_write_b32                               vaddr,    vdata                    offset gds
+ds_write_b64                               vaddr,    vdata                    offset gds
+ds_write_b8                                vaddr,    vdata                    offset gds
+ds_write_b8_d16_hi                         vaddr,    vdata                    offset gds
+ds_write_b96                               vaddr,    vdata                    offset gds
+ds_write_src2_b32                          vaddr                              offset gds
+ds_write_src2_b64                          vaddr                              offset gds
+ds_wrxchg2_rtn_b32             vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2_rtn_b64             vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b32         vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b64         vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg_rtn_b32              vdst,       vaddr,    vdata                    offset gds
+ds_wrxchg_rtn_b64              vdst,       vaddr,    vdata                    offset gds
+ds_xor_b32                                 vaddr,    vdata                    offset gds
+ds_xor_b64                                 vaddr,    vdata                    offset gds
+ds_xor_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_src2_b32                            vaddr                              offset gds
+ds_xor_src2_b64                            vaddr                              offset gds
+
+
+

EXP

+
INSTRUCTION                    DST       SRC0      SRC1      SRC2      SRC3           MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————
+exp                            tgt,      vsrc0,    vsrc1,    vsrc2,    vsrc3          done compr vm
+
+
+

FLAT

+
INSTRUCTION                    DST           SRC0      SRC1         SRC2       MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+flat_atomic_add                vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_add_x2             vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_and                vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_and_x2             vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_cmpswap            vdst:opt,     vaddr,    vdata:b32x2             offset11 glc slc
+flat_atomic_cmpswap_x2         vdst:opt,     vaddr,    vdata:b64x2             offset11 glc slc
+flat_atomic_dec                vdst:opt:u32, vaddr,    vdata:u32               offset11 glc slc
+flat_atomic_dec_x2             vdst:opt:u64, vaddr,    vdata:u64               offset11 glc slc
+flat_atomic_fcmpswap           vdst:opt:f32, vaddr,    vdata:f32x2             offset11 glc slc
+flat_atomic_fcmpswap_x2        vdst:opt:f64, vaddr,    vdata:f64x2             offset11 glc slc
+flat_atomic_fmax               vdst:opt:f32, vaddr,    vdata:f32               offset11 glc slc
+flat_atomic_fmax_x2            vdst:opt:f64, vaddr,    vdata:f64               offset11 glc slc
+flat_atomic_fmin               vdst:opt:f32, vaddr,    vdata:f32               offset11 glc slc
+flat_atomic_fmin_x2            vdst:opt:f64, vaddr,    vdata:f64               offset11 glc slc
+flat_atomic_inc                vdst:opt:u32, vaddr,    vdata:u32               offset11 glc slc
+flat_atomic_inc_x2             vdst:opt:u64, vaddr,    vdata:u64               offset11 glc slc
+flat_atomic_or                 vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_or_x2              vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_smax               vdst:opt:i32, vaddr,    vdata:i32               offset11 glc slc
+flat_atomic_smax_x2            vdst:opt:i64, vaddr,    vdata:i64               offset11 glc slc
+flat_atomic_smin               vdst:opt:i32, vaddr,    vdata:i32               offset11 glc slc
+flat_atomic_smin_x2            vdst:opt:i64, vaddr,    vdata:i64               offset11 glc slc
+flat_atomic_sub                vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_sub_x2             vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_swap               vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_swap_x2            vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_umax               vdst:opt:u32, vaddr,    vdata:u32               offset11 glc slc
+flat_atomic_umax_x2            vdst:opt:u64, vaddr,    vdata:u64               offset11 glc slc
+flat_atomic_umin               vdst:opt:u32, vaddr,    vdata:u32               offset11 glc slc
+flat_atomic_umin_x2            vdst:opt:u64, vaddr,    vdata:u64               offset11 glc slc
+flat_atomic_xor                vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_atomic_xor_x2             vdst:opt,     vaddr,    vdata                   offset11 glc slc
+flat_load_dword                vdst,         vaddr                             offset11 glc slc dlc
+flat_load_dwordx2              vdst,         vaddr                             offset11 glc slc dlc
+flat_load_dwordx3              vdst,         vaddr                             offset11 glc slc dlc
+flat_load_dwordx4              vdst,         vaddr                             offset11 glc slc dlc
+flat_load_sbyte                vdst,         vaddr                             offset11 glc slc dlc
+flat_load_sbyte_d16            vdst,         vaddr                             offset11 glc slc dlc
+flat_load_sbyte_d16_hi         vdst,         vaddr                             offset11 glc slc dlc
+flat_load_short_d16            vdst,         vaddr                             offset11 glc slc dlc
+flat_load_short_d16_hi         vdst,         vaddr                             offset11 glc slc dlc
+flat_load_sshort               vdst,         vaddr                             offset11 glc slc dlc
+flat_load_ubyte                vdst,         vaddr                             offset11 glc slc dlc
+flat_load_ubyte_d16            vdst,         vaddr                             offset11 glc slc dlc
+flat_load_ubyte_d16_hi         vdst,         vaddr                             offset11 glc slc dlc
+flat_load_ushort               vdst,         vaddr                             offset11 glc slc dlc
+flat_store_byte                              vaddr,    vdata                   offset11 glc slc dlc
+flat_store_byte_d16_hi                       vaddr,    vdata                   offset11 glc slc dlc
+flat_store_dword                             vaddr,    vdata                   offset11 glc slc dlc
+flat_store_dwordx2                           vaddr,    vdata                   offset11 glc slc dlc
+flat_store_dwordx3                           vaddr,    vdata                   offset11 glc slc dlc
+flat_store_dwordx4                           vaddr,    vdata                   offset11 glc slc dlc
+flat_store_short                             vaddr,    vdata                   offset11 glc slc dlc
+flat_store_short_d16_hi                      vaddr,    vdata                   offset11 glc slc dlc
+global_atomic_add              vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_add_x2           vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_and              vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_and_x2           vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_cmpswap          vdst:opt,     vaddr,    vdata:b32x2, saddr      offset12s glc slc
+global_atomic_cmpswap_x2       vdst:opt,     vaddr,    vdata:b64x2, saddr      offset12s glc slc
+global_atomic_dec              vdst:opt:u32, vaddr,    vdata:u32,   saddr      offset12s glc slc
+global_atomic_dec_x2           vdst:opt:u64, vaddr,    vdata:u64,   saddr      offset12s glc slc
+global_atomic_fmax             vdst:opt:f32, vaddr,    vdata:f32,   saddr      offset12s glc slc
+global_atomic_fmax_x2          vdst:opt:f64, vaddr,    vdata:f64,   saddr      offset12s glc slc
+global_atomic_fmin             vdst:opt:f32, vaddr,    vdata:f32,   saddr      offset12s glc slc
+global_atomic_fmin_x2          vdst:opt:f64, vaddr,    vdata:f64,   saddr      offset12s glc slc
+global_atomic_inc              vdst:opt:u32, vaddr,    vdata:u32,   saddr      offset12s glc slc
+global_atomic_inc_x2           vdst:opt:u64, vaddr,    vdata:u64,   saddr      offset12s glc slc
+global_atomic_or               vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_or_x2            vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_smax             vdst:opt:i32, vaddr,    vdata:i32,   saddr      offset12s glc slc
+global_atomic_smax_x2          vdst:opt:i64, vaddr,    vdata:i64,   saddr      offset12s glc slc
+global_atomic_smin             vdst:opt:i32, vaddr,    vdata:i32,   saddr      offset12s glc slc
+global_atomic_smin_x2          vdst:opt:i64, vaddr,    vdata:i64,   saddr      offset12s glc slc
+global_atomic_sub              vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_sub_x2           vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_swap             vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_swap_x2          vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_umax             vdst:opt:u32, vaddr,    vdata:u32,   saddr      offset12s glc slc
+global_atomic_umax_x2          vdst:opt:u64, vaddr,    vdata:u64,   saddr      offset12s glc slc
+global_atomic_umin             vdst:opt:u32, vaddr,    vdata:u32,   saddr      offset12s glc slc
+global_atomic_umin_x2          vdst:opt:u64, vaddr,    vdata:u64,   saddr      offset12s glc slc
+global_atomic_xor              vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_atomic_xor_x2           vdst:opt,     vaddr,    vdata,       saddr      offset12s glc slc
+global_load_dword              vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_dwordx2            vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_dwordx3            vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_dwordx4            vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_sbyte              vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_sbyte_d16          vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_sbyte_d16_hi       vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_short_d16          vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_short_d16_hi       vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_sshort             vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_ubyte              vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_ubyte_d16          vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_ubyte_d16_hi       vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_load_ushort             vdst,         vaddr,    saddr                   offset12s glc slc dlc
+global_store_byte                            vaddr,    vdata,       saddr      offset12s glc slc dlc
+global_store_byte_d16_hi                     vaddr,    vdata,       saddr      offset12s glc slc dlc
+global_store_dword                           vaddr,    vdata,       saddr      offset12s glc slc dlc
+global_store_dwordx2                         vaddr,    vdata,       saddr      offset12s glc slc dlc
+global_store_dwordx3                         vaddr,    vdata,       saddr      offset12s glc slc dlc
+global_store_dwordx4                         vaddr,    vdata,       saddr      offset12s glc slc dlc
+global_store_short                           vaddr,    vdata,       saddr      offset12s glc slc dlc
+global_store_short_d16_hi                    vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_load_dword             vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_dwordx2           vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_dwordx3           vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_dwordx4           vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_sbyte             vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_sbyte_d16         vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_sbyte_d16_hi      vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_short_d16         vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_short_d16_hi      vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_sshort            vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_ubyte             vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_ubyte_d16         vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_ubyte_d16_hi      vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_load_ushort            vdst,         vaddr,    saddr                   offset12s glc slc dlc
+scratch_store_byte                           vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_store_byte_d16_hi                    vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_store_dword                          vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_store_dwordx2                        vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_store_dwordx3                        vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_store_dwordx4                        vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_store_short                          vaddr,    vdata,       saddr      offset12s glc slc dlc
+scratch_store_short_d16_hi                   vaddr,    vdata,       saddr      offset12s glc slc dlc
+
+
+

MIMG

+
INSTRUCTION                 DST   SRC0       SRC1   SRC2   MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————
+image_atomic_add                  vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_and                  vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_cmpswap              vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_dec                  vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_fcmpswap             vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_fmax                 vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_fmin                 vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_inc                  vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_or                   vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_smax                 vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_smin                 vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_sub                  vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_swap                 vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_umax                 vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_umin                 vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_atomic_xor                  vdata:dst, vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_gather4               vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_b             vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc lwe d16
+image_gather4_b_cl          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc lwe d16
+image_gather4_b_cl_o        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc lwe d16
+image_gather4_b_o           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc lwe d16
+image_gather4_c             vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_b           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_b_cl        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_b_cl_o      vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_b_o         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_cl          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_cl_o        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_l           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_l_o         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_lz          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_lz_o        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_c_o           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_cl            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_cl_o          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_l             vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_l_o           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_lz            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_lz_o          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_gather4_o             vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 lwe d16
+image_get_lod               vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe
+image_get_resinfo           vdst, vaddr,     srsrc         dmask dim unorm glc slc dlc a16 tfe lwe
+image_load                  vdst, vaddr,     srsrc         dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_load_mip              vdst, vaddr,     srsrc         dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_load_mip_pck          vdst, vaddr,     srsrc         dmask dim unorm glc slc dlc a16 tfe lwe
+image_load_mip_pck_sgn      vdst, vaddr,     srsrc         dmask dim unorm glc slc dlc a16 tfe lwe
+image_load_pck              vdst, vaddr,     srsrc         dmask dim unorm glc slc dlc a16 tfe lwe
+image_load_pck_sgn          vdst, vaddr,     srsrc         dmask dim unorm glc slc dlc a16 tfe lwe
+image_sample                vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_b              vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc tfe lwe d16
+image_sample_b_cl           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc tfe lwe d16
+image_sample_b_cl_o         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc tfe lwe d16
+image_sample_b_o            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc tfe lwe d16
+image_sample_c              vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_b            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_b_cl         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_b_cl_o       vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_b_o          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd_cl        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd_cl_g16    vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd_cl_o      vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd_cl_o_g16  vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd_g16       vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd_o         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cd_o_g16     vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cl           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_cl_o         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d_cl         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d_cl_g16     vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d_cl_o       vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d_cl_o_g16   vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d_g16        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d_o          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_d_o_g16      vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_l            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_l_o          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_lz           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_lz_o         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_c_o            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd             vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd_cl          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd_cl_g16      vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd_cl_o        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd_cl_o_g16    vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd_g16         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd_o           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cd_o_g16       vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cl             vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_cl_o           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d              vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d_cl           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d_cl_g16       vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d_cl_o         vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d_cl_o_g16     vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d_g16          vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d_o            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_d_o_g16        vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_l              vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_l_o            vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_lz             vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_lz_o           vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_sample_o              vdst, vaddr,     srsrc, ssamp  dmask dim unorm glc slc dlc a16 tfe lwe d16
+image_store                       vdata,     vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe d16
+image_store_mip                   vdata,     vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe d16
+image_store_mip_pck               vdata,     vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+image_store_pck                   vdata,     vaddr, srsrc  dmask dim unorm glc slc dlc a16 lwe
+
+
+

MTBUF

+
INSTRUCTION                    DST   SRC0   SRC1   SRC2    SRC3     MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————————
+tbuffer_load_format_d16_x      vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_load_format_d16_xy     vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_load_format_d16_xyz    vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_load_format_d16_xyzw   vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_load_format_x          vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_load_format_xy         vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_load_format_xyz        vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_load_format_xyzw       vdst, vaddr, srsrc, soffset          ufmt idxen offen offset12 glc slc dlc
+tbuffer_store_format_d16_x           vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xy          vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyz         vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyzw        vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+tbuffer_store_format_x               vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+tbuffer_store_format_xy              vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+tbuffer_store_format_xyz             vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+tbuffer_store_format_xyzw            vdata, vaddr, srsrc,  soffset  ufmt idxen offen offset12 glc slc
+
+
+

MUBUF

+
INSTRUCTION                   DST   SRC0             SRC1   SRC2    SRC3     MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————————————————
+buffer_atomic_add                   vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_add_x2                vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and                   vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and_x2                vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap               vdata:dst:b32x2, vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap_x2            vdata:dst:b64x2, vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec                   vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec_x2                vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_fcmpswap              vdata:dst:f32x2, vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_fcmpswap_x2           vdata:dst:f64x2, vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_fmax                  vdata:dst:f32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_fmax_x2               vdata:dst:f64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_fmin                  vdata:dst:f32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_fmin_x2               vdata:dst:f64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc                   vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc_x2                vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or                    vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or_x2                 vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax                  vdata:dst:i32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax_x2               vdata:dst:i64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin                  vdata:dst:i32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin_x2               vdata:dst:i64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub                   vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub_x2                vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap                  vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap_x2               vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax                  vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax_x2               vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin                  vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin_x2               vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor                   vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor_x2                vdata:dst,       vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_gl0_inv
+buffer_gl1_inv
+buffer_load_dword             vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc lds
+buffer_load_dwordx2           vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_dwordx3           vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_dwordx4           vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_format_d16_x      vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_format_d16_xy     vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_format_d16_xyz    vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_format_d16_xyzw   vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_format_x          vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc lds
+buffer_load_format_xy         vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_format_xyz        vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_format_xyzw       vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_sbyte             vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc lds
+buffer_load_sbyte_d16         vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_sbyte_d16_hi      vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_short_d16         vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_short_d16_hi      vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_sshort            vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc lds
+buffer_load_ubyte             vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc lds
+buffer_load_ubyte_d16         vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_ubyte_d16_hi      vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc
+buffer_load_ushort            vdst, vaddr,           srsrc, soffset          idxen offen offset12 glc slc dlc lds
+buffer_store_byte                   vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_byte_d16_hi            vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dword                  vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx2                vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx3                vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx4                vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_x           vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xy          vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyz         vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyzw        vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_x               vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xy              vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyz             vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyzw            vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_short                  vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_short_d16_hi           vdata,           vaddr, srsrc,  soffset  idxen offen offset12 glc slc
+
+
+

SDWA

+
INSTRUCTION               DST0       DST1 SRC0        SRC1       SRC2  MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_co_ci_u32_sdwa      vdst,      vcc, src0:m,     src1:m,    vcc   clamp dst_sel dst_unused src0_sel src1_sel
+v_add_f16_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_add_f32_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_add_nc_u32_sdwa         vdst,           src0:m,     src1:m           clamp dst_sel dst_unused src0_sel src1_sel
+v_and_b32_sdwa            vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_ashrrev_i32_sdwa        vdst,           src0:m:u32, src1:m           dst_sel dst_unused src0_sel src1_sel
+v_bfrev_b32_sdwa          vdst,           src:m                        dst_sel dst_unused src0_sel
+v_ceil_f16_sdwa           vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_ceil_f32_sdwa           vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cmp_class_f16_sdwa      sdst,           src0:m,     src1:m:b32       src0_sel src1_sel
+v_cmp_class_f32_sdwa      sdst,           src0:m,     src1:m:b32       src0_sel src1_sel
+v_cmp_eq_f16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_eq_f32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_eq_i16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_eq_i32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_eq_u16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_eq_u32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_f_f16_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_f_f32_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_f_i32_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_f_u32_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ge_f16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ge_f32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ge_i16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ge_i32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ge_u16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ge_u32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_gt_f16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_gt_f32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_gt_i16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_gt_i32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_gt_u16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_gt_u32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_le_f16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_le_f32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_le_i16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_le_i32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_le_u16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_le_u32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lg_f16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lg_f32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lt_f16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lt_f32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lt_i16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lt_i32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lt_u16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_lt_u32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ne_i16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ne_i32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ne_u16_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ne_u32_sdwa         sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_neq_f16_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_neq_f32_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nge_f16_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nge_f32_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ngt_f16_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_ngt_f32_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nle_f16_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nle_f32_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nlg_f16_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nlg_f32_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nlt_f16_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_nlt_f32_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_o_f16_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_o_f32_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_t_i32_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_t_u32_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_tru_f16_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_tru_f32_sdwa        sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_u_f16_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmp_u_f32_sdwa          sdst,           src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_class_f16_sdwa                     src0:m,     src1:m:b32       src0_sel src1_sel
+v_cmpx_class_f32_sdwa                     src0:m,     src1:m:b32       src0_sel src1_sel
+v_cmpx_eq_f16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_eq_f32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_eq_i16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_eq_i32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_eq_u16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_eq_u32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_f_f16_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_f_f32_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_f_i32_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_f_u32_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ge_f16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ge_f32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ge_i16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ge_i32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ge_u16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ge_u32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_gt_f16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_gt_f32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_gt_i16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_gt_i32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_gt_u16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_gt_u32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_le_f16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_le_f32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_le_i16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_le_i32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_le_u16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_le_u32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lg_f16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lg_f32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lt_f16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lt_f32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lt_i16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lt_i32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lt_u16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_lt_u32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ne_i16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ne_i32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ne_u16_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ne_u32_sdwa                        src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_neq_f16_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_neq_f32_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nge_f16_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nge_f32_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ngt_f16_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_ngt_f32_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nle_f16_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nle_f32_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nlg_f16_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nlg_f32_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nlt_f16_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_nlt_f32_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_o_f16_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_o_f32_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_t_i32_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_t_u32_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_tru_f16_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_tru_f32_sdwa                       src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_u_f16_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cmpx_u_f32_sdwa                         src0:m,     src1:m           src0_sel src1_sel
+v_cndmask_b32_sdwa        vdst,           src0:m,     src1:m,    vcc   dst_sel dst_unused src0_sel src1_sel
+v_cos_f16_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cos_f32_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_f32_sdwa        vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_i16_sdwa        vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_u16_sdwa        vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_f16_sdwa        vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_i32_sdwa        vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_u32_sdwa        vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte0_sdwa     vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte1_sdwa     vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte2_sdwa     vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte3_sdwa     vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_flr_i32_f32_sdwa    vdst,           src:m                        dst_sel dst_unused src0_sel
+v_cvt_i16_f16_sdwa        vdst,           src:m                        clamp dst_sel dst_unused src0_sel
+v_cvt_i32_f32_sdwa        vdst,           src:m                        clamp dst_sel dst_unused src0_sel
+v_cvt_norm_i16_f16_sdwa   vdst,           src:m                        clamp dst_sel dst_unused src0_sel
+v_cvt_norm_u16_f16_sdwa   vdst,           src:m                        clamp dst_sel dst_unused src0_sel
+v_cvt_off_f32_i4_sdwa     vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_cvt_rpi_i32_f32_sdwa    vdst,           src:m                        dst_sel dst_unused src0_sel
+v_cvt_u16_f16_sdwa        vdst,           src:m                        clamp dst_sel dst_unused src0_sel
+v_cvt_u32_f32_sdwa        vdst,           src:m                        clamp dst_sel dst_unused src0_sel
+v_exp_f16_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_exp_f32_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_ffbh_i32_sdwa           vdst,           src:m                        dst_sel dst_unused src0_sel
+v_ffbh_u32_sdwa           vdst,           src:m                        dst_sel dst_unused src0_sel
+v_ffbl_b32_sdwa           vdst,           src:m                        dst_sel dst_unused src0_sel
+v_floor_f16_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_floor_f32_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_fract_f16_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_fract_f32_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_frexp_exp_i16_f16_sdwa  vdst,           src:m                        dst_sel dst_unused src0_sel
+v_frexp_exp_i32_f32_sdwa  vdst,           src:m                        dst_sel dst_unused src0_sel
+v_frexp_mant_f16_sdwa     vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_frexp_mant_f32_sdwa     vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_ldexp_f16_sdwa          vdst,           src0:m,     src1:m:i16       clamp omod dst_sel dst_unused src0_sel src1_sel
+v_log_f16_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_log_f32_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_lshlrev_b32_sdwa        vdst,           src0:m:u32, src1:m           dst_sel dst_unused src0_sel src1_sel
+v_lshrrev_b32_sdwa        vdst,           src0:m:u32, src1:m           dst_sel dst_unused src0_sel src1_sel
+v_max_f16_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_max_f32_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_max_i32_sdwa            vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_max_u32_sdwa            vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_min_f16_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_min_f32_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_min_i32_sdwa            vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_min_u32_sdwa            vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_mov_b32_sdwa            vdst,           src:m                        dst_sel dst_unused src0_sel
+v_movreld_b32_sdwa        vdst,           src:m                        dst_sel dst_unused src0_sel
+v_movrels_b32_sdwa        vdst,           vsrc:m                       dst_sel dst_unused src0_sel
+v_movrelsd_2_b32_sdwa     vdst,           vsrc:m                       dst_sel dst_unused src0_sel
+v_movrelsd_b32_sdwa       vdst,           vsrc:m                       dst_sel dst_unused src0_sel
+v_mul_f16_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_f32_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_i32_i24_sdwa     vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_u32_u24_sdwa     vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_mul_i32_i24_sdwa        vdst,           src0:m,     src1:m           clamp dst_sel dst_unused src0_sel src1_sel
+v_mul_legacy_f32_sdwa     vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_u32_u24_sdwa        vdst,           src0:m,     src1:m           clamp dst_sel dst_unused src0_sel src1_sel
+v_not_b32_sdwa            vdst,           src:m                        dst_sel dst_unused src0_sel
+v_or_b32_sdwa             vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_rcp_f16_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_rcp_f32_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_rcp_iflag_f32_sdwa      vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_rndne_f16_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_rndne_f32_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_rsq_f16_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_rsq_f32_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_sat_pk_u8_i16_sdwa      vdst:u8x4,      src:m                        dst_sel dst_unused src0_sel
+v_sin_f16_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_sin_f32_sdwa            vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f16_sdwa           vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f32_sdwa           vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_sub_co_ci_u32_sdwa      vdst,      vcc, src0:m,     src1:m,    vcc   clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_f16_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_sub_f32_sdwa            vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_sub_nc_u32_sdwa         vdst,           src0:m,     src1:m           clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_co_ci_u32_sdwa   vdst,      vcc, src0:m,     src1:m,    vcc   clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_f16_sdwa         vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_subrev_f32_sdwa         vdst,           src0:m,     src1:m           clamp omod dst_sel dst_unused src0_sel src1_sel
+v_subrev_nc_u32_sdwa      vdst,           src0:m,     src1:m           clamp dst_sel dst_unused src0_sel src1_sel
+v_trunc_f16_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_trunc_f32_sdwa          vdst,           src:m                        clamp omod dst_sel dst_unused src0_sel
+v_xnor_b32_sdwa           vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+v_xor_b32_sdwa            vdst,           src0:m,     src1:m           dst_sel dst_unused src0_sel src1_sel
+
+
+

SMEM

+
INSTRUCTION                    DST       SRC0             SRC1      SRC2           MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————
+s_atc_probe                              probe,           sbase,    soffset
+s_atc_probe_buffer                       probe,           sbase,    soffset
+s_atomic_add                             sdata:dst,       sbase,    soffset        glc
+s_atomic_add_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_and                             sdata:dst,       sbase,    soffset        glc
+s_atomic_and_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_cmpswap                         sdata:dst:b32x2, sbase,    soffset        glc
+s_atomic_cmpswap_x2                      sdata:dst:b64x2, sbase,    soffset        glc
+s_atomic_dec                             sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_dec_x2                          sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_inc                             sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_inc_x2                          sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_or                              sdata:dst,       sbase,    soffset        glc
+s_atomic_or_x2                           sdata:dst,       sbase,    soffset        glc
+s_atomic_smax                            sdata:dst:i32,   sbase,    soffset        glc
+s_atomic_smax_x2                         sdata:dst:i64,   sbase,    soffset        glc
+s_atomic_smin                            sdata:dst:i32,   sbase,    soffset        glc
+s_atomic_smin_x2                         sdata:dst:i64,   sbase,    soffset        glc
+s_atomic_sub                             sdata:dst,       sbase,    soffset        glc
+s_atomic_sub_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_swap                            sdata:dst,       sbase,    soffset        glc
+s_atomic_swap_x2                         sdata:dst,       sbase,    soffset        glc
+s_atomic_umax                            sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_umax_x2                         sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_umin                            sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_umin_x2                         sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_xor                             sdata:dst,       sbase,    soffset        glc
+s_atomic_xor_x2                          sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_add                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_add_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_and                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_and_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_cmpswap                  sdata:dst:b32x2, sbase,    soffset        glc
+s_buffer_atomic_cmpswap_x2               sdata:dst:b64x2, sbase,    soffset        glc
+s_buffer_atomic_dec                      sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_dec_x2                   sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_inc                      sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_inc_x2                   sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_or                       sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_or_x2                    sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_smax                     sdata:dst:i32,   sbase,    soffset        glc
+s_buffer_atomic_smax_x2                  sdata:dst:i64,   sbase,    soffset        glc
+s_buffer_atomic_smin                     sdata:dst:i32,   sbase,    soffset        glc
+s_buffer_atomic_smin_x2                  sdata:dst:i64,   sbase,    soffset        glc
+s_buffer_atomic_sub                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_sub_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_swap                     sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_swap_x2                  sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_umax                     sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_umax_x2                  sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_umin                     sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_umin_x2                  sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_xor                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_xor_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_load_dword            sdst,     sbase,           soffset                  glc dlc
+s_buffer_load_dwordx16         sdst,     sbase,           soffset                  glc dlc
+s_buffer_load_dwordx2          sdst,     sbase,           soffset                  glc dlc
+s_buffer_load_dwordx4          sdst,     sbase,           soffset                  glc dlc
+s_buffer_load_dwordx8          sdst,     sbase,           soffset                  glc dlc
+s_buffer_store_dword                     sdata,           sbase,    soffset        glc
+s_buffer_store_dwordx2                   sdata,           sbase,    soffset        glc
+s_buffer_store_dwordx4                   sdata,           sbase,    soffset        glc
+s_dcache_discard                         sbase,           soffset
+s_dcache_discard_x2                      sbase,           soffset
+s_dcache_inv
+s_dcache_wb
+s_get_waveid_in_workgroup      sdst
+s_gl1_inv
+s_load_dword                   sdst,     sbase,           soffset                  glc dlc
+s_load_dwordx16                sdst,     sbase,           soffset                  glc dlc
+s_load_dwordx2                 sdst,     sbase,           soffset                  glc dlc
+s_load_dwordx4                 sdst,     sbase,           soffset                  glc dlc
+s_load_dwordx8                 sdst,     sbase,           soffset                  glc dlc
+s_memrealtime                  sdst:b64
+s_memtime                      sdst:b64
+s_scratch_load_dword           sdst,     sbase,           soffset                  glc dlc
+s_scratch_load_dwordx2         sdst,     sbase,           soffset                  glc dlc
+s_scratch_load_dwordx4         sdst,     sbase,           soffset                  glc dlc
+s_scratch_store_dword                    sdata,           sbase,    soffset        glc
+s_scratch_store_dwordx2                  sdata,           sbase,    soffset        glc
+s_scratch_store_dwordx4                  sdata,           sbase,    soffset        glc
+s_store_dword                            sdata,           sbase,    soffset        glc
+s_store_dwordx2                          sdata,           sbase,    soffset        glc
+s_store_dwordx4                          sdata,           sbase,    soffset        glc
+
+
+

SOP1

+
INSTRUCTION                    DST       SRC
+———————————————————————————————————————————————————
+s_abs_i32                      sdst,     ssrc
+s_and_saveexec_b32             sdst,     ssrc
+s_and_saveexec_b64             sdst,     ssrc
+s_andn1_saveexec_b32           sdst,     ssrc
+s_andn1_saveexec_b64           sdst,     ssrc
+s_andn1_wrexec_b32             sdst,     ssrc
+s_andn1_wrexec_b64             sdst,     ssrc
+s_andn2_saveexec_b32           sdst,     ssrc
+s_andn2_saveexec_b64           sdst,     ssrc
+s_andn2_wrexec_b32             sdst,     ssrc
+s_andn2_wrexec_b64             sdst,     ssrc
+s_bcnt0_i32_b32                sdst,     ssrc
+s_bcnt0_i32_b64                sdst,     ssrc
+s_bcnt1_i32_b32                sdst,     ssrc
+s_bcnt1_i32_b64                sdst,     ssrc
+s_bitreplicate_b64_b32         sdst,     ssrc
+s_bitset0_b32                  sdst,     ssrc
+s_bitset0_b64                  sdst,     ssrc:b32
+s_bitset1_b32                  sdst,     ssrc
+s_bitset1_b64                  sdst,     ssrc:b32
+s_brev_b32                     sdst,     ssrc
+s_brev_b64                     sdst,     ssrc
+s_cmov_b32                     sdst,     ssrc
+s_cmov_b64                     sdst,     ssrc
+s_ff0_i32_b32                  sdst,     ssrc
+s_ff0_i32_b64                  sdst,     ssrc
+s_ff1_i32_b32                  sdst,     ssrc
+s_ff1_i32_b64                  sdst,     ssrc
+s_flbit_i32                    sdst,     ssrc
+s_flbit_i32_b32                sdst,     ssrc
+s_flbit_i32_b64                sdst,     ssrc
+s_flbit_i32_i64                sdst,     ssrc
+s_getpc_b64                    sdst
+s_mov_b32                      sdst,     ssrc
+s_mov_b64                      sdst,     ssrc
+s_movreld_b32                  sdst,     ssrc
+s_movreld_b64                  sdst,     ssrc
+s_movrels_b32                  sdst,     ssrc
+s_movrels_b64                  sdst,     ssrc
+s_movrelsd_2_b32               sdst,     ssrc
+s_nand_saveexec_b32            sdst,     ssrc
+s_nand_saveexec_b64            sdst,     ssrc
+s_nor_saveexec_b32             sdst,     ssrc
+s_nor_saveexec_b64             sdst,     ssrc
+s_not_b32                      sdst,     ssrc
+s_not_b64                      sdst,     ssrc
+s_or_saveexec_b32              sdst,     ssrc
+s_or_saveexec_b64              sdst,     ssrc
+s_orn1_saveexec_b32            sdst,     ssrc
+s_orn1_saveexec_b64            sdst,     ssrc
+s_orn2_saveexec_b32            sdst,     ssrc
+s_orn2_saveexec_b64            sdst,     ssrc
+s_quadmask_b32                 sdst,     ssrc
+s_quadmask_b64                 sdst,     ssrc
+s_rfe_b64                                ssrc
+s_setpc_b64                              ssrc
+s_sext_i32_i16                 sdst,     ssrc
+s_sext_i32_i8                  sdst,     ssrc
+s_swappc_b64                   sdst,     ssrc
+s_wqm_b32                      sdst,     ssrc
+s_wqm_b64                      sdst,     ssrc
+s_xnor_saveexec_b32            sdst,     ssrc
+s_xnor_saveexec_b64            sdst,     ssrc
+s_xor_saveexec_b32             sdst,     ssrc
+s_xor_saveexec_b64             sdst,     ssrc
+
+
+

SOP2

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+s_absdiff_i32                  sdst,     ssrc0,     ssrc1
+s_add_i32                      sdst,     ssrc0,     ssrc1
+s_add_u32                      sdst,     ssrc0,     ssrc1
+s_addc_u32                     sdst,     ssrc0,     ssrc1
+s_and_b32                      sdst,     ssrc0,     ssrc1
+s_and_b64                      sdst,     ssrc0,     ssrc1
+s_andn2_b32                    sdst,     ssrc0,     ssrc1
+s_andn2_b64                    sdst,     ssrc0,     ssrc1
+s_ashr_i32                     sdst,     ssrc0,     ssrc1:u32
+s_ashr_i64                     sdst,     ssrc0,     ssrc1:u32
+s_bfe_i32                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_i64                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_u32                      sdst,     ssrc0,     ssrc1
+s_bfe_u64                      sdst,     ssrc0,     ssrc1:u32
+s_bfm_b32                      sdst,     ssrc0,     ssrc1
+s_bfm_b64                      sdst,     ssrc0:b32, ssrc1:b32
+s_cselect_b32                  sdst,     ssrc0,     ssrc1
+s_cselect_b64                  sdst,     ssrc0,     ssrc1
+s_lshl1_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl2_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl3_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl4_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshl_b64                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b64                     sdst,     ssrc0,     ssrc1:u32
+s_max_i32                      sdst,     ssrc0,     ssrc1
+s_max_u32                      sdst,     ssrc0,     ssrc1
+s_min_i32                      sdst,     ssrc0,     ssrc1
+s_min_u32                      sdst,     ssrc0,     ssrc1
+s_mul_hi_i32                   sdst,     ssrc0,     ssrc1
+s_mul_hi_u32                   sdst,     ssrc0,     ssrc1
+s_mul_i32                      sdst,     ssrc0,     ssrc1
+s_nand_b32                     sdst,     ssrc0,     ssrc1
+s_nand_b64                     sdst,     ssrc0,     ssrc1
+s_nor_b32                      sdst,     ssrc0,     ssrc1
+s_nor_b64                      sdst,     ssrc0,     ssrc1
+s_or_b32                       sdst,     ssrc0,     ssrc1
+s_or_b64                       sdst,     ssrc0,     ssrc1
+s_orn2_b32                     sdst,     ssrc0,     ssrc1
+s_orn2_b64                     sdst,     ssrc0,     ssrc1
+s_pack_hh_b32_b16              sdst,     ssrc0:b32, ssrc1:b32
+s_pack_lh_b32_b16              sdst,     ssrc0,     ssrc1:b32
+s_pack_ll_b32_b16              sdst,     ssrc0,     ssrc1
+s_sub_i32                      sdst,     ssrc0,     ssrc1
+s_sub_u32                      sdst,     ssrc0,     ssrc1
+s_subb_u32                     sdst,     ssrc0,     ssrc1
+s_xnor_b32                     sdst,     ssrc0,     ssrc1
+s_xnor_b64                     sdst,     ssrc0,     ssrc1
+s_xor_b32                      sdst,     ssrc0,     ssrc1
+s_xor_b64                      sdst,     ssrc0,     ssrc1
+
+
+

SOPC

+
INSTRUCTION                    SRC0      SRC1
+———————————————————————————————————————————————————
+s_bitcmp0_b32                  ssrc0,    ssrc1
+s_bitcmp0_b64                  ssrc0,    ssrc1:u32
+s_bitcmp1_b32                  ssrc0,    ssrc1
+s_bitcmp1_b64                  ssrc0,    ssrc1:u32
+s_cmp_eq_i32                   ssrc0,    ssrc1
+s_cmp_eq_u32                   ssrc0,    ssrc1
+s_cmp_eq_u64                   ssrc0,    ssrc1
+s_cmp_ge_i32                   ssrc0,    ssrc1
+s_cmp_ge_u32                   ssrc0,    ssrc1
+s_cmp_gt_i32                   ssrc0,    ssrc1
+s_cmp_gt_u32                   ssrc0,    ssrc1
+s_cmp_le_i32                   ssrc0,    ssrc1
+s_cmp_le_u32                   ssrc0,    ssrc1
+s_cmp_lg_i32                   ssrc0,    ssrc1
+s_cmp_lg_u32                   ssrc0,    ssrc1
+s_cmp_lg_u64                   ssrc0,    ssrc1
+s_cmp_lt_i32                   ssrc0,    ssrc1
+s_cmp_lt_u32                   ssrc0,    ssrc1
+
+
+

SOPK

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+s_addk_i32                     sdst,     imm16
+s_call_b64                     sdst,     label
+s_cmovk_i32                    sdst,     imm16
+s_cmpk_eq_i32                            ssrc,     imm16
+s_cmpk_eq_u32                            ssrc,     imm16
+s_cmpk_ge_i32                            ssrc,     imm16
+s_cmpk_ge_u32                            ssrc,     imm16
+s_cmpk_gt_i32                            ssrc,     imm16
+s_cmpk_gt_u32                            ssrc,     imm16
+s_cmpk_le_i32                            ssrc,     imm16
+s_cmpk_le_u32                            ssrc,     imm16
+s_cmpk_lg_i32                            ssrc,     imm16
+s_cmpk_lg_u32                            ssrc,     imm16
+s_cmpk_lt_i32                            ssrc,     imm16
+s_cmpk_lt_u32                            ssrc,     imm16
+s_getreg_b32                   sdst,     hwreg
+s_movk_i32                     sdst,     imm16
+s_mulk_i32                     sdst,     imm16
+s_setreg_b32                   hwreg,    ssrc
+s_setreg_imm32_b32             hwreg,    simm32
+s_subvector_loop_begin         sdst,     label
+s_subvector_loop_end           sdst,     label
+s_version                                imm16
+s_waitcnt_expcnt                         ssrc,     imm16
+s_waitcnt_lgkmcnt                        ssrc,     imm16
+s_waitcnt_vmcnt                          ssrc,     imm16
+s_waitcnt_vscnt                          ssrc,     imm16
+
+
+

SOPP

+
INSTRUCTION                    SRC
+—————————————————————————————————————————
+s_barrier
+s_branch                       label
+s_cbranch_cdbgsys              label
+s_cbranch_cdbgsys_and_user     label
+s_cbranch_cdbgsys_or_user      label
+s_cbranch_cdbguser             label
+s_cbranch_execnz               label
+s_cbranch_execz                label
+s_cbranch_scc0                 label
+s_cbranch_scc1                 label
+s_cbranch_vccnz                label
+s_cbranch_vccz                 label
+s_clause                       imm16
+s_code_end
+s_decperflevel                 imm16
+s_denorm_mode                  imm16
+s_endpgm
+s_endpgm_ordered_ps_done
+s_endpgm_saved
+s_icache_inv
+s_incperflevel                 imm16
+s_inst_prefetch                imm16
+s_nop                          imm16
+s_round_mode                   imm16
+s_sendmsg                      msg
+s_sendmsghalt                  msg
+s_sethalt                      imm16
+s_setkill                      imm16
+s_setprio                      imm16
+s_sleep                        imm16
+s_trap                         imm16
+s_ttracedata
+s_ttracedata_imm               imm16
+s_waitcnt                      waitcnt
+s_wakeup
+
+
+

VINTRP

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+v_interp_mov_f32               vdst,     param:b32, attr:b32
+v_interp_p1_f32                vdst,     vsrc,      attr:b32
+v_interp_p2_f32                vdst,     vsrc,      attr:b32
+
+
+

VOP1

+
INSTRUCTION                    DST        SRC
+————————————————————————————————————————————————————
+v_bfrev_b32                    vdst,      src
+v_ceil_f16                     vdst,      src
+v_ceil_f32                     vdst,      src
+v_ceil_f64                     vdst,      src
+v_clrexcp
+v_cos_f16                      vdst,      src
+v_cos_f32                      vdst,      src
+v_cvt_f16_f32                  vdst,      src
+v_cvt_f16_i16                  vdst,      src
+v_cvt_f16_u16                  vdst,      src
+v_cvt_f32_f16                  vdst,      src
+v_cvt_f32_f64                  vdst,      src
+v_cvt_f32_i32                  vdst,      src
+v_cvt_f32_u32                  vdst,      src
+v_cvt_f32_ubyte0               vdst,      src
+v_cvt_f32_ubyte1               vdst,      src
+v_cvt_f32_ubyte2               vdst,      src
+v_cvt_f32_ubyte3               vdst,      src
+v_cvt_f64_f32                  vdst,      src
+v_cvt_f64_i32                  vdst,      src
+v_cvt_f64_u32                  vdst,      src
+v_cvt_flr_i32_f32              vdst,      src
+v_cvt_i16_f16                  vdst,      src
+v_cvt_i32_f32                  vdst,      src
+v_cvt_i32_f64                  vdst,      src
+v_cvt_norm_i16_f16             vdst,      src
+v_cvt_norm_u16_f16             vdst,      src
+v_cvt_off_f32_i4               vdst,      src
+v_cvt_rpi_i32_f32              vdst,      src
+v_cvt_u16_f16                  vdst,      src
+v_cvt_u32_f32                  vdst,      src
+v_cvt_u32_f64                  vdst,      src
+v_exp_f16                      vdst,      src
+v_exp_f32                      vdst,      src
+v_ffbh_i32                     vdst,      src
+v_ffbh_u32                     vdst,      src
+v_ffbl_b32                     vdst,      src
+v_floor_f16                    vdst,      src
+v_floor_f32                    vdst,      src
+v_floor_f64                    vdst,      src
+v_fract_f16                    vdst,      src
+v_fract_f32                    vdst,      src
+v_fract_f64                    vdst,      src
+v_frexp_exp_i16_f16            vdst,      src
+v_frexp_exp_i32_f32            vdst,      src
+v_frexp_exp_i32_f64            vdst,      src
+v_frexp_mant_f16               vdst,      src
+v_frexp_mant_f32               vdst,      src
+v_frexp_mant_f64               vdst,      src
+v_log_f16                      vdst,      src
+v_log_f32                      vdst,      src
+v_mov_b32                      vdst,      src
+v_movreld_b32                  vdst,      src
+v_movrels_b32                  vdst,      vsrc
+v_movrelsd_2_b32               vdst,      vsrc
+v_movrelsd_b32                 vdst,      vsrc
+v_nop
+v_not_b32                      vdst,      src
+v_pipeflush
+v_rcp_f16                      vdst,      src
+v_rcp_f32                      vdst,      src
+v_rcp_f64                      vdst,      src
+v_rcp_iflag_f32                vdst,      src
+v_readfirstlane_b32            sdst,      src
+v_rndne_f16                    vdst,      src
+v_rndne_f32                    vdst,      src
+v_rndne_f64                    vdst,      src
+v_rsq_f16                      vdst,      src
+v_rsq_f32                      vdst,      src
+v_rsq_f64                      vdst,      src
+v_sat_pk_u8_i16                vdst:u8x4, src
+v_sin_f16                      vdst,      src
+v_sin_f32                      vdst,      src
+v_sqrt_f16                     vdst,      src
+v_sqrt_f32                     vdst,      src
+v_sqrt_f64                     vdst,      src
+v_swap_b32                     vdst,      vsrc
+v_swaprel_b32                  vdst,      vsrc
+v_trunc_f16                    vdst,      src
+v_trunc_f32                    vdst,      src
+v_trunc_f64                    vdst,      src
+
+
+

VOP2

+
INSTRUCTION                    DST0      DST1      SRC0      SRC1      SRC2
+—————————————————————————————————————————————————————————————————————————————————
+v_add_co_ci_u32                vdst,     vcc,      src0,     vsrc1,    vcc
+v_add_f16                      vdst,               src0,     vsrc1
+v_add_f32                      vdst,               src0,     vsrc1
+v_add_nc_u32                   vdst,               src0,     vsrc1
+v_and_b32                      vdst,               src0,     vsrc1
+v_ashrrev_i32                  vdst,               src0:u32, vsrc1
+v_cndmask_b32                  vdst,               src0,     vsrc1,    vcc
+v_cvt_pkrtz_f16_f32            vdst,               src0:f32, vsrc1:f32
+v_fmaak_f16                    vdst,               src0,     vsrc1,    simm32
+v_fmaak_f32                    vdst,               src0,     vsrc1,    simm32
+v_fmac_f16                     vdst,               src0,     vsrc1
+v_fmac_f32                     vdst,               src0,     vsrc1
+v_fmamk_f16                    vdst,               src0,     simm32,   vsrc2
+v_fmamk_f32                    vdst,               src0,     simm32,   vsrc2
+v_ldexp_f16                    vdst,               src0,     vsrc1:i16
+v_lshlrev_b32                  vdst,               src0:u32, vsrc1
+v_lshrrev_b32                  vdst,               src0:u32, vsrc1
+v_mac_f32                      vdst,               src0,     vsrc1
+v_mac_legacy_f32               vdst,               src0,     vsrc1
+v_madak_f32                    vdst,               src0,     vsrc1,    simm32
+v_madmk_f32                    vdst,               src0,     simm32,   vsrc2
+v_max_f16                      vdst,               src0,     vsrc1
+v_max_f32                      vdst,               src0,     vsrc1
+v_max_i32                      vdst,               src0,     vsrc1
+v_max_u32                      vdst,               src0,     vsrc1
+v_min_f16                      vdst,               src0,     vsrc1
+v_min_f32                      vdst,               src0,     vsrc1
+v_min_i32                      vdst,               src0,     vsrc1
+v_min_u32                      vdst,               src0,     vsrc1
+v_mul_f16                      vdst,               src0,     vsrc1
+v_mul_f32                      vdst,               src0,     vsrc1
+v_mul_hi_i32_i24               vdst,               src0,     vsrc1
+v_mul_hi_u32_u24               vdst,               src0,     vsrc1
+v_mul_i32_i24                  vdst,               src0,     vsrc1
+v_mul_legacy_f32               vdst,               src0,     vsrc1
+v_mul_u32_u24                  vdst,               src0,     vsrc1
+v_or_b32                       vdst,               src0,     vsrc1
+v_pk_fmac_f16                  vdst,               src0,     vsrc1
+v_sub_co_ci_u32                vdst,     vcc,      src0,     vsrc1,    vcc
+v_sub_f16                      vdst,               src0,     vsrc1
+v_sub_f32                      vdst,               src0,     vsrc1
+v_sub_nc_u32                   vdst,               src0,     vsrc1
+v_subrev_co_ci_u32             vdst,     vcc,      src0,     vsrc1,    vcc
+v_subrev_f16                   vdst,               src0,     vsrc1
+v_subrev_f32                   vdst,               src0,     vsrc1
+v_subrev_nc_u32                vdst,               src0,     vsrc1
+v_xnor_b32                     vdst,               src0,     vsrc1
+v_xor_b32                      vdst,               src0,     vsrc1
+
+
+

VOP3

+
INSTRUCTION              DST0        DST1     SRC0         SRC1        SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add3_u32               vdst,                src0,        src1,       src2
+v_add_co_ci_u32_e64      vdst,       sdst,    src0,        src1,       ssrc2          clamp
+v_add_co_u32             vdst,       sdst,    src0,        src1                       clamp
+v_add_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_add_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_add_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_add_lshl_u32           vdst,                src0,        src1,       src2
+v_add_nc_i16             vdst,                src0,        src1                       op_sel clamp
+v_add_nc_i32             vdst,                src0,        src1                       clamp
+v_add_nc_u16             vdst,                src0,        src1                       clamp
+v_add_nc_u32_e64         vdst,                src0,        src1                       clamp
+v_alignbit_b32           vdst,                src0,        src1,       src2:b16
+v_alignbyte_b32          vdst,                src0,        src1,       src2:b16
+v_and_b32_e64            vdst,                src0,        src1
+v_and_or_b32             vdst,                src0,        src1,       src2
+v_ashrrev_i16            vdst,                src0:u16,    src1
+v_ashrrev_i32_e64        vdst,                src0:u32,    src1
+v_ashrrev_i64            vdst,                src0:u32,    src1
+v_bcnt_u32_b32           vdst,                src0,        src1
+v_bfe_i32                vdst,                src0,        src1:u32,   src2:u32
+v_bfe_u32                vdst,                src0,        src1,       src2
+v_bfi_b32                vdst,                src0,        src1,       src2
+v_bfm_b32                vdst,                src0,        src1
+v_bfrev_b32_e64          vdst,                src
+v_ceil_f16_e64           vdst,                src:m                                   clamp omod
+v_ceil_f32_e64           vdst,                src:m                                   clamp omod
+v_ceil_f64_e64           vdst,                src:m                                   clamp omod
+v_clrexcp_e64
+v_cmp_class_f16_e64      sdst,                src0:m,      src1:b32
+v_cmp_class_f32_e64      sdst,                src0:m,      src1:b32
+v_cmp_class_f64_e64      sdst,                src0:m,      src1:b32
+v_cmp_eq_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_eq_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_eq_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_eq_i16_e64         sdst,                src0,        src1
+v_cmp_eq_i32_e64         sdst,                src0,        src1
+v_cmp_eq_i64_e64         sdst,                src0,        src1
+v_cmp_eq_u16_e64         sdst,                src0,        src1
+v_cmp_eq_u32_e64         sdst,                src0,        src1
+v_cmp_eq_u64_e64         sdst,                src0,        src1
+v_cmp_f_f16_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_f_f32_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_f_f64_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_f_i32_e64          sdst,                src0,        src1
+v_cmp_f_i64_e64          sdst,                src0,        src1
+v_cmp_f_u32_e64          sdst,                src0,        src1
+v_cmp_f_u64_e64          sdst,                src0,        src1
+v_cmp_ge_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_ge_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_ge_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_ge_i16_e64         sdst,                src0,        src1
+v_cmp_ge_i32_e64         sdst,                src0,        src1
+v_cmp_ge_i64_e64         sdst,                src0,        src1
+v_cmp_ge_u16_e64         sdst,                src0,        src1
+v_cmp_ge_u32_e64         sdst,                src0,        src1
+v_cmp_ge_u64_e64         sdst,                src0,        src1
+v_cmp_gt_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_gt_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_gt_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_gt_i16_e64         sdst,                src0,        src1
+v_cmp_gt_i32_e64         sdst,                src0,        src1
+v_cmp_gt_i64_e64         sdst,                src0,        src1
+v_cmp_gt_u16_e64         sdst,                src0,        src1
+v_cmp_gt_u32_e64         sdst,                src0,        src1
+v_cmp_gt_u64_e64         sdst,                src0,        src1
+v_cmp_le_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_le_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_le_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_le_i16_e64         sdst,                src0,        src1
+v_cmp_le_i32_e64         sdst,                src0,        src1
+v_cmp_le_i64_e64         sdst,                src0,        src1
+v_cmp_le_u16_e64         sdst,                src0,        src1
+v_cmp_le_u32_e64         sdst,                src0,        src1
+v_cmp_le_u64_e64         sdst,                src0,        src1
+v_cmp_lg_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lg_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lg_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_i16_e64         sdst,                src0,        src1
+v_cmp_lt_i32_e64         sdst,                src0,        src1
+v_cmp_lt_i64_e64         sdst,                src0,        src1
+v_cmp_lt_u16_e64         sdst,                src0,        src1
+v_cmp_lt_u32_e64         sdst,                src0,        src1
+v_cmp_lt_u64_e64         sdst,                src0,        src1
+v_cmp_ne_i16_e64         sdst,                src0,        src1
+v_cmp_ne_i32_e64         sdst,                src0,        src1
+v_cmp_ne_i64_e64         sdst,                src0,        src1
+v_cmp_ne_u16_e64         sdst,                src0,        src1
+v_cmp_ne_u32_e64         sdst,                src0,        src1
+v_cmp_ne_u64_e64         sdst,                src0,        src1
+v_cmp_neq_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_neq_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_neq_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nge_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nge_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nge_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_ngt_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_ngt_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_ngt_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nle_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nle_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nle_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlg_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlg_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlg_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlt_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlt_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlt_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_o_f16_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_o_f32_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_o_f64_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_t_i32_e64          sdst,                src0,        src1
+v_cmp_t_i64_e64          sdst,                src0,        src1
+v_cmp_t_u32_e64          sdst,                src0,        src1
+v_cmp_t_u64_e64          sdst,                src0,        src1
+v_cmp_tru_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_tru_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_tru_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_u_f16_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_u_f32_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_u_f64_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmpx_class_f16_e64                          src0:m,      src1:b32
+v_cmpx_class_f32_e64                          src0:m,      src1:b32
+v_cmpx_class_f64_e64                          src0:m,      src1:b32
+v_cmpx_eq_f16_e64                             src0:m,      src1:m                     clamp
+v_cmpx_eq_f32_e64                             src0:m,      src1:m                     clamp
+v_cmpx_eq_f64_e64                             src0:m,      src1:m                     clamp
+v_cmpx_eq_i16_e64                             src0,        src1
+v_cmpx_eq_i32_e64                             src0,        src1
+v_cmpx_eq_i64_e64                             src0,        src1
+v_cmpx_eq_u16_e64                             src0,        src1
+v_cmpx_eq_u32_e64                             src0,        src1
+v_cmpx_eq_u64_e64                             src0,        src1
+v_cmpx_f_f16_e64                              src0:m,      src1:m                     clamp
+v_cmpx_f_f32_e64                              src0:m,      src1:m                     clamp
+v_cmpx_f_f64_e64                              src0:m,      src1:m                     clamp
+v_cmpx_f_i32_e64                              src0,        src1
+v_cmpx_f_i64_e64                              src0,        src1
+v_cmpx_f_u32_e64                              src0,        src1
+v_cmpx_f_u64_e64                              src0,        src1
+v_cmpx_ge_f16_e64                             src0:m,      src1:m                     clamp
+v_cmpx_ge_f32_e64                             src0:m,      src1:m                     clamp
+v_cmpx_ge_f64_e64                             src0:m,      src1:m                     clamp
+v_cmpx_ge_i16_e64                             src0,        src1
+v_cmpx_ge_i32_e64                             src0,        src1
+v_cmpx_ge_i64_e64                             src0,        src1
+v_cmpx_ge_u16_e64                             src0,        src1
+v_cmpx_ge_u32_e64                             src0,        src1
+v_cmpx_ge_u64_e64                             src0,        src1
+v_cmpx_gt_f16_e64                             src0:m,      src1:m                     clamp
+v_cmpx_gt_f32_e64                             src0:m,      src1:m                     clamp
+v_cmpx_gt_f64_e64                             src0:m,      src1:m                     clamp
+v_cmpx_gt_i16_e64                             src0,        src1
+v_cmpx_gt_i32_e64                             src0,        src1
+v_cmpx_gt_i64_e64                             src0,        src1
+v_cmpx_gt_u16_e64                             src0,        src1
+v_cmpx_gt_u32_e64                             src0,        src1
+v_cmpx_gt_u64_e64                             src0,        src1
+v_cmpx_le_f16_e64                             src0:m,      src1:m                     clamp
+v_cmpx_le_f32_e64                             src0:m,      src1:m                     clamp
+v_cmpx_le_f64_e64                             src0:m,      src1:m                     clamp
+v_cmpx_le_i16_e64                             src0,        src1
+v_cmpx_le_i32_e64                             src0,        src1
+v_cmpx_le_i64_e64                             src0,        src1
+v_cmpx_le_u16_e64                             src0,        src1
+v_cmpx_le_u32_e64                             src0,        src1
+v_cmpx_le_u64_e64                             src0,        src1
+v_cmpx_lg_f16_e64                             src0:m,      src1:m                     clamp
+v_cmpx_lg_f32_e64                             src0:m,      src1:m                     clamp
+v_cmpx_lg_f64_e64                             src0:m,      src1:m                     clamp
+v_cmpx_lt_f16_e64                             src0:m,      src1:m                     clamp
+v_cmpx_lt_f32_e64                             src0:m,      src1:m                     clamp
+v_cmpx_lt_f64_e64                             src0:m,      src1:m                     clamp
+v_cmpx_lt_i16_e64                             src0,        src1
+v_cmpx_lt_i32_e64                             src0,        src1
+v_cmpx_lt_i64_e64                             src0,        src1
+v_cmpx_lt_u16_e64                             src0,        src1
+v_cmpx_lt_u32_e64                             src0,        src1
+v_cmpx_lt_u64_e64                             src0,        src1
+v_cmpx_ne_i16_e64                             src0,        src1
+v_cmpx_ne_i32_e64                             src0,        src1
+v_cmpx_ne_i64_e64                             src0,        src1
+v_cmpx_ne_u16_e64                             src0,        src1
+v_cmpx_ne_u32_e64                             src0,        src1
+v_cmpx_ne_u64_e64                             src0,        src1
+v_cmpx_neq_f16_e64                            src0:m,      src1:m                     clamp
+v_cmpx_neq_f32_e64                            src0:m,      src1:m                     clamp
+v_cmpx_neq_f64_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nge_f16_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nge_f32_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nge_f64_e64                            src0:m,      src1:m                     clamp
+v_cmpx_ngt_f16_e64                            src0:m,      src1:m                     clamp
+v_cmpx_ngt_f32_e64                            src0:m,      src1:m                     clamp
+v_cmpx_ngt_f64_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nle_f16_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nle_f32_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nle_f64_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nlg_f16_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nlg_f32_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nlg_f64_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nlt_f16_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nlt_f32_e64                            src0:m,      src1:m                     clamp
+v_cmpx_nlt_f64_e64                            src0:m,      src1:m                     clamp
+v_cmpx_o_f16_e64                              src0:m,      src1:m                     clamp
+v_cmpx_o_f32_e64                              src0:m,      src1:m                     clamp
+v_cmpx_o_f64_e64                              src0:m,      src1:m                     clamp
+v_cmpx_t_i32_e64                              src0,        src1
+v_cmpx_t_i64_e64                              src0,        src1
+v_cmpx_t_u32_e64                              src0,        src1
+v_cmpx_t_u64_e64                              src0,        src1
+v_cmpx_tru_f16_e64                            src0:m,      src1:m                     clamp
+v_cmpx_tru_f32_e64                            src0:m,      src1:m                     clamp
+v_cmpx_tru_f64_e64                            src0:m,      src1:m                     clamp
+v_cmpx_u_f16_e64                              src0:m,      src1:m                     clamp
+v_cmpx_u_f32_e64                              src0:m,      src1:m                     clamp
+v_cmpx_u_f64_e64                              src0:m,      src1:m                     clamp
+v_cndmask_b32_e64        vdst,                src0:m,      src1:m,     ssrc2
+v_cos_f16_e64            vdst,                src:m                                   clamp omod
+v_cos_f32_e64            vdst,                src:m                                   clamp omod
+v_cubeid_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cubema_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cubesc_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cubetc_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cvt_f16_f32_e64        vdst,                src:m                                   clamp omod
+v_cvt_f16_i16_e64        vdst,                src                                     clamp omod
+v_cvt_f16_u16_e64        vdst,                src                                     clamp omod
+v_cvt_f32_f16_e64        vdst,                src:m                                   clamp omod
+v_cvt_f32_f64_e64        vdst,                src:m                                   clamp omod
+v_cvt_f32_i32_e64        vdst,                src                                     clamp omod
+v_cvt_f32_u32_e64        vdst,                src                                     clamp omod
+v_cvt_f32_ubyte0_e64     vdst,                src                                     clamp omod
+v_cvt_f32_ubyte1_e64     vdst,                src                                     clamp omod
+v_cvt_f32_ubyte2_e64     vdst,                src                                     clamp omod
+v_cvt_f32_ubyte3_e64     vdst,                src                                     clamp omod
+v_cvt_f64_f32_e64        vdst,                src:m                                   clamp omod
+v_cvt_f64_i32_e64        vdst,                src                                     clamp omod
+v_cvt_f64_u32_e64        vdst,                src                                     clamp omod
+v_cvt_flr_i32_f32_e64    vdst,                src:m
+v_cvt_i16_f16_e64        vdst,                src:m                                   clamp
+v_cvt_i32_f32_e64        vdst,                src:m                                   clamp
+v_cvt_i32_f64_e64        vdst,                src:m                                   clamp
+v_cvt_norm_i16_f16_e64   vdst,                src:m                                   clamp
+v_cvt_norm_u16_f16_e64   vdst,                src:m                                   clamp
+v_cvt_off_f32_i4_e64     vdst,                src                                     clamp omod
+v_cvt_pk_i16_i32         vdst,                src0:i32,    src1:i32
+v_cvt_pk_u16_u32         vdst,                src0:u32,    src1:u32
+v_cvt_pk_u8_f32          vdst:b32,            src0:m:f32,  src1:u32,   src2:u32
+v_cvt_pknorm_i16_f16     vdst,                src0:m:f16,  src1:m:f16                 op_sel
+v_cvt_pknorm_i16_f32     vdst,                src0:m:f32,  src1:m:f32
+v_cvt_pknorm_u16_f16     vdst,                src0:m:f16,  src1:m:f16                 op_sel
+v_cvt_pknorm_u16_f32     vdst,                src0:m:f32,  src1:m:f32
+v_cvt_pkrtz_f16_f32_e64  vdst,                src0:m:f32,  src1:m:f32                 clamp
+v_cvt_rpi_i32_f32_e64    vdst,                src:m
+v_cvt_u16_f16_e64        vdst,                src:m                                   clamp
+v_cvt_u32_f32_e64        vdst,                src:m                                   clamp
+v_cvt_u32_f64_e64        vdst,                src:m                                   clamp
+v_div_fixup_f16          vdst,                src0:m,      src1:m,     src2:m         op_sel clamp
+v_div_fixup_f32          vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_fixup_f64          vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_fmas_f32           vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_fmas_f64           vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_scale_f32          vdst,       vcc,     src0,        src1,       src2
+v_div_scale_f64          vdst,       vcc,     src0,        src1,       src2
+v_exp_f16_e64            vdst,                src:m                                   clamp omod
+v_exp_f32_e64            vdst,                src:m                                   clamp omod
+v_ffbh_i32_e64           vdst,                src
+v_ffbh_u32_e64           vdst,                src
+v_ffbl_b32_e64           vdst,                src
+v_floor_f16_e64          vdst,                src:m                                   clamp omod
+v_floor_f32_e64          vdst,                src:m                                   clamp omod
+v_floor_f64_e64          vdst,                src:m                                   clamp omod
+v_fma_f16                vdst,                src0:m,      src1:m,     src2:m         op_sel clamp
+v_fma_f32                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_fma_f64                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_fmac_f16_e64           vdst,                src0:m,      src1:m                     clamp omod
+v_fmac_f32_e64           vdst,                src0:m,      src1:m                     clamp omod
+v_fract_f16_e64          vdst,                src:m                                   clamp omod
+v_fract_f32_e64          vdst,                src:m                                   clamp omod
+v_fract_f64_e64          vdst,                src:m                                   clamp omod
+v_frexp_exp_i16_f16_e64  vdst,                src:m
+v_frexp_exp_i32_f32_e64  vdst,                src:m
+v_frexp_exp_i32_f64_e64  vdst,                src:m
+v_frexp_mant_f16_e64     vdst,                src:m                                   clamp omod
+v_frexp_mant_f32_e64     vdst,                src:m                                   clamp omod
+v_frexp_mant_f64_e64     vdst,                src:m                                   clamp omod
+v_interp_mov_f32_e64     vdst,                param:b32,   attr:b32                   clamp omod
+v_interp_p1_f32_e64      vdst,                vsrc:m,      attr:b32                   clamp omod
+v_interp_p1ll_f16        vdst:f32,            vsrc:m:f32,  attr:b32                   high clamp omod
+v_interp_p1lv_f16        vdst:f32,            vsrc0:m:f32, attr:b32,   vsrc2:m:f16x2  high clamp omod
+v_interp_p2_f16          vdst,                vsrc0:m:f32, attr:b32,   vsrc2:m:f32    high clamp
+v_interp_p2_f32_e64      vdst,                vsrc:m,      attr:b32                   clamp omod
+v_ldexp_f16_e64          vdst,                src0:m,      src1:i16                   clamp omod
+v_ldexp_f32              vdst,                src0:m,      src1:i32                   clamp omod
+v_ldexp_f64              vdst,                src0:m,      src1:i32                   clamp omod
+v_lerp_u8                vdst:u32,            src0:b32,    src1:b32,   src2:b32
+v_log_f16_e64            vdst,                src:m                                   clamp omod
+v_log_f32_e64            vdst,                src:m                                   clamp omod
+v_lshl_add_u32           vdst,                src0,        src1,       src2
+v_lshl_or_b32            vdst,                src0,        src1:u32,   src2
+v_lshlrev_b16            vdst,                src0:u16,    src1
+v_lshlrev_b32_e64        vdst,                src0:u32,    src1
+v_lshlrev_b64            vdst,                src0:u32,    src1
+v_lshrrev_b16            vdst,                src0:u16,    src1
+v_lshrrev_b32_e64        vdst,                src0:u32,    src1
+v_lshrrev_b64            vdst,                src0:u32,    src1
+v_mac_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_mac_legacy_f32_e64     vdst,                src0:m,      src1:m                     clamp omod
+v_mad_f32                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_mad_i16                vdst,                src0,        src1,       src2           op_sel clamp
+v_mad_i32_i16            vdst,                src0,        src1,       src2:i32       op_sel clamp
+v_mad_i32_i24            vdst,                src0,        src1,       src2:i32       clamp
+v_mad_i64_i32            vdst,       sdst,    src0,        src1,       src2:i64       clamp
+v_mad_legacy_f32         vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_mad_u16                vdst,                src0,        src1,       src2           op_sel clamp
+v_mad_u32_u16            vdst,                src0,        src1,       src2:u32       op_sel clamp
+v_mad_u32_u24            vdst,                src0,        src1,       src2:u32       clamp
+v_mad_u64_u32            vdst,       sdst,    src0,        src1,       src2:u64       clamp
+v_max3_f16               vdst,                src0:m,      src1:m,     src2:m         op_sel clamp
+v_max3_f32               vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_max3_i16               vdst,                src0,        src1,       src2           op_sel
+v_max3_i32               vdst,                src0,        src1,       src2
+v_max3_u16               vdst,                src0,        src1,       src2           op_sel
+v_max3_u32               vdst,                src0,        src1,       src2
+v_max_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_max_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_max_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_max_i16                vdst,                src0,        src1
+v_max_i32_e64            vdst,                src0,        src1
+v_max_u16                vdst,                src0,        src1
+v_max_u32_e64            vdst,                src0,        src1
+v_mbcnt_hi_u32_b32       vdst,                src0,        src1
+v_mbcnt_lo_u32_b32       vdst,                src0,        src1
+v_med3_f16               vdst,                src0:m,      src1:m,     src2:m         op_sel clamp
+v_med3_f32               vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_med3_i16               vdst,                src0,        src1,       src2           op_sel
+v_med3_i32               vdst,                src0,        src1,       src2
+v_med3_u16               vdst,                src0,        src1,       src2           op_sel
+v_med3_u32               vdst,                src0,        src1,       src2
+v_min3_f16               vdst,                src0:m,      src1:m,     src2:m         op_sel clamp
+v_min3_f32               vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_min3_i16               vdst,                src0,        src1,       src2           op_sel
+v_min3_i32               vdst,                src0,        src1,       src2
+v_min3_u16               vdst,                src0,        src1,       src2           op_sel
+v_min3_u32               vdst,                src0,        src1,       src2
+v_min_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_min_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_min_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_min_i16                vdst,                src0,        src1
+v_min_i32_e64            vdst,                src0,        src1
+v_min_u16                vdst,                src0,        src1
+v_min_u32_e64            vdst,                src0,        src1
+v_mov_b32_e64            vdst,                src
+v_movreld_b32_e64        vdst,                src
+v_movrels_b32_e64        vdst,                vsrc
+v_movrelsd_2_b32_e64     vdst,                vsrc
+v_movrelsd_b32_e64       vdst,                vsrc
+v_mqsad_pk_u16_u8        vdst:u16x4,          src0:u8x8,   src1:u8x4,  src2:u16x4     clamp
+v_mqsad_u32_u8           vdst:u32x4,          src0:u8x8,   src1:u8x4,  vsrc2:u32x4    clamp
+v_msad_u8                vdst:u32,            src0:b32,    src1:b32,   src2:b32       clamp
+v_mul_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_mul_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_mul_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_mul_hi_i32             vdst,                src0,        src1
+v_mul_hi_i32_i24_e64     vdst,                src0,        src1
+v_mul_hi_u32             vdst,                src0,        src1
+v_mul_hi_u32_u24_e64     vdst,                src0,        src1
+v_mul_i32_i24_e64        vdst,                src0,        src1                       clamp
+v_mul_legacy_f32_e64     vdst,                src0:m,      src1:m                     clamp omod
+v_mul_lo_u16             vdst,                src0,        src1
+v_mul_lo_u32             vdst,                src0,        src1
+v_mul_u32_u24_e64        vdst,                src0,        src1                       clamp
+v_mullit_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_nop_e64
+v_not_b32_e64            vdst,                src
+v_or3_b32                vdst,                src0,        src1,       src2
+v_or_b32_e64             vdst,                src0,        src1
+v_pack_b32_f16           vdst,                src0:m,      src1:m                     op_sel
+v_perm_b32               vdst,                src0,        src1,       src2
+v_permlane16_b32         vdst,                vdata,       ssrc1,      ssrc2          dpp_op_sel
+v_permlanex16_b32        vdst,                vdata,       ssrc1,      ssrc2          dpp_op_sel
+v_pipeflush_e64
+v_qsad_pk_u16_u8         vdst:u16x4,          src0:u8x8,   src1:u8x4,  src2:u16x4     clamp
+v_rcp_f16_e64            vdst,                src:m                                   clamp omod
+v_rcp_f32_e64            vdst,                src:m                                   clamp omod
+v_rcp_f64_e64            vdst,                src:m                                   clamp omod
+v_rcp_iflag_f32_e64      vdst,                src:m                                   clamp omod
+v_readlane_b32           sdst,                src0,        ssrc1
+v_rndne_f16_e64          vdst,                src:m                                   clamp omod
+v_rndne_f32_e64          vdst,                src:m                                   clamp omod
+v_rndne_f64_e64          vdst,                src:m                                   clamp omod
+v_rsq_f16_e64            vdst,                src:m                                   clamp omod
+v_rsq_f32_e64            vdst,                src:m                                   clamp omod
+v_rsq_f64_e64            vdst,                src:m                                   clamp omod
+v_sad_hi_u8              vdst:u32,            src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_sad_u16                vdst:u32,            src0:u16x2,  src1:u16x2, src2:u32       clamp
+v_sad_u32                vdst,                src0,        src1,       src2           clamp
+v_sad_u8                 vdst:u32,            src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_sat_pk_u8_i16_e64      vdst:u8x4,           src
+v_sin_f16_e64            vdst,                src:m                                   clamp omod
+v_sin_f32_e64            vdst,                src:m                                   clamp omod
+v_sqrt_f16_e64           vdst,                src:m                                   clamp omod
+v_sqrt_f32_e64           vdst,                src:m                                   clamp omod
+v_sqrt_f64_e64           vdst,                src:m                                   clamp omod
+v_sub_co_ci_u32_e64      vdst,       sdst,    src0,        src1,       ssrc2          clamp
+v_sub_co_u32             vdst,       sdst,    src0,        src1                       clamp
+v_sub_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_sub_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_sub_nc_i16             vdst,                src0,        src1                       op_sel clamp
+v_sub_nc_i32             vdst,                src0,        src1                       clamp
+v_sub_nc_u16             vdst,                src0,        src1                       clamp
+v_sub_nc_u32_e64         vdst,                src0,        src1                       clamp
+v_subrev_co_ci_u32_e64   vdst,       sdst,    src0,        src1,       ssrc2          clamp
+v_subrev_co_u32          vdst,       sdst,    src0,        src1                       clamp
+v_subrev_f16_e64         vdst,                src0:m,      src1:m                     clamp omod
+v_subrev_f32_e64         vdst,                src0:m,      src1:m                     clamp omod
+v_subrev_nc_u32_e64      vdst,                src0,        src1                       clamp
+v_trig_preop_f64         vdst,                src0:m,      src1:u32                   clamp omod
+v_trunc_f16_e64          vdst,                src:m                                   clamp omod
+v_trunc_f32_e64          vdst,                src:m                                   clamp omod
+v_trunc_f64_e64          vdst,                src:m                                   clamp omod
+v_writelane_b32          vdst,                ssrc0,       ssrc1
+v_xad_u32                vdst,                src0,        src1,       src2
+v_xnor_b32_e64           vdst,                src0,        src1
+v_xor3_b32               vdst,                src0,        src1,       src2
+v_xor_b32_e64            vdst,                src0,        src1
+
+
+

VOP3P

+
INSTRUCTION          DST   SRC0        SRC1       SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————
+v_fma_mix_f32        vdst, src0:m:fx,  src1:m:fx, src2:m:fx      m_op_sel m_op_sel_hi clamp
+v_fma_mixhi_f16      vdst, src0:m:fx,  src1:m:fx, src2:m:fx      m_op_sel m_op_sel_hi clamp
+v_fma_mixlo_f16      vdst, src0:m:fx,  src1:m:fx, src2:m:fx      m_op_sel m_op_sel_hi clamp
+v_pk_add_f16         vdst, src0,       src1                      op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_add_i16         vdst, src0,       src1                      op_sel op_sel_hi clamp
+v_pk_add_u16         vdst, src0,       src1                      op_sel op_sel_hi clamp
+v_pk_ashrrev_i16     vdst, src0:u16x2, src1                      op_sel op_sel_hi
+v_pk_fma_f16         vdst, src0,       src1,      src2           op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_lshlrev_b16     vdst, src0:u16x2, src1                      op_sel op_sel_hi
+v_pk_lshrrev_b16     vdst, src0:u16x2, src1                      op_sel op_sel_hi
+v_pk_mad_i16         vdst, src0,       src1,      src2           op_sel op_sel_hi clamp
+v_pk_mad_u16         vdst, src0,       src1,      src2           op_sel op_sel_hi clamp
+v_pk_max_f16         vdst, src0,       src1                      op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_max_i16         vdst, src0,       src1                      op_sel op_sel_hi
+v_pk_max_u16         vdst, src0,       src1                      op_sel op_sel_hi
+v_pk_min_f16         vdst, src0,       src1                      op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_min_i16         vdst, src0,       src1                      op_sel op_sel_hi
+v_pk_min_u16         vdst, src0,       src1                      op_sel op_sel_hi
+v_pk_mul_f16         vdst, src0,       src1                      op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_mul_lo_u16      vdst, src0,       src1                      op_sel op_sel_hi
+v_pk_sub_i16         vdst, src0,       src1                      op_sel op_sel_hi clamp
+v_pk_sub_u16         vdst, src0,       src1                      op_sel op_sel_hi clamp
+
+
+

VOPC

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+v_cmp_class_f16                vcc,      src0,     vsrc1:b32
+v_cmp_class_f32                vcc,      src0,     vsrc1:b32
+v_cmp_class_f64                vcc,      src0,     vsrc1:b32
+v_cmp_eq_f16                   vcc,      src0,     vsrc1
+v_cmp_eq_f32                   vcc,      src0,     vsrc1
+v_cmp_eq_f64                   vcc,      src0,     vsrc1
+v_cmp_eq_i16                   vcc,      src0,     vsrc1
+v_cmp_eq_i32                   vcc,      src0,     vsrc1
+v_cmp_eq_i64                   vcc,      src0,     vsrc1
+v_cmp_eq_u16                   vcc,      src0,     vsrc1
+v_cmp_eq_u32                   vcc,      src0,     vsrc1
+v_cmp_eq_u64                   vcc,      src0,     vsrc1
+v_cmp_f_f16                    vcc,      src0,     vsrc1
+v_cmp_f_f32                    vcc,      src0,     vsrc1
+v_cmp_f_f64                    vcc,      src0,     vsrc1
+v_cmp_f_i32                    vcc,      src0,     vsrc1
+v_cmp_f_i64                    vcc,      src0,     vsrc1
+v_cmp_f_u32                    vcc,      src0,     vsrc1
+v_cmp_f_u64                    vcc,      src0,     vsrc1
+v_cmp_ge_f16                   vcc,      src0,     vsrc1
+v_cmp_ge_f32                   vcc,      src0,     vsrc1
+v_cmp_ge_f64                   vcc,      src0,     vsrc1
+v_cmp_ge_i16                   vcc,      src0,     vsrc1
+v_cmp_ge_i32                   vcc,      src0,     vsrc1
+v_cmp_ge_i64                   vcc,      src0,     vsrc1
+v_cmp_ge_u16                   vcc,      src0,     vsrc1
+v_cmp_ge_u32                   vcc,      src0,     vsrc1
+v_cmp_ge_u64                   vcc,      src0,     vsrc1
+v_cmp_gt_f16                   vcc,      src0,     vsrc1
+v_cmp_gt_f32                   vcc,      src0,     vsrc1
+v_cmp_gt_f64                   vcc,      src0,     vsrc1
+v_cmp_gt_i16                   vcc,      src0,     vsrc1
+v_cmp_gt_i32                   vcc,      src0,     vsrc1
+v_cmp_gt_i64                   vcc,      src0,     vsrc1
+v_cmp_gt_u16                   vcc,      src0,     vsrc1
+v_cmp_gt_u32                   vcc,      src0,     vsrc1
+v_cmp_gt_u64                   vcc,      src0,     vsrc1
+v_cmp_le_f16                   vcc,      src0,     vsrc1
+v_cmp_le_f32                   vcc,      src0,     vsrc1
+v_cmp_le_f64                   vcc,      src0,     vsrc1
+v_cmp_le_i16                   vcc,      src0,     vsrc1
+v_cmp_le_i32                   vcc,      src0,     vsrc1
+v_cmp_le_i64                   vcc,      src0,     vsrc1
+v_cmp_le_u16                   vcc,      src0,     vsrc1
+v_cmp_le_u32                   vcc,      src0,     vsrc1
+v_cmp_le_u64                   vcc,      src0,     vsrc1
+v_cmp_lg_f16                   vcc,      src0,     vsrc1
+v_cmp_lg_f32                   vcc,      src0,     vsrc1
+v_cmp_lg_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_f16                   vcc,      src0,     vsrc1
+v_cmp_lt_f32                   vcc,      src0,     vsrc1
+v_cmp_lt_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_i16                   vcc,      src0,     vsrc1
+v_cmp_lt_i32                   vcc,      src0,     vsrc1
+v_cmp_lt_i64                   vcc,      src0,     vsrc1
+v_cmp_lt_u16                   vcc,      src0,     vsrc1
+v_cmp_lt_u32                   vcc,      src0,     vsrc1
+v_cmp_lt_u64                   vcc,      src0,     vsrc1
+v_cmp_ne_i16                   vcc,      src0,     vsrc1
+v_cmp_ne_i32                   vcc,      src0,     vsrc1
+v_cmp_ne_i64                   vcc,      src0,     vsrc1
+v_cmp_ne_u16                   vcc,      src0,     vsrc1
+v_cmp_ne_u32                   vcc,      src0,     vsrc1
+v_cmp_ne_u64                   vcc,      src0,     vsrc1
+v_cmp_neq_f16                  vcc,      src0,     vsrc1
+v_cmp_neq_f32                  vcc,      src0,     vsrc1
+v_cmp_neq_f64                  vcc,      src0,     vsrc1
+v_cmp_nge_f16                  vcc,      src0,     vsrc1
+v_cmp_nge_f32                  vcc,      src0,     vsrc1
+v_cmp_nge_f64                  vcc,      src0,     vsrc1
+v_cmp_ngt_f16                  vcc,      src0,     vsrc1
+v_cmp_ngt_f32                  vcc,      src0,     vsrc1
+v_cmp_ngt_f64                  vcc,      src0,     vsrc1
+v_cmp_nle_f16                  vcc,      src0,     vsrc1
+v_cmp_nle_f32                  vcc,      src0,     vsrc1
+v_cmp_nle_f64                  vcc,      src0,     vsrc1
+v_cmp_nlg_f16                  vcc,      src0,     vsrc1
+v_cmp_nlg_f32                  vcc,      src0,     vsrc1
+v_cmp_nlg_f64                  vcc,      src0,     vsrc1
+v_cmp_nlt_f16                  vcc,      src0,     vsrc1
+v_cmp_nlt_f32                  vcc,      src0,     vsrc1
+v_cmp_nlt_f64                  vcc,      src0,     vsrc1
+v_cmp_o_f16                    vcc,      src0,     vsrc1
+v_cmp_o_f32                    vcc,      src0,     vsrc1
+v_cmp_o_f64                    vcc,      src0,     vsrc1
+v_cmp_t_i32                    vcc,      src0,     vsrc1
+v_cmp_t_i64                    vcc,      src0,     vsrc1
+v_cmp_t_u32                    vcc,      src0,     vsrc1
+v_cmp_t_u64                    vcc,      src0,     vsrc1
+v_cmp_tru_f16                  vcc,      src0,     vsrc1
+v_cmp_tru_f32                  vcc,      src0,     vsrc1
+v_cmp_tru_f64                  vcc,      src0,     vsrc1
+v_cmp_u_f16                    vcc,      src0,     vsrc1
+v_cmp_u_f32                    vcc,      src0,     vsrc1
+v_cmp_u_f64                    vcc,      src0,     vsrc1
+v_cmpx_class_f16                         src0,     vsrc1:b32
+v_cmpx_class_f32                         src0,     vsrc1:b32
+v_cmpx_class_f64                         src0,     vsrc1:b32
+v_cmpx_eq_f16                            src0,     vsrc1
+v_cmpx_eq_f32                            src0,     vsrc1
+v_cmpx_eq_f64                            src0,     vsrc1
+v_cmpx_eq_i16                            src0,     vsrc1
+v_cmpx_eq_i32                            src0,     vsrc1
+v_cmpx_eq_i64                            src0,     vsrc1
+v_cmpx_eq_u16                            src0,     vsrc1
+v_cmpx_eq_u32                            src0,     vsrc1
+v_cmpx_eq_u64                            src0,     vsrc1
+v_cmpx_f_f16                             src0,     vsrc1
+v_cmpx_f_f32                             src0,     vsrc1
+v_cmpx_f_f64                             src0,     vsrc1
+v_cmpx_f_i32                             src0,     vsrc1
+v_cmpx_f_i64                             src0,     vsrc1
+v_cmpx_f_u32                             src0,     vsrc1
+v_cmpx_f_u64                             src0,     vsrc1
+v_cmpx_ge_f16                            src0,     vsrc1
+v_cmpx_ge_f32                            src0,     vsrc1
+v_cmpx_ge_f64                            src0,     vsrc1
+v_cmpx_ge_i16                            src0,     vsrc1
+v_cmpx_ge_i32                            src0,     vsrc1
+v_cmpx_ge_i64                            src0,     vsrc1
+v_cmpx_ge_u16                            src0,     vsrc1
+v_cmpx_ge_u32                            src0,     vsrc1
+v_cmpx_ge_u64                            src0,     vsrc1
+v_cmpx_gt_f16                            src0,     vsrc1
+v_cmpx_gt_f32                            src0,     vsrc1
+v_cmpx_gt_f64                            src0,     vsrc1
+v_cmpx_gt_i16                            src0,     vsrc1
+v_cmpx_gt_i32                            src0,     vsrc1
+v_cmpx_gt_i64                            src0,     vsrc1
+v_cmpx_gt_u16                            src0,     vsrc1
+v_cmpx_gt_u32                            src0,     vsrc1
+v_cmpx_gt_u64                            src0,     vsrc1
+v_cmpx_le_f16                            src0,     vsrc1
+v_cmpx_le_f32                            src0,     vsrc1
+v_cmpx_le_f64                            src0,     vsrc1
+v_cmpx_le_i16                            src0,     vsrc1
+v_cmpx_le_i32                            src0,     vsrc1
+v_cmpx_le_i64                            src0,     vsrc1
+v_cmpx_le_u16                            src0,     vsrc1
+v_cmpx_le_u32                            src0,     vsrc1
+v_cmpx_le_u64                            src0,     vsrc1
+v_cmpx_lg_f16                            src0,     vsrc1
+v_cmpx_lg_f32                            src0,     vsrc1
+v_cmpx_lg_f64                            src0,     vsrc1
+v_cmpx_lt_f16                            src0,     vsrc1
+v_cmpx_lt_f32                            src0,     vsrc1
+v_cmpx_lt_f64                            src0,     vsrc1
+v_cmpx_lt_i16                            src0,     vsrc1
+v_cmpx_lt_i32                            src0,     vsrc1
+v_cmpx_lt_i64                            src0,     vsrc1
+v_cmpx_lt_u16                            src0,     vsrc1
+v_cmpx_lt_u32                            src0,     vsrc1
+v_cmpx_lt_u64                            src0,     vsrc1
+v_cmpx_ne_i16                            src0,     vsrc1
+v_cmpx_ne_i32                            src0,     vsrc1
+v_cmpx_ne_i64                            src0,     vsrc1
+v_cmpx_ne_u16                            src0,     vsrc1
+v_cmpx_ne_u32                            src0,     vsrc1
+v_cmpx_ne_u64                            src0,     vsrc1
+v_cmpx_neq_f16                           src0,     vsrc1
+v_cmpx_neq_f32                           src0,     vsrc1
+v_cmpx_neq_f64                           src0,     vsrc1
+v_cmpx_nge_f16                           src0,     vsrc1
+v_cmpx_nge_f32                           src0,     vsrc1
+v_cmpx_nge_f64                           src0,     vsrc1
+v_cmpx_ngt_f16                           src0,     vsrc1
+v_cmpx_ngt_f32                           src0,     vsrc1
+v_cmpx_ngt_f64                           src0,     vsrc1
+v_cmpx_nle_f16                           src0,     vsrc1
+v_cmpx_nle_f32                           src0,     vsrc1
+v_cmpx_nle_f64                           src0,     vsrc1
+v_cmpx_nlg_f16                           src0,     vsrc1
+v_cmpx_nlg_f32                           src0,     vsrc1
+v_cmpx_nlg_f64                           src0,     vsrc1
+v_cmpx_nlt_f16                           src0,     vsrc1
+v_cmpx_nlt_f32                           src0,     vsrc1
+v_cmpx_nlt_f64                           src0,     vsrc1
+v_cmpx_o_f16                             src0,     vsrc1
+v_cmpx_o_f32                             src0,     vsrc1
+v_cmpx_o_f64                             src0,     vsrc1
+v_cmpx_t_i32                             src0,     vsrc1
+v_cmpx_t_i64                             src0,     vsrc1
+v_cmpx_t_u32                             src0,     vsrc1
+v_cmpx_t_u64                             src0,     vsrc1
+v_cmpx_tru_f16                           src0,     vsrc1
+v_cmpx_tru_f32                           src0,     vsrc1
+v_cmpx_tru_f64                           src0,     vsrc1
+v_cmpx_u_f16                             src0,     vsrc1
+v_cmpx_u_f32                             src0,     vsrc1
+v_cmpx_u_f64                             src0,     vsrc1
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX7.html 2021-09-19 16:16:15.000000000 +0000 @@ -0,0 +1,1459 @@ + + + + + + + + + Syntax of GFX7 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of GFX7 Instructions

+ +
+

Introduction

+

This document describes the syntax of GFX7 instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

DS

+
INSTRUCTION                    DST         SRC0      SRC1      SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————
+ds_add_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_add_src2_u32                            vaddr                              offset gds
+ds_add_src2_u64                            vaddr                              offset gds
+ds_add_u32                                 vaddr,    vdata                    offset gds
+ds_add_u64                                 vaddr,    vdata                    offset gds
+ds_and_b32                                 vaddr,    vdata                    offset gds
+ds_and_b64                                 vaddr,    vdata                    offset gds
+ds_and_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_and_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_and_src2_b32                            vaddr                              offset gds
+ds_and_src2_b64                            vaddr                              offset gds
+ds_append                      vdst                                           offset gds
+ds_cmpst_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_condxchg32_rtn_b64          vdst,       vaddr,    vdata                    offset gds
+ds_consume                     vdst                                           offset gds
+ds_dec_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_src2_u32                            vaddr                              offset gds
+ds_dec_src2_u64                            vaddr                              offset gds
+ds_dec_u32                                 vaddr,    vdata                    offset gds
+ds_dec_u64                                 vaddr,    vdata                    offset gds
+ds_gws_barrier                             vdata                              offset gds
+ds_gws_init                                vdata                              offset gds
+ds_gws_sema_br                             vdata                              offset gds
+ds_gws_sema_p                                                                 offset gds
+ds_gws_sema_release_all                                                       offset gds
+ds_gws_sema_v                                                                 offset gds
+ds_inc_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_src2_u32                            vaddr                              offset gds
+ds_inc_src2_u64                            vaddr                              offset gds
+ds_inc_u32                                 vaddr,    vdata                    offset gds
+ds_inc_u64                                 vaddr,    vdata                    offset gds
+ds_max_f32                                 vaddr,    vdata                    offset gds
+ds_max_f64                                 vaddr,    vdata                    offset gds
+ds_max_i32                                 vaddr,    vdata                    offset gds
+ds_max_i64                                 vaddr,    vdata                    offset gds
+ds_max_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_src2_f32                            vaddr                              offset gds
+ds_max_src2_f64                            vaddr                              offset gds
+ds_max_src2_i32                            vaddr                              offset gds
+ds_max_src2_i64                            vaddr                              offset gds
+ds_max_src2_u32                            vaddr                              offset gds
+ds_max_src2_u64                            vaddr                              offset gds
+ds_max_u32                                 vaddr,    vdata                    offset gds
+ds_max_u64                                 vaddr,    vdata                    offset gds
+ds_min_f32                                 vaddr,    vdata                    offset gds
+ds_min_f64                                 vaddr,    vdata                    offset gds
+ds_min_i32                                 vaddr,    vdata                    offset gds
+ds_min_i64                                 vaddr,    vdata                    offset gds
+ds_min_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_src2_f32                            vaddr                              offset gds
+ds_min_src2_f64                            vaddr                              offset gds
+ds_min_src2_i32                            vaddr                              offset gds
+ds_min_src2_i64                            vaddr                              offset gds
+ds_min_src2_u32                            vaddr                              offset gds
+ds_min_src2_u64                            vaddr                              offset gds
+ds_min_u32                                 vaddr,    vdata                    offset gds
+ds_min_u64                                 vaddr,    vdata                    offset gds
+ds_mskor_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_nop
+ds_or_b32                                  vaddr,    vdata                    offset gds
+ds_or_b64                                  vaddr,    vdata                    offset gds
+ds_or_rtn_b32                  vdst,       vaddr,    vdata                    offset gds
+ds_or_rtn_b64                  vdst,       vaddr,    vdata                    offset gds
+ds_or_src2_b32                             vaddr                              offset gds
+ds_or_src2_b64                             vaddr                              offset gds
+ds_ordered_count               vdst,       vaddr                              offset gds
+ds_read2_b32                   vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2_b64                   vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b32               vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b64               vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read_b128                   vdst,       vaddr                              offset gds
+ds_read_b32                    vdst,       vaddr                              offset gds
+ds_read_b64                    vdst,       vaddr                              offset gds
+ds_read_b96                    vdst,       vaddr                              offset gds
+ds_read_i16                    vdst,       vaddr                              offset gds
+ds_read_i8                     vdst,       vaddr                              offset gds
+ds_read_u16                    vdst,       vaddr                              offset gds
+ds_read_u8                     vdst,       vaddr                              offset gds
+ds_rsub_rtn_u32                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_rtn_u64                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_src2_u32                           vaddr                              offset gds
+ds_rsub_src2_u64                           vaddr                              offset gds
+ds_rsub_u32                                vaddr,    vdata                    offset gds
+ds_rsub_u64                                vaddr,    vdata                    offset gds
+ds_sub_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_src2_u32                            vaddr                              offset gds
+ds_sub_src2_u64                            vaddr                              offset gds
+ds_sub_u32                                 vaddr,    vdata                    offset gds
+ds_sub_u64                                 vaddr,    vdata                    offset gds
+ds_swizzle_b32                 vdst,       vaddr                              pattern gds
+ds_wrap_rtn_b32                vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_write2_b32                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2_b64                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b32                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b64                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write_b128                              vaddr,    vdata                    offset gds
+ds_write_b16                               vaddr,    vdata                    offset gds
+ds_write_b32                               vaddr,    vdata                    offset gds
+ds_write_b64                               vaddr,    vdata                    offset gds
+ds_write_b8                                vaddr,    vdata                    offset gds
+ds_write_b96                               vaddr,    vdata                    offset gds
+ds_write_src2_b32                          vaddr                              offset gds
+ds_write_src2_b64                          vaddr                              offset gds
+ds_wrxchg2_rtn_b32             vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2_rtn_b64             vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b32         vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b64         vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg_rtn_b32              vdst,       vaddr,    vdata                    offset gds
+ds_wrxchg_rtn_b64              vdst,       vaddr,    vdata                    offset gds
+ds_xor_b32                                 vaddr,    vdata                    offset gds
+ds_xor_b64                                 vaddr,    vdata                    offset gds
+ds_xor_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_src2_b32                            vaddr                              offset gds
+ds_xor_src2_b64                            vaddr                              offset gds
+
+
+

EXP

+
INSTRUCTION                    DST       SRC0      SRC1      SRC2      SRC3           MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————
+exp                            tgt,      vsrc0,    vsrc1,    vsrc2,    vsrc3          done compr vm
+
+
+

FLAT

+
INSTRUCTION                    DST           SRC0      SRC1             MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————
+flat_atomic_add                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_add_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_and                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_and_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_cmpswap            vdst:opt,     vaddr,    vdata:b32x2      glc slc
+flat_atomic_cmpswap_x2         vdst:opt,     vaddr,    vdata:b64x2      glc slc
+flat_atomic_dec                vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_dec_x2             vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_fcmpswap           vdst:opt:f32, vaddr,    vdata:f32x2      glc slc
+flat_atomic_fcmpswap_x2        vdst:opt:f64, vaddr,    vdata:f64x2      glc slc
+flat_atomic_fmax               vdst:opt:f32, vaddr,    vdata:f32        glc slc
+flat_atomic_fmax_x2            vdst:opt:f64, vaddr,    vdata:f64        glc slc
+flat_atomic_fmin               vdst:opt:f32, vaddr,    vdata:f32        glc slc
+flat_atomic_fmin_x2            vdst:opt:f64, vaddr,    vdata:f64        glc slc
+flat_atomic_inc                vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_inc_x2             vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_or                 vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_or_x2              vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_smax               vdst:opt:i32, vaddr,    vdata:i32        glc slc
+flat_atomic_smax_x2            vdst:opt:i64, vaddr,    vdata:i64        glc slc
+flat_atomic_smin               vdst:opt:i32, vaddr,    vdata:i32        glc slc
+flat_atomic_smin_x2            vdst:opt:i64, vaddr,    vdata:i64        glc slc
+flat_atomic_sub                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_sub_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_swap               vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_swap_x2            vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_umax               vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_umax_x2            vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_umin               vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_umin_x2            vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_xor                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_xor_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_load_dword                vdst,         vaddr                      glc slc
+flat_load_dwordx2              vdst,         vaddr                      glc slc
+flat_load_dwordx3              vdst,         vaddr                      glc slc
+flat_load_dwordx4              vdst,         vaddr                      glc slc
+flat_load_sbyte                vdst,         vaddr                      glc slc
+flat_load_sshort               vdst,         vaddr                      glc slc
+flat_load_ubyte                vdst,         vaddr                      glc slc
+flat_load_ushort               vdst,         vaddr                      glc slc
+flat_store_byte                              vaddr,    vdata            glc slc
+flat_store_dword                             vaddr,    vdata            glc slc
+flat_store_dwordx2                           vaddr,    vdata            glc slc
+flat_store_dwordx3                           vaddr,    vdata            glc slc
+flat_store_dwordx4                           vaddr,    vdata            glc slc
+flat_store_short                             vaddr,    vdata            glc slc
+
+
+

MIMG

+
INSTRUCTION                DST      SRC0       SRC1     SRC2          MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+image_atomic_add                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_and                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_cmpswap                vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_dec                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_fcmpswap               vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_fmax                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_fmin                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_inc                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_or                     vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_smax                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_smin                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_sub                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_swap                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_umax                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_umin                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_xor                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_gather4              vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_b            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_b_cl         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_b_cl_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_b_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_b          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_b_cl       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_b_cl_o     vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_b_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_cl         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_cl_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_l          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_l_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_lz         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_lz_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_c_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_cl           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_cl_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_l            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_l_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_lz           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_lz_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_gather4_o            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_get_lod              vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_get_resinfo          vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load                 vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_mip             vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_mip_pck         vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_mip_pck_sgn     vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_pck             vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_pck_sgn         vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_sample               vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_b             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_b_cl          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_b_cl_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_b_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_b           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_b_cl        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_b_cl_o      vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_b_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_cd          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_cd_cl       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_cd_cl_o     vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_cd_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_cl          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_cl_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_d           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_d_cl        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_d_cl_o      vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_d_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_l           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_l_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_lz          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_lz_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_c_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_cd            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_cd_cl         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_cd_cl_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_cd_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_cl            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_cl_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_d             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_d_cl          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_d_cl_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_d_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_l             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_l_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_lz            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_lz_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_sample_o             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_store                         vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da
+image_store_mip                     vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da
+image_store_mip_pck                 vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da
+image_store_pck                     vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da
+
+
+

MTBUF

+
INSTRUCTION                DST   SRC0   SRC1   SRC2    SRC3     MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————————
+tbuffer_load_format_x      vdst, vaddr, srsrc, soffset          fmt idxen offen addr64 offset12 glc slc
+tbuffer_load_format_xy     vdst, vaddr, srsrc, soffset          fmt idxen offen addr64 offset12 glc slc
+tbuffer_load_format_xyz    vdst, vaddr, srsrc, soffset          fmt idxen offen addr64 offset12 glc slc
+tbuffer_load_format_xyzw   vdst, vaddr, srsrc, soffset          fmt idxen offen addr64 offset12 glc slc
+tbuffer_store_format_x           vdata, vaddr, srsrc,  soffset  fmt idxen offen addr64 offset12 glc slc
+tbuffer_store_format_xy          vdata, vaddr, srsrc,  soffset  fmt idxen offen addr64 offset12 glc slc
+tbuffer_store_format_xyz         vdata, vaddr, srsrc,  soffset  fmt idxen offen addr64 offset12 glc slc
+tbuffer_store_format_xyzw        vdata, vaddr, srsrc,  soffset  fmt idxen offen addr64 offset12 glc slc
+
+
+

MUBUF

+
INSTRUCTION                DST   SRC0             SRC1   SRC2    SRC3     MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————————————————
+buffer_atomic_add                vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_add_x2             vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_and                vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_and_x2             vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_cmpswap            vdata:dst:b32x2, vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_cmpswap_x2         vdata:dst:b64x2, vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_dec                vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_dec_x2             vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_fcmpswap           vdata:dst:f32x2, vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_fcmpswap_x2        vdata:dst:f64x2, vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_fmax               vdata:dst:f32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_fmax_x2            vdata:dst:f64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_fmin               vdata:dst:f32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_fmin_x2            vdata:dst:f64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_inc                vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_inc_x2             vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_or                 vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_or_x2              vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_smax               vdata:dst:i32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_smax_x2            vdata:dst:i64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_smin               vdata:dst:i32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_smin_x2            vdata:dst:i64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_sub                vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_sub_x2             vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_swap               vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_swap_x2            vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_umax               vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_umax_x2            vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_umin               vdata:dst:u32,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_umin_x2            vdata:dst:u64,   vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_xor                vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_atomic_xor_x2             vdata:dst,       vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_load_dword          vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc lds
+buffer_load_dwordx2        vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc
+buffer_load_dwordx3        vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc
+buffer_load_dwordx4        vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc
+buffer_load_format_x       vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc lds
+buffer_load_format_xy      vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc
+buffer_load_format_xyz     vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc
+buffer_load_format_xyzw    vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc
+buffer_load_sbyte          vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc lds
+buffer_load_sshort         vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc lds
+buffer_load_ubyte          vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc lds
+buffer_load_ushort         vdst, vaddr,           srsrc, soffset          idxen offen addr64 offset12 glc slc lds
+buffer_store_byte                vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_dword               vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_dwordx2             vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_dwordx3             vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_dwordx4             vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_format_x            vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_format_xy           vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_format_xyz          vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_format_xyzw         vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_store_short               vdata,           vaddr, srsrc,  soffset  idxen offen addr64 offset12 glc slc
+buffer_wbinvl1
+buffer_wbinvl1_vol
+
+
+

SMRD

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+s_buffer_load_dword            sdst,     sbase,    soffset
+s_buffer_load_dwordx16         sdst,     sbase,    soffset
+s_buffer_load_dwordx2          sdst,     sbase,    soffset
+s_buffer_load_dwordx4          sdst,     sbase,    soffset
+s_buffer_load_dwordx8          sdst,     sbase,    soffset
+s_dcache_inv
+s_dcache_inv_vol
+s_load_dword                   sdst,     sbase,    soffset
+s_load_dwordx16                sdst,     sbase,    soffset
+s_load_dwordx2                 sdst,     sbase,    soffset
+s_load_dwordx4                 sdst,     sbase,    soffset
+s_load_dwordx8                 sdst,     sbase,    soffset
+s_memtime                      sdst:b64
+
+
+

SOP1

+
INSTRUCTION                    DST       SRC
+———————————————————————————————————————————————————
+s_abs_i32                      sdst,     ssrc
+s_and_saveexec_b64             sdst,     ssrc
+s_andn2_saveexec_b64           sdst,     ssrc
+s_bcnt0_i32_b32                sdst,     ssrc
+s_bcnt0_i32_b64                sdst,     ssrc
+s_bcnt1_i32_b32                sdst,     ssrc
+s_bcnt1_i32_b64                sdst,     ssrc
+s_bitset0_b32                  sdst,     ssrc
+s_bitset0_b64                  sdst,     ssrc:b32
+s_bitset1_b32                  sdst,     ssrc
+s_bitset1_b64                  sdst,     ssrc:b32
+s_brev_b32                     sdst,     ssrc
+s_brev_b64                     sdst,     ssrc
+s_cbranch_join                           ssrc
+s_cmov_b32                     sdst,     ssrc
+s_cmov_b64                     sdst,     ssrc
+s_ff0_i32_b32                  sdst,     ssrc
+s_ff0_i32_b64                  sdst,     ssrc
+s_ff1_i32_b32                  sdst,     ssrc
+s_ff1_i32_b64                  sdst,     ssrc
+s_flbit_i32                    sdst,     ssrc
+s_flbit_i32_b32                sdst,     ssrc
+s_flbit_i32_b64                sdst,     ssrc
+s_flbit_i32_i64                sdst,     ssrc
+s_getpc_b64                    sdst
+s_mov_b32                      sdst,     ssrc
+s_mov_b64                      sdst,     ssrc
+s_movreld_b32                  sdst,     ssrc
+s_movreld_b64                  sdst,     ssrc
+s_movrels_b32                  sdst,     ssrc
+s_movrels_b64                  sdst,     ssrc
+s_nand_saveexec_b64            sdst,     ssrc
+s_nor_saveexec_b64             sdst,     ssrc
+s_not_b32                      sdst,     ssrc
+s_not_b64                      sdst,     ssrc
+s_or_saveexec_b64              sdst,     ssrc
+s_orn2_saveexec_b64            sdst,     ssrc
+s_quadmask_b32                 sdst,     ssrc
+s_quadmask_b64                 sdst,     ssrc
+s_rfe_b64                                ssrc
+s_setpc_b64                              ssrc
+s_sext_i32_i16                 sdst,     ssrc
+s_sext_i32_i8                  sdst,     ssrc
+s_swappc_b64                   sdst,     ssrc
+s_wqm_b32                      sdst,     ssrc
+s_wqm_b64                      sdst,     ssrc
+s_xnor_saveexec_b64            sdst,     ssrc
+s_xor_saveexec_b64             sdst,     ssrc
+
+
+

SOP2

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+s_absdiff_i32                  sdst,     ssrc0,     ssrc1
+s_add_i32                      sdst,     ssrc0,     ssrc1
+s_add_u32                      sdst,     ssrc0,     ssrc1
+s_addc_u32                     sdst,     ssrc0,     ssrc1
+s_and_b32                      sdst,     ssrc0,     ssrc1
+s_and_b64                      sdst,     ssrc0,     ssrc1
+s_andn2_b32                    sdst,     ssrc0,     ssrc1
+s_andn2_b64                    sdst,     ssrc0,     ssrc1
+s_ashr_i32                     sdst,     ssrc0,     ssrc1:u32
+s_ashr_i64                     sdst,     ssrc0,     ssrc1:u32
+s_bfe_i32                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_i64                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_u32                      sdst,     ssrc0,     ssrc1
+s_bfe_u64                      sdst,     ssrc0,     ssrc1:u32
+s_bfm_b32                      sdst,     ssrc0,     ssrc1
+s_bfm_b64                      sdst,     ssrc0:b32, ssrc1:b32
+s_cbranch_g_fork                         ssrc0,     ssrc1
+s_cselect_b32                  sdst,     ssrc0,     ssrc1
+s_cselect_b64                  sdst,     ssrc0,     ssrc1
+s_lshl_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshl_b64                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b64                     sdst,     ssrc0,     ssrc1:u32
+s_max_i32                      sdst,     ssrc0,     ssrc1
+s_max_u32                      sdst,     ssrc0,     ssrc1
+s_min_i32                      sdst,     ssrc0,     ssrc1
+s_min_u32                      sdst,     ssrc0,     ssrc1
+s_mul_i32                      sdst,     ssrc0,     ssrc1
+s_nand_b32                     sdst,     ssrc0,     ssrc1
+s_nand_b64                     sdst,     ssrc0,     ssrc1
+s_nor_b32                      sdst,     ssrc0,     ssrc1
+s_nor_b64                      sdst,     ssrc0,     ssrc1
+s_or_b32                       sdst,     ssrc0,     ssrc1
+s_or_b64                       sdst,     ssrc0,     ssrc1
+s_orn2_b32                     sdst,     ssrc0,     ssrc1
+s_orn2_b64                     sdst,     ssrc0,     ssrc1
+s_sub_i32                      sdst,     ssrc0,     ssrc1
+s_sub_u32                      sdst,     ssrc0,     ssrc1
+s_subb_u32                     sdst,     ssrc0,     ssrc1
+s_xnor_b32                     sdst,     ssrc0,     ssrc1
+s_xnor_b64                     sdst,     ssrc0,     ssrc1
+s_xor_b32                      sdst,     ssrc0,     ssrc1
+s_xor_b64                      sdst,     ssrc0,     ssrc1
+
+
+

SOPC

+
INSTRUCTION                    SRC0      SRC1
+———————————————————————————————————————————————————
+s_bitcmp0_b32                  ssrc0,    ssrc1
+s_bitcmp0_b64                  ssrc0,    ssrc1:u32
+s_bitcmp1_b32                  ssrc0,    ssrc1
+s_bitcmp1_b64                  ssrc0,    ssrc1:u32
+s_cmp_eq_i32                   ssrc0,    ssrc1
+s_cmp_eq_u32                   ssrc0,    ssrc1
+s_cmp_ge_i32                   ssrc0,    ssrc1
+s_cmp_ge_u32                   ssrc0,    ssrc1
+s_cmp_gt_i32                   ssrc0,    ssrc1
+s_cmp_gt_u32                   ssrc0,    ssrc1
+s_cmp_le_i32                   ssrc0,    ssrc1
+s_cmp_le_u32                   ssrc0,    ssrc1
+s_cmp_lg_i32                   ssrc0,    ssrc1
+s_cmp_lg_u32                   ssrc0,    ssrc1
+s_cmp_lt_i32                   ssrc0,    ssrc1
+s_cmp_lt_u32                   ssrc0,    ssrc1
+s_setvskip                     ssrc0,    ssrc1
+
+
+

SOPK

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+s_addk_i32                     sdst,     imm16
+s_cbranch_i_fork                         ssrc,     label
+s_cmovk_i32                    sdst,     imm16
+s_cmpk_eq_i32                            ssrc,     imm16
+s_cmpk_eq_u32                            ssrc,     imm16
+s_cmpk_ge_i32                            ssrc,     imm16
+s_cmpk_ge_u32                            ssrc,     imm16
+s_cmpk_gt_i32                            ssrc,     imm16
+s_cmpk_gt_u32                            ssrc,     imm16
+s_cmpk_le_i32                            ssrc,     imm16
+s_cmpk_le_u32                            ssrc,     imm16
+s_cmpk_lg_i32                            ssrc,     imm16
+s_cmpk_lg_u32                            ssrc,     imm16
+s_cmpk_lt_i32                            ssrc,     imm16
+s_cmpk_lt_u32                            ssrc,     imm16
+s_getreg_b32                   sdst,     hwreg
+s_movk_i32                     sdst,     imm16
+s_mulk_i32                     sdst,     imm16
+s_setreg_b32                   hwreg,    ssrc
+s_setreg_imm32_b32             hwreg,    simm32
+
+
+

SOPP

+
INSTRUCTION                    SRC
+—————————————————————————————————————————
+s_barrier
+s_branch                       label
+s_cbranch_cdbgsys              label
+s_cbranch_cdbgsys_and_user     label
+s_cbranch_cdbgsys_or_user      label
+s_cbranch_cdbguser             label
+s_cbranch_execnz               label
+s_cbranch_execz                label
+s_cbranch_scc0                 label
+s_cbranch_scc1                 label
+s_cbranch_vccnz                label
+s_cbranch_vccz                 label
+s_decperflevel                 imm16
+s_endpgm
+s_icache_inv
+s_incperflevel                 imm16
+s_nop                          imm16
+s_sendmsg                      msg
+s_sendmsghalt                  msg
+s_sethalt                      imm16
+s_setkill                      imm16
+s_setprio                      imm16
+s_sleep                        imm16
+s_trap                         imm16
+s_ttracedata
+s_waitcnt                      waitcnt
+
+
+

VINTRP

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+v_interp_mov_f32               vdst,     param:b32, attr:b32
+v_interp_p1_f32                vdst,     vsrc,      attr:b32
+v_interp_p2_f32                vdst,     vsrc,      attr:b32
+
+
+

VOP1

+
INSTRUCTION                    DST       SRC
+———————————————————————————————————————————————————
+v_bfrev_b32                    vdst,     src
+v_ceil_f32                     vdst,     src
+v_ceil_f64                     vdst,     src
+v_clrexcp
+v_cos_f32                      vdst,     src
+v_cvt_f16_f32                  vdst,     src
+v_cvt_f32_f16                  vdst,     src
+v_cvt_f32_f64                  vdst,     src
+v_cvt_f32_i32                  vdst,     src
+v_cvt_f32_u32                  vdst,     src
+v_cvt_f32_ubyte0               vdst,     src
+v_cvt_f32_ubyte1               vdst,     src
+v_cvt_f32_ubyte2               vdst,     src
+v_cvt_f32_ubyte3               vdst,     src
+v_cvt_f64_f32                  vdst,     src
+v_cvt_f64_i32                  vdst,     src
+v_cvt_f64_u32                  vdst,     src
+v_cvt_flr_i32_f32              vdst,     src
+v_cvt_i32_f32                  vdst,     src
+v_cvt_i32_f64                  vdst,     src
+v_cvt_off_f32_i4               vdst,     src
+v_cvt_rpi_i32_f32              vdst,     src
+v_cvt_u32_f32                  vdst,     src
+v_cvt_u32_f64                  vdst,     src
+v_exp_f32                      vdst,     src
+v_exp_legacy_f32               vdst,     src
+v_ffbh_i32                     vdst,     src
+v_ffbh_u32                     vdst,     src
+v_ffbl_b32                     vdst,     src
+v_floor_f32                    vdst,     src
+v_floor_f64                    vdst,     src
+v_fract_f32                    vdst,     src
+v_fract_f64                    vdst,     src
+v_frexp_exp_i32_f32            vdst,     src
+v_frexp_exp_i32_f64            vdst,     src
+v_frexp_mant_f32               vdst,     src
+v_frexp_mant_f64               vdst,     src
+v_log_clamp_f32                vdst,     src
+v_log_f32                      vdst,     src
+v_log_legacy_f32               vdst,     src
+v_mov_b32                      vdst,     src
+v_movreld_b32                  vdst,     src
+v_movrels_b32                  vdst,     vsrc
+v_movrelsd_b32                 vdst,     vsrc
+v_nop
+v_not_b32                      vdst,     src
+v_rcp_clamp_f32                vdst,     src
+v_rcp_clamp_f64                vdst,     src
+v_rcp_f32                      vdst,     src
+v_rcp_f64                      vdst,     src
+v_rcp_iflag_f32                vdst,     src
+v_rcp_legacy_f32               vdst,     src
+v_readfirstlane_b32            sdst,     src
+v_rndne_f32                    vdst,     src
+v_rndne_f64                    vdst,     src
+v_rsq_clamp_f32                vdst,     src
+v_rsq_clamp_f64                vdst,     src
+v_rsq_f32                      vdst,     src
+v_rsq_f64                      vdst,     src
+v_rsq_legacy_f32               vdst,     src
+v_sin_f32                      vdst,     src
+v_sqrt_f32                     vdst,     src
+v_sqrt_f64                     vdst,     src
+v_trunc_f32                    vdst,     src
+v_trunc_f64                    vdst,     src
+
+
+

VOP2

+
INSTRUCTION                    DST0      DST1      SRC0      SRC1      SRC2
+—————————————————————————————————————————————————————————————————————————————————
+v_add_f32                      vdst,               src0,     vsrc1
+v_add_i32                      vdst,     vcc,      src0,     vsrc1
+v_addc_u32                     vdst,     vcc,      src0,     vsrc1,    vcc
+v_and_b32                      vdst,               src0,     vsrc1
+v_ashr_i32                     vdst,               src0,     vsrc1:u32
+v_ashrrev_i32                  vdst,               src0:u32, vsrc1
+v_bcnt_u32_b32                 vdst,               src0,     vsrc1
+v_bfm_b32                      vdst,               src0,     vsrc1
+v_cndmask_b32                  vdst,               src0,     vsrc1,    vcc
+v_cvt_pk_i16_i32               vdst,               src0:i32, vsrc1:i32
+v_cvt_pk_u16_u32               vdst,               src0:u32, vsrc1:u32
+v_cvt_pkaccum_u8_f32           vdst:b32,           src0:f32, vsrc1:u32
+v_cvt_pknorm_i16_f32           vdst,               src0:f32, vsrc1:f32
+v_cvt_pknorm_u16_f32           vdst,               src0:f32, vsrc1:f32
+v_cvt_pkrtz_f16_f32            vdst,               src0:f32, vsrc1:f32
+v_ldexp_f32                    vdst,               src0,     vsrc1:i32
+v_lshl_b32                     vdst,               src0,     vsrc1:u32
+v_lshlrev_b32                  vdst,               src0:u32, vsrc1
+v_lshr_b32                     vdst,               src0,     vsrc1:u32
+v_lshrrev_b32                  vdst,               src0:u32, vsrc1
+v_mac_f32                      vdst,               src0,     vsrc1
+v_mac_legacy_f32               vdst,               src0,     vsrc1
+v_madak_f32                    vdst,               src0,     vsrc1,    simm32
+v_madmk_f32                    vdst,               src0,     simm32,   vsrc2
+v_max_f32                      vdst,               src0,     vsrc1
+v_max_i32                      vdst,               src0,     vsrc1
+v_max_legacy_f32               vdst,               src0,     vsrc1
+v_max_u32                      vdst,               src0,     vsrc1
+v_mbcnt_hi_u32_b32             vdst,               src0,     vsrc1
+v_mbcnt_lo_u32_b32             vdst,               src0,     vsrc1
+v_min_f32                      vdst,               src0,     vsrc1
+v_min_i32                      vdst,               src0,     vsrc1
+v_min_legacy_f32               vdst,               src0,     vsrc1
+v_min_u32                      vdst,               src0,     vsrc1
+v_mul_f32                      vdst,               src0,     vsrc1
+v_mul_hi_i32_i24               vdst,               src0,     vsrc1
+v_mul_hi_u32_u24               vdst,               src0,     vsrc1
+v_mul_i32_i24                  vdst,               src0,     vsrc1
+v_mul_legacy_f32               vdst,               src0,     vsrc1
+v_mul_u32_u24                  vdst,               src0,     vsrc1
+v_or_b32                       vdst,               src0,     vsrc1
+v_readlane_b32                 sdst,               src0,     ssrc1
+v_sub_f32                      vdst,               src0,     vsrc1
+v_sub_i32                      vdst,     vcc,      src0,     vsrc1
+v_subb_u32                     vdst,     vcc,      src0,     vsrc1,    vcc
+v_subbrev_u32                  vdst,     vcc,      src0,     vsrc1,    vcc
+v_subrev_f32                   vdst,               src0,     vsrc1
+v_subrev_i32                   vdst,     vcc,      src0,     vsrc1
+v_writelane_b32                vdst,               ssrc0,    ssrc1
+v_xor_b32                      vdst,               src0,     vsrc1
+
+
+

VOP3

+
INSTRUCTION                    DST0        DST1      SRC0        SRC1        SRC2         MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_f32_e64                  vdst,                 src0:m,     src1:m                   clamp omod
+v_add_f64                      vdst,                 src0:m,     src1:m                   clamp omod
+v_add_i32_e64                  vdst,       sdst,     src0,       src1
+v_addc_u32_e64                 vdst,       sdst,     src0,       src1,       ssrc2
+v_alignbit_b32                 vdst,                 src0,       src1,       src2
+v_alignbyte_b32                vdst,                 src0,       src1,       src2
+v_and_b32_e64                  vdst,                 src0,       src1
+v_ashr_i32_e64                 vdst,                 src0,       src1:u32
+v_ashr_i64                     vdst,                 src0,       src1:u32
+v_ashrrev_i32_e64              vdst,                 src0:u32,   src1
+v_bcnt_u32_b32_e64             vdst,                 src0,       src1
+v_bfe_i32                      vdst,                 src0,       src1:u32,   src2:u32
+v_bfe_u32                      vdst,                 src0,       src1,       src2
+v_bfi_b32                      vdst,                 src0,       src1,       src2
+v_bfm_b32_e64                  vdst,                 src0,       src1
+v_bfrev_b32_e64                vdst,                 src
+v_ceil_f32_e64                 vdst,                 src:m                                clamp omod
+v_ceil_f64_e64                 vdst,                 src:m                                clamp omod
+v_clrexcp_e64
+v_cmp_class_f32_e64            sdst,                 src0:m,     src1:b32
+v_cmp_class_f64_e64            sdst,                 src0:m,     src1:b32
+v_cmp_eq_f32_e64               sdst,                 src0:m,     src1:m
+v_cmp_eq_f64_e64               sdst,                 src0:m,     src1:m
+v_cmp_eq_i32_e64               sdst,                 src0,       src1
+v_cmp_eq_i64_e64               sdst,                 src0,       src1
+v_cmp_eq_u32_e64               sdst,                 src0,       src1
+v_cmp_eq_u64_e64               sdst,                 src0,       src1
+v_cmp_f_f32_e64                sdst,                 src0:m,     src1:m
+v_cmp_f_f64_e64                sdst,                 src0:m,     src1:m
+v_cmp_f_i32_e64                sdst,                 src0,       src1
+v_cmp_f_i64_e64                sdst,                 src0,       src1
+v_cmp_f_u32_e64                sdst,                 src0,       src1
+v_cmp_f_u64_e64                sdst,                 src0,       src1
+v_cmp_ge_f32_e64               sdst,                 src0:m,     src1:m
+v_cmp_ge_f64_e64               sdst,                 src0:m,     src1:m
+v_cmp_ge_i32_e64               sdst,                 src0,       src1
+v_cmp_ge_i64_e64               sdst,                 src0,       src1
+v_cmp_ge_u32_e64               sdst,                 src0,       src1
+v_cmp_ge_u64_e64               sdst,                 src0,       src1
+v_cmp_gt_f32_e64               sdst,                 src0:m,     src1:m
+v_cmp_gt_f64_e64               sdst,                 src0:m,     src1:m
+v_cmp_gt_i32_e64               sdst,                 src0,       src1
+v_cmp_gt_i64_e64               sdst,                 src0,       src1
+v_cmp_gt_u32_e64               sdst,                 src0,       src1
+v_cmp_gt_u64_e64               sdst,                 src0,       src1
+v_cmp_le_f32_e64               sdst,                 src0:m,     src1:m
+v_cmp_le_f64_e64               sdst,                 src0:m,     src1:m
+v_cmp_le_i32_e64               sdst,                 src0,       src1
+v_cmp_le_i64_e64               sdst,                 src0,       src1
+v_cmp_le_u32_e64               sdst,                 src0,       src1
+v_cmp_le_u64_e64               sdst,                 src0,       src1
+v_cmp_lg_f32_e64               sdst,                 src0:m,     src1:m
+v_cmp_lg_f64_e64               sdst,                 src0:m,     src1:m
+v_cmp_lt_f32_e64               sdst,                 src0:m,     src1:m
+v_cmp_lt_f64_e64               sdst,                 src0:m,     src1:m
+v_cmp_lt_i32_e64               sdst,                 src0,       src1
+v_cmp_lt_i64_e64               sdst,                 src0,       src1
+v_cmp_lt_u32_e64               sdst,                 src0,       src1
+v_cmp_lt_u64_e64               sdst,                 src0,       src1
+v_cmp_ne_i32_e64               sdst,                 src0,       src1
+v_cmp_ne_i64_e64               sdst,                 src0,       src1
+v_cmp_ne_u32_e64               sdst,                 src0,       src1
+v_cmp_ne_u64_e64               sdst,                 src0,       src1
+v_cmp_neq_f32_e64              sdst,                 src0:m,     src1:m
+v_cmp_neq_f64_e64              sdst,                 src0:m,     src1:m
+v_cmp_nge_f32_e64              sdst,                 src0:m,     src1:m
+v_cmp_nge_f64_e64              sdst,                 src0:m,     src1:m
+v_cmp_ngt_f32_e64              sdst,                 src0:m,     src1:m
+v_cmp_ngt_f64_e64              sdst,                 src0:m,     src1:m
+v_cmp_nle_f32_e64              sdst,                 src0:m,     src1:m
+v_cmp_nle_f64_e64              sdst,                 src0:m,     src1:m
+v_cmp_nlg_f32_e64              sdst,                 src0:m,     src1:m
+v_cmp_nlg_f64_e64              sdst,                 src0:m,     src1:m
+v_cmp_nlt_f32_e64              sdst,                 src0:m,     src1:m
+v_cmp_nlt_f64_e64              sdst,                 src0:m,     src1:m
+v_cmp_o_f32_e64                sdst,                 src0:m,     src1:m
+v_cmp_o_f64_e64                sdst,                 src0:m,     src1:m
+v_cmp_t_i32_e64                sdst,                 src0,       src1
+v_cmp_t_i64_e64                sdst,                 src0,       src1
+v_cmp_t_u32_e64                sdst,                 src0,       src1
+v_cmp_t_u64_e64                sdst,                 src0,       src1
+v_cmp_tru_f32_e64              sdst,                 src0:m,     src1:m
+v_cmp_tru_f64_e64              sdst,                 src0:m,     src1:m
+v_cmp_u_f32_e64                sdst,                 src0:m,     src1:m
+v_cmp_u_f64_e64                sdst,                 src0:m,     src1:m
+v_cmps_eq_f32_e64              sdst,                 src0:m,     src1:m
+v_cmps_eq_f64_e64              sdst,                 src0:m,     src1:m
+v_cmps_f_f32_e64               sdst,                 src0:m,     src1:m
+v_cmps_f_f64_e64               sdst,                 src0:m,     src1:m
+v_cmps_ge_f32_e64              sdst,                 src0:m,     src1:m
+v_cmps_ge_f64_e64              sdst,                 src0:m,     src1:m
+v_cmps_gt_f32_e64              sdst,                 src0:m,     src1:m
+v_cmps_gt_f64_e64              sdst,                 src0:m,     src1:m
+v_cmps_le_f32_e64              sdst,                 src0:m,     src1:m
+v_cmps_le_f64_e64              sdst,                 src0:m,     src1:m
+v_cmps_lg_f32_e64              sdst,                 src0:m,     src1:m
+v_cmps_lg_f64_e64              sdst,                 src0:m,     src1:m
+v_cmps_lt_f32_e64              sdst,                 src0:m,     src1:m
+v_cmps_lt_f64_e64              sdst,                 src0:m,     src1:m
+v_cmps_neq_f32_e64             sdst,                 src0:m,     src1:m
+v_cmps_neq_f64_e64             sdst,                 src0:m,     src1:m
+v_cmps_nge_f32_e64             sdst,                 src0:m,     src1:m
+v_cmps_nge_f64_e64             sdst,                 src0:m,     src1:m
+v_cmps_ngt_f32_e64             sdst,                 src0:m,     src1:m
+v_cmps_ngt_f64_e64             sdst,                 src0:m,     src1:m
+v_cmps_nle_f32_e64             sdst,                 src0:m,     src1:m
+v_cmps_nle_f64_e64             sdst,                 src0:m,     src1:m
+v_cmps_nlg_f32_e64             sdst,                 src0:m,     src1:m
+v_cmps_nlg_f64_e64             sdst,                 src0:m,     src1:m
+v_cmps_nlt_f32_e64             sdst,                 src0:m,     src1:m
+v_cmps_nlt_f64_e64             sdst,                 src0:m,     src1:m
+v_cmps_o_f32_e64               sdst,                 src0:m,     src1:m
+v_cmps_o_f64_e64               sdst,                 src0:m,     src1:m
+v_cmps_tru_f32_e64             sdst,                 src0:m,     src1:m
+v_cmps_tru_f64_e64             sdst,                 src0:m,     src1:m
+v_cmps_u_f32_e64               sdst,                 src0:m,     src1:m
+v_cmps_u_f64_e64               sdst,                 src0:m,     src1:m
+v_cmpsx_eq_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_eq_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_f_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpsx_f_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpsx_ge_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_ge_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_gt_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_gt_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_le_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_le_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_lg_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_lg_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_lt_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_lt_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpsx_neq_f32_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_neq_f64_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nge_f32_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nge_f64_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_ngt_f32_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_ngt_f64_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nle_f32_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nle_f64_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nlg_f32_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nlg_f64_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nlt_f32_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_nlt_f64_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_o_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpsx_o_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpsx_tru_f32_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_tru_f64_e64            sdst,                 src0:m,     src1:m
+v_cmpsx_u_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpsx_u_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpx_class_f32_e64           sdst,                 src0:m,     src1:b32
+v_cmpx_class_f64_e64           sdst,                 src0:m,     src1:b32
+v_cmpx_eq_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpx_eq_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpx_eq_i32_e64              sdst,                 src0,       src1
+v_cmpx_eq_i64_e64              sdst,                 src0,       src1
+v_cmpx_eq_u32_e64              sdst,                 src0,       src1
+v_cmpx_eq_u64_e64              sdst,                 src0,       src1
+v_cmpx_f_f32_e64               sdst,                 src0:m,     src1:m
+v_cmpx_f_f64_e64               sdst,                 src0:m,     src1:m
+v_cmpx_f_i32_e64               sdst,                 src0,       src1
+v_cmpx_f_i64_e64               sdst,                 src0,       src1
+v_cmpx_f_u32_e64               sdst,                 src0,       src1
+v_cmpx_f_u64_e64               sdst,                 src0,       src1
+v_cmpx_ge_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpx_ge_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpx_ge_i32_e64              sdst,                 src0,       src1
+v_cmpx_ge_i64_e64              sdst,                 src0,       src1
+v_cmpx_ge_u32_e64              sdst,                 src0,       src1
+v_cmpx_ge_u64_e64              sdst,                 src0,       src1
+v_cmpx_gt_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpx_gt_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpx_gt_i32_e64              sdst,                 src0,       src1
+v_cmpx_gt_i64_e64              sdst,                 src0,       src1
+v_cmpx_gt_u32_e64              sdst,                 src0,       src1
+v_cmpx_gt_u64_e64              sdst,                 src0,       src1
+v_cmpx_le_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpx_le_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpx_le_i32_e64              sdst,                 src0,       src1
+v_cmpx_le_i64_e64              sdst,                 src0,       src1
+v_cmpx_le_u32_e64              sdst,                 src0,       src1
+v_cmpx_le_u64_e64              sdst,                 src0,       src1
+v_cmpx_lg_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpx_lg_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpx_lt_f32_e64              sdst,                 src0:m,     src1:m
+v_cmpx_lt_f64_e64              sdst,                 src0:m,     src1:m
+v_cmpx_lt_i32_e64              sdst,                 src0,       src1
+v_cmpx_lt_i64_e64              sdst,                 src0,       src1
+v_cmpx_lt_u32_e64              sdst,                 src0,       src1
+v_cmpx_lt_u64_e64              sdst,                 src0,       src1
+v_cmpx_ne_i32_e64              sdst,                 src0,       src1
+v_cmpx_ne_i64_e64              sdst,                 src0,       src1
+v_cmpx_ne_u32_e64              sdst,                 src0,       src1
+v_cmpx_ne_u64_e64              sdst,                 src0,       src1
+v_cmpx_neq_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpx_neq_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nge_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nge_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpx_ngt_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpx_ngt_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nle_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nle_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nlg_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nlg_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nlt_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpx_nlt_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpx_o_f32_e64               sdst,                 src0:m,     src1:m
+v_cmpx_o_f64_e64               sdst,                 src0:m,     src1:m
+v_cmpx_t_i32_e64               sdst,                 src0,       src1
+v_cmpx_t_i64_e64               sdst,                 src0,       src1
+v_cmpx_t_u32_e64               sdst,                 src0,       src1
+v_cmpx_t_u64_e64               sdst,                 src0,       src1
+v_cmpx_tru_f32_e64             sdst,                 src0:m,     src1:m
+v_cmpx_tru_f64_e64             sdst,                 src0:m,     src1:m
+v_cmpx_u_f32_e64               sdst,                 src0:m,     src1:m
+v_cmpx_u_f64_e64               sdst,                 src0:m,     src1:m
+v_cndmask_b32_e64              vdst,                 src0,       src1,       ssrc2
+v_cos_f32_e64                  vdst,                 src:m                                clamp omod
+v_cubeid_f32                   vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_cubema_f32                   vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_cubesc_f32                   vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_cubetc_f32                   vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_cvt_f16_f32_e64              vdst,                 src:m
+v_cvt_f32_f16_e64              vdst,                 src                                  clamp omod
+v_cvt_f32_f64_e64              vdst,                 src:m                                clamp omod
+v_cvt_f32_i32_e64              vdst,                 src                                  clamp omod
+v_cvt_f32_u32_e64              vdst,                 src                                  clamp omod
+v_cvt_f32_ubyte0_e64           vdst,                 src
+v_cvt_f32_ubyte1_e64           vdst,                 src
+v_cvt_f32_ubyte2_e64           vdst,                 src
+v_cvt_f32_ubyte3_e64           vdst,                 src
+v_cvt_f64_f32_e64              vdst,                 src:m                                clamp omod
+v_cvt_f64_i32_e64              vdst,                 src                                  clamp omod
+v_cvt_f64_u32_e64              vdst,                 src                                  clamp omod
+v_cvt_flr_i32_f32_e64          vdst,                 src:m
+v_cvt_i32_f32_e64              vdst,                 src:m
+v_cvt_i32_f64_e64              vdst,                 src:m
+v_cvt_off_f32_i4_e64           vdst,                 src                                  clamp omod
+v_cvt_pk_i16_i32_e64           vdst,                 src0:i32,   src1:i32
+v_cvt_pk_u16_u32_e64           vdst,                 src0:u32,   src1:u32
+v_cvt_pk_u8_f32                vdst:b32,             src0:f32,   src1:u32,   src2:u32
+v_cvt_pkaccum_u8_f32_e64       vdst:b32,             src0:m:f32, src1:u32
+v_cvt_pknorm_i16_f32_e64       vdst,                 src0:m:f32, src1:m:f32
+v_cvt_pknorm_u16_f32_e64       vdst,                 src0:m:f32, src1:m:f32
+v_cvt_pkrtz_f16_f32_e64        vdst,                 src0:m:f32, src1:m:f32
+v_cvt_rpi_i32_f32_e64          vdst,                 src:m
+v_cvt_u32_f32_e64              vdst,                 src:m
+v_cvt_u32_f64_e64              vdst,                 src:m
+v_div_fixup_f32                vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_div_fixup_f64                vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_div_fmas_f32                 vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_div_fmas_f64                 vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_div_scale_f32                vdst,       vcc,      src0,       src1,       src2
+v_div_scale_f64                vdst,       vcc,      src0,       src1,       src2
+v_exp_f32_e64                  vdst,                 src:m                                clamp omod
+v_exp_legacy_f32_e64           vdst,                 src:m                                clamp omod
+v_ffbh_i32_e64                 vdst,                 src
+v_ffbh_u32_e64                 vdst,                 src
+v_ffbl_b32_e64                 vdst,                 src
+v_floor_f32_e64                vdst,                 src:m                                clamp omod
+v_floor_f64_e64                vdst,                 src:m                                clamp omod
+v_fma_f32                      vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_fma_f64                      vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_fract_f32_e64                vdst,                 src:m                                clamp omod
+v_fract_f64_e64                vdst,                 src:m                                clamp omod
+v_frexp_exp_i32_f32_e64        vdst,                 src
+v_frexp_exp_i32_f64_e64        vdst,                 src:m
+v_frexp_mant_f32_e64           vdst,                 src
+v_frexp_mant_f64_e64           vdst,                 src:m                                clamp omod
+v_ldexp_f32_e64                vdst,                 src0:m,     src1:i32                 clamp omod
+v_ldexp_f64                    vdst,                 src0:m,     src1:i32                 clamp omod
+v_lerp_u8                      vdst:u32,             src0:b32,   src1:b32,   src2:b32
+v_log_clamp_f32_e64            vdst,                 src:m                                clamp omod
+v_log_f32_e64                  vdst,                 src:m                                clamp omod
+v_log_legacy_f32_e64           vdst,                 src:m                                clamp omod
+v_lshl_b32_e64                 vdst,                 src0,       src1:u32
+v_lshl_b64                     vdst,                 src0,       src1:u32
+v_lshlrev_b32_e64              vdst,                 src0:u32,   src1
+v_lshr_b32_e64                 vdst,                 src0,       src1:u32
+v_lshr_b64                     vdst,                 src0,       src1:u32
+v_lshrrev_b32_e64              vdst,                 src0:u32,   src1
+v_mac_f32_e64                  vdst,                 src0:m,     src1:m                   clamp omod
+v_mac_legacy_f32_e64           vdst,                 src0:m,     src1:m                   clamp omod
+v_mad_f32                      vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_mad_i32_i24                  vdst,                 src0,       src1,       src2:i32
+v_mad_i64_i32                  vdst,       sdst,     src0,       src1,       src2:i64
+v_mad_legacy_f32               vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_mad_u32_u24                  vdst,                 src0,       src1,       src2:u32
+v_mad_u64_u32                  vdst,       sdst,     src0,       src1,       src2:u64
+v_max3_f32                     vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_max3_i32                     vdst,                 src0,       src1,       src2
+v_max3_u32                     vdst,                 src0,       src1,       src2
+v_max_f32_e64                  vdst,                 src0:m,     src1:m                   clamp omod
+v_max_f64                      vdst,                 src0:m,     src1:m                   clamp omod
+v_max_i32_e64                  vdst,                 src0,       src1
+v_max_legacy_f32_e64           vdst,                 src0:m,     src1:m                   clamp omod
+v_max_u32_e64                  vdst,                 src0,       src1
+v_mbcnt_hi_u32_b32_e64         vdst,                 src0,       src1
+v_mbcnt_lo_u32_b32_e64         vdst,                 src0,       src1
+v_med3_f32                     vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_med3_i32                     vdst,                 src0,       src1,       src2
+v_med3_u32                     vdst,                 src0,       src1,       src2
+v_min3_f32                     vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_min3_i32                     vdst,                 src0,       src1,       src2
+v_min3_u32                     vdst,                 src0,       src1,       src2
+v_min_f32_e64                  vdst,                 src0:m,     src1:m                   clamp omod
+v_min_f64                      vdst,                 src0:m,     src1:m                   clamp omod
+v_min_i32_e64                  vdst,                 src0,       src1
+v_min_legacy_f32_e64           vdst,                 src0:m,     src1:m                   clamp omod
+v_min_u32_e64                  vdst,                 src0,       src1
+v_mov_b32_e64                  vdst,                 src
+v_movreld_b32_e64              vdst,                 src
+v_movrels_b32_e64              vdst,                 vsrc
+v_movrelsd_b32_e64             vdst,                 vsrc
+v_mqsad_pk_u16_u8              vdst:u16x4,           src0:u8x8,  src1:u8x4,  src2:u16x4
+v_mqsad_u32_u8                 vdst:u32x4,           src0:u8x8,  src1:u8x4,  vsrc2:u32x4
+v_msad_u8                      vdst:u32,             src0:u8x4,  src1:u8x4,  src2:u32
+v_mul_f32_e64                  vdst,                 src0:m,     src1:m                   clamp omod
+v_mul_f64                      vdst,                 src0:m,     src1:m                   clamp omod
+v_mul_hi_i32                   vdst,                 src0,       src1
+v_mul_hi_i32_i24_e64           vdst,                 src0,       src1
+v_mul_hi_u32                   vdst,                 src0,       src1
+v_mul_hi_u32_u24_e64           vdst,                 src0,       src1
+v_mul_i32_i24_e64              vdst,                 src0,       src1
+v_mul_legacy_f32_e64           vdst,                 src0:m,     src1:m                   clamp omod
+v_mul_lo_i32                   vdst,                 src0,       src1
+v_mul_lo_u32                   vdst,                 src0,       src1
+v_mul_u32_u24_e64              vdst,                 src0,       src1
+v_mullit_f32                   vdst,                 src0:m,     src1:m,     src2:m       clamp omod
+v_nop_e64
+v_not_b32_e64                  vdst,                 src
+v_or_b32_e64                   vdst,                 src0,       src1
+v_qsad_pk_u16_u8               vdst:u16x4,           src0:u8x8,  src1:u8x4,  src2:u16x4
+v_rcp_clamp_f32_e64            vdst,                 src:m                                clamp omod
+v_rcp_clamp_f64_e64            vdst,                 src:m                                clamp omod
+v_rcp_f32_e64                  vdst,                 src:m                                clamp omod
+v_rcp_f64_e64                  vdst,                 src:m                                clamp omod
+v_rcp_iflag_f32_e64            vdst,                 src:m                                clamp omod
+v_rcp_legacy_f32_e64           vdst,                 src:m                                clamp omod
+v_rndne_f32_e64                vdst,                 src:m                                clamp omod
+v_rndne_f64_e64                vdst,                 src:m                                clamp omod
+v_rsq_clamp_f32_e64            vdst,                 src:m                                clamp omod
+v_rsq_clamp_f64_e64            vdst,                 src:m                                clamp omod
+v_rsq_f32_e64                  vdst,                 src:m                                clamp omod
+v_rsq_f64_e64                  vdst,                 src:m                                clamp omod
+v_rsq_legacy_f32_e64           vdst,                 src:m                                clamp omod
+v_sad_hi_u8                    vdst:u32,             src0:u8x4,  src1:u8x4,  src2:u32
+v_sad_u16                      vdst:u32,             src0:u16x2, src1:u16x2, src2:u32
+v_sad_u32                      vdst,                 src0,       src1,       src2
+v_sad_u8                       vdst:u32,             src0:u8x4,  src1:u8x4,  src2:u32
+v_sin_f32_e64                  vdst,                 src:m                                clamp omod
+v_sqrt_f32_e64                 vdst,                 src:m                                clamp omod
+v_sqrt_f64_e64                 vdst,                 src:m                                clamp omod
+v_sub_f32_e64                  vdst,                 src0:m,     src1:m                   clamp omod
+v_sub_i32_e64                  vdst,       sdst,     src0,       src1
+v_subb_u32_e64                 vdst,       sdst,     src0,       src1,       ssrc2
+v_subbrev_u32_e64              vdst,       sdst,     src0,       src1,       ssrc2
+v_subrev_f32_e64               vdst,                 src0:m,     src1:m                   clamp omod
+v_subrev_i32_e64               vdst,       sdst,     src0,       src1
+v_trig_preop_f64               vdst,                 src0:m,     src1:u32                 clamp omod
+v_trunc_f32_e64                vdst,                 src:m                                clamp omod
+v_trunc_f64_e64                vdst,                 src:m                                clamp omod
+v_xor_b32_e64                  vdst,                 src0,       src1
+
+
+

VOPC

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+v_cmp_class_f32                vcc,      src0,     vsrc1:b32
+v_cmp_class_f64                vcc,      src0,     vsrc1:b32
+v_cmp_eq_f32                   vcc,      src0,     vsrc1
+v_cmp_eq_f64                   vcc,      src0,     vsrc1
+v_cmp_eq_i32                   vcc,      src0,     vsrc1
+v_cmp_eq_i64                   vcc,      src0,     vsrc1
+v_cmp_eq_u32                   vcc,      src0,     vsrc1
+v_cmp_eq_u64                   vcc,      src0,     vsrc1
+v_cmp_f_f32                    vcc,      src0,     vsrc1
+v_cmp_f_f64                    vcc,      src0,     vsrc1
+v_cmp_f_i32                    vcc,      src0,     vsrc1
+v_cmp_f_i64                    vcc,      src0,     vsrc1
+v_cmp_f_u32                    vcc,      src0,     vsrc1
+v_cmp_f_u64                    vcc,      src0,     vsrc1
+v_cmp_ge_f32                   vcc,      src0,     vsrc1
+v_cmp_ge_f64                   vcc,      src0,     vsrc1
+v_cmp_ge_i32                   vcc,      src0,     vsrc1
+v_cmp_ge_i64                   vcc,      src0,     vsrc1
+v_cmp_ge_u32                   vcc,      src0,     vsrc1
+v_cmp_ge_u64                   vcc,      src0,     vsrc1
+v_cmp_gt_f32                   vcc,      src0,     vsrc1
+v_cmp_gt_f64                   vcc,      src0,     vsrc1
+v_cmp_gt_i32                   vcc,      src0,     vsrc1
+v_cmp_gt_i64                   vcc,      src0,     vsrc1
+v_cmp_gt_u32                   vcc,      src0,     vsrc1
+v_cmp_gt_u64                   vcc,      src0,     vsrc1
+v_cmp_le_f32                   vcc,      src0,     vsrc1
+v_cmp_le_f64                   vcc,      src0,     vsrc1
+v_cmp_le_i32                   vcc,      src0,     vsrc1
+v_cmp_le_i64                   vcc,      src0,     vsrc1
+v_cmp_le_u32                   vcc,      src0,     vsrc1
+v_cmp_le_u64                   vcc,      src0,     vsrc1
+v_cmp_lg_f32                   vcc,      src0,     vsrc1
+v_cmp_lg_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_f32                   vcc,      src0,     vsrc1
+v_cmp_lt_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_i32                   vcc,      src0,     vsrc1
+v_cmp_lt_i64                   vcc,      src0,     vsrc1
+v_cmp_lt_u32                   vcc,      src0,     vsrc1
+v_cmp_lt_u64                   vcc,      src0,     vsrc1
+v_cmp_ne_i32                   vcc,      src0,     vsrc1
+v_cmp_ne_i64                   vcc,      src0,     vsrc1
+v_cmp_ne_u32                   vcc,      src0,     vsrc1
+v_cmp_ne_u64                   vcc,      src0,     vsrc1
+v_cmp_neq_f32                  vcc,      src0,     vsrc1
+v_cmp_neq_f64                  vcc,      src0,     vsrc1
+v_cmp_nge_f32                  vcc,      src0,     vsrc1
+v_cmp_nge_f64                  vcc,      src0,     vsrc1
+v_cmp_ngt_f32                  vcc,      src0,     vsrc1
+v_cmp_ngt_f64                  vcc,      src0,     vsrc1
+v_cmp_nle_f32                  vcc,      src0,     vsrc1
+v_cmp_nle_f64                  vcc,      src0,     vsrc1
+v_cmp_nlg_f32                  vcc,      src0,     vsrc1
+v_cmp_nlg_f64                  vcc,      src0,     vsrc1
+v_cmp_nlt_f32                  vcc,      src0,     vsrc1
+v_cmp_nlt_f64                  vcc,      src0,     vsrc1
+v_cmp_o_f32                    vcc,      src0,     vsrc1
+v_cmp_o_f64                    vcc,      src0,     vsrc1
+v_cmp_t_i32                    vcc,      src0,     vsrc1
+v_cmp_t_i64                    vcc,      src0,     vsrc1
+v_cmp_t_u32                    vcc,      src0,     vsrc1
+v_cmp_t_u64                    vcc,      src0,     vsrc1
+v_cmp_tru_f32                  vcc,      src0,     vsrc1
+v_cmp_tru_f64                  vcc,      src0,     vsrc1
+v_cmp_u_f32                    vcc,      src0,     vsrc1
+v_cmp_u_f64                    vcc,      src0,     vsrc1
+v_cmps_eq_f32                  vcc,      src0,     vsrc1
+v_cmps_eq_f64                  vcc,      src0,     vsrc1
+v_cmps_f_f32                   vcc,      src0,     vsrc1
+v_cmps_f_f64                   vcc,      src0,     vsrc1
+v_cmps_ge_f32                  vcc,      src0,     vsrc1
+v_cmps_ge_f64                  vcc,      src0,     vsrc1
+v_cmps_gt_f32                  vcc,      src0,     vsrc1
+v_cmps_gt_f64                  vcc,      src0,     vsrc1
+v_cmps_le_f32                  vcc,      src0,     vsrc1
+v_cmps_le_f64                  vcc,      src0,     vsrc1
+v_cmps_lg_f32                  vcc,      src0,     vsrc1
+v_cmps_lg_f64                  vcc,      src0,     vsrc1
+v_cmps_lt_f32                  vcc,      src0,     vsrc1
+v_cmps_lt_f64                  vcc,      src0,     vsrc1
+v_cmps_neq_f32                 vcc,      src0,     vsrc1
+v_cmps_neq_f64                 vcc,      src0,     vsrc1
+v_cmps_nge_f32                 vcc,      src0,     vsrc1
+v_cmps_nge_f64                 vcc,      src0,     vsrc1
+v_cmps_ngt_f32                 vcc,      src0,     vsrc1
+v_cmps_ngt_f64                 vcc,      src0,     vsrc1
+v_cmps_nle_f32                 vcc,      src0,     vsrc1
+v_cmps_nle_f64                 vcc,      src0,     vsrc1
+v_cmps_nlg_f32                 vcc,      src0,     vsrc1
+v_cmps_nlg_f64                 vcc,      src0,     vsrc1
+v_cmps_nlt_f32                 vcc,      src0,     vsrc1
+v_cmps_nlt_f64                 vcc,      src0,     vsrc1
+v_cmps_o_f32                   vcc,      src0,     vsrc1
+v_cmps_o_f64                   vcc,      src0,     vsrc1
+v_cmps_tru_f32                 vcc,      src0,     vsrc1
+v_cmps_tru_f64                 vcc,      src0,     vsrc1
+v_cmps_u_f32                   vcc,      src0,     vsrc1
+v_cmps_u_f64                   vcc,      src0,     vsrc1
+v_cmpsx_eq_f32                 vcc,      src0,     vsrc1
+v_cmpsx_eq_f64                 vcc,      src0,     vsrc1
+v_cmpsx_f_f32                  vcc,      src0,     vsrc1
+v_cmpsx_f_f64                  vcc,      src0,     vsrc1
+v_cmpsx_ge_f32                 vcc,      src0,     vsrc1
+v_cmpsx_ge_f64                 vcc,      src0,     vsrc1
+v_cmpsx_gt_f32                 vcc,      src0,     vsrc1
+v_cmpsx_gt_f64                 vcc,      src0,     vsrc1
+v_cmpsx_le_f32                 vcc,      src0,     vsrc1
+v_cmpsx_le_f64                 vcc,      src0,     vsrc1
+v_cmpsx_lg_f32                 vcc,      src0,     vsrc1
+v_cmpsx_lg_f64                 vcc,      src0,     vsrc1
+v_cmpsx_lt_f32                 vcc,      src0,     vsrc1
+v_cmpsx_lt_f64                 vcc,      src0,     vsrc1
+v_cmpsx_neq_f32                vcc,      src0,     vsrc1
+v_cmpsx_neq_f64                vcc,      src0,     vsrc1
+v_cmpsx_nge_f32                vcc,      src0,     vsrc1
+v_cmpsx_nge_f64                vcc,      src0,     vsrc1
+v_cmpsx_ngt_f32                vcc,      src0,     vsrc1
+v_cmpsx_ngt_f64                vcc,      src0,     vsrc1
+v_cmpsx_nle_f32                vcc,      src0,     vsrc1
+v_cmpsx_nle_f64                vcc,      src0,     vsrc1
+v_cmpsx_nlg_f32                vcc,      src0,     vsrc1
+v_cmpsx_nlg_f64                vcc,      src0,     vsrc1
+v_cmpsx_nlt_f32                vcc,      src0,     vsrc1
+v_cmpsx_nlt_f64                vcc,      src0,     vsrc1
+v_cmpsx_o_f32                  vcc,      src0,     vsrc1
+v_cmpsx_o_f64                  vcc,      src0,     vsrc1
+v_cmpsx_tru_f32                vcc,      src0,     vsrc1
+v_cmpsx_tru_f64                vcc,      src0,     vsrc1
+v_cmpsx_u_f32                  vcc,      src0,     vsrc1
+v_cmpsx_u_f64                  vcc,      src0,     vsrc1
+v_cmpx_class_f32               vcc,      src0,     vsrc1:b32
+v_cmpx_class_f64               vcc,      src0,     vsrc1:b32
+v_cmpx_eq_f32                  vcc,      src0,     vsrc1
+v_cmpx_eq_f64                  vcc,      src0,     vsrc1
+v_cmpx_eq_i32                  vcc,      src0,     vsrc1
+v_cmpx_eq_i64                  vcc,      src0,     vsrc1
+v_cmpx_eq_u32                  vcc,      src0,     vsrc1
+v_cmpx_eq_u64                  vcc,      src0,     vsrc1
+v_cmpx_f_f32                   vcc,      src0,     vsrc1
+v_cmpx_f_f64                   vcc,      src0,     vsrc1
+v_cmpx_f_i32                   vcc,      src0,     vsrc1
+v_cmpx_f_i64                   vcc,      src0,     vsrc1
+v_cmpx_f_u32                   vcc,      src0,     vsrc1
+v_cmpx_f_u64                   vcc,      src0,     vsrc1
+v_cmpx_ge_f32                  vcc,      src0,     vsrc1
+v_cmpx_ge_f64                  vcc,      src0,     vsrc1
+v_cmpx_ge_i32                  vcc,      src0,     vsrc1
+v_cmpx_ge_i64                  vcc,      src0,     vsrc1
+v_cmpx_ge_u32                  vcc,      src0,     vsrc1
+v_cmpx_ge_u64                  vcc,      src0,     vsrc1
+v_cmpx_gt_f32                  vcc,      src0,     vsrc1
+v_cmpx_gt_f64                  vcc,      src0,     vsrc1
+v_cmpx_gt_i32                  vcc,      src0,     vsrc1
+v_cmpx_gt_i64                  vcc,      src0,     vsrc1
+v_cmpx_gt_u32                  vcc,      src0,     vsrc1
+v_cmpx_gt_u64                  vcc,      src0,     vsrc1
+v_cmpx_le_f32                  vcc,      src0,     vsrc1
+v_cmpx_le_f64                  vcc,      src0,     vsrc1
+v_cmpx_le_i32                  vcc,      src0,     vsrc1
+v_cmpx_le_i64                  vcc,      src0,     vsrc1
+v_cmpx_le_u32                  vcc,      src0,     vsrc1
+v_cmpx_le_u64                  vcc,      src0,     vsrc1
+v_cmpx_lg_f32                  vcc,      src0,     vsrc1
+v_cmpx_lg_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_f32                  vcc,      src0,     vsrc1
+v_cmpx_lt_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_i32                  vcc,      src0,     vsrc1
+v_cmpx_lt_i64                  vcc,      src0,     vsrc1
+v_cmpx_lt_u32                  vcc,      src0,     vsrc1
+v_cmpx_lt_u64                  vcc,      src0,     vsrc1
+v_cmpx_ne_i32                  vcc,      src0,     vsrc1
+v_cmpx_ne_i64                  vcc,      src0,     vsrc1
+v_cmpx_ne_u32                  vcc,      src0,     vsrc1
+v_cmpx_ne_u64                  vcc,      src0,     vsrc1
+v_cmpx_neq_f32                 vcc,      src0,     vsrc1
+v_cmpx_neq_f64                 vcc,      src0,     vsrc1
+v_cmpx_nge_f32                 vcc,      src0,     vsrc1
+v_cmpx_nge_f64                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f32                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f64                 vcc,      src0,     vsrc1
+v_cmpx_nle_f32                 vcc,      src0,     vsrc1
+v_cmpx_nle_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f64                 vcc,      src0,     vsrc1
+v_cmpx_o_f32                   vcc,      src0,     vsrc1
+v_cmpx_o_f64                   vcc,      src0,     vsrc1
+v_cmpx_t_i32                   vcc,      src0,     vsrc1
+v_cmpx_t_i64                   vcc,      src0,     vsrc1
+v_cmpx_t_u32                   vcc,      src0,     vsrc1
+v_cmpx_t_u64                   vcc,      src0,     vsrc1
+v_cmpx_tru_f32                 vcc,      src0,     vsrc1
+v_cmpx_tru_f64                 vcc,      src0,     vsrc1
+v_cmpx_u_f32                   vcc,      src0,     vsrc1
+v_cmpx_u_f64                   vcc,      src0,     vsrc1
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX8.html 2021-09-19 16:16:17.000000000 +0000 @@ -0,0 +1,1869 @@ + + + + + + + + + Syntax of GFX8 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of GFX8 Instructions

+ +
+

Introduction

+

This document describes the syntax of GFX8 instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

DS

+
INSTRUCTION                    DST         SRC0      SRC1      SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————
+ds_add_f32                                 vaddr,    vdata                    offset gds
+ds_add_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_add_src2_f32                            vaddr                              offset gds
+ds_add_src2_u32                            vaddr                              offset gds
+ds_add_src2_u64                            vaddr                              offset gds
+ds_add_u32                                 vaddr,    vdata                    offset gds
+ds_add_u64                                 vaddr,    vdata                    offset gds
+ds_and_b32                                 vaddr,    vdata                    offset gds
+ds_and_b64                                 vaddr,    vdata                    offset gds
+ds_and_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_and_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_and_src2_b32                            vaddr                              offset gds
+ds_and_src2_b64                            vaddr                              offset gds
+ds_append                      vdst                                           offset gds
+ds_bpermute_b32                vdst,       vaddr,    vdata                    offset
+ds_cmpst_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_condxchg32_rtn_b64          vdst,       vaddr,    vdata                    offset gds
+ds_consume                     vdst                                           offset gds
+ds_dec_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_src2_u32                            vaddr                              offset gds
+ds_dec_src2_u64                            vaddr                              offset gds
+ds_dec_u32                                 vaddr,    vdata                    offset gds
+ds_dec_u64                                 vaddr,    vdata                    offset gds
+ds_gws_barrier                             vdata                              offset gds
+ds_gws_init                                vdata                              offset gds
+ds_gws_sema_br                             vdata                              offset gds
+ds_gws_sema_p                                                                 offset gds
+ds_gws_sema_release_all                                                       offset gds
+ds_gws_sema_v                                                                 offset gds
+ds_inc_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_src2_u32                            vaddr                              offset gds
+ds_inc_src2_u64                            vaddr                              offset gds
+ds_inc_u32                                 vaddr,    vdata                    offset gds
+ds_inc_u64                                 vaddr,    vdata                    offset gds
+ds_max_f32                                 vaddr,    vdata                    offset gds
+ds_max_f64                                 vaddr,    vdata                    offset gds
+ds_max_i32                                 vaddr,    vdata                    offset gds
+ds_max_i64                                 vaddr,    vdata                    offset gds
+ds_max_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_src2_f32                            vaddr                              offset gds
+ds_max_src2_f64                            vaddr                              offset gds
+ds_max_src2_i32                            vaddr                              offset gds
+ds_max_src2_i64                            vaddr                              offset gds
+ds_max_src2_u32                            vaddr                              offset gds
+ds_max_src2_u64                            vaddr                              offset gds
+ds_max_u32                                 vaddr,    vdata                    offset gds
+ds_max_u64                                 vaddr,    vdata                    offset gds
+ds_min_f32                                 vaddr,    vdata                    offset gds
+ds_min_f64                                 vaddr,    vdata                    offset gds
+ds_min_i32                                 vaddr,    vdata                    offset gds
+ds_min_i64                                 vaddr,    vdata                    offset gds
+ds_min_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_src2_f32                            vaddr                              offset gds
+ds_min_src2_f64                            vaddr                              offset gds
+ds_min_src2_i32                            vaddr                              offset gds
+ds_min_src2_i64                            vaddr                              offset gds
+ds_min_src2_u32                            vaddr                              offset gds
+ds_min_src2_u64                            vaddr                              offset gds
+ds_min_u32                                 vaddr,    vdata                    offset gds
+ds_min_u64                                 vaddr,    vdata                    offset gds
+ds_mskor_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_nop
+ds_or_b32                                  vaddr,    vdata                    offset gds
+ds_or_b64                                  vaddr,    vdata                    offset gds
+ds_or_rtn_b32                  vdst,       vaddr,    vdata                    offset gds
+ds_or_rtn_b64                  vdst,       vaddr,    vdata                    offset gds
+ds_or_src2_b32                             vaddr                              offset gds
+ds_or_src2_b64                             vaddr                              offset gds
+ds_ordered_count               vdst,       vaddr                              offset gds
+ds_permute_b32                 vdst,       vaddr,    vdata                    offset
+ds_read2_b32                   vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2_b64                   vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b32               vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b64               vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read_b128                   vdst,       vaddr                              offset gds
+ds_read_b32                    vdst,       vaddr                              offset gds
+ds_read_b64                    vdst,       vaddr                              offset gds
+ds_read_b96                    vdst,       vaddr                              offset gds
+ds_read_i16                    vdst,       vaddr                              offset gds
+ds_read_i8                     vdst,       vaddr                              offset gds
+ds_read_u16                    vdst,       vaddr                              offset gds
+ds_read_u8                     vdst,       vaddr                              offset gds
+ds_rsub_rtn_u32                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_rtn_u64                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_src2_u32                           vaddr                              offset gds
+ds_rsub_src2_u64                           vaddr                              offset gds
+ds_rsub_u32                                vaddr,    vdata                    offset gds
+ds_rsub_u64                                vaddr,    vdata                    offset gds
+ds_sub_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_src2_u32                            vaddr                              offset gds
+ds_sub_src2_u64                            vaddr                              offset gds
+ds_sub_u32                                 vaddr,    vdata                    offset gds
+ds_sub_u64                                 vaddr,    vdata                    offset gds
+ds_swizzle_b32                 vdst,       vaddr                              pattern gds
+ds_wrap_rtn_b32                vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_write2_b32                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2_b64                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b32                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b64                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write_b128                              vaddr,    vdata                    offset gds
+ds_write_b16                               vaddr,    vdata                    offset gds
+ds_write_b32                               vaddr,    vdata                    offset gds
+ds_write_b64                               vaddr,    vdata                    offset gds
+ds_write_b8                                vaddr,    vdata                    offset gds
+ds_write_b96                               vaddr,    vdata                    offset gds
+ds_write_src2_b32                          vaddr                              offset gds
+ds_write_src2_b64                          vaddr                              offset gds
+ds_wrxchg2_rtn_b32             vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2_rtn_b64             vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b32         vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b64         vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg_rtn_b32              vdst,       vaddr,    vdata                    offset gds
+ds_wrxchg_rtn_b64              vdst,       vaddr,    vdata                    offset gds
+ds_xor_b32                                 vaddr,    vdata                    offset gds
+ds_xor_b64                                 vaddr,    vdata                    offset gds
+ds_xor_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_src2_b32                            vaddr                              offset gds
+ds_xor_src2_b64                            vaddr                              offset gds
+
+
+

EXP

+
INSTRUCTION                    DST       SRC0      SRC1      SRC2      SRC3           MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————
+exp                            tgt,      vsrc0,    vsrc1,    vsrc2,    vsrc3          done compr vm
+
+
+

FLAT

+
INSTRUCTION                    DST           SRC0      SRC1             MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————
+flat_atomic_add                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_add_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_and                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_and_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_cmpswap            vdst:opt,     vaddr,    vdata:b32x2      glc slc
+flat_atomic_cmpswap_x2         vdst:opt,     vaddr,    vdata:b64x2      glc slc
+flat_atomic_dec                vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_dec_x2             vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_inc                vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_inc_x2             vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_or                 vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_or_x2              vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_smax               vdst:opt:i32, vaddr,    vdata:i32        glc slc
+flat_atomic_smax_x2            vdst:opt:i64, vaddr,    vdata:i64        glc slc
+flat_atomic_smin               vdst:opt:i32, vaddr,    vdata:i32        glc slc
+flat_atomic_smin_x2            vdst:opt:i64, vaddr,    vdata:i64        glc slc
+flat_atomic_sub                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_sub_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_swap               vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_swap_x2            vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_umax               vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_umax_x2            vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_umin               vdst:opt:u32, vaddr,    vdata:u32        glc slc
+flat_atomic_umin_x2            vdst:opt:u64, vaddr,    vdata:u64        glc slc
+flat_atomic_xor                vdst:opt,     vaddr,    vdata            glc slc
+flat_atomic_xor_x2             vdst:opt,     vaddr,    vdata            glc slc
+flat_load_dword                vdst,         vaddr                      glc slc
+flat_load_dwordx2              vdst,         vaddr                      glc slc
+flat_load_dwordx3              vdst,         vaddr                      glc slc
+flat_load_dwordx4              vdst,         vaddr                      glc slc
+flat_load_sbyte                vdst,         vaddr                      glc slc
+flat_load_sshort               vdst,         vaddr                      glc slc
+flat_load_ubyte                vdst,         vaddr                      glc slc
+flat_load_ushort               vdst,         vaddr                      glc slc
+flat_store_byte                              vaddr,    vdata            glc slc
+flat_store_dword                             vaddr,    vdata            glc slc
+flat_store_dwordx2                           vaddr,    vdata            glc slc
+flat_store_dwordx3                           vaddr,    vdata            glc slc
+flat_store_dwordx4                           vaddr,    vdata            glc slc
+flat_store_short                             vaddr,    vdata            glc slc
+
+
+

MIMG

+
INSTRUCTION                DST      SRC0       SRC1     SRC2          MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————————
+image_atomic_add                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_and                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_cmpswap                vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_dec                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_inc                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_or                     vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_smax                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_smin                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_sub                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_swap                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_umax                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_umin                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_atomic_xor                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc lwe da
+image_gather4              vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_b            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_b_cl         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_b_cl_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_b_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_b          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_b_cl       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_b_cl_o     vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_b_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_cl         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_cl_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_l          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_l_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_lz         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_lz_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_c_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_cl           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_cl_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_l            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_l_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_lz           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_lz_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_gather4_o            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_get_lod              vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da
+image_get_resinfo          vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load                 vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da d16
+image_load_mip             vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da d16
+image_load_mip_pck         vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_mip_pck_sgn     vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_pck             vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_load_pck_sgn         vdst,    vaddr,     srsrc                  dmask unorm glc slc tfe lwe da
+image_sample               vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_b             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_b_cl          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_b_cl_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_b_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_b           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_b_cl        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_b_cl_o      vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_b_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_cd          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_cd_cl       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_cd_cl_o     vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_cd_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_cl          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_cl_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_d           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_d_cl        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_d_cl_o      vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_d_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_l           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_l_o         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_lz          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_lz_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_c_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_cd            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_cd_cl         vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_cd_cl_o       vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_cd_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_cl            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_cl_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_d             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_d_cl          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_d_cl_o        vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_d_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_l             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_l_o           vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_lz            vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_lz_o          vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_sample_o             vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc tfe lwe da d16
+image_store                         vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da d16
+image_store_mip                     vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da d16
+image_store_mip_pck                 vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da
+image_store_pck                     vdata,     vaddr,   srsrc         dmask unorm glc slc lwe da
+
+
+

MTBUF

+
INSTRUCTION                     DST   SRC0   SRC1   SRC2    SRC3      MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————
+tbuffer_load_format_d16_x       vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xy      vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xyz     vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xyzw    vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_x           vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xy          vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xyz         vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xyzw        vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_x            vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xy           vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyz          vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyzw         vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_x                vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xy               vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xyz              vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xyzw             vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+
+
+

MUBUF

+
INSTRUCTION                   DST   SRC0             SRC1    SRC2    SRC3     MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————————————
+buffer_atomic_add                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_add_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap               vdata:dst:b32x2, vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap_x2            vdata:dst:b64x2, vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec                   vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec_x2                vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc                   vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc_x2                vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or                    vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or_x2                 vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax                  vdata:dst:i32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax_x2               vdata:dst:i64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin                  vdata:dst:i32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin_x2               vdata:dst:i64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap                  vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap_x2               vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax                  vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax_x2               vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin                  vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin_x2               vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_load_dword             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_dwordx2           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_dwordx3           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_dwordx4           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_x      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xy     vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xyz    vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xyzw   vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_x          vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_format_xy         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_xyz        vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_xyzw       vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_sbyte             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_sshort            vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_ubyte             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_ushort            vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_store_byte                   vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dword                  vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx2                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx3                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx4                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_x           vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xy          vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyz         vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyzw        vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_x               vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xy              vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyz             vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyzw            vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_lds_dword              srsrc,           soffset                  offset12 lds glc slc
+buffer_store_short                  vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_wbinvl1
+buffer_wbinvl1_vol
+
+
+

SMEM

+
INSTRUCTION                    DST       SRC0      SRC1      SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————
+s_atc_probe                              probe,    sbase,    soffset
+s_atc_probe_buffer                       probe,    sbase,    soffset
+s_buffer_load_dword            sdst,     sbase,    soffset                  glc
+s_buffer_load_dwordx16         sdst,     sbase,    soffset                  glc
+s_buffer_load_dwordx2          sdst,     sbase,    soffset                  glc
+s_buffer_load_dwordx4          sdst,     sbase,    soffset                  glc
+s_buffer_load_dwordx8          sdst,     sbase,    soffset                  glc
+s_buffer_store_dword                     sdata,    sbase,    soffset        glc
+s_buffer_store_dwordx2                   sdata,    sbase,    soffset        glc
+s_buffer_store_dwordx4                   sdata,    sbase,    soffset        glc
+s_dcache_inv
+s_dcache_inv_vol
+s_dcache_wb
+s_dcache_wb_vol
+s_load_dword                   sdst,     sbase,    soffset                  glc
+s_load_dwordx16                sdst,     sbase,    soffset                  glc
+s_load_dwordx2                 sdst,     sbase,    soffset                  glc
+s_load_dwordx4                 sdst,     sbase,    soffset                  glc
+s_load_dwordx8                 sdst,     sbase,    soffset                  glc
+s_memrealtime                  sdst:b64
+s_memtime                      sdst:b64
+s_store_dword                            sdata,    sbase,    soffset        glc
+s_store_dwordx2                          sdata,    sbase,    soffset        glc
+s_store_dwordx4                          sdata,    sbase,    soffset        glc
+
+
+

SOP1

+
INSTRUCTION                    DST       SRC
+———————————————————————————————————————————————————
+s_abs_i32                      sdst,     ssrc
+s_and_saveexec_b64             sdst,     ssrc
+s_andn2_saveexec_b64           sdst,     ssrc
+s_bcnt0_i32_b32                sdst,     ssrc
+s_bcnt0_i32_b64                sdst,     ssrc
+s_bcnt1_i32_b32                sdst,     ssrc
+s_bcnt1_i32_b64                sdst,     ssrc
+s_bitset0_b32                  sdst,     ssrc
+s_bitset0_b64                  sdst,     ssrc:b32
+s_bitset1_b32                  sdst,     ssrc
+s_bitset1_b64                  sdst,     ssrc:b32
+s_brev_b32                     sdst,     ssrc
+s_brev_b64                     sdst,     ssrc
+s_cbranch_join                           ssrc
+s_cmov_b32                     sdst,     ssrc
+s_cmov_b64                     sdst,     ssrc
+s_ff0_i32_b32                  sdst,     ssrc
+s_ff0_i32_b64                  sdst,     ssrc
+s_ff1_i32_b32                  sdst,     ssrc
+s_ff1_i32_b64                  sdst,     ssrc
+s_flbit_i32                    sdst,     ssrc
+s_flbit_i32_b32                sdst,     ssrc
+s_flbit_i32_b64                sdst,     ssrc
+s_flbit_i32_i64                sdst,     ssrc
+s_getpc_b64                    sdst
+s_mov_b32                      sdst,     ssrc
+s_mov_b64                      sdst,     ssrc
+s_movreld_b32                  sdst,     ssrc
+s_movreld_b64                  sdst,     ssrc
+s_movrels_b32                  sdst,     ssrc
+s_movrels_b64                  sdst,     ssrc
+s_nand_saveexec_b64            sdst,     ssrc
+s_nor_saveexec_b64             sdst,     ssrc
+s_not_b32                      sdst,     ssrc
+s_not_b64                      sdst,     ssrc
+s_or_saveexec_b64              sdst,     ssrc
+s_orn2_saveexec_b64            sdst,     ssrc
+s_quadmask_b32                 sdst,     ssrc
+s_quadmask_b64                 sdst,     ssrc
+s_rfe_b64                                ssrc
+s_set_gpr_idx_idx                        ssrc
+s_setpc_b64                              ssrc
+s_sext_i32_i16                 sdst,     ssrc
+s_sext_i32_i8                  sdst,     ssrc
+s_swappc_b64                   sdst,     ssrc
+s_wqm_b32                      sdst,     ssrc
+s_wqm_b64                      sdst,     ssrc
+s_xnor_saveexec_b64            sdst,     ssrc
+s_xor_saveexec_b64             sdst,     ssrc
+
+
+

SOP2

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+s_absdiff_i32                  sdst,     ssrc0,     ssrc1
+s_add_i32                      sdst,     ssrc0,     ssrc1
+s_add_u32                      sdst,     ssrc0,     ssrc1
+s_addc_u32                     sdst,     ssrc0,     ssrc1
+s_and_b32                      sdst,     ssrc0,     ssrc1
+s_and_b64                      sdst,     ssrc0,     ssrc1
+s_andn2_b32                    sdst,     ssrc0,     ssrc1
+s_andn2_b64                    sdst,     ssrc0,     ssrc1
+s_ashr_i32                     sdst,     ssrc0,     ssrc1:u32
+s_ashr_i64                     sdst,     ssrc0,     ssrc1:u32
+s_bfe_i32                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_i64                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_u32                      sdst,     ssrc0,     ssrc1
+s_bfe_u64                      sdst,     ssrc0,     ssrc1:u32
+s_bfm_b32                      sdst,     ssrc0,     ssrc1
+s_bfm_b64                      sdst,     ssrc0:b32, ssrc1:b32
+s_cbranch_g_fork                         ssrc0,     ssrc1
+s_cselect_b32                  sdst,     ssrc0,     ssrc1
+s_cselect_b64                  sdst,     ssrc0,     ssrc1
+s_lshl_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshl_b64                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b64                     sdst,     ssrc0,     ssrc1:u32
+s_max_i32                      sdst,     ssrc0,     ssrc1
+s_max_u32                      sdst,     ssrc0,     ssrc1
+s_min_i32                      sdst,     ssrc0,     ssrc1
+s_min_u32                      sdst,     ssrc0,     ssrc1
+s_mul_i32                      sdst,     ssrc0,     ssrc1
+s_nand_b32                     sdst,     ssrc0,     ssrc1
+s_nand_b64                     sdst,     ssrc0,     ssrc1
+s_nor_b32                      sdst,     ssrc0,     ssrc1
+s_nor_b64                      sdst,     ssrc0,     ssrc1
+s_or_b32                       sdst,     ssrc0,     ssrc1
+s_or_b64                       sdst,     ssrc0,     ssrc1
+s_orn2_b32                     sdst,     ssrc0,     ssrc1
+s_orn2_b64                     sdst,     ssrc0,     ssrc1
+s_rfe_restore_b64                        ssrc0,     ssrc1:b32
+s_sub_i32                      sdst,     ssrc0,     ssrc1
+s_sub_u32                      sdst,     ssrc0,     ssrc1
+s_subb_u32                     sdst,     ssrc0,     ssrc1
+s_xnor_b32                     sdst,     ssrc0,     ssrc1
+s_xnor_b64                     sdst,     ssrc0,     ssrc1
+s_xor_b32                      sdst,     ssrc0,     ssrc1
+s_xor_b64                      sdst,     ssrc0,     ssrc1
+
+
+

SOPC

+
INSTRUCTION                    SRC0      SRC1
+———————————————————————————————————————————————————
+s_bitcmp0_b32                  ssrc0,    ssrc1
+s_bitcmp0_b64                  ssrc0,    ssrc1:u32
+s_bitcmp1_b32                  ssrc0,    ssrc1
+s_bitcmp1_b64                  ssrc0,    ssrc1:u32
+s_cmp_eq_i32                   ssrc0,    ssrc1
+s_cmp_eq_u32                   ssrc0,    ssrc1
+s_cmp_eq_u64                   ssrc0,    ssrc1
+s_cmp_ge_i32                   ssrc0,    ssrc1
+s_cmp_ge_u32                   ssrc0,    ssrc1
+s_cmp_gt_i32                   ssrc0,    ssrc1
+s_cmp_gt_u32                   ssrc0,    ssrc1
+s_cmp_le_i32                   ssrc0,    ssrc1
+s_cmp_le_u32                   ssrc0,    ssrc1
+s_cmp_lg_i32                   ssrc0,    ssrc1
+s_cmp_lg_u32                   ssrc0,    ssrc1
+s_cmp_lg_u64                   ssrc0,    ssrc1
+s_cmp_lt_i32                   ssrc0,    ssrc1
+s_cmp_lt_u32                   ssrc0,    ssrc1
+s_set_gpr_idx_on               ssrc,     imask
+s_setvskip                     ssrc0,    ssrc1
+
+
+

SOPK

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+s_addk_i32                     sdst,     imm16
+s_cbranch_i_fork                         ssrc,     label
+s_cmovk_i32                    sdst,     imm16
+s_cmpk_eq_i32                            ssrc,     imm16
+s_cmpk_eq_u32                            ssrc,     imm16
+s_cmpk_ge_i32                            ssrc,     imm16
+s_cmpk_ge_u32                            ssrc,     imm16
+s_cmpk_gt_i32                            ssrc,     imm16
+s_cmpk_gt_u32                            ssrc,     imm16
+s_cmpk_le_i32                            ssrc,     imm16
+s_cmpk_le_u32                            ssrc,     imm16
+s_cmpk_lg_i32                            ssrc,     imm16
+s_cmpk_lg_u32                            ssrc,     imm16
+s_cmpk_lt_i32                            ssrc,     imm16
+s_cmpk_lt_u32                            ssrc,     imm16
+s_getreg_b32                   sdst,     hwreg
+s_movk_i32                     sdst,     imm16
+s_mulk_i32                     sdst,     imm16
+s_setreg_b32                   hwreg,    ssrc
+s_setreg_imm32_b32             hwreg,    simm32
+
+
+

SOPP

+
INSTRUCTION                    SRC
+—————————————————————————————————————————
+s_barrier
+s_branch                       label
+s_cbranch_cdbgsys              label
+s_cbranch_cdbgsys_and_user     label
+s_cbranch_cdbgsys_or_user      label
+s_cbranch_cdbguser             label
+s_cbranch_execnz               label
+s_cbranch_execz                label
+s_cbranch_scc0                 label
+s_cbranch_scc1                 label
+s_cbranch_vccnz                label
+s_cbranch_vccz                 label
+s_decperflevel                 imm16
+s_endpgm
+s_endpgm_saved
+s_icache_inv
+s_incperflevel                 imm16
+s_nop                          imm16
+s_sendmsg                      msg
+s_sendmsghalt                  msg
+s_set_gpr_idx_mode             imask
+s_set_gpr_idx_off
+s_sethalt                      imm16
+s_setkill                      imm16
+s_setprio                      imm16
+s_sleep                        imm16
+s_trap                         imm16
+s_ttracedata
+s_waitcnt                      waitcnt
+s_wakeup
+
+
+

VINTRP

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+v_interp_mov_f32               vdst,     param:b32, attr:b32
+v_interp_p1_f32                vdst,     vsrc,      attr:b32
+v_interp_p2_f32                vdst,     vsrc,      attr:b32
+
+
+

VOP1

+
INSTRUCTION                    DST       SRC            MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————
+v_bfrev_b32                    vdst,     src
+v_bfrev_b32_dpp                vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_bfrev_b32_sdwa               vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_ceil_f16                     vdst,     src
+v_ceil_f16_dpp                 vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_ceil_f16_sdwa                vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_ceil_f32                     vdst,     src
+v_ceil_f32_dpp                 vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_ceil_f32_sdwa                vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_ceil_f64                     vdst,     src
+v_clrexcp
+v_cos_f16                      vdst,     src
+v_cos_f16_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cos_f16_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cos_f32                      vdst,     src
+v_cos_f32_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cos_f32_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f16_f32                  vdst,     src
+v_cvt_f16_f32_dpp              vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_f32_sdwa             vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f16_i16                  vdst,     src
+v_cvt_f16_i16_dpp              vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_i16_sdwa             vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f16_u16                  vdst,     src
+v_cvt_f16_u16_dpp              vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_u16_sdwa             vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f32_f16                  vdst,     src
+v_cvt_f32_f16_dpp              vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_f16_sdwa             vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f32_f64                  vdst,     src
+v_cvt_f32_i32                  vdst,     src
+v_cvt_f32_i32_dpp              vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_i32_sdwa             vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f32_u32                  vdst,     src
+v_cvt_f32_u32_dpp              vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_u32_sdwa             vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte0               vdst,     src
+v_cvt_f32_ubyte0_dpp           vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte0_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte1               vdst,     src
+v_cvt_f32_ubyte1_dpp           vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte1_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte2               vdst,     src
+v_cvt_f32_ubyte2_dpp           vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte2_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte3               vdst,     src
+v_cvt_f32_ubyte3_dpp           vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte3_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_f64_f32                  vdst,     src
+v_cvt_f64_i32                  vdst,     src
+v_cvt_f64_u32                  vdst,     src
+v_cvt_flr_i32_f32              vdst,     src
+v_cvt_flr_i32_f32_dpp          vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_flr_i32_f32_sdwa         vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_cvt_i16_f16                  vdst,     src
+v_cvt_i16_f16_dpp              vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_i16_f16_sdwa             vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_cvt_i32_f32                  vdst,     src
+v_cvt_i32_f32_dpp              vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_i32_f32_sdwa             vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_cvt_i32_f64                  vdst,     src
+v_cvt_off_f32_i4               vdst,     src
+v_cvt_off_f32_i4_dpp           vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_off_f32_i4_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_cvt_rpi_i32_f32              vdst,     src
+v_cvt_rpi_i32_f32_dpp          vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_rpi_i32_f32_sdwa         vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_cvt_u16_f16                  vdst,     src
+v_cvt_u16_f16_dpp              vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_u16_f16_sdwa             vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_cvt_u32_f32                  vdst,     src
+v_cvt_u32_f32_dpp              vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_u32_f32_sdwa             vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_cvt_u32_f64                  vdst,     src
+v_exp_f16                      vdst,     src
+v_exp_f16_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_exp_f16_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_exp_f32                      vdst,     src
+v_exp_f32_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_exp_f32_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_exp_legacy_f32               vdst,     src
+v_exp_legacy_f32_dpp           vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_exp_legacy_f32_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_ffbh_i32                     vdst,     src
+v_ffbh_i32_dpp                 vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_ffbh_i32_sdwa                vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_ffbh_u32                     vdst,     src
+v_ffbh_u32_dpp                 vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_ffbh_u32_sdwa                vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_ffbl_b32                     vdst,     src
+v_ffbl_b32_dpp                 vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_ffbl_b32_sdwa                vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_floor_f16                    vdst,     src
+v_floor_f16_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_floor_f16_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_floor_f32                    vdst,     src
+v_floor_f32_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_floor_f32_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_floor_f64                    vdst,     src
+v_fract_f16                    vdst,     src
+v_fract_f16_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_fract_f16_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_fract_f32                    vdst,     src
+v_fract_f32_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_fract_f32_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_fract_f64                    vdst,     src
+v_frexp_exp_i16_f16            vdst,     src
+v_frexp_exp_i16_f16_dpp        vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_exp_i16_f16_sdwa       vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_frexp_exp_i32_f32            vdst,     src
+v_frexp_exp_i32_f32_dpp        vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_exp_i32_f32_sdwa       vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_frexp_exp_i32_f64            vdst,     src
+v_frexp_mant_f16               vdst,     src
+v_frexp_mant_f16_dpp           vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_mant_f16_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_frexp_mant_f32               vdst,     src
+v_frexp_mant_f32_dpp           vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_mant_f32_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_frexp_mant_f64               vdst,     src
+v_log_f16                      vdst,     src
+v_log_f16_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_log_f16_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_log_f32                      vdst,     src
+v_log_f32_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_log_f32_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_log_legacy_f32               vdst,     src
+v_log_legacy_f32_dpp           vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_log_legacy_f32_sdwa          vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_mov_b32                      vdst,     src
+v_mov_b32_dpp                  vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_mov_b32_sdwa                 vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_nop
+v_not_b32                      vdst,     src
+v_not_b32_dpp                  vdst,     vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_not_b32_sdwa                 vdst,     vsrc:m         dst_sel dst_unused src0_sel
+v_rcp_f16                      vdst,     src
+v_rcp_f16_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rcp_f16_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_rcp_f32                      vdst,     src
+v_rcp_f32_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rcp_f32_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_rcp_f64                      vdst,     src
+v_rcp_iflag_f32                vdst,     src
+v_rcp_iflag_f32_dpp            vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rcp_iflag_f32_sdwa           vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_readfirstlane_b32            sdst,     src
+v_rndne_f16                    vdst,     src
+v_rndne_f16_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rndne_f16_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_rndne_f32                    vdst,     src
+v_rndne_f32_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rndne_f32_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_rndne_f64                    vdst,     src
+v_rsq_f16                      vdst,     src
+v_rsq_f16_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rsq_f16_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_rsq_f32                      vdst,     src
+v_rsq_f32_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rsq_f32_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_rsq_f64                      vdst,     src
+v_sin_f16                      vdst,     src
+v_sin_f16_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sin_f16_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_sin_f32                      vdst,     src
+v_sin_f32_dpp                  vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sin_f32_sdwa                 vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_sqrt_f16                     vdst,     src
+v_sqrt_f16_dpp                 vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sqrt_f16_sdwa                vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_sqrt_f32                     vdst,     src
+v_sqrt_f32_dpp                 vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sqrt_f32_sdwa                vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_sqrt_f64                     vdst,     src
+v_trunc_f16                    vdst,     src
+v_trunc_f16_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_trunc_f16_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_trunc_f32                    vdst,     src
+v_trunc_f32_dpp                vdst,     vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_trunc_f32_sdwa               vdst,     vsrc:m         clamp dst_sel dst_unused src0_sel
+v_trunc_f64                    vdst,     src
+
+
+

VOP2

+
INSTRUCTION            DST0  DST1 SRC0         SRC1        SRC2    MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_f16              vdst,      src0,        vsrc1
+v_add_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_f16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_add_f32              vdst,      src0,        vsrc1
+v_add_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_f32_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_add_u16              vdst,      src0,        vsrc1
+v_add_u16_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_u16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_add_u32              vdst, vcc, src0,        vsrc1
+v_add_u32_dpp          vdst, vcc, vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_u32_sdwa         vdst, vcc, vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_addc_u32             vdst, vcc, src0,        vsrc1,      vcc
+v_addc_u32_dpp         vdst, vcc, vsrc0,       vsrc1,      vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_addc_u32_sdwa        vdst, vcc, vsrc0:m,     vsrc1:m,    vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_and_b32              vdst,      src0,        vsrc1
+v_and_b32_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_and_b32_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_ashrrev_i16          vdst,      src0:u16,    vsrc1
+v_ashrrev_i16_dpp      vdst,      vsrc0:u16,   vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_ashrrev_i16_sdwa     vdst,      vsrc0:m:u16, vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_ashrrev_i32          vdst,      src0:u32,    vsrc1
+v_ashrrev_i32_dpp      vdst,      vsrc0:u32,   vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_ashrrev_i32_sdwa     vdst,      vsrc0:m:u32, vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_cndmask_b32          vdst,      src0,        vsrc1,      vcc
+v_cndmask_b32_dpp      vdst,      vsrc0,       vsrc1,      vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_cndmask_b32_sdwa     vdst,      vsrc0:m,     vsrc1:m,    vcc     dst_sel dst_unused src0_sel src1_sel
+v_ldexp_f16            vdst,      src0,        vsrc1:i16
+v_ldexp_f16_dpp        vdst,      vsrc0:m,     vsrc1:i16           dpp_ctrl row_mask bank_mask bound_ctrl
+v_ldexp_f16_sdwa       vdst,      vsrc0:m,     vsrc1:m:i16         clamp dst_sel dst_unused src0_sel src1_sel
+v_lshlrev_b16          vdst,      src0:u16,    vsrc1
+v_lshlrev_b16_dpp      vdst,      vsrc0:u16,   vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshlrev_b16_sdwa     vdst,      vsrc0:m:u16, vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_lshlrev_b32          vdst,      src0:u32,    vsrc1
+v_lshlrev_b32_dpp      vdst,      vsrc0:u32,   vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshlrev_b32_sdwa     vdst,      vsrc0:m:u32, vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_lshrrev_b16          vdst,      src0:u16,    vsrc1
+v_lshrrev_b16_dpp      vdst,      vsrc0:u16,   vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshrrev_b16_sdwa     vdst,      vsrc0:m:u16, vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_lshrrev_b32          vdst,      src0:u32,    vsrc1
+v_lshrrev_b32_dpp      vdst,      vsrc0:u32,   vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshrrev_b32_sdwa     vdst,      vsrc0:m:u32, vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_mac_f16              vdst,      src0,        vsrc1
+v_mac_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_mac_f16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_mac_f32              vdst,      src0,        vsrc1
+v_mac_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_mac_f32_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_madak_f16            vdst,      src0,        vsrc1,      simm32
+v_madak_f32            vdst,      src0,        vsrc1,      simm32
+v_madmk_f16            vdst,      src0,        simm32,     vsrc2
+v_madmk_f32            vdst,      src0,        simm32,     vsrc2
+v_max_f16              vdst,      src0,        vsrc1
+v_max_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_f16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_max_f32              vdst,      src0,        vsrc1
+v_max_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_f32_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_max_i16              vdst,      src0,        vsrc1
+v_max_i16_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_i16_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_max_i32              vdst,      src0,        vsrc1
+v_max_i32_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_i32_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_max_u16              vdst,      src0,        vsrc1
+v_max_u16_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_u16_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_max_u32              vdst,      src0,        vsrc1
+v_max_u32_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_u32_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_f16              vdst,      src0,        vsrc1
+v_min_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_f16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_min_f32              vdst,      src0,        vsrc1
+v_min_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_f32_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_min_i16              vdst,      src0,        vsrc1
+v_min_i16_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_i16_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_i32              vdst,      src0,        vsrc1
+v_min_i32_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_i32_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_u16              vdst,      src0,        vsrc1
+v_min_u16_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_u16_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_u32              vdst,      src0,        vsrc1
+v_min_u32_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_u32_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_f16              vdst,      src0,        vsrc1
+v_mul_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_f16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_mul_f32              vdst,      src0,        vsrc1
+v_mul_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_f32_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_i32_i24       vdst,      src0,        vsrc1
+v_mul_hi_i32_i24_dpp   vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_hi_i32_i24_sdwa  vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_u32_u24       vdst,      src0,        vsrc1
+v_mul_hi_u32_u24_dpp   vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_hi_u32_u24_sdwa  vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_i32_i24          vdst,      src0,        vsrc1
+v_mul_i32_i24_dpp      vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_i32_i24_sdwa     vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_mul_legacy_f32       vdst,      src0,        vsrc1
+v_mul_legacy_f32_dpp   vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_legacy_f32_sdwa  vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_mul_lo_u16           vdst,      src0,        vsrc1
+v_mul_lo_u16_dpp       vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_lo_u16_sdwa      vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_u32_u24          vdst,      src0,        vsrc1
+v_mul_u32_u24_dpp      vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_u32_u24_sdwa     vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_or_b32               vdst,      src0,        vsrc1
+v_or_b32_dpp           vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_or_b32_sdwa          vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+v_sub_f16              vdst,      src0,        vsrc1
+v_sub_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_f16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_f32              vdst,      src0,        vsrc1
+v_sub_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_f32_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_u16              vdst,      src0,        vsrc1
+v_sub_u16_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_u16_sdwa         vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_u32              vdst, vcc, src0,        vsrc1
+v_sub_u32_dpp          vdst, vcc, vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_u32_sdwa         vdst, vcc, vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_subb_u32             vdst, vcc, src0,        vsrc1,      vcc
+v_subb_u32_dpp         vdst, vcc, vsrc0,       vsrc1,      vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_subb_u32_sdwa        vdst, vcc, vsrc0:m,     vsrc1:m,    vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_subbrev_u32          vdst, vcc, src0,        vsrc1,      vcc
+v_subbrev_u32_dpp      vdst, vcc, vsrc0,       vsrc1,      vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_subbrev_u32_sdwa     vdst, vcc, vsrc0:m,     vsrc1:m,    vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_f16           vdst,      src0,        vsrc1
+v_subrev_f16_dpp       vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_f16_sdwa      vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_f32           vdst,      src0,        vsrc1
+v_subrev_f32_dpp       vdst,      vsrc0:m,     vsrc1:m             dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_f32_sdwa      vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_u16           vdst,      src0,        vsrc1
+v_subrev_u16_dpp       vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_u16_sdwa      vdst,      vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_u32           vdst, vcc, src0,        vsrc1
+v_subrev_u32_dpp       vdst, vcc, vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_u32_sdwa      vdst, vcc, vsrc0:m,     vsrc1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_xor_b32              vdst,      src0,        vsrc1
+v_xor_b32_dpp          vdst,      vsrc0,       vsrc1               dpp_ctrl row_mask bank_mask bound_ctrl
+v_xor_b32_sdwa         vdst,      vsrc0:m,     vsrc1:m             dst_sel dst_unused src0_sel src1_sel
+
+
+

VOP3

+
INSTRUCTION              DST0        DST1     SRC0         SRC1        SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_add_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_add_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_add_u16_e64            vdst,                src0,        src1                       clamp
+v_add_u32_e64            vdst,       sdst,    src0,        src1                       clamp
+v_addc_u32_e64           vdst,       sdst,    src0,        src1,       ssrc2          clamp
+v_alignbit_b32           vdst,                src0,        src1,       src2
+v_alignbyte_b32          vdst,                src0,        src1,       src2
+v_and_b32_e64            vdst,                src0,        src1
+v_ashrrev_i16_e64        vdst,                src0:u16,    src1
+v_ashrrev_i32_e64        vdst,                src0:u32,    src1
+v_ashrrev_i64            vdst,                src0:u32,    src1
+v_bcnt_u32_b32           vdst,                src0,        src1
+v_bfe_i32                vdst,                src0,        src1:u32,   src2:u32
+v_bfe_u32                vdst,                src0,        src1,       src2
+v_bfi_b32                vdst,                src0,        src1,       src2
+v_bfm_b32                vdst,                src0,        src1
+v_bfrev_b32_e64          vdst,                src
+v_ceil_f16_e64           vdst,                src:m                                   clamp omod
+v_ceil_f32_e64           vdst,                src:m                                   clamp omod
+v_ceil_f64_e64           vdst,                src:m                                   clamp omod
+v_clrexcp_e64
+v_cmp_class_f16_e64      sdst,                src0:m,      src1:b32
+v_cmp_class_f32_e64      sdst,                src0:m,      src1:b32
+v_cmp_class_f64_e64      sdst,                src0:m,      src1:b32
+v_cmp_eq_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_eq_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_eq_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_eq_i16_e64         sdst,                src0,        src1
+v_cmp_eq_i32_e64         sdst,                src0,        src1
+v_cmp_eq_i64_e64         sdst,                src0,        src1
+v_cmp_eq_u16_e64         sdst,                src0,        src1
+v_cmp_eq_u32_e64         sdst,                src0,        src1
+v_cmp_eq_u64_e64         sdst,                src0,        src1
+v_cmp_f_f16_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_f_f32_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_f_f64_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_f_i16_e64          sdst,                src0,        src1
+v_cmp_f_i32_e64          sdst,                src0,        src1
+v_cmp_f_i64_e64          sdst,                src0,        src1
+v_cmp_f_u16_e64          sdst,                src0,        src1
+v_cmp_f_u32_e64          sdst,                src0,        src1
+v_cmp_f_u64_e64          sdst,                src0,        src1
+v_cmp_ge_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_ge_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_ge_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_ge_i16_e64         sdst,                src0,        src1
+v_cmp_ge_i32_e64         sdst,                src0,        src1
+v_cmp_ge_i64_e64         sdst,                src0,        src1
+v_cmp_ge_u16_e64         sdst,                src0,        src1
+v_cmp_ge_u32_e64         sdst,                src0,        src1
+v_cmp_ge_u64_e64         sdst,                src0,        src1
+v_cmp_gt_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_gt_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_gt_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_gt_i16_e64         sdst,                src0,        src1
+v_cmp_gt_i32_e64         sdst,                src0,        src1
+v_cmp_gt_i64_e64         sdst,                src0,        src1
+v_cmp_gt_u16_e64         sdst,                src0,        src1
+v_cmp_gt_u32_e64         sdst,                src0,        src1
+v_cmp_gt_u64_e64         sdst,                src0,        src1
+v_cmp_le_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_le_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_le_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_le_i16_e64         sdst,                src0,        src1
+v_cmp_le_i32_e64         sdst,                src0,        src1
+v_cmp_le_i64_e64         sdst,                src0,        src1
+v_cmp_le_u16_e64         sdst,                src0,        src1
+v_cmp_le_u32_e64         sdst,                src0,        src1
+v_cmp_le_u64_e64         sdst,                src0,        src1
+v_cmp_lg_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lg_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lg_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmp_lt_i16_e64         sdst,                src0,        src1
+v_cmp_lt_i32_e64         sdst,                src0,        src1
+v_cmp_lt_i64_e64         sdst,                src0,        src1
+v_cmp_lt_u16_e64         sdst,                src0,        src1
+v_cmp_lt_u32_e64         sdst,                src0,        src1
+v_cmp_lt_u64_e64         sdst,                src0,        src1
+v_cmp_ne_i16_e64         sdst,                src0,        src1
+v_cmp_ne_i32_e64         sdst,                src0,        src1
+v_cmp_ne_i64_e64         sdst,                src0,        src1
+v_cmp_ne_u16_e64         sdst,                src0,        src1
+v_cmp_ne_u32_e64         sdst,                src0,        src1
+v_cmp_ne_u64_e64         sdst,                src0,        src1
+v_cmp_neq_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_neq_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_neq_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nge_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nge_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nge_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_ngt_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_ngt_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_ngt_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nle_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nle_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nle_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlg_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlg_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlg_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlt_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlt_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_nlt_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_o_f16_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_o_f32_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_o_f64_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_t_i16_e64          sdst,                src0,        src1
+v_cmp_t_i32_e64          sdst,                src0,        src1
+v_cmp_t_i64_e64          sdst,                src0,        src1
+v_cmp_t_u16_e64          sdst,                src0,        src1
+v_cmp_t_u32_e64          sdst,                src0,        src1
+v_cmp_t_u64_e64          sdst,                src0,        src1
+v_cmp_tru_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_tru_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_tru_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmp_u_f16_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_u_f32_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmp_u_f64_e64          sdst,                src0:m,      src1:m                     clamp
+v_cmpx_class_f16_e64     sdst,                src0:m,      src1:b32
+v_cmpx_class_f32_e64     sdst,                src0:m,      src1:b32
+v_cmpx_class_f64_e64     sdst,                src0:m,      src1:b32
+v_cmpx_eq_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_eq_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_eq_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_eq_i16_e64        sdst,                src0,        src1
+v_cmpx_eq_i32_e64        sdst,                src0,        src1
+v_cmpx_eq_i64_e64        sdst,                src0,        src1
+v_cmpx_eq_u16_e64        sdst,                src0,        src1
+v_cmpx_eq_u32_e64        sdst,                src0,        src1
+v_cmpx_eq_u64_e64        sdst,                src0,        src1
+v_cmpx_f_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_f_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_f_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_f_i16_e64         sdst,                src0,        src1
+v_cmpx_f_i32_e64         sdst,                src0,        src1
+v_cmpx_f_i64_e64         sdst,                src0,        src1
+v_cmpx_f_u16_e64         sdst,                src0,        src1
+v_cmpx_f_u32_e64         sdst,                src0,        src1
+v_cmpx_f_u64_e64         sdst,                src0,        src1
+v_cmpx_ge_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_ge_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_ge_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_ge_i16_e64        sdst,                src0,        src1
+v_cmpx_ge_i32_e64        sdst,                src0,        src1
+v_cmpx_ge_i64_e64        sdst,                src0,        src1
+v_cmpx_ge_u16_e64        sdst,                src0,        src1
+v_cmpx_ge_u32_e64        sdst,                src0,        src1
+v_cmpx_ge_u64_e64        sdst,                src0,        src1
+v_cmpx_gt_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_gt_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_gt_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_gt_i16_e64        sdst,                src0,        src1
+v_cmpx_gt_i32_e64        sdst,                src0,        src1
+v_cmpx_gt_i64_e64        sdst,                src0,        src1
+v_cmpx_gt_u16_e64        sdst,                src0,        src1
+v_cmpx_gt_u32_e64        sdst,                src0,        src1
+v_cmpx_gt_u64_e64        sdst,                src0,        src1
+v_cmpx_le_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_le_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_le_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_le_i16_e64        sdst,                src0,        src1
+v_cmpx_le_i32_e64        sdst,                src0,        src1
+v_cmpx_le_i64_e64        sdst,                src0,        src1
+v_cmpx_le_u16_e64        sdst,                src0,        src1
+v_cmpx_le_u32_e64        sdst,                src0,        src1
+v_cmpx_le_u64_e64        sdst,                src0,        src1
+v_cmpx_lg_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_lg_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_lg_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_lt_f16_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_lt_f32_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_lt_f64_e64        sdst,                src0:m,      src1:m                     clamp
+v_cmpx_lt_i16_e64        sdst,                src0,        src1
+v_cmpx_lt_i32_e64        sdst,                src0,        src1
+v_cmpx_lt_i64_e64        sdst,                src0,        src1
+v_cmpx_lt_u16_e64        sdst,                src0,        src1
+v_cmpx_lt_u32_e64        sdst,                src0,        src1
+v_cmpx_lt_u64_e64        sdst,                src0,        src1
+v_cmpx_ne_i16_e64        sdst,                src0,        src1
+v_cmpx_ne_i32_e64        sdst,                src0,        src1
+v_cmpx_ne_i64_e64        sdst,                src0,        src1
+v_cmpx_ne_u16_e64        sdst,                src0,        src1
+v_cmpx_ne_u32_e64        sdst,                src0,        src1
+v_cmpx_ne_u64_e64        sdst,                src0,        src1
+v_cmpx_neq_f16_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_neq_f32_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_neq_f64_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nge_f16_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nge_f32_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nge_f64_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_ngt_f16_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_ngt_f32_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_ngt_f64_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nle_f16_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nle_f32_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nle_f64_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nlg_f16_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nlg_f32_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nlg_f64_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nlt_f16_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nlt_f32_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_nlt_f64_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_o_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_o_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_o_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_t_i16_e64         sdst,                src0,        src1
+v_cmpx_t_i32_e64         sdst,                src0,        src1
+v_cmpx_t_i64_e64         sdst,                src0,        src1
+v_cmpx_t_u16_e64         sdst,                src0,        src1
+v_cmpx_t_u32_e64         sdst,                src0,        src1
+v_cmpx_t_u64_e64         sdst,                src0,        src1
+v_cmpx_tru_f16_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_tru_f32_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_tru_f64_e64       sdst,                src0:m,      src1:m                     clamp
+v_cmpx_u_f16_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_u_f32_e64         sdst,                src0:m,      src1:m                     clamp
+v_cmpx_u_f64_e64         sdst,                src0:m,      src1:m                     clamp
+v_cndmask_b32_e64        vdst,                src0,        src1,       ssrc2
+v_cos_f16_e64            vdst,                src:m                                   clamp omod
+v_cos_f32_e64            vdst,                src:m                                   clamp omod
+v_cubeid_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cubema_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cubesc_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cubetc_f32             vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_cvt_f16_f32_e64        vdst,                src:m                                   clamp omod
+v_cvt_f16_i16_e64        vdst,                src                                     clamp omod
+v_cvt_f16_u16_e64        vdst,                src                                     clamp omod
+v_cvt_f32_f16_e64        vdst,                src:m                                   clamp omod
+v_cvt_f32_f64_e64        vdst,                src:m                                   clamp omod
+v_cvt_f32_i32_e64        vdst,                src                                     clamp omod
+v_cvt_f32_u32_e64        vdst,                src                                     clamp omod
+v_cvt_f32_ubyte0_e64     vdst,                src                                     clamp omod
+v_cvt_f32_ubyte1_e64     vdst,                src                                     clamp omod
+v_cvt_f32_ubyte2_e64     vdst,                src                                     clamp omod
+v_cvt_f32_ubyte3_e64     vdst,                src                                     clamp omod
+v_cvt_f64_f32_e64        vdst,                src:m                                   clamp omod
+v_cvt_f64_i32_e64        vdst,                src                                     clamp omod
+v_cvt_f64_u32_e64        vdst,                src                                     clamp omod
+v_cvt_flr_i32_f32_e64    vdst,                src:m
+v_cvt_i16_f16_e64        vdst,                src:m
+v_cvt_i32_f32_e64        vdst,                src:m
+v_cvt_i32_f64_e64        vdst,                src:m
+v_cvt_off_f32_i4_e64     vdst,                src                                     clamp omod
+v_cvt_pk_i16_i32         vdst,                src0:i32,    src1:i32
+v_cvt_pk_u16_u32         vdst,                src0:u32,    src1:u32
+v_cvt_pk_u8_f32          vdst:b32,            src0:m:f32,  src1:u32,   src2:u32
+v_cvt_pkaccum_u8_f32     vdst:b32,            src0:m:f32,  src1:u32
+v_cvt_pknorm_i16_f32     vdst,                src0:m:f32,  src1:m:f32
+v_cvt_pknorm_u16_f32     vdst,                src0:m:f32,  src1:m:f32
+v_cvt_pkrtz_f16_f32      vdst,                src0:m:f32,  src1:m:f32
+v_cvt_rpi_i32_f32_e64    vdst,                src:m
+v_cvt_u16_f16_e64        vdst,                src:m
+v_cvt_u32_f32_e64        vdst,                src:m
+v_cvt_u32_f64_e64        vdst,                src:m
+v_div_fixup_f16          vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_fixup_f32          vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_fixup_f64          vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_fmas_f32           vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_fmas_f64           vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_div_scale_f32          vdst,       vcc,     src0,        src1,       src2
+v_div_scale_f64          vdst,       vcc,     src0,        src1,       src2
+v_exp_f16_e64            vdst,                src:m                                   clamp omod
+v_exp_f32_e64            vdst,                src:m                                   clamp omod
+v_exp_legacy_f32_e64     vdst,                src:m                                   clamp omod
+v_ffbh_i32_e64           vdst,                src
+v_ffbh_u32_e64           vdst,                src
+v_ffbl_b32_e64           vdst,                src
+v_floor_f16_e64          vdst,                src:m                                   clamp omod
+v_floor_f32_e64          vdst,                src:m                                   clamp omod
+v_floor_f64_e64          vdst,                src:m                                   clamp omod
+v_fma_f16                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_fma_f32                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_fma_f64                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_fract_f16_e64          vdst,                src:m                                   clamp omod
+v_fract_f32_e64          vdst,                src:m                                   clamp omod
+v_fract_f64_e64          vdst,                src:m                                   clamp omod
+v_frexp_exp_i16_f16_e64  vdst,                src:m
+v_frexp_exp_i32_f32_e64  vdst,                src:m
+v_frexp_exp_i32_f64_e64  vdst,                src:m
+v_frexp_mant_f16_e64     vdst,                src:m                                   clamp omod
+v_frexp_mant_f32_e64     vdst,                src:m                                   clamp omod
+v_frexp_mant_f64_e64     vdst,                src:m                                   clamp omod
+v_interp_mov_f32_e64     vdst,                param:b32,   attr:b32                   clamp omod
+v_interp_p1_f32_e64      vdst,                vsrc:m,      attr:b32                   clamp omod
+v_interp_p1ll_f16        vdst:f32,            vsrc:m:f32,  attr:b32                   high clamp omod
+v_interp_p1lv_f16        vdst:f32,            vsrc0:m:f32, attr:b32,   vsrc2:m:f16x2  high clamp omod
+v_interp_p2_f16          vdst,                vsrc0:m:f32, attr:b32,   vsrc2:m:f32    high clamp
+v_interp_p2_f32_e64      vdst,                vsrc:m,      attr:b32                   clamp omod
+v_ldexp_f16_e64          vdst,                src0:m,      src1:i16                   clamp omod
+v_ldexp_f32              vdst,                src0:m,      src1:i32                   clamp omod
+v_ldexp_f64              vdst,                src0:m,      src1:i32                   clamp omod
+v_lerp_u8                vdst:u32,            src0:b32,    src1:b32,   src2:b32
+v_log_f16_e64            vdst,                src:m                                   clamp omod
+v_log_f32_e64            vdst,                src:m                                   clamp omod
+v_log_legacy_f32_e64     vdst,                src:m                                   clamp omod
+v_lshlrev_b16_e64        vdst,                src0:u16,    src1
+v_lshlrev_b32_e64        vdst,                src0:u32,    src1
+v_lshlrev_b64            vdst,                src0:u32,    src1
+v_lshrrev_b16_e64        vdst,                src0:u16,    src1
+v_lshrrev_b32_e64        vdst,                src0:u32,    src1
+v_lshrrev_b64            vdst,                src0:u32,    src1
+v_mac_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_mac_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_mad_f16                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_mad_f32                vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_mad_i16                vdst,                src0,        src1,       src2           clamp
+v_mad_i32_i24            vdst,                src0,        src1,       src2:i32       clamp
+v_mad_i64_i32            vdst,       sdst,    src0,        src1,       src2:i64       clamp
+v_mad_legacy_f32         vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_mad_u16                vdst,                src0,        src1,       src2           clamp
+v_mad_u32_u24            vdst,                src0,        src1,       src2:u32       clamp
+v_mad_u64_u32            vdst,       sdst,    src0,        src1,       src2:u64       clamp
+v_max3_f32               vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_max3_i32               vdst,                src0,        src1,       src2
+v_max3_u32               vdst,                src0,        src1,       src2
+v_max_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_max_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_max_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_max_i16_e64            vdst,                src0,        src1
+v_max_i32_e64            vdst,                src0,        src1
+v_max_u16_e64            vdst,                src0,        src1
+v_max_u32_e64            vdst,                src0,        src1
+v_mbcnt_hi_u32_b32       vdst,                src0,        src1
+v_mbcnt_lo_u32_b32       vdst,                src0,        src1
+v_med3_f32               vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_med3_i32               vdst,                src0,        src1,       src2
+v_med3_u32               vdst,                src0,        src1,       src2
+v_min3_f32               vdst,                src0:m,      src1:m,     src2:m         clamp omod
+v_min3_i32               vdst,                src0,        src1,       src2
+v_min3_u32               vdst,                src0,        src1,       src2
+v_min_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_min_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_min_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_min_i16_e64            vdst,                src0,        src1
+v_min_i32_e64            vdst,                src0,        src1
+v_min_u16_e64            vdst,                src0,        src1
+v_min_u32_e64            vdst,                src0,        src1
+v_mov_b32_e64            vdst,                src
+v_mqsad_pk_u16_u8        vdst:u16x4,          src0:u8x8,   src1:u8x4,  src2:u16x4     clamp
+v_mqsad_u32_u8           vdst:u32x4,          src0:u8x8,   src1:u8x4,  vsrc2:u32x4    clamp
+v_msad_u8                vdst:u32,            src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_mul_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_mul_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_mul_f64                vdst,                src0:m,      src1:m                     clamp omod
+v_mul_hi_i32             vdst,                src0,        src1
+v_mul_hi_i32_i24_e64     vdst,                src0,        src1
+v_mul_hi_u32             vdst,                src0,        src1
+v_mul_hi_u32_u24_e64     vdst,                src0,        src1
+v_mul_i32_i24_e64        vdst,                src0,        src1                       clamp
+v_mul_legacy_f32_e64     vdst,                src0:m,      src1:m                     clamp omod
+v_mul_lo_u16_e64         vdst,                src0,        src1
+v_mul_lo_u32             vdst,                src0,        src1
+v_mul_u32_u24_e64        vdst,                src0,        src1                       clamp
+v_nop_e64
+v_not_b32_e64            vdst,                src
+v_or_b32_e64             vdst,                src0,        src1
+v_perm_b32               vdst,                src0,        src1,       src2
+v_qsad_pk_u16_u8         vdst:u16x4,          src0:u8x8,   src1:u8x4,  src2:u16x4     clamp
+v_rcp_f16_e64            vdst,                src:m                                   clamp omod
+v_rcp_f32_e64            vdst,                src:m                                   clamp omod
+v_rcp_f64_e64            vdst,                src:m                                   clamp omod
+v_rcp_iflag_f32_e64      vdst,                src:m                                   clamp omod
+v_readlane_b32           sdst,                src0,        ssrc1
+v_rndne_f16_e64          vdst,                src:m                                   clamp omod
+v_rndne_f32_e64          vdst,                src:m                                   clamp omod
+v_rndne_f64_e64          vdst,                src:m                                   clamp omod
+v_rsq_f16_e64            vdst,                src:m                                   clamp omod
+v_rsq_f32_e64            vdst,                src:m                                   clamp omod
+v_rsq_f64_e64            vdst,                src:m                                   clamp omod
+v_sad_hi_u8              vdst:u32,            src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_sad_u16                vdst:u32,            src0:u16x2,  src1:u16x2, src2:u32       clamp
+v_sad_u32                vdst,                src0,        src1,       src2           clamp
+v_sad_u8                 vdst:u32,            src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_sin_f16_e64            vdst,                src:m                                   clamp omod
+v_sin_f32_e64            vdst,                src:m                                   clamp omod
+v_sqrt_f16_e64           vdst,                src:m                                   clamp omod
+v_sqrt_f32_e64           vdst,                src:m                                   clamp omod
+v_sqrt_f64_e64           vdst,                src:m                                   clamp omod
+v_sub_f16_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_sub_f32_e64            vdst,                src0:m,      src1:m                     clamp omod
+v_sub_u16_e64            vdst,                src0,        src1                       clamp
+v_sub_u32_e64            vdst,       sdst,    src0,        src1                       clamp
+v_subb_u32_e64           vdst,       sdst,    src0,        src1,       ssrc2          clamp
+v_subbrev_u32_e64        vdst,       sdst,    src0,        src1,       ssrc2          clamp
+v_subrev_f16_e64         vdst,                src0:m,      src1:m                     clamp omod
+v_subrev_f32_e64         vdst,                src0:m,      src1:m                     clamp omod
+v_subrev_u16_e64         vdst,                src0,        src1                       clamp
+v_subrev_u32_e64         vdst,       sdst,    src0,        src1                       clamp
+v_trig_preop_f64         vdst,                src0:m,      src1:u32                   clamp omod
+v_trunc_f16_e64          vdst,                src:m                                   clamp omod
+v_trunc_f32_e64          vdst,                src:m                                   clamp omod
+v_trunc_f64_e64          vdst,                src:m                                   clamp omod
+v_writelane_b32          vdst,                ssrc0,       ssrc1
+v_xor_b32_e64            vdst,                src0,        src1
+
+
+

VOPC

+
INSTRUCTION                    DST       SRC0      SRC1             MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————
+v_cmp_class_f16                vcc,      src0,     vsrc1:b32
+v_cmp_class_f16_sdwa           vcc,      vsrc0:m,  vsrc1:m:b32      src0_sel src1_sel
+v_cmp_class_f32                vcc,      src0,     vsrc1:b32
+v_cmp_class_f32_sdwa           vcc,      vsrc0:m,  vsrc1:m:b32      src0_sel src1_sel
+v_cmp_class_f64                vcc,      src0,     vsrc1:b32
+v_cmp_eq_f16                   vcc,      src0,     vsrc1
+v_cmp_eq_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_eq_f32                   vcc,      src0,     vsrc1
+v_cmp_eq_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_eq_f64                   vcc,      src0,     vsrc1
+v_cmp_eq_i16                   vcc,      src0,     vsrc1
+v_cmp_eq_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_eq_i32                   vcc,      src0,     vsrc1
+v_cmp_eq_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_eq_i64                   vcc,      src0,     vsrc1
+v_cmp_eq_u16                   vcc,      src0,     vsrc1
+v_cmp_eq_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_eq_u32                   vcc,      src0,     vsrc1
+v_cmp_eq_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_eq_u64                   vcc,      src0,     vsrc1
+v_cmp_f_f16                    vcc,      src0,     vsrc1
+v_cmp_f_f16_sdwa               vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_f_f32                    vcc,      src0,     vsrc1
+v_cmp_f_f32_sdwa               vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_f_f64                    vcc,      src0,     vsrc1
+v_cmp_f_i16                    vcc,      src0,     vsrc1
+v_cmp_f_i16_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_f_i32                    vcc,      src0,     vsrc1
+v_cmp_f_i32_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_f_i64                    vcc,      src0,     vsrc1
+v_cmp_f_u16                    vcc,      src0,     vsrc1
+v_cmp_f_u16_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_f_u32                    vcc,      src0,     vsrc1
+v_cmp_f_u32_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_f_u64                    vcc,      src0,     vsrc1
+v_cmp_ge_f16                   vcc,      src0,     vsrc1
+v_cmp_ge_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_ge_f32                   vcc,      src0,     vsrc1
+v_cmp_ge_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_ge_f64                   vcc,      src0,     vsrc1
+v_cmp_ge_i16                   vcc,      src0,     vsrc1
+v_cmp_ge_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ge_i32                   vcc,      src0,     vsrc1
+v_cmp_ge_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ge_i64                   vcc,      src0,     vsrc1
+v_cmp_ge_u16                   vcc,      src0,     vsrc1
+v_cmp_ge_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ge_u32                   vcc,      src0,     vsrc1
+v_cmp_ge_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ge_u64                   vcc,      src0,     vsrc1
+v_cmp_gt_f16                   vcc,      src0,     vsrc1
+v_cmp_gt_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_gt_f32                   vcc,      src0,     vsrc1
+v_cmp_gt_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_gt_f64                   vcc,      src0,     vsrc1
+v_cmp_gt_i16                   vcc,      src0,     vsrc1
+v_cmp_gt_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_gt_i32                   vcc,      src0,     vsrc1
+v_cmp_gt_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_gt_i64                   vcc,      src0,     vsrc1
+v_cmp_gt_u16                   vcc,      src0,     vsrc1
+v_cmp_gt_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_gt_u32                   vcc,      src0,     vsrc1
+v_cmp_gt_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_gt_u64                   vcc,      src0,     vsrc1
+v_cmp_le_f16                   vcc,      src0,     vsrc1
+v_cmp_le_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_le_f32                   vcc,      src0,     vsrc1
+v_cmp_le_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_le_f64                   vcc,      src0,     vsrc1
+v_cmp_le_i16                   vcc,      src0,     vsrc1
+v_cmp_le_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_le_i32                   vcc,      src0,     vsrc1
+v_cmp_le_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_le_i64                   vcc,      src0,     vsrc1
+v_cmp_le_u16                   vcc,      src0,     vsrc1
+v_cmp_le_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_le_u32                   vcc,      src0,     vsrc1
+v_cmp_le_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_le_u64                   vcc,      src0,     vsrc1
+v_cmp_lg_f16                   vcc,      src0,     vsrc1
+v_cmp_lg_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_lg_f32                   vcc,      src0,     vsrc1
+v_cmp_lg_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_lg_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_f16                   vcc,      src0,     vsrc1
+v_cmp_lt_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_lt_f32                   vcc,      src0,     vsrc1
+v_cmp_lt_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_lt_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_i16                   vcc,      src0,     vsrc1
+v_cmp_lt_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_lt_i32                   vcc,      src0,     vsrc1
+v_cmp_lt_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_lt_i64                   vcc,      src0,     vsrc1
+v_cmp_lt_u16                   vcc,      src0,     vsrc1
+v_cmp_lt_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_lt_u32                   vcc,      src0,     vsrc1
+v_cmp_lt_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_lt_u64                   vcc,      src0,     vsrc1
+v_cmp_ne_i16                   vcc,      src0,     vsrc1
+v_cmp_ne_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ne_i32                   vcc,      src0,     vsrc1
+v_cmp_ne_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ne_i64                   vcc,      src0,     vsrc1
+v_cmp_ne_u16                   vcc,      src0,     vsrc1
+v_cmp_ne_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ne_u32                   vcc,      src0,     vsrc1
+v_cmp_ne_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_ne_u64                   vcc,      src0,     vsrc1
+v_cmp_neq_f16                  vcc,      src0,     vsrc1
+v_cmp_neq_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_neq_f32                  vcc,      src0,     vsrc1
+v_cmp_neq_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_neq_f64                  vcc,      src0,     vsrc1
+v_cmp_nge_f16                  vcc,      src0,     vsrc1
+v_cmp_nge_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nge_f32                  vcc,      src0,     vsrc1
+v_cmp_nge_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nge_f64                  vcc,      src0,     vsrc1
+v_cmp_ngt_f16                  vcc,      src0,     vsrc1
+v_cmp_ngt_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_ngt_f32                  vcc,      src0,     vsrc1
+v_cmp_ngt_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_ngt_f64                  vcc,      src0,     vsrc1
+v_cmp_nle_f16                  vcc,      src0,     vsrc1
+v_cmp_nle_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nle_f32                  vcc,      src0,     vsrc1
+v_cmp_nle_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nle_f64                  vcc,      src0,     vsrc1
+v_cmp_nlg_f16                  vcc,      src0,     vsrc1
+v_cmp_nlg_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nlg_f32                  vcc,      src0,     vsrc1
+v_cmp_nlg_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nlg_f64                  vcc,      src0,     vsrc1
+v_cmp_nlt_f16                  vcc,      src0,     vsrc1
+v_cmp_nlt_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nlt_f32                  vcc,      src0,     vsrc1
+v_cmp_nlt_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_nlt_f64                  vcc,      src0,     vsrc1
+v_cmp_o_f16                    vcc,      src0,     vsrc1
+v_cmp_o_f16_sdwa               vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_o_f32                    vcc,      src0,     vsrc1
+v_cmp_o_f32_sdwa               vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_o_f64                    vcc,      src0,     vsrc1
+v_cmp_t_i16                    vcc,      src0,     vsrc1
+v_cmp_t_i16_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_t_i32                    vcc,      src0,     vsrc1
+v_cmp_t_i32_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_t_i64                    vcc,      src0,     vsrc1
+v_cmp_t_u16                    vcc,      src0,     vsrc1
+v_cmp_t_u16_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_t_u32                    vcc,      src0,     vsrc1
+v_cmp_t_u32_sdwa               vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmp_t_u64                    vcc,      src0,     vsrc1
+v_cmp_tru_f16                  vcc,      src0,     vsrc1
+v_cmp_tru_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_tru_f32                  vcc,      src0,     vsrc1
+v_cmp_tru_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_tru_f64                  vcc,      src0,     vsrc1
+v_cmp_u_f16                    vcc,      src0,     vsrc1
+v_cmp_u_f16_sdwa               vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_u_f32                    vcc,      src0,     vsrc1
+v_cmp_u_f32_sdwa               vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmp_u_f64                    vcc,      src0,     vsrc1
+v_cmpx_class_f16               vcc,      src0,     vsrc1:b32
+v_cmpx_class_f16_sdwa          vcc,      vsrc0:m,  vsrc1:m:b32      src0_sel src1_sel
+v_cmpx_class_f32               vcc,      src0,     vsrc1:b32
+v_cmpx_class_f32_sdwa          vcc,      vsrc0:m,  vsrc1:m:b32      src0_sel src1_sel
+v_cmpx_class_f64               vcc,      src0,     vsrc1:b32
+v_cmpx_eq_f16                  vcc,      src0,     vsrc1
+v_cmpx_eq_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_eq_f32                  vcc,      src0,     vsrc1
+v_cmpx_eq_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_eq_f64                  vcc,      src0,     vsrc1
+v_cmpx_eq_i16                  vcc,      src0,     vsrc1
+v_cmpx_eq_i16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_eq_i32                  vcc,      src0,     vsrc1
+v_cmpx_eq_i32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_eq_i64                  vcc,      src0,     vsrc1
+v_cmpx_eq_u16                  vcc,      src0,     vsrc1
+v_cmpx_eq_u16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_eq_u32                  vcc,      src0,     vsrc1
+v_cmpx_eq_u32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_eq_u64                  vcc,      src0,     vsrc1
+v_cmpx_f_f16                   vcc,      src0,     vsrc1
+v_cmpx_f_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_f_f32                   vcc,      src0,     vsrc1
+v_cmpx_f_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_f_f64                   vcc,      src0,     vsrc1
+v_cmpx_f_i16                   vcc,      src0,     vsrc1
+v_cmpx_f_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_f_i32                   vcc,      src0,     vsrc1
+v_cmpx_f_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_f_i64                   vcc,      src0,     vsrc1
+v_cmpx_f_u16                   vcc,      src0,     vsrc1
+v_cmpx_f_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_f_u32                   vcc,      src0,     vsrc1
+v_cmpx_f_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_f_u64                   vcc,      src0,     vsrc1
+v_cmpx_ge_f16                  vcc,      src0,     vsrc1
+v_cmpx_ge_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_ge_f32                  vcc,      src0,     vsrc1
+v_cmpx_ge_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_ge_f64                  vcc,      src0,     vsrc1
+v_cmpx_ge_i16                  vcc,      src0,     vsrc1
+v_cmpx_ge_i16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ge_i32                  vcc,      src0,     vsrc1
+v_cmpx_ge_i32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ge_i64                  vcc,      src0,     vsrc1
+v_cmpx_ge_u16                  vcc,      src0,     vsrc1
+v_cmpx_ge_u16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ge_u32                  vcc,      src0,     vsrc1
+v_cmpx_ge_u32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ge_u64                  vcc,      src0,     vsrc1
+v_cmpx_gt_f16                  vcc,      src0,     vsrc1
+v_cmpx_gt_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_gt_f32                  vcc,      src0,     vsrc1
+v_cmpx_gt_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_gt_f64                  vcc,      src0,     vsrc1
+v_cmpx_gt_i16                  vcc,      src0,     vsrc1
+v_cmpx_gt_i16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_gt_i32                  vcc,      src0,     vsrc1
+v_cmpx_gt_i32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_gt_i64                  vcc,      src0,     vsrc1
+v_cmpx_gt_u16                  vcc,      src0,     vsrc1
+v_cmpx_gt_u16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_gt_u32                  vcc,      src0,     vsrc1
+v_cmpx_gt_u32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_gt_u64                  vcc,      src0,     vsrc1
+v_cmpx_le_f16                  vcc,      src0,     vsrc1
+v_cmpx_le_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_le_f32                  vcc,      src0,     vsrc1
+v_cmpx_le_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_le_f64                  vcc,      src0,     vsrc1
+v_cmpx_le_i16                  vcc,      src0,     vsrc1
+v_cmpx_le_i16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_le_i32                  vcc,      src0,     vsrc1
+v_cmpx_le_i32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_le_i64                  vcc,      src0,     vsrc1
+v_cmpx_le_u16                  vcc,      src0,     vsrc1
+v_cmpx_le_u16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_le_u32                  vcc,      src0,     vsrc1
+v_cmpx_le_u32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_le_u64                  vcc,      src0,     vsrc1
+v_cmpx_lg_f16                  vcc,      src0,     vsrc1
+v_cmpx_lg_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_lg_f32                  vcc,      src0,     vsrc1
+v_cmpx_lg_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_lg_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_f16                  vcc,      src0,     vsrc1
+v_cmpx_lt_f16_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_lt_f32                  vcc,      src0,     vsrc1
+v_cmpx_lt_f32_sdwa             vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_lt_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_i16                  vcc,      src0,     vsrc1
+v_cmpx_lt_i16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_lt_i32                  vcc,      src0,     vsrc1
+v_cmpx_lt_i32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_lt_i64                  vcc,      src0,     vsrc1
+v_cmpx_lt_u16                  vcc,      src0,     vsrc1
+v_cmpx_lt_u16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_lt_u32                  vcc,      src0,     vsrc1
+v_cmpx_lt_u32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_lt_u64                  vcc,      src0,     vsrc1
+v_cmpx_ne_i16                  vcc,      src0,     vsrc1
+v_cmpx_ne_i16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ne_i32                  vcc,      src0,     vsrc1
+v_cmpx_ne_i32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ne_i64                  vcc,      src0,     vsrc1
+v_cmpx_ne_u16                  vcc,      src0,     vsrc1
+v_cmpx_ne_u16_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ne_u32                  vcc,      src0,     vsrc1
+v_cmpx_ne_u32_sdwa             vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_ne_u64                  vcc,      src0,     vsrc1
+v_cmpx_neq_f16                 vcc,      src0,     vsrc1
+v_cmpx_neq_f16_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_neq_f32                 vcc,      src0,     vsrc1
+v_cmpx_neq_f32_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_neq_f64                 vcc,      src0,     vsrc1
+v_cmpx_nge_f16                 vcc,      src0,     vsrc1
+v_cmpx_nge_f16_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nge_f32                 vcc,      src0,     vsrc1
+v_cmpx_nge_f32_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nge_f64                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f16                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f16_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_ngt_f32                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f32_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_ngt_f64                 vcc,      src0,     vsrc1
+v_cmpx_nle_f16                 vcc,      src0,     vsrc1
+v_cmpx_nle_f16_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nle_f32                 vcc,      src0,     vsrc1
+v_cmpx_nle_f32_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nle_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f16                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f16_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nlg_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f32_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nlg_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f16                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f16_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nlt_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f32_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_nlt_f64                 vcc,      src0,     vsrc1
+v_cmpx_o_f16                   vcc,      src0,     vsrc1
+v_cmpx_o_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_o_f32                   vcc,      src0,     vsrc1
+v_cmpx_o_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_o_f64                   vcc,      src0,     vsrc1
+v_cmpx_t_i16                   vcc,      src0,     vsrc1
+v_cmpx_t_i16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_t_i32                   vcc,      src0,     vsrc1
+v_cmpx_t_i32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_t_i64                   vcc,      src0,     vsrc1
+v_cmpx_t_u16                   vcc,      src0,     vsrc1
+v_cmpx_t_u16_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_t_u32                   vcc,      src0,     vsrc1
+v_cmpx_t_u32_sdwa              vcc,      vsrc0:m,  vsrc1:m          src0_sel src1_sel
+v_cmpx_t_u64                   vcc,      src0,     vsrc1
+v_cmpx_tru_f16                 vcc,      src0,     vsrc1
+v_cmpx_tru_f16_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_tru_f32                 vcc,      src0,     vsrc1
+v_cmpx_tru_f32_sdwa            vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_tru_f64                 vcc,      src0,     vsrc1
+v_cmpx_u_f16                   vcc,      src0,     vsrc1
+v_cmpx_u_f16_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_u_f32                   vcc,      src0,     vsrc1
+v_cmpx_u_f32_sdwa              vcc,      vsrc0:m,  vsrc1:m          clamp src0_sel src1_sel
+v_cmpx_u_f64                   vcc,      src0,     vsrc1
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX900.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX900.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX900.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX900.html 2021-09-19 16:16:20.000000000 +0000 @@ -0,0 +1,182 @@ + + + + + + + + + Syntax of gfx900, gfx902 and gfx909 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of gfx900, gfx902 and gfx909 Instructions

+ +
+

Introduction

+

This document describes the syntax of instructions specific to gfx900, gfx902 and gfx909.

+

For a description of other gfx900, gfx902 and gfx909 instructions see Syntax of Core GFX9 Instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

VOP3P

+
INSTRUCTION                    DST       SRC0       SRC1       SRC2       MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+v_mad_mix_f32                  vdst,     src0:m:fx, src1:m:fx, src2:m:fx  m_op_sel m_op_sel_hi clamp
+v_mad_mixhi_f16                vdst,     src0:m:fx, src1:m:fx, src2:m:fx  m_op_sel m_op_sel_hi clamp
+v_mad_mixlo_f16                vdst,     src0:m:fx, src1:m:fx, src2:m:fx  m_op_sel m_op_sel_hi clamp
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX904.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX904.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX904.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX904.html 2021-09-19 16:16:20.000000000 +0000 @@ -0,0 +1,182 @@ + + + + + + + + + Syntax of gfx904 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of gfx904 Instructions

+ +
+

Introduction

+

This document describes the syntax of instructions specific to gfx904.

+

For a description of other gfx904 instructions see Syntax of Core GFX9 Instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

VOP3P

+
INSTRUCTION                    DST       SRC0       SRC1       SRC2       MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+v_fma_mix_f32                  vdst,     src0:m:fx, src1:m:fx, src2:m:fx  m_op_sel m_op_sel_hi clamp
+v_fma_mixhi_f16                vdst,     src0:m:fx, src1:m:fx, src2:m:fx  m_op_sel m_op_sel_hi clamp
+v_fma_mixlo_f16                vdst,     src0:m:fx, src1:m:fx, src2:m:fx  m_op_sel m_op_sel_hi clamp
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX906.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX906.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX906.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX906.html 2021-09-19 16:16:20.000000000 +0000 @@ -0,0 +1,208 @@ + + + + + + + + + Syntax of gfx906 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of gfx906 Instructions

+ +
+

Introduction

+

This document describes the syntax of instructions specific to gfx906.

+

For a description of other gfx906 instructions see Syntax of Core GFX9 Instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

VOP2

+
INSTRUCTION                    DST       SRC0      SRC1       MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+v_fmac_f32                     vdst,     src0,     vsrc1
+v_fmac_f32_dpp                 vdst,     vsrc0:m,  vsrc1:m    dpp_ctrl row_mask bank_mask bound_ctrl
+v_xnor_b32                     vdst,     src0,     vsrc1
+v_xnor_b32_dpp                 vdst,     vsrc0,    vsrc1      dpp_ctrl row_mask bank_mask bound_ctrl
+v_xnor_b32_sdwa                vdst,     src0:m,   src1:m     dst_sel dst_unused src0_sel src1_sel
+
+
+

VOP3

+
INSTRUCTION                    DST       SRC0      SRC1           MODIFIERS
+————————————————————————————————————————————————————————————————————————————
+v_fmac_f32_e64                 vdst,     src0:m,   src1:m         clamp omod
+v_xnor_b32_e64                 vdst,     src0,     src1
+
+
+

VOP3P

+
INSTRUCTION            DST      SRC0        SRC1        SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————
+v_dot2_f32_f16         vdst,    src0:f16x2, src1:f16x2, src2:f32       neg_lo neg_hi clamp
+v_dot2_i32_i16         vdst,    src0:i16x2, src1:i16x2, src2:i32       clamp
+v_dot2_u32_u16         vdst,    src0:u16x2, src1:u16x2, src2:u32       clamp
+v_dot4_i32_i8          vdst,    src0:i8x4,  src1:i8x4,  src2:i32       clamp
+v_dot4_u32_u8          vdst,    src0:u8x4,  src1:u8x4,  src2:u32       clamp
+v_dot8_i32_i4          vdst,    src0:i4x8,  src1:i4x8,  src2:i32       clamp
+v_dot8_u32_u4          vdst,    src0:u4x8,  src1:u4x8,  src2:u32       clamp
+v_fma_mix_f32          vdst,    src0:m:fx,  src1:m:fx,  src2:m:fx      m_op_sel m_op_sel_hi clamp
+v_fma_mixhi_f16        vdst,    src0:m:fx,  src1:m:fx,  src2:m:fx      m_op_sel m_op_sel_hi clamp
+v_fma_mixlo_f16        vdst,    src0:m:fx,  src1:m:fx,  src2:m:fx      m_op_sel m_op_sel_hi clamp
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX908.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX908.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX908.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX908.html 2021-09-19 16:16:20.000000000 +0000 @@ -0,0 +1,255 @@ + + + + + + + + + Syntax of gfx908 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of gfx908 Instructions

+ +
+

Introduction

+

This document describes the syntax of instructions specific to gfx908.

+

For a description of other gfx908 instructions see Syntax of Core GFX9 Instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

FLAT

+
INSTRUCTION                    DST       SRC0      SRC1      SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————
+global_atomic_add_f32          vdst:opt, vaddr,    vdata,    saddr          offset13s slc
+global_atomic_pk_add_f16       vdst:opt, vaddr,    vdata,    saddr          offset13s slc
+
+
+

MUBUF

+
INSTRUCTION                    SRC0       SRC1      SRC2      SRC3        MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————
+buffer_atomic_add_f32          vdata:dst, vaddr,    srsrc,    soffset     idxen offen offset12 slc
+buffer_atomic_pk_add_f16       vdata:dst, vaddr,    srsrc,    soffset     idxen offen offset12 slc
+
+
+

VOP2

+
INSTRUCTION             DST      SRC0         SRC1             MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————
+v_dot2c_f32_f16         vdst,    src0:f16x2,  vsrc1:f16x2
+v_dot2c_f32_f16_dpp     vdst,    vsrc0:f16x2, vsrc1:f16x2      dpp_ctrl row_mask bank_mask bound_ctrl
+v_dot2c_i32_i16         vdst,    src0:i16x2,  vsrc1:i16x2
+v_dot2c_i32_i16_dpp     vdst,    vsrc0:i16x2, vsrc1:i16x2      dpp_ctrl row_mask bank_mask bound_ctrl
+v_dot4c_i32_i8          vdst,    src0:i8x4,   vsrc1:i8x4
+v_dot4c_i32_i8_dpp      vdst,    vsrc0:i8x4,  vsrc1:i8x4       dpp_ctrl row_mask bank_mask bound_ctrl
+v_dot8c_i32_i4          vdst,    src0:i4x8,   vsrc1:i4x8
+v_dot8c_i32_i4_dpp      vdst,    vsrc0:i4x8,  vsrc1:i4x8       dpp_ctrl row_mask bank_mask bound_ctrl
+v_fmac_f32              vdst,    src0,        vsrc1
+v_fmac_f32_dpp          vdst,    vsrc0:m,     vsrc1:m          dpp_ctrl row_mask bank_mask bound_ctrl
+v_pk_fmac_f16           vdst,    src0,        vsrc1
+v_xnor_b32              vdst,    src0,        vsrc1
+v_xnor_b32_dpp          vdst,    vsrc0,       vsrc1            dpp_ctrl row_mask bank_mask bound_ctrl
+v_xnor_b32_sdwa         vdst,    src0:m,      src1:m           dst_sel dst_unused src0_sel src1_sel
+
+
+

VOP3

+
INSTRUCTION                    DST       SRC0      SRC1           MODIFIERS
+————————————————————————————————————————————————————————————————————————————
+v_fmac_f32_e64                 vdst,     src0:m,   src1:m         clamp omod
+v_xnor_b32_e64                 vdst,     src0,     src1
+
+
+

VOP3P

+
INSTRUCTION             DST          SRC0          SRC1          SRC2          MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_accvgpr_read_b32      vdst,        vsrc
+v_accvgpr_write_b32     vdst,        src
+v_dot2_f32_f16          vdst,        src0:f16x2,   src1:f16x2,   src2:f32      neg_lo neg_hi clamp
+v_dot2_i32_i16          vdst,        src0:i16x2,   src1:i16x2,   src2:i32      clamp
+v_dot2_u32_u16          vdst,        src0:u16x2,   src1:u16x2,   src2:u32      clamp
+v_dot4_i32_i8           vdst,        src0:i8x4,    src1:i8x4,    src2:i32      clamp
+v_dot4_u32_u8           vdst,        src0:u8x4,    src1:u8x4,    src2:u32      clamp
+v_dot8_i32_i4           vdst,        src0:i4x8,    src1:i4x8,    src2:i32      clamp
+v_dot8_u32_u4           vdst,        src0:u4x8,    src1:u4x8,    src2:u32      clamp
+v_fma_mix_f32           vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx     m_op_sel m_op_sel_hi clamp
+v_fma_mixhi_f16         vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx     m_op_sel m_op_sel_hi clamp
+v_fma_mixlo_f16         vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx     m_op_sel m_op_sel_hi clamp
+v_mfma_f32_16x16x16f16  vdst:f32x4,  vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x4   cbsz abid blgp
+v_mfma_f32_16x16x1f32   vdst:f32x16, vsrc0:f32,    vsrc1:f32,    vsrc2:f32x16  cbsz abid blgp
+v_mfma_f32_16x16x2bf16  vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x16  cbsz abid blgp
+v_mfma_f32_16x16x4f16   vdst:f32x16, vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x16  cbsz abid blgp
+v_mfma_f32_16x16x4f32   vdst:f32x4,  vsrc0:f32,    vsrc1:f32,    vsrc2:f32x4   cbsz abid blgp
+v_mfma_f32_16x16x8bf16  vdst:f32x4,  vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x4   cbsz abid blgp
+v_mfma_f32_32x32x1f32   vdst:f32x32, vsrc0:f32,    vsrc1:f32,    vsrc2:f32x32  cbsz abid blgp
+v_mfma_f32_32x32x2bf16  vdst:f32x32, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x32  cbsz abid blgp
+v_mfma_f32_32x32x2f32   vdst:f32x16, vsrc0:f32,    vsrc1:f32,    vsrc2:f32x16  cbsz abid blgp
+v_mfma_f32_32x32x4bf16  vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x16  cbsz abid blgp
+v_mfma_f32_32x32x4f16   vdst:f32x32, vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x32  cbsz abid blgp
+v_mfma_f32_32x32x8f16   vdst:f32x16, vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x16  cbsz abid blgp
+v_mfma_f32_4x4x1f32     vdst:f32x4,  vsrc0:f32,    vsrc1:f32,    vsrc2:f32x4   cbsz abid blgp
+v_mfma_f32_4x4x2bf16    vdst:f32x4,  vsrc0:bf16x2, vsrc1:bf16x2, vsrc2:f32x4   cbsz abid blgp
+v_mfma_f32_4x4x4f16     vdst:f32x4,  vsrc0:f16x4,  vsrc1:f16x4,  vsrc2:f32x4   cbsz abid blgp
+v_mfma_i32_16x16x16i8   vdst:i32x4,  vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x4   cbsz abid blgp
+v_mfma_i32_16x16x4i8    vdst:i32x16, vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x16  cbsz abid blgp
+v_mfma_i32_32x32x4i8    vdst:i32x32, vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x32  cbsz abid blgp
+v_mfma_i32_32x32x8i8    vdst:i32x16, vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x16  cbsz abid blgp
+v_mfma_i32_4x4x4i8      vdst:i32x4,  vsrc0:i8x4,   vsrc1:i8x4,   vsrc2:i32x4   cbsz abid blgp
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX90a.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX90a.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX90a.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX90a.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,2093 @@ + + + + + + + + + Syntax of gfx90a Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of gfx90a Instructions

+ +
+

Introduction

+

This document describes the syntax of gfx90a instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

DS

+
INSTRUCTION                    DST         SRC0      SRC1      SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————
+ds_add_f32                                 vaddr,    vdata                    offset gds
+ds_add_f64                                 vaddr,    vdata                    offset gds
+ds_add_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_add_u32                                 vaddr,    vdata                    offset gds
+ds_add_u64                                 vaddr,    vdata                    offset gds
+ds_and_b32                                 vaddr,    vdata                    offset gds
+ds_and_b64                                 vaddr,    vdata                    offset gds
+ds_and_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_and_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_append                      vdst                                           offset gds
+ds_bpermute_b32                vdst,       vaddr,    vdata                    offset
+ds_cmpst_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_condxchg32_rtn_b64          vdst,       vaddr,    vdata                    offset gds
+ds_consume                     vdst                                           offset gds
+ds_dec_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_u32                                 vaddr,    vdata                    offset gds
+ds_dec_u64                                 vaddr,    vdata                    offset gds
+ds_gws_barrier                             vdata                              offset gds
+ds_gws_init                                vdata                              offset gds
+ds_gws_sema_br                             vdata                              offset gds
+ds_gws_sema_p                                                                 offset gds
+ds_gws_sema_release_all                                                       offset gds
+ds_gws_sema_v                                                                 offset gds
+ds_inc_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_u32                                 vaddr,    vdata                    offset gds
+ds_inc_u64                                 vaddr,    vdata                    offset gds
+ds_max_f32                                 vaddr,    vdata                    offset gds
+ds_max_f64                                 vaddr,    vdata                    offset gds
+ds_max_i32                                 vaddr,    vdata                    offset gds
+ds_max_i64                                 vaddr,    vdata                    offset gds
+ds_max_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_u32                                 vaddr,    vdata                    offset gds
+ds_max_u64                                 vaddr,    vdata                    offset gds
+ds_min_f32                                 vaddr,    vdata                    offset gds
+ds_min_f64                                 vaddr,    vdata                    offset gds
+ds_min_i32                                 vaddr,    vdata                    offset gds
+ds_min_i64                                 vaddr,    vdata                    offset gds
+ds_min_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_u32                                 vaddr,    vdata                    offset gds
+ds_min_u64                                 vaddr,    vdata                    offset gds
+ds_mskor_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_nop
+ds_or_b32                                  vaddr,    vdata                    offset gds
+ds_or_b64                                  vaddr,    vdata                    offset gds
+ds_or_rtn_b32                  vdst,       vaddr,    vdata                    offset gds
+ds_or_rtn_b64                  vdst,       vaddr,    vdata                    offset gds
+ds_permute_b32                 vdst,       vaddr,    vdata                    offset
+ds_read2_b32                   vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2_b64                   vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b32               vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b64               vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read_addtid_b32             vdst                                           offset gds
+ds_read_b128                   vdst,       vaddr                              offset gds
+ds_read_b32                    vdst,       vaddr                              offset gds
+ds_read_b64                    vdst,       vaddr                              offset gds
+ds_read_b96                    vdst,       vaddr                              offset gds
+ds_read_i16                    vdst,       vaddr                              offset gds
+ds_read_i8                     vdst,       vaddr                              offset gds
+ds_read_i8_d16                 vdst,       vaddr                              offset gds
+ds_read_i8_d16_hi              vdst,       vaddr                              offset gds
+ds_read_u16                    vdst,       vaddr                              offset gds
+ds_read_u16_d16                vdst,       vaddr                              offset gds
+ds_read_u16_d16_hi             vdst,       vaddr                              offset gds
+ds_read_u8                     vdst,       vaddr                              offset gds
+ds_read_u8_d16                 vdst,       vaddr                              offset gds
+ds_read_u8_d16_hi              vdst,       vaddr                              offset gds
+ds_rsub_rtn_u32                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_rtn_u64                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_u32                                vaddr,    vdata                    offset gds
+ds_rsub_u64                                vaddr,    vdata                    offset gds
+ds_sub_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_u32                                 vaddr,    vdata                    offset gds
+ds_sub_u64                                 vaddr,    vdata                    offset gds
+ds_swizzle_b32                 vdst,       vaddr                              pattern gds
+ds_wrap_rtn_b32                vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_write2_b32                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2_b64                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b32                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b64                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write_addtid_b32                        vdata                              offset gds
+ds_write_b128                              vaddr,    vdata                    offset gds
+ds_write_b16                               vaddr,    vdata                    offset gds
+ds_write_b16_d16_hi                        vaddr,    vdata                    offset gds
+ds_write_b32                               vaddr,    vdata                    offset gds
+ds_write_b64                               vaddr,    vdata                    offset gds
+ds_write_b8                                vaddr,    vdata                    offset gds
+ds_write_b8_d16_hi                         vaddr,    vdata                    offset gds
+ds_write_b96                               vaddr,    vdata                    offset gds
+ds_wrxchg2_rtn_b32             vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2_rtn_b64             vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b32         vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b64         vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg_rtn_b32              vdst,       vaddr,    vdata                    offset gds
+ds_wrxchg_rtn_b64              vdst,       vaddr,    vdata                    offset gds
+ds_xor_b32                                 vaddr,    vdata                    offset gds
+ds_xor_b64                                 vaddr,    vdata                    offset gds
+ds_xor_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+
+
+

FLAT

+
INSTRUCTION                    DST           SRC0      SRC1         SRC2           MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+flat_atomic_add                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_add_f64            vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_add_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_and                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_and_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_cmpswap            vdst:opt,     vaddr,    vdata:b32x2                 offset12 glc slc
+flat_atomic_cmpswap_x2         vdst:opt,     vaddr,    vdata:b64x2                 offset12 glc slc
+flat_atomic_dec                vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_dec_x2             vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_inc                vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_inc_x2             vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_max_f64            vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_min_f64            vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_or                 vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_or_x2              vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_smax               vdst:opt:i32, vaddr,    vdata:i32                   offset12 glc slc
+flat_atomic_smax_x2            vdst:opt:i64, vaddr,    vdata:i64                   offset12 glc slc
+flat_atomic_smin               vdst:opt:i32, vaddr,    vdata:i32                   offset12 glc slc
+flat_atomic_smin_x2            vdst:opt:i64, vaddr,    vdata:i64                   offset12 glc slc
+flat_atomic_sub                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_sub_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_swap               vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_swap_x2            vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_umax               vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_umax_x2            vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_umin               vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_umin_x2            vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_xor                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_xor_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_load_dword                vdst,         vaddr                                 offset12 glc slc
+flat_load_dwordx2              vdst,         vaddr                                 offset12 glc slc
+flat_load_dwordx3              vdst,         vaddr                                 offset12 glc slc
+flat_load_dwordx4              vdst,         vaddr                                 offset12 glc slc
+flat_load_sbyte                vdst,         vaddr                                 offset12 glc slc
+flat_load_sbyte_d16            vdst,         vaddr                                 offset12 glc slc
+flat_load_sbyte_d16_hi         vdst,         vaddr                                 offset12 glc slc
+flat_load_short_d16            vdst,         vaddr                                 offset12 glc slc
+flat_load_short_d16_hi         vdst,         vaddr                                 offset12 glc slc
+flat_load_sshort               vdst,         vaddr                                 offset12 glc slc
+flat_load_ubyte                vdst,         vaddr                                 offset12 glc slc
+flat_load_ubyte_d16            vdst,         vaddr                                 offset12 glc slc
+flat_load_ubyte_d16_hi         vdst,         vaddr                                 offset12 glc slc
+flat_load_ushort               vdst,         vaddr                                 offset12 glc slc
+flat_store_byte                              vaddr,    vdata                       offset12 glc slc
+flat_store_byte_d16_hi                       vaddr,    vdata                       offset12 glc slc
+flat_store_dword                             vaddr,    vdata                       offset12 glc slc
+flat_store_dwordx2                           vaddr,    vdata                       offset12 glc slc
+flat_store_dwordx3                           vaddr,    vdata                       offset12 glc slc
+flat_store_dwordx4                           vaddr,    vdata                       offset12 glc slc
+flat_store_short                             vaddr,    vdata                       offset12 glc slc
+flat_store_short_d16_hi                      vaddr,    vdata                       offset12 glc slc
+global_atomic_add              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_add_f32          vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_add_f64          vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_add_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_and              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_and_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_cmpswap          vdst:opt,     vaddr,    vdata:b32x2, saddr          offset13s glc slc
+global_atomic_cmpswap_x2       vdst:opt,     vaddr,    vdata:b64x2, saddr          offset13s glc slc
+global_atomic_dec              vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_dec_x2           vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_inc              vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_inc_x2           vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_max_f64          vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_min_f64          vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_or               vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_or_x2            vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_pk_add_f16       vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_smax             vdst:opt:i32, vaddr,    vdata:i32,   saddr          offset13s glc slc
+global_atomic_smax_x2          vdst:opt:i64, vaddr,    vdata:i64,   saddr          offset13s glc slc
+global_atomic_smin             vdst:opt:i32, vaddr,    vdata:i32,   saddr          offset13s glc slc
+global_atomic_smin_x2          vdst:opt:i64, vaddr,    vdata:i64,   saddr          offset13s glc slc
+global_atomic_sub              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_sub_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_swap             vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_swap_x2          vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_umax             vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_umax_x2          vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_umin             vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_umin_x2          vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_xor              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_xor_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_load_dword              vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_dwordx2            vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_dwordx3            vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_dwordx4            vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sbyte              vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sbyte_d16          vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sbyte_d16_hi       vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_short_d16          vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_short_d16_hi       vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sshort             vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ubyte              vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ubyte_d16          vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ubyte_d16_hi       vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ushort             vdst,         vaddr,    saddr                       offset13s glc slc
+global_store_byte                            vaddr,    vdata,       saddr          offset13s glc slc
+global_store_byte_d16_hi                     vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dword                           vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dwordx2                         vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dwordx3                         vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dwordx4                         vaddr,    vdata,       saddr          offset13s glc slc
+global_store_short                           vaddr,    vdata,       saddr          offset13s glc slc
+global_store_short_d16_hi                    vaddr,    vdata,       saddr          offset13s glc slc
+scratch_load_dword             vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_dwordx2           vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_dwordx3           vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_dwordx4           vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sbyte             vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sbyte_d16         vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sbyte_d16_hi      vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_short_d16         vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_short_d16_hi      vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sshort            vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ubyte             vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ubyte_d16         vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ubyte_d16_hi      vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ushort            vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_store_byte                           vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_byte_d16_hi                    vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dword                          vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dwordx2                        vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dwordx3                        vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dwordx4                        vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_short                          vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_short_d16_hi                   vaddr,    vdata,       saddr          offset13s glc slc
+
+
+

MIMG

+
INSTRUCTION                DST      SRC0       SRC1     SRC2          MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————————
+image_atomic_add                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_and                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_cmpswap                vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_dec                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_inc                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_or                     vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_smax                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_smin                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_sub                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_swap                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_umax                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_umin                   vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_atomic_xor                    vdata:dst, vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_get_resinfo          vdst,    vaddr,     srsrc                  dmask unorm glc slc a16 lwe da
+image_load                 vdst,    vaddr,     srsrc                  dmask unorm glc slc a16 lwe da d16
+image_load_mip             vdst,    vaddr,     srsrc                  dmask unorm glc slc a16 lwe da d16
+image_load_mip_pck         vdst,    vaddr,     srsrc                  dmask unorm glc slc a16 lwe da
+image_load_mip_pck_sgn     vdst,    vaddr,     srsrc                  dmask unorm glc slc a16 lwe da
+image_load_pck             vdst,    vaddr,     srsrc                  dmask unorm glc slc a16 lwe da
+image_load_pck_sgn         vdst,    vaddr,     srsrc                  dmask unorm glc slc a16 lwe da
+image_sample               vdst,    vaddr,     srsrc,   ssamp         dmask unorm glc slc a16 lwe da d16
+image_store                         vdata,     vaddr,   srsrc         dmask unorm glc slc a16 lwe da d16
+image_store_mip                     vdata,     vaddr,   srsrc         dmask unorm glc slc a16 lwe da d16
+image_store_mip_pck                 vdata,     vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+image_store_pck                     vdata,     vaddr,   srsrc         dmask unorm glc slc a16 lwe da
+
+
+

MTBUF

+
INSTRUCTION                     DST   SRC0   SRC1   SRC2    SRC3      MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————
+tbuffer_load_format_d16_x       vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xy      vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xyz     vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xyzw    vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_x           vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xy          vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xyz         vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xyzw        vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_x            vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xy           vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyz          vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyzw         vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_x                vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xy               vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xyz              vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xyzw             vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+
+
+

MUBUF

+
INSTRUCTION                   DST   SRC0             SRC1    SRC2    SRC3     MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————————————
+buffer_atomic_add                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_add_f32               vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_add_f64               vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_add_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap               vdata:dst:b32x2, vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap_x2            vdata:dst:b64x2, vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec                   vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec_x2                vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc                   vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc_x2                vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_max_f64               vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_min_f64               vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or                    vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or_x2                 vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_pk_add_f16            vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax                  vdata:dst:i32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax_x2               vdata:dst:i64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin                  vdata:dst:i32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin_x2               vdata:dst:i64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap                  vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap_x2               vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax                  vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax_x2               vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin                  vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin_x2               vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_invl2
+buffer_load_dword             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_dwordx2           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_dwordx3           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_dwordx4           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_hi_x   vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_x      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xy     vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xyz    vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xyzw   vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_x          vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_format_xy         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_xyz        vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_xyzw       vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_sbyte             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_sbyte_d16         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_sbyte_d16_hi      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_short_d16         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_short_d16_hi      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_sshort            vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_ubyte             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_ubyte_d16         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_ubyte_d16_hi      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_ushort            vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_store_byte                   vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_byte_d16_hi            vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dword                  vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx2                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx3                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx4                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_hi_x        vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_x           vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xy          vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyz         vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyzw        vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_x               vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xy              vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyz             vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyzw            vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_lds_dword              srsrc,           soffset                  offset12 lds
+buffer_store_short                  vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_short_d16_hi           vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_wbinvl1
+buffer_wbinvl1_vol
+buffer_wbl2
+
+
+

SMEM

+
INSTRUCTION                    DST       SRC0             SRC1      SRC2           MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————
+s_atc_probe                              probe,           sbase,    soffset
+s_atc_probe_buffer                       probe,           sbase,    soffset
+s_atomic_add                             sdata:dst,       sbase,    soffset        glc
+s_atomic_add_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_and                             sdata:dst,       sbase,    soffset        glc
+s_atomic_and_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_cmpswap                         sdata:dst:b32x2, sbase,    soffset        glc
+s_atomic_cmpswap_x2                      sdata:dst:b64x2, sbase,    soffset        glc
+s_atomic_dec                             sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_dec_x2                          sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_inc                             sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_inc_x2                          sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_or                              sdata:dst,       sbase,    soffset        glc
+s_atomic_or_x2                           sdata:dst,       sbase,    soffset        glc
+s_atomic_smax                            sdata:dst:i32,   sbase,    soffset        glc
+s_atomic_smax_x2                         sdata:dst:i64,   sbase,    soffset        glc
+s_atomic_smin                            sdata:dst:i32,   sbase,    soffset        glc
+s_atomic_smin_x2                         sdata:dst:i64,   sbase,    soffset        glc
+s_atomic_sub                             sdata:dst,       sbase,    soffset        glc
+s_atomic_sub_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_swap                            sdata:dst,       sbase,    soffset        glc
+s_atomic_swap_x2                         sdata:dst,       sbase,    soffset        glc
+s_atomic_umax                            sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_umax_x2                         sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_umin                            sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_umin_x2                         sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_xor                             sdata:dst,       sbase,    soffset        glc
+s_atomic_xor_x2                          sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_add                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_add_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_and                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_and_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_cmpswap                  sdata:dst:b32x2, sbase,    soffset        glc
+s_buffer_atomic_cmpswap_x2               sdata:dst:b64x2, sbase,    soffset        glc
+s_buffer_atomic_dec                      sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_dec_x2                   sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_inc                      sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_inc_x2                   sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_or                       sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_or_x2                    sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_smax                     sdata:dst:i32,   sbase,    soffset        glc
+s_buffer_atomic_smax_x2                  sdata:dst:i64,   sbase,    soffset        glc
+s_buffer_atomic_smin                     sdata:dst:i32,   sbase,    soffset        glc
+s_buffer_atomic_smin_x2                  sdata:dst:i64,   sbase,    soffset        glc
+s_buffer_atomic_sub                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_sub_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_swap                     sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_swap_x2                  sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_umax                     sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_umax_x2                  sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_umin                     sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_umin_x2                  sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_xor                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_xor_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_load_dword            sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx16         sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx2          sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx4          sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx8          sdst,     sbase,           soffset                  glc
+s_buffer_store_dword                     sdata,           sbase,    soffset        glc
+s_buffer_store_dwordx2                   sdata,           sbase,    soffset        glc
+s_buffer_store_dwordx4                   sdata,           sbase,    soffset        glc
+s_dcache_discard                         sbase,           soffset
+s_dcache_discard_x2                      sbase,           soffset
+s_dcache_inv
+s_dcache_inv_vol
+s_dcache_wb
+s_dcache_wb_vol
+s_load_dword                   sdst,     sbase,           soffset                  glc
+s_load_dwordx16                sdst,     sbase,           soffset                  glc
+s_load_dwordx2                 sdst,     sbase,           soffset                  glc
+s_load_dwordx4                 sdst,     sbase,           soffset                  glc
+s_load_dwordx8                 sdst,     sbase,           soffset                  glc
+s_memrealtime                  sdst:b64
+s_memtime                      sdst:b64
+s_scratch_load_dword           sdst,     sbase,           soffset                  glc
+s_scratch_load_dwordx2         sdst,     sbase,           soffset                  glc
+s_scratch_load_dwordx4         sdst,     sbase,           soffset                  glc
+s_scratch_store_dword                    sdata,           sbase,    soffset        glc
+s_scratch_store_dwordx2                  sdata,           sbase,    soffset        glc
+s_scratch_store_dwordx4                  sdata,           sbase,    soffset        glc
+s_store_dword                            sdata,           sbase,    soffset        glc
+s_store_dwordx2                          sdata,           sbase,    soffset        glc
+s_store_dwordx4                          sdata,           sbase,    soffset        glc
+
+
+

SOP1

+
INSTRUCTION                    DST       SRC
+———————————————————————————————————————————————————
+s_abs_i32                      sdst,     ssrc
+s_and_saveexec_b64             sdst,     ssrc
+s_andn1_saveexec_b64           sdst,     ssrc
+s_andn1_wrexec_b64             sdst,     ssrc
+s_andn2_saveexec_b64           sdst,     ssrc
+s_andn2_wrexec_b64             sdst,     ssrc
+s_bcnt0_i32_b32                sdst,     ssrc
+s_bcnt0_i32_b64                sdst,     ssrc
+s_bcnt1_i32_b32                sdst,     ssrc
+s_bcnt1_i32_b64                sdst,     ssrc
+s_bitreplicate_b64_b32         sdst,     ssrc
+s_bitset0_b32                  sdst,     ssrc
+s_bitset0_b64                  sdst,     ssrc:b32
+s_bitset1_b32                  sdst,     ssrc
+s_bitset1_b64                  sdst,     ssrc:b32
+s_brev_b32                     sdst,     ssrc
+s_brev_b64                     sdst,     ssrc
+s_cbranch_join                           ssrc
+s_cmov_b32                     sdst,     ssrc
+s_cmov_b64                     sdst,     ssrc
+s_ff0_i32_b32                  sdst,     ssrc
+s_ff0_i32_b64                  sdst,     ssrc
+s_ff1_i32_b32                  sdst,     ssrc
+s_ff1_i32_b64                  sdst,     ssrc
+s_flbit_i32                    sdst,     ssrc
+s_flbit_i32_b32                sdst,     ssrc
+s_flbit_i32_b64                sdst,     ssrc
+s_flbit_i32_i64                sdst,     ssrc
+s_getpc_b64                    sdst
+s_mov_b32                      sdst,     ssrc
+s_mov_b64                      sdst,     ssrc
+s_movreld_b32                  sdst,     ssrc
+s_movreld_b64                  sdst,     ssrc
+s_movrels_b32                  sdst,     ssrc
+s_movrels_b64                  sdst,     ssrc
+s_nand_saveexec_b64            sdst,     ssrc
+s_nor_saveexec_b64             sdst,     ssrc
+s_not_b32                      sdst,     ssrc
+s_not_b64                      sdst,     ssrc
+s_or_saveexec_b64              sdst,     ssrc
+s_orn1_saveexec_b64            sdst,     ssrc
+s_orn2_saveexec_b64            sdst,     ssrc
+s_quadmask_b32                 sdst,     ssrc
+s_quadmask_b64                 sdst,     ssrc
+s_rfe_b64                                ssrc
+s_set_gpr_idx_idx                        ssrc
+s_setpc_b64                              ssrc
+s_sext_i32_i16                 sdst,     ssrc
+s_sext_i32_i8                  sdst,     ssrc
+s_swappc_b64                   sdst,     ssrc
+s_wqm_b32                      sdst,     ssrc
+s_wqm_b64                      sdst,     ssrc
+s_xnor_saveexec_b64            sdst,     ssrc
+s_xor_saveexec_b64             sdst,     ssrc
+
+
+

SOP2

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+s_absdiff_i32                  sdst,     ssrc0,     ssrc1
+s_add_i32                      sdst,     ssrc0,     ssrc1
+s_add_u32                      sdst,     ssrc0,     ssrc1
+s_addc_u32                     sdst,     ssrc0,     ssrc1
+s_and_b32                      sdst,     ssrc0,     ssrc1
+s_and_b64                      sdst,     ssrc0,     ssrc1
+s_andn2_b32                    sdst,     ssrc0,     ssrc1
+s_andn2_b64                    sdst,     ssrc0,     ssrc1
+s_ashr_i32                     sdst,     ssrc0,     ssrc1:u32
+s_ashr_i64                     sdst,     ssrc0,     ssrc1:u32
+s_bfe_i32                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_i64                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_u32                      sdst,     ssrc0,     ssrc1
+s_bfe_u64                      sdst,     ssrc0,     ssrc1:u32
+s_bfm_b32                      sdst,     ssrc0,     ssrc1
+s_bfm_b64                      sdst,     ssrc0:b32, ssrc1:b32
+s_cbranch_g_fork                         ssrc0,     ssrc1
+s_cselect_b32                  sdst,     ssrc0,     ssrc1
+s_cselect_b64                  sdst,     ssrc0,     ssrc1
+s_lshl1_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl2_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl3_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl4_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshl_b64                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b64                     sdst,     ssrc0,     ssrc1:u32
+s_max_i32                      sdst,     ssrc0,     ssrc1
+s_max_u32                      sdst,     ssrc0,     ssrc1
+s_min_i32                      sdst,     ssrc0,     ssrc1
+s_min_u32                      sdst,     ssrc0,     ssrc1
+s_mul_hi_i32                   sdst,     ssrc0,     ssrc1
+s_mul_hi_u32                   sdst,     ssrc0,     ssrc1
+s_mul_i32                      sdst,     ssrc0,     ssrc1
+s_nand_b32                     sdst,     ssrc0,     ssrc1
+s_nand_b64                     sdst,     ssrc0,     ssrc1
+s_nor_b32                      sdst,     ssrc0,     ssrc1
+s_nor_b64                      sdst,     ssrc0,     ssrc1
+s_or_b32                       sdst,     ssrc0,     ssrc1
+s_or_b64                       sdst,     ssrc0,     ssrc1
+s_orn2_b32                     sdst,     ssrc0,     ssrc1
+s_orn2_b64                     sdst,     ssrc0,     ssrc1
+s_pack_hh_b32_b16              sdst,     ssrc0:b32, ssrc1:b32
+s_pack_lh_b32_b16              sdst,     ssrc0,     ssrc1:b32
+s_pack_ll_b32_b16              sdst,     ssrc0,     ssrc1
+s_rfe_restore_b64                        ssrc0,     ssrc1:b32
+s_sub_i32                      sdst,     ssrc0,     ssrc1
+s_sub_u32                      sdst,     ssrc0,     ssrc1
+s_subb_u32                     sdst,     ssrc0,     ssrc1
+s_xnor_b32                     sdst,     ssrc0,     ssrc1
+s_xnor_b64                     sdst,     ssrc0,     ssrc1
+s_xor_b32                      sdst,     ssrc0,     ssrc1
+s_xor_b64                      sdst,     ssrc0,     ssrc1
+
+
+

SOPC

+
INSTRUCTION                    SRC0      SRC1
+———————————————————————————————————————————————————
+s_bitcmp0_b32                  ssrc0,    ssrc1
+s_bitcmp0_b64                  ssrc0,    ssrc1:u32
+s_bitcmp1_b32                  ssrc0,    ssrc1
+s_bitcmp1_b64                  ssrc0,    ssrc1:u32
+s_cmp_eq_i32                   ssrc0,    ssrc1
+s_cmp_eq_u32                   ssrc0,    ssrc1
+s_cmp_eq_u64                   ssrc0,    ssrc1
+s_cmp_ge_i32                   ssrc0,    ssrc1
+s_cmp_ge_u32                   ssrc0,    ssrc1
+s_cmp_gt_i32                   ssrc0,    ssrc1
+s_cmp_gt_u32                   ssrc0,    ssrc1
+s_cmp_le_i32                   ssrc0,    ssrc1
+s_cmp_le_u32                   ssrc0,    ssrc1
+s_cmp_lg_i32                   ssrc0,    ssrc1
+s_cmp_lg_u32                   ssrc0,    ssrc1
+s_cmp_lg_u64                   ssrc0,    ssrc1
+s_cmp_lt_i32                   ssrc0,    ssrc1
+s_cmp_lt_u32                   ssrc0,    ssrc1
+s_set_gpr_idx_on               ssrc,     imask
+s_setvskip                     ssrc0,    ssrc1
+
+
+

SOPK

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+s_addk_i32                     sdst,     imm16
+s_call_b64                     sdst,     label
+s_cbranch_i_fork                         ssrc,     label
+s_cmovk_i32                    sdst,     imm16
+s_cmpk_eq_i32                            ssrc,     imm16
+s_cmpk_eq_u32                            ssrc,     imm16
+s_cmpk_ge_i32                            ssrc,     imm16
+s_cmpk_ge_u32                            ssrc,     imm16
+s_cmpk_gt_i32                            ssrc,     imm16
+s_cmpk_gt_u32                            ssrc,     imm16
+s_cmpk_le_i32                            ssrc,     imm16
+s_cmpk_le_u32                            ssrc,     imm16
+s_cmpk_lg_i32                            ssrc,     imm16
+s_cmpk_lg_u32                            ssrc,     imm16
+s_cmpk_lt_i32                            ssrc,     imm16
+s_cmpk_lt_u32                            ssrc,     imm16
+s_getreg_b32                   sdst,     hwreg
+s_movk_i32                     sdst,     imm16
+s_mulk_i32                     sdst,     imm16
+s_setreg_b32                   hwreg,    ssrc
+s_setreg_imm32_b32             hwreg,    simm32
+
+
+

SOPP

+
INSTRUCTION                    SRC
+—————————————————————————————————————————
+s_barrier
+s_branch                       label
+s_cbranch_cdbgsys              label
+s_cbranch_cdbgsys_and_user     label
+s_cbranch_cdbgsys_or_user      label
+s_cbranch_cdbguser             label
+s_cbranch_execnz               label
+s_cbranch_execz                label
+s_cbranch_scc0                 label
+s_cbranch_scc1                 label
+s_cbranch_vccnz                label
+s_cbranch_vccz                 label
+s_decperflevel                 imm16
+s_endpgm
+s_endpgm_ordered_ps_done
+s_endpgm_saved
+s_icache_inv
+s_incperflevel                 imm16
+s_nop                          imm16
+s_sendmsg                      msg
+s_sendmsghalt                  msg
+s_set_gpr_idx_mode             imask
+s_set_gpr_idx_off
+s_sethalt                      imm16
+s_setkill                      imm16
+s_setprio                      imm16
+s_sleep                        imm16
+s_trap                         imm16
+s_ttracedata
+s_waitcnt                      waitcnt
+s_wakeup
+
+
+

VOP1

+
INSTRUCTION                         DST        SRC            MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————
+v_accvgpr_mov_b32                   vdst,      vsrc
+v_bfrev_b32                         vdst,      src
+v_bfrev_b32_dpp                     vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_bfrev_b32_sdwa                    vdst,      src:m          dst_sel dst_unused src0_sel
+v_ceil_f16                          vdst,      src
+v_ceil_f16_dpp                      vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ceil_f16_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_ceil_f32                          vdst,      src
+v_ceil_f32_dpp                      vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ceil_f32_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_ceil_f64                          vdst,      src
+v_ceil_f64_dpp                      vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_clrexcp
+v_cos_f16                           vdst,      src
+v_cos_f16_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cos_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cos_f32                           vdst,      src
+v_cos_f32_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cos_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_f32                       vdst,      src
+v_cvt_f16_f32_dpp                   vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_f32_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_i16                       vdst,      src
+v_cvt_f16_i16_dpp                   vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_i16_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_u16                       vdst,      src
+v_cvt_f16_u16_dpp                   vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_u16_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_f16                       vdst,      src
+v_cvt_f32_f16_dpp                   vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_f16_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_f64                       vdst,      src
+v_cvt_f32_f64_dpp                   vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_i32                       vdst,      src
+v_cvt_f32_i32_dpp                   vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_i32_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_u32                       vdst,      src
+v_cvt_f32_u32_dpp                   vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_u32_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte0                    vdst,      src
+v_cvt_f32_ubyte0_dpp                vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte0_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte1                    vdst,      src
+v_cvt_f32_ubyte1_dpp                vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte1_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte2                    vdst,      src
+v_cvt_f32_ubyte2_dpp                vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte2_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte3                    vdst,      src
+v_cvt_f32_ubyte3_dpp                vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte3_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f64_f32                       vdst,      src
+v_cvt_f64_i32                       vdst,      src
+v_cvt_f64_u32                       vdst,      src
+v_cvt_flr_i32_f32                   vdst,      src
+v_cvt_flr_i32_f32_dpp               vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_flr_i32_f32_sdwa              vdst,      src:m          dst_sel dst_unused src0_sel
+v_cvt_i16_f16                       vdst,      src
+v_cvt_i16_f16_dpp                   vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_i16_f16_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_i32_f32                       vdst,      src
+v_cvt_i32_f32_dpp                   vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_i32_f32_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_i32_f64                       vdst,      src
+v_cvt_i32_f64_dpp                   vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_cvt_norm_i16_f16                  vdst,      src
+v_cvt_norm_i16_f16_dpp              vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_norm_i16_f16_sdwa             vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_norm_u16_f16                  vdst,      src
+v_cvt_norm_u16_f16_dpp              vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_norm_u16_f16_sdwa             vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_off_f32_i4                    vdst,      src
+v_cvt_off_f32_i4_dpp                vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_off_f32_i4_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_rpi_i32_f32                   vdst,      src
+v_cvt_rpi_i32_f32_dpp               vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_rpi_i32_f32_sdwa              vdst,      src:m          dst_sel dst_unused src0_sel
+v_cvt_u16_f16                       vdst,      src
+v_cvt_u16_f16_dpp                   vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_u16_f16_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_u32_f32                       vdst,      src
+v_cvt_u32_f32_dpp                   vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cvt_u32_f32_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_u32_f64                       vdst,      src
+v_cvt_u32_f64_dpp                   vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_exp_f16                           vdst,      src
+v_exp_f16_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_exp_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_exp_f32                           vdst,      src
+v_exp_f32_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_exp_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_exp_legacy_f32                    vdst,      src
+v_exp_legacy_f32_dpp                vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_exp_legacy_f32_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_ffbh_i32                          vdst,      src
+v_ffbh_i32_dpp                      vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ffbh_i32_sdwa                     vdst,      src:m          dst_sel dst_unused src0_sel
+v_ffbh_u32                          vdst,      src
+v_ffbh_u32_dpp                      vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ffbh_u32_sdwa                     vdst,      src:m          dst_sel dst_unused src0_sel
+v_ffbl_b32                          vdst,      src
+v_ffbl_b32_dpp                      vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ffbl_b32_sdwa                     vdst,      src:m          dst_sel dst_unused src0_sel
+v_floor_f16                         vdst,      src
+v_floor_f16_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_floor_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_floor_f32                         vdst,      src
+v_floor_f32_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_floor_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_floor_f64                         vdst,      src
+v_floor_f64_dpp                     vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_fract_f16                         vdst,      src
+v_fract_f16_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_fract_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_fract_f32                         vdst,      src
+v_fract_f32_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_fract_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_fract_f64                         vdst,      src
+v_fract_f64_dpp                     vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_frexp_exp_i16_f16                 vdst,      src
+v_frexp_exp_i16_f16_dpp             vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_frexp_exp_i16_f16_sdwa            vdst,      src:m          dst_sel dst_unused src0_sel
+v_frexp_exp_i32_f32                 vdst,      src
+v_frexp_exp_i32_f32_dpp             vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_frexp_exp_i32_f32_sdwa            vdst,      src:m          dst_sel dst_unused src0_sel
+v_frexp_exp_i32_f64                 vdst,      src
+v_frexp_exp_i32_f64_dpp             vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_frexp_mant_f16                    vdst,      src
+v_frexp_mant_f16_dpp                vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_frexp_mant_f16_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_frexp_mant_f32                    vdst,      src
+v_frexp_mant_f32_dpp                vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_frexp_mant_f32_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_frexp_mant_f64                    vdst,      src
+v_frexp_mant_f64_dpp                vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+v_log_f16                           vdst,      src
+v_log_f16_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_log_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_log_f32                           vdst,      src
+v_log_f32_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_log_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_log_legacy_f32                    vdst,      src
+v_log_legacy_f32_dpp                vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_log_legacy_f32_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_mov_b32                           vdst,      src
+v_mov_b32_dpp                       vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mov_b32_sdwa                      vdst,      src:m          dst_sel dst_unused src0_sel
+v_nop
+v_not_b32                           vdst,      src
+v_not_b32_dpp                       vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_not_b32_sdwa                      vdst,      src:m          dst_sel dst_unused src0_sel
+v_rcp_f16                           vdst,      src
+v_rcp_f16_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_rcp_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rcp_f32                           vdst,      src
+v_rcp_f32_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_rcp_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rcp_f64                           vdst,      src
+v_rcp_iflag_f32                     vdst,      src
+v_rcp_iflag_f32_dpp                 vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_rcp_iflag_f32_sdwa                vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_readfirstlane_b32                 sdst,      vsrc
+v_rndne_f16                         vdst,      src
+v_rndne_f16_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_rndne_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rndne_f32                         vdst,      src
+v_rndne_f32_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_rndne_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rndne_f64                         vdst,      src
+v_rsq_f16                           vdst,      src
+v_rsq_f16_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_rsq_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rsq_f32                           vdst,      src
+v_rsq_f32_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_rsq_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rsq_f64                           vdst,      src
+v_sat_pk_u8_i16                     vdst:u8x4, src
+v_sat_pk_u8_i16_dpp                 vdst:u8x4, vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sat_pk_u8_i16_sdwa                vdst:u8x4, src:m          dst_sel dst_unused src0_sel
+v_screen_partition_4se_b32          vdst,      src
+v_screen_partition_4se_b32_dpp      vdst,      vsrc           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_screen_partition_4se_b32_sdwa     vdst,      src:m          dst_sel dst_unused src0_sel
+v_sin_f16                           vdst,      src
+v_sin_f16_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sin_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sin_f32                           vdst,      src
+v_sin_f32_dpp                       vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sin_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f16                          vdst,      src
+v_sqrt_f16_dpp                      vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sqrt_f16_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f32                          vdst,      src
+v_sqrt_f32_dpp                      vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sqrt_f32_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f64                          vdst,      src
+v_swap_b32                          vdst,      vsrc
+v_trunc_f16                         vdst,      src
+v_trunc_f16_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_trunc_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_trunc_f32                         vdst,      src
+v_trunc_f32_dpp                     vdst,      vsrc:m         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_trunc_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_trunc_f64                         vdst,      src
+v_trunc_f64_dpp                     vdst,      vsrc:m         dpp64_ctrl row_mask bank_mask bound_ctrl
+
+
+

VOP2

+
INSTRUCTION            DST0  DST1 SRC0         SRC1        SRC2    MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_co_u32           vdst, vcc, src0,        vsrc1
+v_add_co_u32_dpp       vdst, vcc, vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_add_co_u32_sdwa      vdst, vcc, src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_add_f16              vdst,      src0,        vsrc1
+v_add_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_add_f16_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_add_f32              vdst,      src0,        vsrc1
+v_add_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_add_f32_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_add_u16              vdst,      src0,        vsrc1
+v_add_u16_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_add_u16_sdwa         vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_add_u32              vdst,      src0,        vsrc1
+v_add_u32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_add_u32_sdwa         vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_addc_co_u32          vdst, vcc, src0,        vsrc1,      vcc
+v_addc_co_u32_dpp      vdst, vcc, vsrc0,       vsrc1,      vcc     dpp32_ctrl row_mask bank_mask bound_ctrl
+v_addc_co_u32_sdwa     vdst, vcc, src0:m,      src1:m,     vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_and_b32              vdst,      src0,        vsrc1
+v_and_b32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_and_b32_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_ashrrev_i16          vdst,      src0:u16,    vsrc1
+v_ashrrev_i16_dpp      vdst,      vsrc0:u16,   vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ashrrev_i16_sdwa     vdst,      src0:m:u16,  src1:m              dst_sel dst_unused src0_sel src1_sel
+v_ashrrev_i32          vdst,      src0:u32,    vsrc1
+v_ashrrev_i32_dpp      vdst,      vsrc0:u32,   vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ashrrev_i32_sdwa     vdst,      src0:m:u32,  src1:m              dst_sel dst_unused src0_sel src1_sel
+v_cndmask_b32          vdst,      src0,        vsrc1,      vcc
+v_cndmask_b32_dpp      vdst,      vsrc0,       vsrc1,      vcc     dpp32_ctrl row_mask bank_mask bound_ctrl
+v_cndmask_b32_sdwa     vdst,      src0:m,      src1:m,     vcc     dst_sel dst_unused src0_sel src1_sel
+v_dot2c_f32_f16        vdst,      src0:f16x2,  vsrc1:f16x2
+v_dot2c_f32_f16_dpp    vdst,      vsrc0:f16x2, vsrc1:f16x2         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_dot2c_i32_i16        vdst,      src0:i16x2,  vsrc1:i16x2
+v_dot2c_i32_i16_dpp    vdst,      vsrc0:i16x2, vsrc1:i16x2         dpp32_ctrl row_mask bank_mask bound_ctrl
+v_dot4c_i32_i8         vdst,      src0:i8x4,   vsrc1:i8x4
+v_dot4c_i32_i8_dpp     vdst,      vsrc0:i8x4,  vsrc1:i8x4          dpp32_ctrl row_mask bank_mask bound_ctrl
+v_dot8c_i32_i4         vdst,      src0:i4x8,   vsrc1:i4x8
+v_dot8c_i32_i4_dpp     vdst,      vsrc0:i4x8,  vsrc1:i4x8          dpp32_ctrl row_mask bank_mask bound_ctrl
+v_fmac_f32             vdst,      src0,        vsrc1
+v_fmac_f32_dpp         vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_fmac_f64             vdst,      src0,        vsrc1
+v_fmac_f64_dpp         vdst,      vsrc0:m,     vsrc1:m             dpp64_ctrl row_mask bank_mask bound_ctrl
+v_ldexp_f16            vdst,      src0,        vsrc1:i16
+v_ldexp_f16_dpp        vdst,      vsrc0:m,     vsrc1:i16           dpp32_ctrl row_mask bank_mask bound_ctrl
+v_ldexp_f16_sdwa       vdst,      src0:m,      src1:m:i16          clamp omod dst_sel dst_unused src0_sel src1_sel
+v_lshlrev_b16          vdst,      src0:u16,    vsrc1
+v_lshlrev_b16_dpp      vdst,      vsrc0:u16,   vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_lshlrev_b16_sdwa     vdst,      src0:m:u16,  src1:m              dst_sel dst_unused src0_sel src1_sel
+v_lshlrev_b32          vdst,      src0:u32,    vsrc1
+v_lshlrev_b32_dpp      vdst,      vsrc0:u32,   vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_lshlrev_b32_sdwa     vdst,      src0:m:u32,  src1:m              dst_sel dst_unused src0_sel src1_sel
+v_lshrrev_b16          vdst,      src0:u16,    vsrc1
+v_lshrrev_b16_dpp      vdst,      vsrc0:u16,   vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_lshrrev_b16_sdwa     vdst,      src0:m:u16,  src1:m              dst_sel dst_unused src0_sel src1_sel
+v_lshrrev_b32          vdst,      src0:u32,    vsrc1
+v_lshrrev_b32_dpp      vdst,      vsrc0:u32,   vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_lshrrev_b32_sdwa     vdst,      src0:m:u32,  src1:m              dst_sel dst_unused src0_sel src1_sel
+v_mac_f16              vdst,      src0,        vsrc1
+v_mac_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mac_f32              vdst,      src0,        vsrc1
+v_mac_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_madak_f16            vdst,      src0,        vsrc1,      simm32
+v_madak_f32            vdst,      src0,        vsrc1,      simm32
+v_madmk_f16            vdst,      src0,        simm32,     vsrc2
+v_madmk_f32            vdst,      src0,        simm32,     vsrc2
+v_max_f16              vdst,      src0,        vsrc1
+v_max_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_max_f16_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_max_f32              vdst,      src0,        vsrc1
+v_max_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_max_f32_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_max_i16              vdst,      src0,        vsrc1
+v_max_i16_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_max_i16_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_max_i32              vdst,      src0,        vsrc1
+v_max_i32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_max_i32_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_max_u16              vdst,      src0,        vsrc1
+v_max_u16_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_max_u16_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_max_u32              vdst,      src0,        vsrc1
+v_max_u32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_max_u32_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_min_f16              vdst,      src0,        vsrc1
+v_min_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_min_f16_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_min_f32              vdst,      src0,        vsrc1
+v_min_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_min_f32_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_min_i16              vdst,      src0,        vsrc1
+v_min_i16_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_min_i16_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_min_i32              vdst,      src0,        vsrc1
+v_min_i32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_min_i32_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_min_u16              vdst,      src0,        vsrc1
+v_min_u16_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_min_u16_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_min_u32              vdst,      src0,        vsrc1
+v_min_u32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_min_u32_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_mul_f16              vdst,      src0,        vsrc1
+v_mul_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mul_f16_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_f32              vdst,      src0,        vsrc1
+v_mul_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mul_f32_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_i32_i24       vdst,      src0,        vsrc1
+v_mul_hi_i32_i24_dpp   vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mul_hi_i32_i24_sdwa  vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_u32_u24       vdst,      src0,        vsrc1
+v_mul_hi_u32_u24_dpp   vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mul_hi_u32_u24_sdwa  vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_mul_i32_i24          vdst,      src0,        vsrc1
+v_mul_i32_i24_dpp      vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mul_i32_i24_sdwa     vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_mul_lo_u16           vdst,      src0,        vsrc1
+v_mul_lo_u16_dpp       vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mul_lo_u16_sdwa      vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_mul_u32_u24          vdst,      src0,        vsrc1
+v_mul_u32_u24_dpp      vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_mul_u32_u24_sdwa     vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_or_b32               vdst,      src0,        vsrc1
+v_or_b32_dpp           vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_or_b32_sdwa          vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_pk_fmac_f16          vdst,      src0,        vsrc1
+v_sub_co_u32           vdst, vcc, src0,        vsrc1
+v_sub_co_u32_dpp       vdst, vcc, vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sub_co_u32_sdwa      vdst, vcc, src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_f16              vdst,      src0,        vsrc1
+v_sub_f16_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sub_f16_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_sub_f32              vdst,      src0,        vsrc1
+v_sub_f32_dpp          vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sub_f32_sdwa         vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_sub_u16              vdst,      src0,        vsrc1
+v_sub_u16_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sub_u16_sdwa         vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_u32              vdst,      src0,        vsrc1
+v_sub_u32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_sub_u32_sdwa         vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_subb_co_u32          vdst, vcc, src0,        vsrc1,      vcc
+v_subb_co_u32_dpp      vdst, vcc, vsrc0,       vsrc1,      vcc     dpp32_ctrl row_mask bank_mask bound_ctrl
+v_subb_co_u32_sdwa     vdst, vcc, src0:m,      src1:m,     vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_subbrev_co_u32       vdst, vcc, src0,        vsrc1,      vcc
+v_subbrev_co_u32_dpp   vdst, vcc, vsrc0,       vsrc1,      vcc     dpp32_ctrl row_mask bank_mask bound_ctrl
+v_subbrev_co_u32_sdwa  vdst, vcc, src0:m,      src1:m,     vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_co_u32        vdst, vcc, src0,        vsrc1
+v_subrev_co_u32_dpp    vdst, vcc, vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_subrev_co_u32_sdwa   vdst, vcc, src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_f16           vdst,      src0,        vsrc1
+v_subrev_f16_dpp       vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_subrev_f16_sdwa      vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_subrev_f32           vdst,      src0,        vsrc1
+v_subrev_f32_dpp       vdst,      vsrc0:m,     vsrc1:m             dpp32_ctrl row_mask bank_mask bound_ctrl
+v_subrev_f32_sdwa      vdst,      src0:m,      src1:m              clamp omod dst_sel dst_unused src0_sel src1_sel
+v_subrev_u16           vdst,      src0,        vsrc1
+v_subrev_u16_dpp       vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_subrev_u16_sdwa      vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_u32           vdst,      src0,        vsrc1
+v_subrev_u32_dpp       vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_subrev_u32_sdwa      vdst,      src0:m,      src1:m              clamp dst_sel dst_unused src0_sel src1_sel
+v_xnor_b32             vdst,      src0,        vsrc1
+v_xnor_b32_dpp         vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_xnor_b32_sdwa        vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+v_xor_b32              vdst,      src0,        vsrc1
+v_xor_b32_dpp          vdst,      vsrc0,       vsrc1               dpp32_ctrl row_mask bank_mask bound_ctrl
+v_xor_b32_sdwa         vdst,      src0:m,      src1:m              dst_sel dst_unused src0_sel src1_sel
+
+
+

VOP3

+
INSTRUCTION                     DST0        DST1     SRC0        SRC1        SRC2         MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add3_u32                      vdst,                src0,       src1,       src2
+v_add_co_u32_e64                vdst,       sdst,    src0,       src1                     clamp
+v_add_f16_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_add_f32_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_add_f64                       vdst,                src0:m,     src1:m                   clamp omod
+v_add_i16                       vdst,                src0,       src1                     op_sel clamp
+v_add_i32                       vdst,                src0,       src1                     clamp
+v_add_lshl_u32                  vdst,                src0,       src1,       src2
+v_add_u16_e64                   vdst,                src0,       src1                     clamp
+v_add_u32_e64                   vdst,                src0,       src1                     clamp
+v_addc_co_u32_e64               vdst,       sdst,    src0,       src1,       ssrc2        clamp
+v_alignbit_b32                  vdst,                src0,       src1,       src2:b16
+v_alignbyte_b32                 vdst,                src0,       src1,       src2:b16
+v_and_b32_e64                   vdst,                src0,       src1
+v_and_or_b32                    vdst,                src0,       src1,       src2
+v_ashrrev_i16_e64               vdst,                src0:u16,   src1
+v_ashrrev_i32_e64               vdst,                src0:u32,   src1
+v_ashrrev_i64                   vdst,                src0:u32,   src1
+v_bcnt_u32_b32                  vdst,                src0,       src1
+v_bfe_i32                       vdst,                src0,       src1:u32,   src2:u32
+v_bfe_u32                       vdst,                src0,       src1,       src2
+v_bfi_b32                       vdst,                src0,       src1,       src2
+v_bfm_b32                       vdst,                src0,       src1
+v_bfrev_b32_e64                 vdst,                src
+v_ceil_f16_e64                  vdst,                src:m                                clamp omod
+v_ceil_f32_e64                  vdst,                src:m                                clamp omod
+v_ceil_f64_e64                  vdst,                src:m                                clamp omod
+v_clrexcp_e64
+v_cmp_class_f16_e64             sdst,                src0:m,     src1:b32
+v_cmp_class_f32_e64             sdst,                src0:m,     src1:b32
+v_cmp_class_f64_e64             sdst,                src0:m,     src1:b32
+v_cmp_eq_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_eq_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_eq_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_eq_i16_e64                sdst,                src0,       src1
+v_cmp_eq_i32_e64                sdst,                src0,       src1
+v_cmp_eq_i64_e64                sdst,                src0,       src1
+v_cmp_eq_u16_e64                sdst,                src0,       src1
+v_cmp_eq_u32_e64                sdst,                src0,       src1
+v_cmp_eq_u64_e64                sdst,                src0,       src1
+v_cmp_f_f16_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_f_f32_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_f_f64_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_f_i16_e64                 sdst,                src0,       src1
+v_cmp_f_i32_e64                 sdst,                src0,       src1
+v_cmp_f_i64_e64                 sdst,                src0,       src1
+v_cmp_f_u16_e64                 sdst,                src0,       src1
+v_cmp_f_u32_e64                 sdst,                src0,       src1
+v_cmp_f_u64_e64                 sdst,                src0,       src1
+v_cmp_ge_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_ge_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_ge_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_ge_i16_e64                sdst,                src0,       src1
+v_cmp_ge_i32_e64                sdst,                src0,       src1
+v_cmp_ge_i64_e64                sdst,                src0,       src1
+v_cmp_ge_u16_e64                sdst,                src0,       src1
+v_cmp_ge_u32_e64                sdst,                src0,       src1
+v_cmp_ge_u64_e64                sdst,                src0,       src1
+v_cmp_gt_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_gt_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_gt_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_gt_i16_e64                sdst,                src0,       src1
+v_cmp_gt_i32_e64                sdst,                src0,       src1
+v_cmp_gt_i64_e64                sdst,                src0,       src1
+v_cmp_gt_u16_e64                sdst,                src0,       src1
+v_cmp_gt_u32_e64                sdst,                src0,       src1
+v_cmp_gt_u64_e64                sdst,                src0,       src1
+v_cmp_le_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_le_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_le_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_le_i16_e64                sdst,                src0,       src1
+v_cmp_le_i32_e64                sdst,                src0,       src1
+v_cmp_le_i64_e64                sdst,                src0,       src1
+v_cmp_le_u16_e64                sdst,                src0,       src1
+v_cmp_le_u32_e64                sdst,                src0,       src1
+v_cmp_le_u64_e64                sdst,                src0,       src1
+v_cmp_lg_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_lg_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_lg_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_lt_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_lt_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_lt_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmp_lt_i16_e64                sdst,                src0,       src1
+v_cmp_lt_i32_e64                sdst,                src0,       src1
+v_cmp_lt_i64_e64                sdst,                src0,       src1
+v_cmp_lt_u16_e64                sdst,                src0,       src1
+v_cmp_lt_u32_e64                sdst,                src0,       src1
+v_cmp_lt_u64_e64                sdst,                src0,       src1
+v_cmp_ne_i16_e64                sdst,                src0,       src1
+v_cmp_ne_i32_e64                sdst,                src0,       src1
+v_cmp_ne_i64_e64                sdst,                src0,       src1
+v_cmp_ne_u16_e64                sdst,                src0,       src1
+v_cmp_ne_u32_e64                sdst,                src0,       src1
+v_cmp_ne_u64_e64                sdst,                src0,       src1
+v_cmp_neq_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_neq_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_neq_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nge_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nge_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nge_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_ngt_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_ngt_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_ngt_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nle_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nle_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nle_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nlg_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nlg_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nlg_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nlt_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nlt_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_nlt_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_o_f16_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_o_f32_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_o_f64_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_t_i16_e64                 sdst,                src0,       src1
+v_cmp_t_i32_e64                 sdst,                src0,       src1
+v_cmp_t_i64_e64                 sdst,                src0,       src1
+v_cmp_t_u16_e64                 sdst,                src0,       src1
+v_cmp_t_u32_e64                 sdst,                src0,       src1
+v_cmp_t_u64_e64                 sdst,                src0,       src1
+v_cmp_tru_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_tru_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_tru_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmp_u_f16_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_u_f32_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmp_u_f64_e64                 sdst,                src0:m,     src1:m                   clamp
+v_cmpx_class_f16_e64            sdst,                src0:m,     src1:b32
+v_cmpx_class_f32_e64            sdst,                src0:m,     src1:b32
+v_cmpx_class_f64_e64            sdst,                src0:m,     src1:b32
+v_cmpx_eq_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_eq_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_eq_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_eq_i16_e64               sdst,                src0,       src1
+v_cmpx_eq_i32_e64               sdst,                src0,       src1
+v_cmpx_eq_i64_e64               sdst,                src0,       src1
+v_cmpx_eq_u16_e64               sdst,                src0,       src1
+v_cmpx_eq_u32_e64               sdst,                src0,       src1
+v_cmpx_eq_u64_e64               sdst,                src0,       src1
+v_cmpx_f_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_f_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_f_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_f_i16_e64                sdst,                src0,       src1
+v_cmpx_f_i32_e64                sdst,                src0,       src1
+v_cmpx_f_i64_e64                sdst,                src0,       src1
+v_cmpx_f_u16_e64                sdst,                src0,       src1
+v_cmpx_f_u32_e64                sdst,                src0,       src1
+v_cmpx_f_u64_e64                sdst,                src0,       src1
+v_cmpx_ge_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_ge_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_ge_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_ge_i16_e64               sdst,                src0,       src1
+v_cmpx_ge_i32_e64               sdst,                src0,       src1
+v_cmpx_ge_i64_e64               sdst,                src0,       src1
+v_cmpx_ge_u16_e64               sdst,                src0,       src1
+v_cmpx_ge_u32_e64               sdst,                src0,       src1
+v_cmpx_ge_u64_e64               sdst,                src0,       src1
+v_cmpx_gt_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_gt_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_gt_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_gt_i16_e64               sdst,                src0,       src1
+v_cmpx_gt_i32_e64               sdst,                src0,       src1
+v_cmpx_gt_i64_e64               sdst,                src0,       src1
+v_cmpx_gt_u16_e64               sdst,                src0,       src1
+v_cmpx_gt_u32_e64               sdst,                src0,       src1
+v_cmpx_gt_u64_e64               sdst,                src0,       src1
+v_cmpx_le_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_le_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_le_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_le_i16_e64               sdst,                src0,       src1
+v_cmpx_le_i32_e64               sdst,                src0,       src1
+v_cmpx_le_i64_e64               sdst,                src0,       src1
+v_cmpx_le_u16_e64               sdst,                src0,       src1
+v_cmpx_le_u32_e64               sdst,                src0,       src1
+v_cmpx_le_u64_e64               sdst,                src0,       src1
+v_cmpx_lg_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_lg_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_lg_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_lt_f16_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_lt_f32_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_lt_f64_e64               sdst,                src0:m,     src1:m                   clamp
+v_cmpx_lt_i16_e64               sdst,                src0,       src1
+v_cmpx_lt_i32_e64               sdst,                src0,       src1
+v_cmpx_lt_i64_e64               sdst,                src0,       src1
+v_cmpx_lt_u16_e64               sdst,                src0,       src1
+v_cmpx_lt_u32_e64               sdst,                src0,       src1
+v_cmpx_lt_u64_e64               sdst,                src0,       src1
+v_cmpx_ne_i16_e64               sdst,                src0,       src1
+v_cmpx_ne_i32_e64               sdst,                src0,       src1
+v_cmpx_ne_i64_e64               sdst,                src0,       src1
+v_cmpx_ne_u16_e64               sdst,                src0,       src1
+v_cmpx_ne_u32_e64               sdst,                src0,       src1
+v_cmpx_ne_u64_e64               sdst,                src0,       src1
+v_cmpx_neq_f16_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_neq_f32_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_neq_f64_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nge_f16_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nge_f32_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nge_f64_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_ngt_f16_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_ngt_f32_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_ngt_f64_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nle_f16_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nle_f32_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nle_f64_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nlg_f16_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nlg_f32_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nlg_f64_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nlt_f16_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nlt_f32_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_nlt_f64_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_o_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_o_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_o_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_t_i16_e64                sdst,                src0,       src1
+v_cmpx_t_i32_e64                sdst,                src0,       src1
+v_cmpx_t_i64_e64                sdst,                src0,       src1
+v_cmpx_t_u16_e64                sdst,                src0,       src1
+v_cmpx_t_u32_e64                sdst,                src0,       src1
+v_cmpx_t_u64_e64                sdst,                src0,       src1
+v_cmpx_tru_f16_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_tru_f32_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_tru_f64_e64              sdst,                src0:m,     src1:m                   clamp
+v_cmpx_u_f16_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_u_f32_e64                sdst,                src0:m,     src1:m                   clamp
+v_cmpx_u_f64_e64                sdst,                src0:m,     src1:m                   clamp
+v_cndmask_b32_e64               vdst,                src0,       src1,       ssrc2
+v_cos_f16_e64                   vdst,                src:m                                clamp omod
+v_cos_f32_e64                   vdst,                src:m                                clamp omod
+v_cubeid_f32                    vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_cubema_f32                    vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_cubesc_f32                    vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_cubetc_f32                    vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_cvt_f16_f32_e64               vdst,                src:m                                clamp omod
+v_cvt_f16_i16_e64               vdst,                src                                  clamp omod
+v_cvt_f16_u16_e64               vdst,                src                                  clamp omod
+v_cvt_f32_f16_e64               vdst,                src:m                                clamp omod
+v_cvt_f32_f64_e64               vdst,                src:m                                clamp omod
+v_cvt_f32_i32_e64               vdst,                src                                  clamp omod
+v_cvt_f32_u32_e64               vdst,                src                                  clamp omod
+v_cvt_f32_ubyte0_e64            vdst,                src                                  clamp omod
+v_cvt_f32_ubyte1_e64            vdst,                src                                  clamp omod
+v_cvt_f32_ubyte2_e64            vdst,                src                                  clamp omod
+v_cvt_f32_ubyte3_e64            vdst,                src                                  clamp omod
+v_cvt_f64_f32_e64               vdst,                src:m                                clamp omod
+v_cvt_f64_i32_e64               vdst,                src                                  clamp omod
+v_cvt_f64_u32_e64               vdst,                src                                  clamp omod
+v_cvt_flr_i32_f32_e64           vdst,                src:m
+v_cvt_i16_f16_e64               vdst,                src:m                                clamp
+v_cvt_i32_f32_e64               vdst,                src:m                                clamp
+v_cvt_i32_f64_e64               vdst,                src:m                                clamp
+v_cvt_norm_i16_f16_e64          vdst,                src:m                                clamp
+v_cvt_norm_u16_f16_e64          vdst,                src:m                                clamp
+v_cvt_off_f32_i4_e64            vdst,                src                                  clamp omod
+v_cvt_pk_i16_i32                vdst,                src0:i32,   src1:i32
+v_cvt_pk_u16_u32                vdst,                src0:u32,   src1:u32
+v_cvt_pk_u8_f32                 vdst:b32,            src0:m:f32, src1:u32,   src2:u32
+v_cvt_pkaccum_u8_f32            vdst:b32,            src0:m:f32, src1:u32
+v_cvt_pknorm_i16_f16            vdst,                src0:m:f16, src1:m:f16               op_sel
+v_cvt_pknorm_i16_f32            vdst,                src0:m:f32, src1:m:f32
+v_cvt_pknorm_u16_f16            vdst,                src0:m:f16, src1:m:f16               op_sel
+v_cvt_pknorm_u16_f32            vdst,                src0:m:f32, src1:m:f32
+v_cvt_pkrtz_f16_f32             vdst,                src0:m:f32, src1:m:f32
+v_cvt_rpi_i32_f32_e64           vdst,                src:m
+v_cvt_u16_f16_e64               vdst,                src:m                                clamp
+v_cvt_u32_f32_e64               vdst,                src:m                                clamp
+v_cvt_u32_f64_e64               vdst,                src:m                                clamp
+v_div_fixup_f16                 vdst,                src0:m,     src1:m,     src2:m       op_sel clamp
+v_div_fixup_f32                 vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_div_fixup_f64                 vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_div_fixup_legacy_f16          vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_div_fmas_f32                  vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_div_fmas_f64                  vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_div_scale_f32                 vdst,       vcc,     src0,       src1,       src2
+v_div_scale_f64                 vdst,       vcc,     src0,       src1,       src2
+v_exp_f16_e64                   vdst,                src:m                                clamp omod
+v_exp_f32_e64                   vdst,                src:m                                clamp omod
+v_exp_legacy_f32_e64            vdst,                src:m                                clamp omod
+v_ffbh_i32_e64                  vdst,                src
+v_ffbh_u32_e64                  vdst,                src
+v_ffbl_b32_e64                  vdst,                src
+v_floor_f16_e64                 vdst,                src:m                                clamp omod
+v_floor_f32_e64                 vdst,                src:m                                clamp omod
+v_floor_f64_e64                 vdst,                src:m                                clamp omod
+v_fma_f16                       vdst,                src0:m,     src1:m,     src2:m       op_sel clamp
+v_fma_f32                       vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_fma_f64                       vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_fma_legacy_f16                vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_fmac_f32_e64                  vdst,                src0:m,     src1:m                   clamp omod
+v_fmac_f64_e64                  vdst,                src0:m,     src1:m                   clamp omod
+v_fract_f16_e64                 vdst,                src:m                                clamp omod
+v_fract_f32_e64                 vdst,                src:m                                clamp omod
+v_fract_f64_e64                 vdst,                src:m                                clamp omod
+v_frexp_exp_i16_f16_e64         vdst,                src:m
+v_frexp_exp_i32_f32_e64         vdst,                src:m
+v_frexp_exp_i32_f64_e64         vdst,                src:m
+v_frexp_mant_f16_e64            vdst,                src:m                                clamp omod
+v_frexp_mant_f32_e64            vdst,                src:m                                clamp omod
+v_frexp_mant_f64_e64            vdst,                src:m                                clamp omod
+v_ldexp_f16_e64                 vdst,                src0:m,     src1:i16                 clamp omod
+v_ldexp_f32                     vdst,                src0:m,     src1:i32                 clamp omod
+v_ldexp_f64                     vdst,                src0:m,     src1:i32                 clamp omod
+v_lerp_u8                       vdst:u32,            src0:b32,   src1:b32,   src2:b32
+v_log_f16_e64                   vdst,                src:m                                clamp omod
+v_log_f32_e64                   vdst,                src:m                                clamp omod
+v_log_legacy_f32_e64            vdst,                src:m                                clamp omod
+v_lshl_add_u32                  vdst,                src0,       src1,       src2
+v_lshl_or_b32                   vdst,                src0,       src1:u32,   src2
+v_lshlrev_b16_e64               vdst,                src0:u16,   src1
+v_lshlrev_b32_e64               vdst,                src0:u32,   src1
+v_lshlrev_b64                   vdst,                src0:u32,   src1
+v_lshrrev_b16_e64               vdst,                src0:u16,   src1
+v_lshrrev_b32_e64               vdst,                src0:u32,   src1
+v_lshrrev_b64                   vdst,                src0:u32,   src1
+v_mac_f16_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_mac_f32_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_mad_f16                       vdst,                src0:m,     src1:m,     src2:m       op_sel clamp
+v_mad_f32                       vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_mad_i16                       vdst,                src0,       src1,       src2         op_sel clamp
+v_mad_i32_i16                   vdst,                src0,       src1,       src2:i32     op_sel clamp
+v_mad_i32_i24                   vdst,                src0,       src1,       src2:i32     clamp
+v_mad_i64_i32                   vdst,       sdst,    src0,       src1,       src2:i64     clamp
+v_mad_legacy_f16                vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_mad_legacy_f32                vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_mad_legacy_i16                vdst,                src0,       src1,       src2         clamp
+v_mad_legacy_u16                vdst,                src0,       src1,       src2         clamp
+v_mad_u16                       vdst,                src0,       src1,       src2         op_sel clamp
+v_mad_u32_u16                   vdst,                src0,       src1,       src2:u32     op_sel clamp
+v_mad_u32_u24                   vdst,                src0,       src1,       src2:u32     clamp
+v_mad_u64_u32                   vdst,       sdst,    src0,       src1,       src2:u64     clamp
+v_max3_f16                      vdst,                src0:m,     src1:m,     src2:m       op_sel clamp
+v_max3_f32                      vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_max3_i16                      vdst,                src0,       src1,       src2         op_sel
+v_max3_i32                      vdst,                src0,       src1,       src2
+v_max3_u16                      vdst,                src0,       src1,       src2         op_sel
+v_max3_u32                      vdst,                src0,       src1,       src2
+v_max_f16_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_max_f32_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_max_f64                       vdst,                src0:m,     src1:m                   clamp omod
+v_max_i16_e64                   vdst,                src0,       src1
+v_max_i32_e64                   vdst,                src0,       src1
+v_max_u16_e64                   vdst,                src0,       src1
+v_max_u32_e64                   vdst,                src0,       src1
+v_mbcnt_hi_u32_b32              vdst,                src0,       src1
+v_mbcnt_lo_u32_b32              vdst,                src0,       src1
+v_med3_f16                      vdst,                src0:m,     src1:m,     src2:m       op_sel clamp
+v_med3_f32                      vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_med3_i16                      vdst,                src0,       src1,       src2         op_sel
+v_med3_i32                      vdst,                src0,       src1,       src2
+v_med3_u16                      vdst,                src0,       src1,       src2         op_sel
+v_med3_u32                      vdst,                src0,       src1,       src2
+v_min3_f16                      vdst,                src0:m,     src1:m,     src2:m       op_sel clamp
+v_min3_f32                      vdst,                src0:m,     src1:m,     src2:m       clamp omod
+v_min3_i16                      vdst,                src0,       src1,       src2         op_sel
+v_min3_i32                      vdst,                src0,       src1,       src2
+v_min3_u16                      vdst,                src0,       src1,       src2         op_sel
+v_min3_u32                      vdst,                src0,       src1,       src2
+v_min_f16_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_min_f32_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_min_f64                       vdst,                src0:m,     src1:m                   clamp omod
+v_min_i16_e64                   vdst,                src0,       src1
+v_min_i32_e64                   vdst,                src0,       src1
+v_min_u16_e64                   vdst,                src0,       src1
+v_min_u32_e64                   vdst,                src0,       src1
+v_mov_b32_e64                   vdst,                src
+v_mqsad_pk_u16_u8               vdst:u16x4,          src0:u8x8,  src1:u8x4,  src2:u16x4   clamp
+v_mqsad_u32_u8                  vdst:u32x4,          src0:u8x8,  src1:u8x4,  vsrc2:u32x4  clamp
+v_msad_u8                       vdst:u32,            src0:u8x4,  src1:u8x4,  src2:u32     clamp
+v_mul_f16_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_mul_f32_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_mul_f64                       vdst,                src0:m,     src1:m                   clamp omod
+v_mul_hi_i32                    vdst,                src0,       src1
+v_mul_hi_i32_i24_e64            vdst,                src0,       src1
+v_mul_hi_u32                    vdst,                src0,       src1
+v_mul_hi_u32_u24_e64            vdst,                src0,       src1
+v_mul_i32_i24_e64               vdst,                src0,       src1                     clamp
+v_mul_legacy_f32                vdst,                src0:m,     src1:m                   clamp omod
+v_mul_lo_u16_e64                vdst,                src0,       src1
+v_mul_lo_u32                    vdst,                src0,       src1
+v_mul_u32_u24_e64               vdst,                src0,       src1                     clamp
+v_nop_e64
+v_not_b32_e64                   vdst,                src
+v_or3_b32                       vdst,                src0,       src1,       src2
+v_or_b32_e64                    vdst,                src0,       src1
+v_pack_b32_f16                  vdst,                src0:m,     src1:m                   op_sel
+v_perm_b32                      vdst,                src0,       src1,       src2
+v_qsad_pk_u16_u8                vdst:u16x4,          src0:u8x8,  src1:u8x4,  src2:u16x4   clamp
+v_rcp_f16_e64                   vdst,                src:m                                clamp omod
+v_rcp_f32_e64                   vdst,                src:m                                clamp omod
+v_rcp_f64_e64                   vdst,                src:m                                clamp omod
+v_rcp_iflag_f32_e64             vdst,                src:m                                clamp omod
+v_readlane_b32                  sdst,                vsrc0,      ssrc1
+v_rndne_f16_e64                 vdst,                src:m                                clamp omod
+v_rndne_f32_e64                 vdst,                src:m                                clamp omod
+v_rndne_f64_e64                 vdst,                src:m                                clamp omod
+v_rsq_f16_e64                   vdst,                src:m                                clamp omod
+v_rsq_f32_e64                   vdst,                src:m                                clamp omod
+v_rsq_f64_e64                   vdst,                src:m                                clamp omod
+v_sad_hi_u8                     vdst:u32,            src0:u8x4,  src1:u8x4,  src2:u32     clamp
+v_sad_u16                       vdst:u32,            src0:u16x2, src1:u16x2, src2:u32     clamp
+v_sad_u32                       vdst,                src0,       src1,       src2         clamp
+v_sad_u8                        vdst:u32,            src0:u8x4,  src1:u8x4,  src2:u32     clamp
+v_sat_pk_u8_i16_e64             vdst:u8x4,           src
+v_screen_partition_4se_b32_e64  vdst,                src
+v_sin_f16_e64                   vdst,                src:m                                clamp omod
+v_sin_f32_e64                   vdst,                src:m                                clamp omod
+v_sqrt_f16_e64                  vdst,                src:m                                clamp omod
+v_sqrt_f32_e64                  vdst,                src:m                                clamp omod
+v_sqrt_f64_e64                  vdst,                src:m                                clamp omod
+v_sub_co_u32_e64                vdst,       sdst,    src0,       src1                     clamp
+v_sub_f16_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_sub_f32_e64                   vdst,                src0:m,     src1:m                   clamp omod
+v_sub_i16                       vdst,                src0,       src1                     op_sel clamp
+v_sub_i32                       vdst,                src0,       src1                     clamp
+v_sub_u16_e64                   vdst,                src0,       src1                     clamp
+v_sub_u32_e64                   vdst,                src0,       src1                     clamp
+v_subb_co_u32_e64               vdst,       sdst,    src0,       src1,       ssrc2        clamp
+v_subbrev_co_u32_e64            vdst,       sdst,    src0,       src1,       ssrc2        clamp
+v_subrev_co_u32_e64             vdst,       sdst,    src0,       src1                     clamp
+v_subrev_f16_e64                vdst,                src0:m,     src1:m                   clamp omod
+v_subrev_f32_e64                vdst,                src0:m,     src1:m                   clamp omod
+v_subrev_u16_e64                vdst,                src0,       src1                     clamp
+v_subrev_u32_e64                vdst,                src0,       src1                     clamp
+v_trig_preop_f64                vdst,                src0:m,     src1:u32                 clamp omod
+v_trunc_f16_e64                 vdst,                src:m                                clamp omod
+v_trunc_f32_e64                 vdst,                src:m                                clamp omod
+v_trunc_f64_e64                 vdst,                src:m                                clamp omod
+v_writelane_b32                 vdst,                ssrc0,      ssrc1
+v_xad_u32                       vdst,                src0,       src1,       src2
+v_xnor_b32_e64                  vdst,                src0,       src1
+v_xor_b32_e64                   vdst,                src0,       src1
+
+
+

VOP3P

+
INSTRUCTION                 DST          SRC0          SRC1          SRC2         MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_accvgpr_read_b32          vdst,        vsrc
+v_accvgpr_write_b32         vdst,        src
+v_dot2_f32_f16              vdst,        src0:f16x2,   src1:f16x2,   src2:f32     neg_lo neg_hi clamp
+v_dot2_i32_i16              vdst,        src0:i16x2,   src1:i16x2,   src2:i32     clamp
+v_dot2_u32_u16              vdst,        src0:u16x2,   src1:u16x2,   src2:u32     clamp
+v_dot4_i32_i8               vdst,        src0:i8x4,    src1:i8x4,    src2:i32     clamp
+v_dot4_u32_u8               vdst,        src0:u8x4,    src1:u8x4,    src2:u32     clamp
+v_dot8_i32_i4               vdst,        src0:i4x8,    src1:i4x8,    src2:i32     clamp
+v_dot8_u32_u4               vdst,        src0:u4x8,    src1:u4x8,    src2:u32     clamp
+v_fma_mix_f32               vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx    m_op_sel m_op_sel_hi clamp
+v_fma_mixhi_f16             vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx    m_op_sel m_op_sel_hi clamp
+v_fma_mixlo_f16             vdst,        src0:m:fx,    src1:m:fx,    src2:m:fx    m_op_sel m_op_sel_hi clamp
+v_mfma_f32_16x16x16bf16_1k  vdst:f32x4,  vsrc0:bf16x4, vsrc1:bf16x4, src2:f32x4   cbsz abid blgp
+v_mfma_f32_16x16x16f16      vdst:f32x4,  vsrc0:f16x4,  vsrc1:f16x4,  src2:f32x4   cbsz abid blgp
+v_mfma_f32_16x16x1f32       vdst:f32x16, vsrc0:f32,    vsrc1:f32,    src2:f32x16  cbsz abid blgp
+v_mfma_f32_16x16x2bf16      vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, src2:f32x16  cbsz abid blgp
+v_mfma_f32_16x16x4bf16_1k   vdst:f32x16, vsrc0:bf16x4, vsrc1:bf16x4, src2:f32x16  cbsz abid blgp
+v_mfma_f32_16x16x4f16       vdst:f32x16, vsrc0:f16x4,  vsrc1:f16x4,  src2:f32x16  cbsz abid blgp
+v_mfma_f32_16x16x4f32       vdst:f32x4,  vsrc0:f32,    vsrc1:f32,    src2:f32x4   cbsz abid blgp
+v_mfma_f32_16x16x8bf16      vdst:f32x4,  vsrc0:bf16x2, vsrc1:bf16x2, src2:f32x4   cbsz abid blgp
+v_mfma_f32_32x32x1f32       vdst:f32x32, vsrc0:f32,    vsrc1:f32,    src2:f32x32  cbsz abid blgp
+v_mfma_f32_32x32x2bf16      vdst:f32x32, vsrc0:bf16x2, vsrc1:bf16x2, src2:f32x32  cbsz abid blgp
+v_mfma_f32_32x32x2f32       vdst:f32x16, vsrc0:f32,    vsrc1:f32,    src2:f32x16  cbsz abid blgp
+v_mfma_f32_32x32x4bf16      vdst:f32x16, vsrc0:bf16x2, vsrc1:bf16x2, src2:f32x16  cbsz abid blgp
+v_mfma_f32_32x32x4bf16_1k   vdst:f32x32, vsrc0:bf16x4, vsrc1:bf16x4, src2:f32x32  cbsz abid blgp
+v_mfma_f32_32x32x4f16       vdst:f32x32, vsrc0:f16x4,  vsrc1:f16x4,  src2:f32x32  cbsz abid blgp
+v_mfma_f32_32x32x8bf16_1k   vdst:f32x16, vsrc0:bf16x4, vsrc1:bf16x4, src2:f32x16  cbsz abid blgp
+v_mfma_f32_32x32x8f16       vdst:f32x16, vsrc0:f16x4,  vsrc1:f16x4,  src2:f32x16  cbsz abid blgp
+v_mfma_f32_4x4x1f32         vdst:f32x4,  vsrc0:f32,    vsrc1:f32,    src2:f32x4   cbsz abid blgp
+v_mfma_f32_4x4x2bf16        vdst:f32x4,  vsrc0:bf16x2, vsrc1:bf16x2, src2:f32x4   cbsz abid blgp
+v_mfma_f32_4x4x4bf16_1k     vdst:f32x4,  vsrc0:bf16x4, vsrc1:bf16x4, src2:f32x4   cbsz abid blgp
+v_mfma_f32_4x4x4f16         vdst:f32x4,  vsrc0:f16x4,  vsrc1:f16x4,  src2:f32x4   cbsz abid blgp
+v_mfma_f64_16x16x4f64       vdst:f64x4,  vsrc0:f64,    vsrc1:f64,    src2:f64x4   cbsz abid blgp
+v_mfma_f64_4x4x4f64         vdst:f64,    vsrc0:f64,    vsrc1:f64,    src2:f64     cbsz abid blgp
+v_mfma_i32_16x16x16i8       vdst:i32x4,  vsrc0:i8x4,   vsrc1:i8x4,   src2:i32x4   cbsz abid blgp
+v_mfma_i32_16x16x4i8        vdst:i32x16, vsrc0:i8x4,   vsrc1:i8x4,   src2:i32x16  cbsz abid blgp
+v_mfma_i32_32x32x4i8        vdst:i32x32, vsrc0:i8x4,   vsrc1:i8x4,   src2:i32x32  cbsz abid blgp
+v_mfma_i32_32x32x8i8        vdst:i32x16, vsrc0:i8x4,   vsrc1:i8x4,   src2:i32x16  cbsz abid blgp
+v_mfma_i32_4x4x4i8          vdst:i32x4,  vsrc0:i8x4,   vsrc1:i8x4,   src2:i32x4   cbsz abid blgp
+v_pk_add_f16                vdst,        src0,         src1                       op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_add_f32                vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_add_i16                vdst,        src0,         src1                       op_sel op_sel_hi clamp
+v_pk_add_u16                vdst,        src0,         src1                       op_sel op_sel_hi clamp
+v_pk_ashrrev_i16            vdst,        src0:u16x2,   src1                       op_sel op_sel_hi
+v_pk_fma_f16                vdst,        src0,         src1,         src2         op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_fma_f32                vdst,        src0,         src1,         src2         op_sel op_sel_hi
+v_pk_lshlrev_b16            vdst,        src0:u16x2,   src1                       op_sel op_sel_hi
+v_pk_lshrrev_b16            vdst,        src0:u16x2,   src1                       op_sel op_sel_hi
+v_pk_mad_i16                vdst,        src0,         src1,         src2         op_sel op_sel_hi clamp
+v_pk_mad_u16                vdst,        src0,         src1,         src2         op_sel op_sel_hi clamp
+v_pk_max_f16                vdst,        src0,         src1                       op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_max_i16                vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_max_u16                vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_min_f16                vdst,        src0,         src1                       op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_min_i16                vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_min_u16                vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_mov_b32                vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_mul_f16                vdst,        src0,         src1                       op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_mul_f32                vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_mul_lo_u16             vdst,        src0,         src1                       op_sel op_sel_hi
+v_pk_sub_i16                vdst,        src0,         src1                       op_sel op_sel_hi clamp
+v_pk_sub_u16                vdst,        src0,         src1                       op_sel op_sel_hi clamp
+
+
+

VOPC

+
INSTRUCTION                    DST       SRC0      SRC1            MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————
+v_cmp_class_f16                vcc,      src0,     vsrc1:b32
+v_cmp_class_f16_sdwa           sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmp_class_f32                vcc,      src0,     vsrc1:b32
+v_cmp_class_f32_sdwa           sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmp_class_f64                vcc,      src0,     vsrc1:b32
+v_cmp_eq_f16                   vcc,      src0,     vsrc1
+v_cmp_eq_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_f32                   vcc,      src0,     vsrc1
+v_cmp_eq_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_f64                   vcc,      src0,     vsrc1
+v_cmp_eq_i16                   vcc,      src0,     vsrc1
+v_cmp_eq_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_i32                   vcc,      src0,     vsrc1
+v_cmp_eq_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_i64                   vcc,      src0,     vsrc1
+v_cmp_eq_u16                   vcc,      src0,     vsrc1
+v_cmp_eq_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_u32                   vcc,      src0,     vsrc1
+v_cmp_eq_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_u64                   vcc,      src0,     vsrc1
+v_cmp_f_f16                    vcc,      src0,     vsrc1
+v_cmp_f_f16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_f32                    vcc,      src0,     vsrc1
+v_cmp_f_f32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_f64                    vcc,      src0,     vsrc1
+v_cmp_f_i16                    vcc,      src0,     vsrc1
+v_cmp_f_i16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_i32                    vcc,      src0,     vsrc1
+v_cmp_f_i32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_i64                    vcc,      src0,     vsrc1
+v_cmp_f_u16                    vcc,      src0,     vsrc1
+v_cmp_f_u16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_u32                    vcc,      src0,     vsrc1
+v_cmp_f_u32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_u64                    vcc,      src0,     vsrc1
+v_cmp_ge_f16                   vcc,      src0,     vsrc1
+v_cmp_ge_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_f32                   vcc,      src0,     vsrc1
+v_cmp_ge_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_f64                   vcc,      src0,     vsrc1
+v_cmp_ge_i16                   vcc,      src0,     vsrc1
+v_cmp_ge_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_i32                   vcc,      src0,     vsrc1
+v_cmp_ge_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_i64                   vcc,      src0,     vsrc1
+v_cmp_ge_u16                   vcc,      src0,     vsrc1
+v_cmp_ge_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_u32                   vcc,      src0,     vsrc1
+v_cmp_ge_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_u64                   vcc,      src0,     vsrc1
+v_cmp_gt_f16                   vcc,      src0,     vsrc1
+v_cmp_gt_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_f32                   vcc,      src0,     vsrc1
+v_cmp_gt_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_f64                   vcc,      src0,     vsrc1
+v_cmp_gt_i16                   vcc,      src0,     vsrc1
+v_cmp_gt_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_i32                   vcc,      src0,     vsrc1
+v_cmp_gt_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_i64                   vcc,      src0,     vsrc1
+v_cmp_gt_u16                   vcc,      src0,     vsrc1
+v_cmp_gt_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_u32                   vcc,      src0,     vsrc1
+v_cmp_gt_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_u64                   vcc,      src0,     vsrc1
+v_cmp_le_f16                   vcc,      src0,     vsrc1
+v_cmp_le_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_f32                   vcc,      src0,     vsrc1
+v_cmp_le_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_f64                   vcc,      src0,     vsrc1
+v_cmp_le_i16                   vcc,      src0,     vsrc1
+v_cmp_le_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_i32                   vcc,      src0,     vsrc1
+v_cmp_le_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_i64                   vcc,      src0,     vsrc1
+v_cmp_le_u16                   vcc,      src0,     vsrc1
+v_cmp_le_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_u32                   vcc,      src0,     vsrc1
+v_cmp_le_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_u64                   vcc,      src0,     vsrc1
+v_cmp_lg_f16                   vcc,      src0,     vsrc1
+v_cmp_lg_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lg_f32                   vcc,      src0,     vsrc1
+v_cmp_lg_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lg_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_f16                   vcc,      src0,     vsrc1
+v_cmp_lt_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_f32                   vcc,      src0,     vsrc1
+v_cmp_lt_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_i16                   vcc,      src0,     vsrc1
+v_cmp_lt_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_i32                   vcc,      src0,     vsrc1
+v_cmp_lt_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_i64                   vcc,      src0,     vsrc1
+v_cmp_lt_u16                   vcc,      src0,     vsrc1
+v_cmp_lt_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_u32                   vcc,      src0,     vsrc1
+v_cmp_lt_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_u64                   vcc,      src0,     vsrc1
+v_cmp_ne_i16                   vcc,      src0,     vsrc1
+v_cmp_ne_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_i32                   vcc,      src0,     vsrc1
+v_cmp_ne_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_i64                   vcc,      src0,     vsrc1
+v_cmp_ne_u16                   vcc,      src0,     vsrc1
+v_cmp_ne_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_u32                   vcc,      src0,     vsrc1
+v_cmp_ne_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_u64                   vcc,      src0,     vsrc1
+v_cmp_neq_f16                  vcc,      src0,     vsrc1
+v_cmp_neq_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_neq_f32                  vcc,      src0,     vsrc1
+v_cmp_neq_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_neq_f64                  vcc,      src0,     vsrc1
+v_cmp_nge_f16                  vcc,      src0,     vsrc1
+v_cmp_nge_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nge_f32                  vcc,      src0,     vsrc1
+v_cmp_nge_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nge_f64                  vcc,      src0,     vsrc1
+v_cmp_ngt_f16                  vcc,      src0,     vsrc1
+v_cmp_ngt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ngt_f32                  vcc,      src0,     vsrc1
+v_cmp_ngt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ngt_f64                  vcc,      src0,     vsrc1
+v_cmp_nle_f16                  vcc,      src0,     vsrc1
+v_cmp_nle_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nle_f32                  vcc,      src0,     vsrc1
+v_cmp_nle_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nle_f64                  vcc,      src0,     vsrc1
+v_cmp_nlg_f16                  vcc,      src0,     vsrc1
+v_cmp_nlg_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlg_f32                  vcc,      src0,     vsrc1
+v_cmp_nlg_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlg_f64                  vcc,      src0,     vsrc1
+v_cmp_nlt_f16                  vcc,      src0,     vsrc1
+v_cmp_nlt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlt_f32                  vcc,      src0,     vsrc1
+v_cmp_nlt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlt_f64                  vcc,      src0,     vsrc1
+v_cmp_o_f16                    vcc,      src0,     vsrc1
+v_cmp_o_f16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_o_f32                    vcc,      src0,     vsrc1
+v_cmp_o_f32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_o_f64                    vcc,      src0,     vsrc1
+v_cmp_t_i16                    vcc,      src0,     vsrc1
+v_cmp_t_i16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_i32                    vcc,      src0,     vsrc1
+v_cmp_t_i32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_i64                    vcc,      src0,     vsrc1
+v_cmp_t_u16                    vcc,      src0,     vsrc1
+v_cmp_t_u16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_u32                    vcc,      src0,     vsrc1
+v_cmp_t_u32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_u64                    vcc,      src0,     vsrc1
+v_cmp_tru_f16                  vcc,      src0,     vsrc1
+v_cmp_tru_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_tru_f32                  vcc,      src0,     vsrc1
+v_cmp_tru_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_tru_f64                  vcc,      src0,     vsrc1
+v_cmp_u_f16                    vcc,      src0,     vsrc1
+v_cmp_u_f16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_u_f32                    vcc,      src0,     vsrc1
+v_cmp_u_f32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_u_f64                    vcc,      src0,     vsrc1
+v_cmpx_class_f16               vcc,      src0,     vsrc1:b32
+v_cmpx_class_f16_sdwa          sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmpx_class_f32               vcc,      src0,     vsrc1:b32
+v_cmpx_class_f32_sdwa          sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmpx_class_f64               vcc,      src0,     vsrc1:b32
+v_cmpx_eq_f16                  vcc,      src0,     vsrc1
+v_cmpx_eq_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_f32                  vcc,      src0,     vsrc1
+v_cmpx_eq_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_f64                  vcc,      src0,     vsrc1
+v_cmpx_eq_i16                  vcc,      src0,     vsrc1
+v_cmpx_eq_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_i32                  vcc,      src0,     vsrc1
+v_cmpx_eq_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_i64                  vcc,      src0,     vsrc1
+v_cmpx_eq_u16                  vcc,      src0,     vsrc1
+v_cmpx_eq_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_u32                  vcc,      src0,     vsrc1
+v_cmpx_eq_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_u64                  vcc,      src0,     vsrc1
+v_cmpx_f_f16                   vcc,      src0,     vsrc1
+v_cmpx_f_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_f32                   vcc,      src0,     vsrc1
+v_cmpx_f_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_f64                   vcc,      src0,     vsrc1
+v_cmpx_f_i16                   vcc,      src0,     vsrc1
+v_cmpx_f_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_i32                   vcc,      src0,     vsrc1
+v_cmpx_f_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_i64                   vcc,      src0,     vsrc1
+v_cmpx_f_u16                   vcc,      src0,     vsrc1
+v_cmpx_f_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_u32                   vcc,      src0,     vsrc1
+v_cmpx_f_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_u64                   vcc,      src0,     vsrc1
+v_cmpx_ge_f16                  vcc,      src0,     vsrc1
+v_cmpx_ge_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_f32                  vcc,      src0,     vsrc1
+v_cmpx_ge_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_f64                  vcc,      src0,     vsrc1
+v_cmpx_ge_i16                  vcc,      src0,     vsrc1
+v_cmpx_ge_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_i32                  vcc,      src0,     vsrc1
+v_cmpx_ge_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_i64                  vcc,      src0,     vsrc1
+v_cmpx_ge_u16                  vcc,      src0,     vsrc1
+v_cmpx_ge_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_u32                  vcc,      src0,     vsrc1
+v_cmpx_ge_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_u64                  vcc,      src0,     vsrc1
+v_cmpx_gt_f16                  vcc,      src0,     vsrc1
+v_cmpx_gt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_f32                  vcc,      src0,     vsrc1
+v_cmpx_gt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_f64                  vcc,      src0,     vsrc1
+v_cmpx_gt_i16                  vcc,      src0,     vsrc1
+v_cmpx_gt_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_i32                  vcc,      src0,     vsrc1
+v_cmpx_gt_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_i64                  vcc,      src0,     vsrc1
+v_cmpx_gt_u16                  vcc,      src0,     vsrc1
+v_cmpx_gt_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_u32                  vcc,      src0,     vsrc1
+v_cmpx_gt_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_u64                  vcc,      src0,     vsrc1
+v_cmpx_le_f16                  vcc,      src0,     vsrc1
+v_cmpx_le_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_f32                  vcc,      src0,     vsrc1
+v_cmpx_le_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_f64                  vcc,      src0,     vsrc1
+v_cmpx_le_i16                  vcc,      src0,     vsrc1
+v_cmpx_le_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_i32                  vcc,      src0,     vsrc1
+v_cmpx_le_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_i64                  vcc,      src0,     vsrc1
+v_cmpx_le_u16                  vcc,      src0,     vsrc1
+v_cmpx_le_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_u32                  vcc,      src0,     vsrc1
+v_cmpx_le_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_u64                  vcc,      src0,     vsrc1
+v_cmpx_lg_f16                  vcc,      src0,     vsrc1
+v_cmpx_lg_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lg_f32                  vcc,      src0,     vsrc1
+v_cmpx_lg_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lg_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_f16                  vcc,      src0,     vsrc1
+v_cmpx_lt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_f32                  vcc,      src0,     vsrc1
+v_cmpx_lt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_i16                  vcc,      src0,     vsrc1
+v_cmpx_lt_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_i32                  vcc,      src0,     vsrc1
+v_cmpx_lt_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_i64                  vcc,      src0,     vsrc1
+v_cmpx_lt_u16                  vcc,      src0,     vsrc1
+v_cmpx_lt_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_u32                  vcc,      src0,     vsrc1
+v_cmpx_lt_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_u64                  vcc,      src0,     vsrc1
+v_cmpx_ne_i16                  vcc,      src0,     vsrc1
+v_cmpx_ne_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_i32                  vcc,      src0,     vsrc1
+v_cmpx_ne_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_i64                  vcc,      src0,     vsrc1
+v_cmpx_ne_u16                  vcc,      src0,     vsrc1
+v_cmpx_ne_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_u32                  vcc,      src0,     vsrc1
+v_cmpx_ne_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_u64                  vcc,      src0,     vsrc1
+v_cmpx_neq_f16                 vcc,      src0,     vsrc1
+v_cmpx_neq_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_neq_f32                 vcc,      src0,     vsrc1
+v_cmpx_neq_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_neq_f64                 vcc,      src0,     vsrc1
+v_cmpx_nge_f16                 vcc,      src0,     vsrc1
+v_cmpx_nge_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nge_f32                 vcc,      src0,     vsrc1
+v_cmpx_nge_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nge_f64                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f16                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ngt_f32                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ngt_f64                 vcc,      src0,     vsrc1
+v_cmpx_nle_f16                 vcc,      src0,     vsrc1
+v_cmpx_nle_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nle_f32                 vcc,      src0,     vsrc1
+v_cmpx_nle_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nle_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f16                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlg_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlg_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f16                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlt_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlt_f64                 vcc,      src0,     vsrc1
+v_cmpx_o_f16                   vcc,      src0,     vsrc1
+v_cmpx_o_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_o_f32                   vcc,      src0,     vsrc1
+v_cmpx_o_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_o_f64                   vcc,      src0,     vsrc1
+v_cmpx_t_i16                   vcc,      src0,     vsrc1
+v_cmpx_t_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_i32                   vcc,      src0,     vsrc1
+v_cmpx_t_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_i64                   vcc,      src0,     vsrc1
+v_cmpx_t_u16                   vcc,      src0,     vsrc1
+v_cmpx_t_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_u32                   vcc,      src0,     vsrc1
+v_cmpx_t_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_u64                   vcc,      src0,     vsrc1
+v_cmpx_tru_f16                 vcc,      src0,     vsrc1
+v_cmpx_tru_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_tru_f32                 vcc,      src0,     vsrc1
+v_cmpx_tru_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_tru_f64                 vcc,      src0,     vsrc1
+v_cmpx_u_f16                   vcc,      src0,     vsrc1
+v_cmpx_u_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_u_f32                   vcc,      src0,     vsrc1
+v_cmpx_u_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_u_f64                   vcc,      src0,     vsrc1
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/AMDGPUAsmGFX9.html 2021-09-19 16:16:20.000000000 +0000 @@ -0,0 +1,2126 @@ + + + + + + + + + Syntax of Core GFX9 Instructions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of Core GFX9 Instructions

+ +
+

Introduction

+

This document describes the syntax of core GFX9 instructions.

+
+
+

Notation

+

Notation used in this document is explained here.

+
+
+

Overview

+

An overview of generic syntax and other features of AMDGPU instructions may be found in this document.

+
+
+

Instructions

+
+

DS

+
INSTRUCTION                    DST         SRC0      SRC1      SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————
+ds_add_f32                                 vaddr,    vdata                    offset gds
+ds_add_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_add_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_add_src2_f32                            vaddr                              offset gds
+ds_add_src2_u32                            vaddr                              offset gds
+ds_add_src2_u64                            vaddr                              offset gds
+ds_add_u32                                 vaddr,    vdata                    offset gds
+ds_add_u64                                 vaddr,    vdata                    offset gds
+ds_and_b32                                 vaddr,    vdata                    offset gds
+ds_and_b64                                 vaddr,    vdata                    offset gds
+ds_and_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_and_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_and_src2_b32                            vaddr                              offset gds
+ds_and_src2_b64                            vaddr                              offset gds
+ds_append                      vdst                                           offset gds
+ds_bpermute_b32                vdst,       vaddr,    vdata                    offset
+ds_cmpst_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f32                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_f64                               vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_cmpst_rtn_f64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_condxchg32_rtn_b64          vdst,       vaddr,    vdata                    offset gds
+ds_consume                     vdst                                           offset gds
+ds_dec_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_dec_src2_u32                            vaddr                              offset gds
+ds_dec_src2_u64                            vaddr                              offset gds
+ds_dec_u32                                 vaddr,    vdata                    offset gds
+ds_dec_u64                                 vaddr,    vdata                    offset gds
+ds_gws_barrier                             vdata                              offset gds
+ds_gws_init                                vdata                              offset gds
+ds_gws_sema_br                             vdata                              offset gds
+ds_gws_sema_p                                                                 offset gds
+ds_gws_sema_release_all                                                       offset gds
+ds_gws_sema_v                                                                 offset gds
+ds_inc_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_inc_src2_u32                            vaddr                              offset gds
+ds_inc_src2_u64                            vaddr                              offset gds
+ds_inc_u32                                 vaddr,    vdata                    offset gds
+ds_inc_u64                                 vaddr,    vdata                    offset gds
+ds_max_f32                                 vaddr,    vdata                    offset gds
+ds_max_f64                                 vaddr,    vdata                    offset gds
+ds_max_i32                                 vaddr,    vdata                    offset gds
+ds_max_i64                                 vaddr,    vdata                    offset gds
+ds_max_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_max_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_max_src2_f32                            vaddr                              offset gds
+ds_max_src2_f64                            vaddr                              offset gds
+ds_max_src2_i32                            vaddr                              offset gds
+ds_max_src2_i64                            vaddr                              offset gds
+ds_max_src2_u32                            vaddr                              offset gds
+ds_max_src2_u64                            vaddr                              offset gds
+ds_max_u32                                 vaddr,    vdata                    offset gds
+ds_max_u64                                 vaddr,    vdata                    offset gds
+ds_min_f32                                 vaddr,    vdata                    offset gds
+ds_min_f64                                 vaddr,    vdata                    offset gds
+ds_min_i32                                 vaddr,    vdata                    offset gds
+ds_min_i64                                 vaddr,    vdata                    offset gds
+ds_min_rtn_f32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_f64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_i64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_min_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_min_src2_f32                            vaddr                              offset gds
+ds_min_src2_f64                            vaddr                              offset gds
+ds_min_src2_i32                            vaddr                              offset gds
+ds_min_src2_i64                            vaddr                              offset gds
+ds_min_src2_u32                            vaddr                              offset gds
+ds_min_src2_u64                            vaddr                              offset gds
+ds_min_u32                                 vaddr,    vdata                    offset gds
+ds_min_u64                                 vaddr,    vdata                    offset gds
+ds_mskor_b32                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_b64                               vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b32               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_mskor_rtn_b64               vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_nop
+ds_or_b32                                  vaddr,    vdata                    offset gds
+ds_or_b64                                  vaddr,    vdata                    offset gds
+ds_or_rtn_b32                  vdst,       vaddr,    vdata                    offset gds
+ds_or_rtn_b64                  vdst,       vaddr,    vdata                    offset gds
+ds_or_src2_b32                             vaddr                              offset gds
+ds_or_src2_b64                             vaddr                              offset gds
+ds_ordered_count               vdst,       vaddr                              offset gds
+ds_permute_b32                 vdst,       vaddr,    vdata                    offset
+ds_read2_b32                   vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2_b64                   vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b32               vdst:b32x2, vaddr                              offset0 offset1 gds
+ds_read2st64_b64               vdst:b64x2, vaddr                              offset0 offset1 gds
+ds_read_addtid_b32             vdst                                           offset gds
+ds_read_b128                   vdst,       vaddr                              offset gds
+ds_read_b32                    vdst,       vaddr                              offset gds
+ds_read_b64                    vdst,       vaddr                              offset gds
+ds_read_b96                    vdst,       vaddr                              offset gds
+ds_read_i16                    vdst,       vaddr                              offset gds
+ds_read_i8                     vdst,       vaddr                              offset gds
+ds_read_i8_d16                 vdst,       vaddr                              offset gds
+ds_read_i8_d16_hi              vdst,       vaddr                              offset gds
+ds_read_u16                    vdst,       vaddr                              offset gds
+ds_read_u16_d16                vdst,       vaddr                              offset gds
+ds_read_u16_d16_hi             vdst,       vaddr                              offset gds
+ds_read_u8                     vdst,       vaddr                              offset gds
+ds_read_u8_d16                 vdst,       vaddr                              offset gds
+ds_read_u8_d16_hi              vdst,       vaddr                              offset gds
+ds_rsub_rtn_u32                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_rtn_u64                vdst,       vaddr,    vdata                    offset gds
+ds_rsub_src2_u32                           vaddr                              offset gds
+ds_rsub_src2_u64                           vaddr                              offset gds
+ds_rsub_u32                                vaddr,    vdata                    offset gds
+ds_rsub_u64                                vaddr,    vdata                    offset gds
+ds_sub_rtn_u32                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_rtn_u64                 vdst,       vaddr,    vdata                    offset gds
+ds_sub_src2_u32                            vaddr                              offset gds
+ds_sub_src2_u64                            vaddr                              offset gds
+ds_sub_u32                                 vaddr,    vdata                    offset gds
+ds_sub_u64                                 vaddr,    vdata                    offset gds
+ds_swizzle_b32                 vdst,       vaddr                              pattern gds
+ds_wrap_rtn_b32                vdst,       vaddr,    vdata0,   vdata1         offset gds
+ds_write2_b32                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2_b64                              vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b32                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write2st64_b64                          vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_write_addtid_b32                        vdata                              offset gds
+ds_write_b128                              vaddr,    vdata                    offset gds
+ds_write_b16                               vaddr,    vdata                    offset gds
+ds_write_b16_d16_hi                        vaddr,    vdata                    offset gds
+ds_write_b32                               vaddr,    vdata                    offset gds
+ds_write_b64                               vaddr,    vdata                    offset gds
+ds_write_b8                                vaddr,    vdata                    offset gds
+ds_write_b8_d16_hi                         vaddr,    vdata                    offset gds
+ds_write_b96                               vaddr,    vdata                    offset gds
+ds_write_src2_b32                          vaddr                              offset gds
+ds_write_src2_b64                          vaddr                              offset gds
+ds_wrxchg2_rtn_b32             vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2_rtn_b64             vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b32         vdst:b32x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg2st64_rtn_b64         vdst:b64x2, vaddr,    vdata0,   vdata1         offset0 offset1 gds
+ds_wrxchg_rtn_b32              vdst,       vaddr,    vdata                    offset gds
+ds_wrxchg_rtn_b64              vdst,       vaddr,    vdata                    offset gds
+ds_xor_b32                                 vaddr,    vdata                    offset gds
+ds_xor_b64                                 vaddr,    vdata                    offset gds
+ds_xor_rtn_b32                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_rtn_b64                 vdst,       vaddr,    vdata                    offset gds
+ds_xor_src2_b32                            vaddr                              offset gds
+ds_xor_src2_b64                            vaddr                              offset gds
+
+
+

EXP

+
INSTRUCTION                    DST       SRC0      SRC1      SRC2      SRC3           MODIFIERS
+———————————————————————————————————————————————————————————————————————————————————————————————————
+exp                            tgt,      vsrc0,    vsrc1,    vsrc2,    vsrc3          done compr vm
+
+
+

FLAT

+
INSTRUCTION                    DST           SRC0      SRC1         SRC2           MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+flat_atomic_add                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_add_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_and                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_and_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_cmpswap            vdst:opt,     vaddr,    vdata:b32x2                 offset12 glc slc
+flat_atomic_cmpswap_x2         vdst:opt,     vaddr,    vdata:b64x2                 offset12 glc slc
+flat_atomic_dec                vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_dec_x2             vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_inc                vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_inc_x2             vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_or                 vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_or_x2              vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_smax               vdst:opt:i32, vaddr,    vdata:i32                   offset12 glc slc
+flat_atomic_smax_x2            vdst:opt:i64, vaddr,    vdata:i64                   offset12 glc slc
+flat_atomic_smin               vdst:opt:i32, vaddr,    vdata:i32                   offset12 glc slc
+flat_atomic_smin_x2            vdst:opt:i64, vaddr,    vdata:i64                   offset12 glc slc
+flat_atomic_sub                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_sub_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_swap               vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_swap_x2            vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_umax               vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_umax_x2            vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_umin               vdst:opt:u32, vaddr,    vdata:u32                   offset12 glc slc
+flat_atomic_umin_x2            vdst:opt:u64, vaddr,    vdata:u64                   offset12 glc slc
+flat_atomic_xor                vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_atomic_xor_x2             vdst:opt,     vaddr,    vdata                       offset12 glc slc
+flat_load_dword                vdst,         vaddr                                 offset12 glc slc
+flat_load_dwordx2              vdst,         vaddr                                 offset12 glc slc
+flat_load_dwordx3              vdst,         vaddr                                 offset12 glc slc
+flat_load_dwordx4              vdst,         vaddr                                 offset12 glc slc
+flat_load_sbyte                vdst,         vaddr                                 offset12 glc slc
+flat_load_sbyte_d16            vdst,         vaddr                                 offset12 glc slc
+flat_load_sbyte_d16_hi         vdst,         vaddr                                 offset12 glc slc
+flat_load_short_d16            vdst,         vaddr                                 offset12 glc slc
+flat_load_short_d16_hi         vdst,         vaddr                                 offset12 glc slc
+flat_load_sshort               vdst,         vaddr                                 offset12 glc slc
+flat_load_ubyte                vdst,         vaddr                                 offset12 glc slc
+flat_load_ubyte_d16            vdst,         vaddr                                 offset12 glc slc
+flat_load_ubyte_d16_hi         vdst,         vaddr                                 offset12 glc slc
+flat_load_ushort               vdst,         vaddr                                 offset12 glc slc
+flat_store_byte                              vaddr,    vdata                       offset12 glc slc
+flat_store_byte_d16_hi                       vaddr,    vdata                       offset12 glc slc
+flat_store_dword                             vaddr,    vdata                       offset12 glc slc
+flat_store_dwordx2                           vaddr,    vdata                       offset12 glc slc
+flat_store_dwordx3                           vaddr,    vdata                       offset12 glc slc
+flat_store_dwordx4                           vaddr,    vdata                       offset12 glc slc
+flat_store_short                             vaddr,    vdata                       offset12 glc slc
+flat_store_short_d16_hi                      vaddr,    vdata                       offset12 glc slc
+global_atomic_add              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_add_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_and              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_and_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_cmpswap          vdst:opt,     vaddr,    vdata:b32x2, saddr          offset13s glc slc
+global_atomic_cmpswap_x2       vdst:opt,     vaddr,    vdata:b64x2, saddr          offset13s glc slc
+global_atomic_dec              vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_dec_x2           vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_inc              vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_inc_x2           vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_or               vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_or_x2            vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_smax             vdst:opt:i32, vaddr,    vdata:i32,   saddr          offset13s glc slc
+global_atomic_smax_x2          vdst:opt:i64, vaddr,    vdata:i64,   saddr          offset13s glc slc
+global_atomic_smin             vdst:opt:i32, vaddr,    vdata:i32,   saddr          offset13s glc slc
+global_atomic_smin_x2          vdst:opt:i64, vaddr,    vdata:i64,   saddr          offset13s glc slc
+global_atomic_sub              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_sub_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_swap             vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_swap_x2          vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_umax             vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_umax_x2          vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_umin             vdst:opt:u32, vaddr,    vdata:u32,   saddr          offset13s glc slc
+global_atomic_umin_x2          vdst:opt:u64, vaddr,    vdata:u64,   saddr          offset13s glc slc
+global_atomic_xor              vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_atomic_xor_x2           vdst:opt,     vaddr,    vdata,       saddr          offset13s glc slc
+global_load_dword              vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_dwordx2            vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_dwordx3            vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_dwordx4            vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sbyte              vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sbyte_d16          vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sbyte_d16_hi       vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_short_d16          vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_short_d16_hi       vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_sshort             vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ubyte              vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ubyte_d16          vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ubyte_d16_hi       vdst,         vaddr,    saddr                       offset13s glc slc
+global_load_ushort             vdst,         vaddr,    saddr                       offset13s glc slc
+global_store_byte                            vaddr,    vdata,       saddr          offset13s glc slc
+global_store_byte_d16_hi                     vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dword                           vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dwordx2                         vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dwordx3                         vaddr,    vdata,       saddr          offset13s glc slc
+global_store_dwordx4                         vaddr,    vdata,       saddr          offset13s glc slc
+global_store_short                           vaddr,    vdata,       saddr          offset13s glc slc
+global_store_short_d16_hi                    vaddr,    vdata,       saddr          offset13s glc slc
+scratch_load_dword             vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_dwordx2           vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_dwordx3           vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_dwordx4           vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sbyte             vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sbyte_d16         vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sbyte_d16_hi      vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_short_d16         vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_short_d16_hi      vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_sshort            vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ubyte             vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ubyte_d16         vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ubyte_d16_hi      vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_load_ushort            vdst,         vaddr,    saddr                       offset13s glc slc
+scratch_store_byte                           vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_byte_d16_hi                    vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dword                          vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dwordx2                        vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dwordx3                        vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_dwordx4                        vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_short                          vaddr,    vdata,       saddr          offset13s glc slc
+scratch_store_short_d16_hi                   vaddr,    vdata,       saddr          offset13s glc slc
+
+
+

MIMG

+
INSTRUCTION                DST      SRC0       SRC1     SRC2      MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————————
+image_atomic_add                    vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_and                    vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_cmpswap                vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_dec                    vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_inc                    vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_or                     vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_smax                   vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_smin                   vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_sub                    vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_swap                   vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_umax                   vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_umin                   vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_atomic_xor                    vdata:dst, vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_gather4              vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_b            vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_b_cl         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_b_cl_o       vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_b_o          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c            vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_b          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_b_cl       vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_b_cl_o     vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_b_o        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_cl         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_cl_o       vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_l          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_l_o        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_lz         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_lz_o       vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_c_o          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_cl           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_cl_o         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_l            vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_l_o          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_lz           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_lz_o         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_gather4_o            vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 lwe da d16
+image_get_lod              vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da
+image_get_resinfo          vdst,    vaddr,     srsrc              dmask unorm glc slc a16 tfe lwe da
+image_load                 vdst,    vaddr,     srsrc              dmask unorm glc slc a16 tfe lwe da d16
+image_load_mip             vdst,    vaddr,     srsrc              dmask unorm glc slc a16 tfe lwe da d16
+image_load_mip_pck         vdst,    vaddr,     srsrc              dmask unorm glc slc a16 tfe lwe da
+image_load_mip_pck_sgn     vdst,    vaddr,     srsrc              dmask unorm glc slc a16 tfe lwe da
+image_load_pck             vdst,    vaddr,     srsrc              dmask unorm glc slc a16 tfe lwe da
+image_load_pck_sgn         vdst,    vaddr,     srsrc              dmask unorm glc slc a16 tfe lwe da
+image_sample               vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_b             vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc tfe lwe da d16
+image_sample_b_cl          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_b_cl_o        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_b_o           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc tfe lwe da d16
+image_sample_c             vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_b           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_b_cl        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_b_cl_o      vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_b_o         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_cd          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_cd_cl       vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_cd_cl_o     vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_cd_o        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_cl          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_cl_o        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_d           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_d_cl        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_d_cl_o      vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_d_o         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_l           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_l_o         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_lz          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_lz_o        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_c_o           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_cd            vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_cd_cl         vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_cd_cl_o       vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_cd_o          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_cl            vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_cl_o          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_d             vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_d_cl          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_d_cl_o        vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_d_o           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_l             vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_l_o           vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_lz            vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_lz_o          vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_sample_o             vdst,    vaddr,     srsrc,   ssamp     dmask unorm glc slc a16 tfe lwe da d16
+image_store                         vdata,     vaddr,   srsrc     dmask unorm glc slc a16 lwe da d16
+image_store_mip                     vdata,     vaddr,   srsrc     dmask unorm glc slc a16 lwe da d16
+image_store_mip_pck                 vdata,     vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+image_store_pck                     vdata,     vaddr,   srsrc     dmask unorm glc slc a16 lwe da
+
+
+

MTBUF

+
INSTRUCTION                     DST   SRC0   SRC1   SRC2    SRC3      MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————
+tbuffer_load_format_d16_x       vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xy      vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xyz     vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_d16_xyzw    vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_x           vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xy          vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xyz         vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_load_format_xyzw        vdst, vaddr, srsrc, soffset           fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_x            vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xy           vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyz          vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_d16_xyzw         vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_x                vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xy               vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xyz              vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+tbuffer_store_format_xyzw             vdata, vaddr, srsrc,  soffset   fmt idxen offen offset12 glc slc
+
+
+

MUBUF

+
INSTRUCTION                   DST   SRC0             SRC1    SRC2    SRC3     MODIFIERS
+——————————————————————————————————————————————————————————————————————————————————————————————————————————————
+buffer_atomic_add                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_add_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_and_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap               vdata:dst:b32x2, vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_cmpswap_x2            vdata:dst:b64x2, vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec                   vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_dec_x2                vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc                   vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_inc_x2                vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or                    vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_or_x2                 vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax                  vdata:dst:i32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smax_x2               vdata:dst:i64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin                  vdata:dst:i32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_smin_x2               vdata:dst:i64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_sub_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap                  vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_swap_x2               vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax                  vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umax_x2               vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin                  vdata:dst:u32,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_umin_x2               vdata:dst:u64,   vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor                   vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_atomic_xor_x2                vdata:dst,       vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_load_dword             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_dwordx2           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_dwordx3           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_dwordx4           vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_hi_x   vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_x      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xy     vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xyz    vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_d16_xyzw   vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_x          vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_format_xy         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_xyz        vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_format_xyzw       vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_sbyte             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_sbyte_d16         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_sbyte_d16_hi      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_short_d16         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_short_d16_hi      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_sshort            vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_ubyte             vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_load_ubyte_d16         vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_ubyte_d16_hi      vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc
+buffer_load_ushort            vdst, vaddr,           srsrc,  soffset          idxen offen offset12 glc slc lds
+buffer_store_byte                   vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_byte_d16_hi            vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dword                  vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx2                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx3                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_dwordx4                vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_hi_x        vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_x           vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xy          vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyz         vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_d16_xyzw        vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_x               vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xy              vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyz             vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_format_xyzw            vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_lds_dword              srsrc,           soffset                  offset12 lds
+buffer_store_short                  vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_store_short_d16_hi           vdata,           vaddr,  srsrc,  soffset  idxen offen offset12 glc slc
+buffer_wbinvl1
+buffer_wbinvl1_vol
+
+
+

SMEM

+
INSTRUCTION                    DST       SRC0             SRC1      SRC2           MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————
+s_atc_probe                              probe,           sbase,    soffset
+s_atc_probe_buffer                       probe,           sbase,    soffset
+s_atomic_add                             sdata:dst,       sbase,    soffset        glc
+s_atomic_add_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_and                             sdata:dst,       sbase,    soffset        glc
+s_atomic_and_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_cmpswap                         sdata:dst:b32x2, sbase,    soffset        glc
+s_atomic_cmpswap_x2                      sdata:dst:b64x2, sbase,    soffset        glc
+s_atomic_dec                             sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_dec_x2                          sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_inc                             sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_inc_x2                          sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_or                              sdata:dst,       sbase,    soffset        glc
+s_atomic_or_x2                           sdata:dst,       sbase,    soffset        glc
+s_atomic_smax                            sdata:dst:i32,   sbase,    soffset        glc
+s_atomic_smax_x2                         sdata:dst:i64,   sbase,    soffset        glc
+s_atomic_smin                            sdata:dst:i32,   sbase,    soffset        glc
+s_atomic_smin_x2                         sdata:dst:i64,   sbase,    soffset        glc
+s_atomic_sub                             sdata:dst,       sbase,    soffset        glc
+s_atomic_sub_x2                          sdata:dst,       sbase,    soffset        glc
+s_atomic_swap                            sdata:dst,       sbase,    soffset        glc
+s_atomic_swap_x2                         sdata:dst,       sbase,    soffset        glc
+s_atomic_umax                            sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_umax_x2                         sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_umin                            sdata:dst:u32,   sbase,    soffset        glc
+s_atomic_umin_x2                         sdata:dst:u64,   sbase,    soffset        glc
+s_atomic_xor                             sdata:dst,       sbase,    soffset        glc
+s_atomic_xor_x2                          sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_add                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_add_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_and                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_and_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_cmpswap                  sdata:dst:b32x2, sbase,    soffset        glc
+s_buffer_atomic_cmpswap_x2               sdata:dst:b64x2, sbase,    soffset        glc
+s_buffer_atomic_dec                      sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_dec_x2                   sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_inc                      sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_inc_x2                   sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_or                       sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_or_x2                    sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_smax                     sdata:dst:i32,   sbase,    soffset        glc
+s_buffer_atomic_smax_x2                  sdata:dst:i64,   sbase,    soffset        glc
+s_buffer_atomic_smin                     sdata:dst:i32,   sbase,    soffset        glc
+s_buffer_atomic_smin_x2                  sdata:dst:i64,   sbase,    soffset        glc
+s_buffer_atomic_sub                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_sub_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_swap                     sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_swap_x2                  sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_umax                     sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_umax_x2                  sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_umin                     sdata:dst:u32,   sbase,    soffset        glc
+s_buffer_atomic_umin_x2                  sdata:dst:u64,   sbase,    soffset        glc
+s_buffer_atomic_xor                      sdata:dst,       sbase,    soffset        glc
+s_buffer_atomic_xor_x2                   sdata:dst,       sbase,    soffset        glc
+s_buffer_load_dword            sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx16         sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx2          sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx4          sdst,     sbase,           soffset                  glc
+s_buffer_load_dwordx8          sdst,     sbase,           soffset                  glc
+s_buffer_store_dword                     sdata,           sbase,    soffset        glc
+s_buffer_store_dwordx2                   sdata,           sbase,    soffset        glc
+s_buffer_store_dwordx4                   sdata,           sbase,    soffset        glc
+s_dcache_discard                         sbase,           soffset
+s_dcache_discard_x2                      sbase,           soffset
+s_dcache_inv
+s_dcache_inv_vol
+s_dcache_wb
+s_dcache_wb_vol
+s_load_dword                   sdst,     sbase,           soffset                  glc
+s_load_dwordx16                sdst,     sbase,           soffset                  glc
+s_load_dwordx2                 sdst,     sbase,           soffset                  glc
+s_load_dwordx4                 sdst,     sbase,           soffset                  glc
+s_load_dwordx8                 sdst,     sbase,           soffset                  glc
+s_memrealtime                  sdst:b64
+s_memtime                      sdst:b64
+s_scratch_load_dword           sdst,     sbase,           soffset                  glc
+s_scratch_load_dwordx2         sdst,     sbase,           soffset                  glc
+s_scratch_load_dwordx4         sdst,     sbase,           soffset                  glc
+s_scratch_store_dword                    sdata,           sbase,    soffset        glc
+s_scratch_store_dwordx2                  sdata,           sbase,    soffset        glc
+s_scratch_store_dwordx4                  sdata,           sbase,    soffset        glc
+s_store_dword                            sdata,           sbase,    soffset        glc
+s_store_dwordx2                          sdata,           sbase,    soffset        glc
+s_store_dwordx4                          sdata,           sbase,    soffset        glc
+
+
+

SOP1

+
INSTRUCTION                    DST       SRC
+———————————————————————————————————————————————————
+s_abs_i32                      sdst,     ssrc
+s_and_saveexec_b64             sdst,     ssrc
+s_andn1_saveexec_b64           sdst,     ssrc
+s_andn1_wrexec_b64             sdst,     ssrc
+s_andn2_saveexec_b64           sdst,     ssrc
+s_andn2_wrexec_b64             sdst,     ssrc
+s_bcnt0_i32_b32                sdst,     ssrc
+s_bcnt0_i32_b64                sdst,     ssrc
+s_bcnt1_i32_b32                sdst,     ssrc
+s_bcnt1_i32_b64                sdst,     ssrc
+s_bitreplicate_b64_b32         sdst,     ssrc
+s_bitset0_b32                  sdst,     ssrc
+s_bitset0_b64                  sdst,     ssrc:b32
+s_bitset1_b32                  sdst,     ssrc
+s_bitset1_b64                  sdst,     ssrc:b32
+s_brev_b32                     sdst,     ssrc
+s_brev_b64                     sdst,     ssrc
+s_cbranch_join                           ssrc
+s_cmov_b32                     sdst,     ssrc
+s_cmov_b64                     sdst,     ssrc
+s_ff0_i32_b32                  sdst,     ssrc
+s_ff0_i32_b64                  sdst,     ssrc
+s_ff1_i32_b32                  sdst,     ssrc
+s_ff1_i32_b64                  sdst,     ssrc
+s_flbit_i32                    sdst,     ssrc
+s_flbit_i32_b32                sdst,     ssrc
+s_flbit_i32_b64                sdst,     ssrc
+s_flbit_i32_i64                sdst,     ssrc
+s_getpc_b64                    sdst
+s_mov_b32                      sdst,     ssrc
+s_mov_b64                      sdst,     ssrc
+s_movreld_b32                  sdst,     ssrc
+s_movreld_b64                  sdst,     ssrc
+s_movrels_b32                  sdst,     ssrc
+s_movrels_b64                  sdst,     ssrc
+s_nand_saveexec_b64            sdst,     ssrc
+s_nor_saveexec_b64             sdst,     ssrc
+s_not_b32                      sdst,     ssrc
+s_not_b64                      sdst,     ssrc
+s_or_saveexec_b64              sdst,     ssrc
+s_orn1_saveexec_b64            sdst,     ssrc
+s_orn2_saveexec_b64            sdst,     ssrc
+s_quadmask_b32                 sdst,     ssrc
+s_quadmask_b64                 sdst,     ssrc
+s_rfe_b64                                ssrc
+s_set_gpr_idx_idx                        ssrc
+s_setpc_b64                              ssrc
+s_sext_i32_i16                 sdst,     ssrc
+s_sext_i32_i8                  sdst,     ssrc
+s_swappc_b64                   sdst,     ssrc
+s_wqm_b32                      sdst,     ssrc
+s_wqm_b64                      sdst,     ssrc
+s_xnor_saveexec_b64            sdst,     ssrc
+s_xor_saveexec_b64             sdst,     ssrc
+
+
+

SOP2

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+s_absdiff_i32                  sdst,     ssrc0,     ssrc1
+s_add_i32                      sdst,     ssrc0,     ssrc1
+s_add_u32                      sdst,     ssrc0,     ssrc1
+s_addc_u32                     sdst,     ssrc0,     ssrc1
+s_and_b32                      sdst,     ssrc0,     ssrc1
+s_and_b64                      sdst,     ssrc0,     ssrc1
+s_andn2_b32                    sdst,     ssrc0,     ssrc1
+s_andn2_b64                    sdst,     ssrc0,     ssrc1
+s_ashr_i32                     sdst,     ssrc0,     ssrc1:u32
+s_ashr_i64                     sdst,     ssrc0,     ssrc1:u32
+s_bfe_i32                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_i64                      sdst,     ssrc0,     ssrc1:u32
+s_bfe_u32                      sdst,     ssrc0,     ssrc1
+s_bfe_u64                      sdst,     ssrc0,     ssrc1:u32
+s_bfm_b32                      sdst,     ssrc0,     ssrc1
+s_bfm_b64                      sdst,     ssrc0:b32, ssrc1:b32
+s_cbranch_g_fork                         ssrc0,     ssrc1
+s_cselect_b32                  sdst,     ssrc0,     ssrc1
+s_cselect_b64                  sdst,     ssrc0,     ssrc1
+s_lshl1_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl2_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl3_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl4_add_u32                sdst,     ssrc0,     ssrc1
+s_lshl_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshl_b64                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b32                     sdst,     ssrc0,     ssrc1:u32
+s_lshr_b64                     sdst,     ssrc0,     ssrc1:u32
+s_max_i32                      sdst,     ssrc0,     ssrc1
+s_max_u32                      sdst,     ssrc0,     ssrc1
+s_min_i32                      sdst,     ssrc0,     ssrc1
+s_min_u32                      sdst,     ssrc0,     ssrc1
+s_mul_hi_i32                   sdst,     ssrc0,     ssrc1
+s_mul_hi_u32                   sdst,     ssrc0,     ssrc1
+s_mul_i32                      sdst,     ssrc0,     ssrc1
+s_nand_b32                     sdst,     ssrc0,     ssrc1
+s_nand_b64                     sdst,     ssrc0,     ssrc1
+s_nor_b32                      sdst,     ssrc0,     ssrc1
+s_nor_b64                      sdst,     ssrc0,     ssrc1
+s_or_b32                       sdst,     ssrc0,     ssrc1
+s_or_b64                       sdst,     ssrc0,     ssrc1
+s_orn2_b32                     sdst,     ssrc0,     ssrc1
+s_orn2_b64                     sdst,     ssrc0,     ssrc1
+s_pack_hh_b32_b16              sdst,     ssrc0:b32, ssrc1:b32
+s_pack_lh_b32_b16              sdst,     ssrc0,     ssrc1:b32
+s_pack_ll_b32_b16              sdst,     ssrc0,     ssrc1
+s_rfe_restore_b64                        ssrc0,     ssrc1:b32
+s_sub_i32                      sdst,     ssrc0,     ssrc1
+s_sub_u32                      sdst,     ssrc0,     ssrc1
+s_subb_u32                     sdst,     ssrc0,     ssrc1
+s_xnor_b32                     sdst,     ssrc0,     ssrc1
+s_xnor_b64                     sdst,     ssrc0,     ssrc1
+s_xor_b32                      sdst,     ssrc0,     ssrc1
+s_xor_b64                      sdst,     ssrc0,     ssrc1
+
+
+

SOPC

+
INSTRUCTION                    SRC0      SRC1
+———————————————————————————————————————————————————
+s_bitcmp0_b32                  ssrc0,    ssrc1
+s_bitcmp0_b64                  ssrc0,    ssrc1:u32
+s_bitcmp1_b32                  ssrc0,    ssrc1
+s_bitcmp1_b64                  ssrc0,    ssrc1:u32
+s_cmp_eq_i32                   ssrc0,    ssrc1
+s_cmp_eq_u32                   ssrc0,    ssrc1
+s_cmp_eq_u64                   ssrc0,    ssrc1
+s_cmp_ge_i32                   ssrc0,    ssrc1
+s_cmp_ge_u32                   ssrc0,    ssrc1
+s_cmp_gt_i32                   ssrc0,    ssrc1
+s_cmp_gt_u32                   ssrc0,    ssrc1
+s_cmp_le_i32                   ssrc0,    ssrc1
+s_cmp_le_u32                   ssrc0,    ssrc1
+s_cmp_lg_i32                   ssrc0,    ssrc1
+s_cmp_lg_u32                   ssrc0,    ssrc1
+s_cmp_lg_u64                   ssrc0,    ssrc1
+s_cmp_lt_i32                   ssrc0,    ssrc1
+s_cmp_lt_u32                   ssrc0,    ssrc1
+s_set_gpr_idx_on               ssrc,     imask
+s_setvskip                     ssrc0,    ssrc1
+
+
+

SOPK

+
INSTRUCTION                    DST       SRC0      SRC1
+—————————————————————————————————————————————————————————————
+s_addk_i32                     sdst,     imm16
+s_call_b64                     sdst,     label
+s_cbranch_i_fork                         ssrc,     label
+s_cmovk_i32                    sdst,     imm16
+s_cmpk_eq_i32                            ssrc,     imm16
+s_cmpk_eq_u32                            ssrc,     imm16
+s_cmpk_ge_i32                            ssrc,     imm16
+s_cmpk_ge_u32                            ssrc,     imm16
+s_cmpk_gt_i32                            ssrc,     imm16
+s_cmpk_gt_u32                            ssrc,     imm16
+s_cmpk_le_i32                            ssrc,     imm16
+s_cmpk_le_u32                            ssrc,     imm16
+s_cmpk_lg_i32                            ssrc,     imm16
+s_cmpk_lg_u32                            ssrc,     imm16
+s_cmpk_lt_i32                            ssrc,     imm16
+s_cmpk_lt_u32                            ssrc,     imm16
+s_getreg_b32                   sdst,     hwreg
+s_movk_i32                     sdst,     imm16
+s_mulk_i32                     sdst,     imm16
+s_setreg_b32                   hwreg,    ssrc
+s_setreg_imm32_b32             hwreg,    simm32
+
+
+

SOPP

+
INSTRUCTION                    SRC
+—————————————————————————————————————————
+s_barrier
+s_branch                       label
+s_cbranch_cdbgsys              label
+s_cbranch_cdbgsys_and_user     label
+s_cbranch_cdbgsys_or_user      label
+s_cbranch_cdbguser             label
+s_cbranch_execnz               label
+s_cbranch_execz                label
+s_cbranch_scc0                 label
+s_cbranch_scc1                 label
+s_cbranch_vccnz                label
+s_cbranch_vccz                 label
+s_decperflevel                 imm16
+s_endpgm
+s_endpgm_ordered_ps_done
+s_endpgm_saved
+s_icache_inv
+s_incperflevel                 imm16
+s_nop                          imm16
+s_sendmsg                      msg
+s_sendmsghalt                  msg
+s_set_gpr_idx_mode             imask
+s_set_gpr_idx_off
+s_sethalt                      imm16
+s_setkill                      imm16
+s_setprio                      imm16
+s_sleep                        imm16
+s_trap                         imm16
+s_ttracedata
+s_waitcnt                      waitcnt
+s_wakeup
+
+
+

VINTRP

+
INSTRUCTION                    DST       SRC0       SRC1
+——————————————————————————————————————————————————————————————
+v_interp_mov_f32               vdst,     param:b32, attr:b32
+v_interp_p1_f32                vdst,     vsrc,      attr:b32
+v_interp_p2_f32                vdst,     vsrc,      attr:b32
+
+
+

VOP1

+
INSTRUCTION                         DST        SRC            MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+v_bfrev_b32                         vdst,      src
+v_bfrev_b32_dpp                     vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_bfrev_b32_sdwa                    vdst,      src:m          dst_sel dst_unused src0_sel
+v_ceil_f16                          vdst,      src
+v_ceil_f16_dpp                      vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_ceil_f16_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_ceil_f32                          vdst,      src
+v_ceil_f32_dpp                      vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_ceil_f32_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_ceil_f64                          vdst,      src
+v_clrexcp
+v_cos_f16                           vdst,      src
+v_cos_f16_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cos_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cos_f32                           vdst,      src
+v_cos_f32_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cos_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_f32                       vdst,      src
+v_cvt_f16_f32_dpp                   vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_f32_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_i16                       vdst,      src
+v_cvt_f16_i16_dpp                   vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_i16_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f16_u16                       vdst,      src
+v_cvt_f16_u16_dpp                   vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f16_u16_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_f16                       vdst,      src
+v_cvt_f32_f16_dpp                   vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_f16_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_f64                       vdst,      src
+v_cvt_f32_i32                       vdst,      src
+v_cvt_f32_i32_dpp                   vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_i32_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_u32                       vdst,      src
+v_cvt_f32_u32_dpp                   vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_u32_sdwa                  vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte0                    vdst,      src
+v_cvt_f32_ubyte0_dpp                vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte0_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte1                    vdst,      src
+v_cvt_f32_ubyte1_dpp                vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte1_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte2                    vdst,      src
+v_cvt_f32_ubyte2_dpp                vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte2_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f32_ubyte3                    vdst,      src
+v_cvt_f32_ubyte3_dpp                vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_f32_ubyte3_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_f64_f32                       vdst,      src
+v_cvt_f64_i32                       vdst,      src
+v_cvt_f64_u32                       vdst,      src
+v_cvt_flr_i32_f32                   vdst,      src
+v_cvt_flr_i32_f32_dpp               vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_flr_i32_f32_sdwa              vdst,      src:m          dst_sel dst_unused src0_sel
+v_cvt_i16_f16                       vdst,      src
+v_cvt_i16_f16_dpp                   vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_i16_f16_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_i32_f32                       vdst,      src
+v_cvt_i32_f32_dpp                   vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_i32_f32_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_i32_f64                       vdst,      src
+v_cvt_norm_i16_f16                  vdst,      src
+v_cvt_norm_i16_f16_dpp              vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_norm_i16_f16_sdwa             vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_norm_u16_f16                  vdst,      src
+v_cvt_norm_u16_f16_dpp              vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_norm_u16_f16_sdwa             vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_off_f32_i4                    vdst,      src
+v_cvt_off_f32_i4_dpp                vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_off_f32_i4_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_cvt_rpi_i32_f32                   vdst,      src
+v_cvt_rpi_i32_f32_dpp               vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_rpi_i32_f32_sdwa              vdst,      src:m          dst_sel dst_unused src0_sel
+v_cvt_u16_f16                       vdst,      src
+v_cvt_u16_f16_dpp                   vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_u16_f16_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_u32_f32                       vdst,      src
+v_cvt_u32_f32_dpp                   vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_cvt_u32_f32_sdwa                  vdst,      src:m          clamp dst_sel dst_unused src0_sel
+v_cvt_u32_f64                       vdst,      src
+v_exp_f16                           vdst,      src
+v_exp_f16_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_exp_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_exp_f32                           vdst,      src
+v_exp_f32_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_exp_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_exp_legacy_f32                    vdst,      src
+v_exp_legacy_f32_dpp                vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_exp_legacy_f32_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_ffbh_i32                          vdst,      src
+v_ffbh_i32_dpp                      vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_ffbh_i32_sdwa                     vdst,      src:m          dst_sel dst_unused src0_sel
+v_ffbh_u32                          vdst,      src
+v_ffbh_u32_dpp                      vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_ffbh_u32_sdwa                     vdst,      src:m          dst_sel dst_unused src0_sel
+v_ffbl_b32                          vdst,      src
+v_ffbl_b32_dpp                      vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_ffbl_b32_sdwa                     vdst,      src:m          dst_sel dst_unused src0_sel
+v_floor_f16                         vdst,      src
+v_floor_f16_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_floor_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_floor_f32                         vdst,      src
+v_floor_f32_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_floor_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_floor_f64                         vdst,      src
+v_fract_f16                         vdst,      src
+v_fract_f16_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_fract_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_fract_f32                         vdst,      src
+v_fract_f32_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_fract_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_fract_f64                         vdst,      src
+v_frexp_exp_i16_f16                 vdst,      src
+v_frexp_exp_i16_f16_dpp             vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_exp_i16_f16_sdwa            vdst,      src:m          dst_sel dst_unused src0_sel
+v_frexp_exp_i32_f32                 vdst,      src
+v_frexp_exp_i32_f32_dpp             vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_exp_i32_f32_sdwa            vdst,      src:m          dst_sel dst_unused src0_sel
+v_frexp_exp_i32_f64                 vdst,      src
+v_frexp_mant_f16                    vdst,      src
+v_frexp_mant_f16_dpp                vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_mant_f16_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_frexp_mant_f32                    vdst,      src
+v_frexp_mant_f32_dpp                vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_frexp_mant_f32_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_frexp_mant_f64                    vdst,      src
+v_log_f16                           vdst,      src
+v_log_f16_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_log_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_log_f32                           vdst,      src
+v_log_f32_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_log_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_log_legacy_f32                    vdst,      src
+v_log_legacy_f32_dpp                vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_log_legacy_f32_sdwa               vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_mov_b32                           vdst,      src
+v_mov_b32_dpp                       vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_mov_b32_sdwa                      vdst,      src:m          dst_sel dst_unused src0_sel
+v_nop
+v_not_b32                           vdst,      src
+v_not_b32_dpp                       vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_not_b32_sdwa                      vdst,      src:m          dst_sel dst_unused src0_sel
+v_rcp_f16                           vdst,      src
+v_rcp_f16_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rcp_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rcp_f32                           vdst,      src
+v_rcp_f32_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rcp_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rcp_f64                           vdst,      src
+v_rcp_iflag_f32                     vdst,      src
+v_rcp_iflag_f32_dpp                 vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rcp_iflag_f32_sdwa                vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_readfirstlane_b32                 sdst,      src
+v_rndne_f16                         vdst,      src
+v_rndne_f16_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rndne_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rndne_f32                         vdst,      src
+v_rndne_f32_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rndne_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rndne_f64                         vdst,      src
+v_rsq_f16                           vdst,      src
+v_rsq_f16_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rsq_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rsq_f32                           vdst,      src
+v_rsq_f32_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_rsq_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_rsq_f64                           vdst,      src
+v_sat_pk_u8_i16                     vdst:u8x4, src
+v_sat_pk_u8_i16_dpp                 vdst:u8x4, vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_sat_pk_u8_i16_sdwa                vdst:u8x4, src:m          dst_sel dst_unused src0_sel
+v_screen_partition_4se_b32          vdst,      src
+v_screen_partition_4se_b32_dpp      vdst,      vsrc           dpp_ctrl row_mask bank_mask bound_ctrl
+v_screen_partition_4se_b32_sdwa     vdst,      src:m          dst_sel dst_unused src0_sel
+v_sin_f16                           vdst,      src
+v_sin_f16_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sin_f16_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sin_f32                           vdst,      src
+v_sin_f32_dpp                       vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sin_f32_sdwa                      vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f16                          vdst,      src
+v_sqrt_f16_dpp                      vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sqrt_f16_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f32                          vdst,      src
+v_sqrt_f32_dpp                      vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_sqrt_f32_sdwa                     vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_sqrt_f64                          vdst,      src
+v_swap_b32                          vdst,      vsrc
+v_trunc_f16                         vdst,      src
+v_trunc_f16_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_trunc_f16_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_trunc_f32                         vdst,      src
+v_trunc_f32_dpp                     vdst,      vsrc:m         dpp_ctrl row_mask bank_mask bound_ctrl
+v_trunc_f32_sdwa                    vdst,      src:m          clamp omod dst_sel dst_unused src0_sel
+v_trunc_f64                         vdst,      src
+
+
+

VOP2

+
INSTRUCTION            DST0  DST1 SRC0        SRC1       SRC2    MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add_co_u32           vdst, vcc, src0,       vsrc1
+v_add_co_u32_dpp       vdst, vcc, vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_co_u32_sdwa      vdst, vcc, src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_add_f16              vdst,      src0,       vsrc1
+v_add_f16_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_f16_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_add_f32              vdst,      src0,       vsrc1
+v_add_f32_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_f32_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_add_u16              vdst,      src0,       vsrc1
+v_add_u16_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_u16_sdwa         vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_add_u32              vdst,      src0,       vsrc1
+v_add_u32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_add_u32_sdwa         vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_addc_co_u32          vdst, vcc, src0,       vsrc1,     vcc
+v_addc_co_u32_dpp      vdst, vcc, vsrc0,      vsrc1,     vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_addc_co_u32_sdwa     vdst, vcc, src0:m,     src1:m,    vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_and_b32              vdst,      src0,       vsrc1
+v_and_b32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_and_b32_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_ashrrev_i16          vdst,      src0:u16,   vsrc1
+v_ashrrev_i16_dpp      vdst,      vsrc0:u16,  vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_ashrrev_i16_sdwa     vdst,      src0:m:u16, src1:m             dst_sel dst_unused src0_sel src1_sel
+v_ashrrev_i32          vdst,      src0:u32,   vsrc1
+v_ashrrev_i32_dpp      vdst,      vsrc0:u32,  vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_ashrrev_i32_sdwa     vdst,      src0:m:u32, src1:m             dst_sel dst_unused src0_sel src1_sel
+v_cndmask_b32          vdst,      src0,       vsrc1,     vcc
+v_cndmask_b32_dpp      vdst,      vsrc0,      vsrc1,     vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_cndmask_b32_sdwa     vdst,      src0:m,     src1:m,    vcc     dst_sel dst_unused src0_sel src1_sel
+v_ldexp_f16            vdst,      src0,       vsrc1:i16
+v_ldexp_f16_dpp        vdst,      vsrc0:m,    vsrc1:i16          dpp_ctrl row_mask bank_mask bound_ctrl
+v_ldexp_f16_sdwa       vdst,      src0:m,     src1:m:i16         clamp omod dst_sel dst_unused src0_sel src1_sel
+v_lshlrev_b16          vdst,      src0:u16,   vsrc1
+v_lshlrev_b16_dpp      vdst,      vsrc0:u16,  vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshlrev_b16_sdwa     vdst,      src0:m:u16, src1:m             dst_sel dst_unused src0_sel src1_sel
+v_lshlrev_b32          vdst,      src0:u32,   vsrc1
+v_lshlrev_b32_dpp      vdst,      vsrc0:u32,  vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshlrev_b32_sdwa     vdst,      src0:m:u32, src1:m             dst_sel dst_unused src0_sel src1_sel
+v_lshrrev_b16          vdst,      src0:u16,   vsrc1
+v_lshrrev_b16_dpp      vdst,      vsrc0:u16,  vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshrrev_b16_sdwa     vdst,      src0:m:u16, src1:m             dst_sel dst_unused src0_sel src1_sel
+v_lshrrev_b32          vdst,      src0:u32,   vsrc1
+v_lshrrev_b32_dpp      vdst,      vsrc0:u32,  vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_lshrrev_b32_sdwa     vdst,      src0:m:u32, src1:m             dst_sel dst_unused src0_sel src1_sel
+v_mac_f16              vdst,      src0,       vsrc1
+v_mac_f16_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_mac_f32              vdst,      src0,       vsrc1
+v_mac_f32_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_madak_f16            vdst,      src0,       vsrc1,     simm32
+v_madak_f32            vdst,      src0,       vsrc1,     simm32
+v_madmk_f16            vdst,      src0,       simm32,    vsrc2
+v_madmk_f32            vdst,      src0,       simm32,    vsrc2
+v_max_f16              vdst,      src0,       vsrc1
+v_max_f16_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_f16_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_max_f32              vdst,      src0,       vsrc1
+v_max_f32_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_f32_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_max_i16              vdst,      src0,       vsrc1
+v_max_i16_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_i16_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_max_i32              vdst,      src0,       vsrc1
+v_max_i32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_i32_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_max_u16              vdst,      src0,       vsrc1
+v_max_u16_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_u16_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_max_u32              vdst,      src0,       vsrc1
+v_max_u32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_max_u32_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_f16              vdst,      src0,       vsrc1
+v_min_f16_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_f16_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_min_f32              vdst,      src0,       vsrc1
+v_min_f32_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_f32_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_min_i16              vdst,      src0,       vsrc1
+v_min_i16_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_i16_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_i32              vdst,      src0,       vsrc1
+v_min_i32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_i32_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_u16              vdst,      src0,       vsrc1
+v_min_u16_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_u16_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_min_u32              vdst,      src0,       vsrc1
+v_min_u32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_min_u32_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_f16              vdst,      src0,       vsrc1
+v_mul_f16_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_f16_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_f32              vdst,      src0,       vsrc1
+v_mul_f32_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_f32_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_i32_i24       vdst,      src0,       vsrc1
+v_mul_hi_i32_i24_dpp   vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_hi_i32_i24_sdwa  vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_hi_u32_u24       vdst,      src0,       vsrc1
+v_mul_hi_u32_u24_dpp   vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_hi_u32_u24_sdwa  vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_i32_i24          vdst,      src0,       vsrc1
+v_mul_i32_i24_dpp      vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_i32_i24_sdwa     vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_mul_legacy_f32       vdst,      src0,       vsrc1
+v_mul_legacy_f32_dpp   vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_legacy_f32_sdwa  vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_mul_lo_u16           vdst,      src0,       vsrc1
+v_mul_lo_u16_dpp       vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_lo_u16_sdwa      vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_mul_u32_u24          vdst,      src0,       vsrc1
+v_mul_u32_u24_dpp      vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_mul_u32_u24_sdwa     vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_or_b32               vdst,      src0,       vsrc1
+v_or_b32_dpp           vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_or_b32_sdwa          vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+v_sub_co_u32           vdst, vcc, src0,       vsrc1
+v_sub_co_u32_dpp       vdst, vcc, vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_co_u32_sdwa      vdst, vcc, src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_f16              vdst,      src0,       vsrc1
+v_sub_f16_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_f16_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_sub_f32              vdst,      src0,       vsrc1
+v_sub_f32_dpp          vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_f32_sdwa         vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_sub_u16              vdst,      src0,       vsrc1
+v_sub_u16_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_u16_sdwa         vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_sub_u32              vdst,      src0,       vsrc1
+v_sub_u32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_sub_u32_sdwa         vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_subb_co_u32          vdst, vcc, src0,       vsrc1,     vcc
+v_subb_co_u32_dpp      vdst, vcc, vsrc0,      vsrc1,     vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_subb_co_u32_sdwa     vdst, vcc, src0:m,     src1:m,    vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_subbrev_co_u32       vdst, vcc, src0,       vsrc1,     vcc
+v_subbrev_co_u32_dpp   vdst, vcc, vsrc0,      vsrc1,     vcc     dpp_ctrl row_mask bank_mask bound_ctrl
+v_subbrev_co_u32_sdwa  vdst, vcc, src0:m,     src1:m,    vcc     clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_co_u32        vdst, vcc, src0,       vsrc1
+v_subrev_co_u32_dpp    vdst, vcc, vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_co_u32_sdwa   vdst, vcc, src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_f16           vdst,      src0,       vsrc1
+v_subrev_f16_dpp       vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_f16_sdwa      vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_subrev_f32           vdst,      src0,       vsrc1
+v_subrev_f32_dpp       vdst,      vsrc0:m,    vsrc1:m            dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_f32_sdwa      vdst,      src0:m,     src1:m             clamp omod dst_sel dst_unused src0_sel src1_sel
+v_subrev_u16           vdst,      src0,       vsrc1
+v_subrev_u16_dpp       vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_u16_sdwa      vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_subrev_u32           vdst,      src0,       vsrc1
+v_subrev_u32_dpp       vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_subrev_u32_sdwa      vdst,      src0:m,     src1:m             clamp dst_sel dst_unused src0_sel src1_sel
+v_xor_b32              vdst,      src0,       vsrc1
+v_xor_b32_dpp          vdst,      vsrc0,      vsrc1              dpp_ctrl row_mask bank_mask bound_ctrl
+v_xor_b32_sdwa         vdst,      src0:m,     src1:m             dst_sel dst_unused src0_sel src1_sel
+
+
+

VOP3

+
INSTRUCTION                     DST0        DST1  SRC0         SRC1        SRC2           MODIFIERS
+—————————————————————————————————————————————————————————————————————————————————————————————————————————
+v_add3_u32                      vdst,             src0,        src1,       src2
+v_add_co_u32_e64                vdst,       sdst, src0,        src1                       clamp
+v_add_f16_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_add_f32_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_add_f64                       vdst,             src0:m,      src1:m                     clamp omod
+v_add_i16                       vdst,             src0,        src1                       op_sel clamp
+v_add_i32                       vdst,             src0,        src1                       clamp
+v_add_lshl_u32                  vdst,             src0,        src1,       src2
+v_add_u16_e64                   vdst,             src0,        src1                       clamp
+v_add_u32_e64                   vdst,             src0,        src1                       clamp
+v_addc_co_u32_e64               vdst,       sdst, src0,        src1,       ssrc2          clamp
+v_alignbit_b32                  vdst,             src0,        src1,       src2:b16
+v_alignbyte_b32                 vdst,             src0,        src1,       src2:b16
+v_and_b32_e64                   vdst,             src0,        src1
+v_and_or_b32                    vdst,             src0,        src1,       src2
+v_ashrrev_i16_e64               vdst,             src0:u16,    src1
+v_ashrrev_i32_e64               vdst,             src0:u32,    src1
+v_ashrrev_i64                   vdst,             src0:u32,    src1
+v_bcnt_u32_b32                  vdst,             src0,        src1
+v_bfe_i32                       vdst,             src0,        src1:u32,   src2:u32
+v_bfe_u32                       vdst,             src0,        src1,       src2
+v_bfi_b32                       vdst,             src0,        src1,       src2
+v_bfm_b32                       vdst,             src0,        src1
+v_bfrev_b32_e64                 vdst,             src
+v_ceil_f16_e64                  vdst,             src:m                                   clamp omod
+v_ceil_f32_e64                  vdst,             src:m                                   clamp omod
+v_ceil_f64_e64                  vdst,             src:m                                   clamp omod
+v_clrexcp_e64
+v_cmp_class_f16_e64             sdst,             src0:m,      src1:b32
+v_cmp_class_f32_e64             sdst,             src0:m,      src1:b32
+v_cmp_class_f64_e64             sdst,             src0:m,      src1:b32
+v_cmp_eq_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_eq_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_eq_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_eq_i16_e64                sdst,             src0,        src1
+v_cmp_eq_i32_e64                sdst,             src0,        src1
+v_cmp_eq_i64_e64                sdst,             src0,        src1
+v_cmp_eq_u16_e64                sdst,             src0,        src1
+v_cmp_eq_u32_e64                sdst,             src0,        src1
+v_cmp_eq_u64_e64                sdst,             src0,        src1
+v_cmp_f_f16_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_f_f32_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_f_f64_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_f_i16_e64                 sdst,             src0,        src1
+v_cmp_f_i32_e64                 sdst,             src0,        src1
+v_cmp_f_i64_e64                 sdst,             src0,        src1
+v_cmp_f_u16_e64                 sdst,             src0,        src1
+v_cmp_f_u32_e64                 sdst,             src0,        src1
+v_cmp_f_u64_e64                 sdst,             src0,        src1
+v_cmp_ge_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_ge_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_ge_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_ge_i16_e64                sdst,             src0,        src1
+v_cmp_ge_i32_e64                sdst,             src0,        src1
+v_cmp_ge_i64_e64                sdst,             src0,        src1
+v_cmp_ge_u16_e64                sdst,             src0,        src1
+v_cmp_ge_u32_e64                sdst,             src0,        src1
+v_cmp_ge_u64_e64                sdst,             src0,        src1
+v_cmp_gt_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_gt_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_gt_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_gt_i16_e64                sdst,             src0,        src1
+v_cmp_gt_i32_e64                sdst,             src0,        src1
+v_cmp_gt_i64_e64                sdst,             src0,        src1
+v_cmp_gt_u16_e64                sdst,             src0,        src1
+v_cmp_gt_u32_e64                sdst,             src0,        src1
+v_cmp_gt_u64_e64                sdst,             src0,        src1
+v_cmp_le_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_le_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_le_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_le_i16_e64                sdst,             src0,        src1
+v_cmp_le_i32_e64                sdst,             src0,        src1
+v_cmp_le_i64_e64                sdst,             src0,        src1
+v_cmp_le_u16_e64                sdst,             src0,        src1
+v_cmp_le_u32_e64                sdst,             src0,        src1
+v_cmp_le_u64_e64                sdst,             src0,        src1
+v_cmp_lg_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_lg_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_lg_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_lt_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_lt_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_lt_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmp_lt_i16_e64                sdst,             src0,        src1
+v_cmp_lt_i32_e64                sdst,             src0,        src1
+v_cmp_lt_i64_e64                sdst,             src0,        src1
+v_cmp_lt_u16_e64                sdst,             src0,        src1
+v_cmp_lt_u32_e64                sdst,             src0,        src1
+v_cmp_lt_u64_e64                sdst,             src0,        src1
+v_cmp_ne_i16_e64                sdst,             src0,        src1
+v_cmp_ne_i32_e64                sdst,             src0,        src1
+v_cmp_ne_i64_e64                sdst,             src0,        src1
+v_cmp_ne_u16_e64                sdst,             src0,        src1
+v_cmp_ne_u32_e64                sdst,             src0,        src1
+v_cmp_ne_u64_e64                sdst,             src0,        src1
+v_cmp_neq_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_neq_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_neq_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nge_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nge_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nge_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_ngt_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_ngt_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_ngt_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nle_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nle_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nle_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nlg_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nlg_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nlg_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nlt_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nlt_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_nlt_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_o_f16_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_o_f32_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_o_f64_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_t_i16_e64                 sdst,             src0,        src1
+v_cmp_t_i32_e64                 sdst,             src0,        src1
+v_cmp_t_i64_e64                 sdst,             src0,        src1
+v_cmp_t_u16_e64                 sdst,             src0,        src1
+v_cmp_t_u32_e64                 sdst,             src0,        src1
+v_cmp_t_u64_e64                 sdst,             src0,        src1
+v_cmp_tru_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_tru_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_tru_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmp_u_f16_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_u_f32_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmp_u_f64_e64                 sdst,             src0:m,      src1:m                     clamp
+v_cmpx_class_f16_e64            sdst,             src0:m,      src1:b32
+v_cmpx_class_f32_e64            sdst,             src0:m,      src1:b32
+v_cmpx_class_f64_e64            sdst,             src0:m,      src1:b32
+v_cmpx_eq_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_eq_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_eq_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_eq_i16_e64               sdst,             src0,        src1
+v_cmpx_eq_i32_e64               sdst,             src0,        src1
+v_cmpx_eq_i64_e64               sdst,             src0,        src1
+v_cmpx_eq_u16_e64               sdst,             src0,        src1
+v_cmpx_eq_u32_e64               sdst,             src0,        src1
+v_cmpx_eq_u64_e64               sdst,             src0,        src1
+v_cmpx_f_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_f_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_f_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_f_i16_e64                sdst,             src0,        src1
+v_cmpx_f_i32_e64                sdst,             src0,        src1
+v_cmpx_f_i64_e64                sdst,             src0,        src1
+v_cmpx_f_u16_e64                sdst,             src0,        src1
+v_cmpx_f_u32_e64                sdst,             src0,        src1
+v_cmpx_f_u64_e64                sdst,             src0,        src1
+v_cmpx_ge_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_ge_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_ge_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_ge_i16_e64               sdst,             src0,        src1
+v_cmpx_ge_i32_e64               sdst,             src0,        src1
+v_cmpx_ge_i64_e64               sdst,             src0,        src1
+v_cmpx_ge_u16_e64               sdst,             src0,        src1
+v_cmpx_ge_u32_e64               sdst,             src0,        src1
+v_cmpx_ge_u64_e64               sdst,             src0,        src1
+v_cmpx_gt_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_gt_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_gt_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_gt_i16_e64               sdst,             src0,        src1
+v_cmpx_gt_i32_e64               sdst,             src0,        src1
+v_cmpx_gt_i64_e64               sdst,             src0,        src1
+v_cmpx_gt_u16_e64               sdst,             src0,        src1
+v_cmpx_gt_u32_e64               sdst,             src0,        src1
+v_cmpx_gt_u64_e64               sdst,             src0,        src1
+v_cmpx_le_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_le_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_le_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_le_i16_e64               sdst,             src0,        src1
+v_cmpx_le_i32_e64               sdst,             src0,        src1
+v_cmpx_le_i64_e64               sdst,             src0,        src1
+v_cmpx_le_u16_e64               sdst,             src0,        src1
+v_cmpx_le_u32_e64               sdst,             src0,        src1
+v_cmpx_le_u64_e64               sdst,             src0,        src1
+v_cmpx_lg_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_lg_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_lg_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_lt_f16_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_lt_f32_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_lt_f64_e64               sdst,             src0:m,      src1:m                     clamp
+v_cmpx_lt_i16_e64               sdst,             src0,        src1
+v_cmpx_lt_i32_e64               sdst,             src0,        src1
+v_cmpx_lt_i64_e64               sdst,             src0,        src1
+v_cmpx_lt_u16_e64               sdst,             src0,        src1
+v_cmpx_lt_u32_e64               sdst,             src0,        src1
+v_cmpx_lt_u64_e64               sdst,             src0,        src1
+v_cmpx_ne_i16_e64               sdst,             src0,        src1
+v_cmpx_ne_i32_e64               sdst,             src0,        src1
+v_cmpx_ne_i64_e64               sdst,             src0,        src1
+v_cmpx_ne_u16_e64               sdst,             src0,        src1
+v_cmpx_ne_u32_e64               sdst,             src0,        src1
+v_cmpx_ne_u64_e64               sdst,             src0,        src1
+v_cmpx_neq_f16_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_neq_f32_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_neq_f64_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nge_f16_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nge_f32_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nge_f64_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_ngt_f16_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_ngt_f32_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_ngt_f64_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nle_f16_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nle_f32_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nle_f64_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nlg_f16_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nlg_f32_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nlg_f64_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nlt_f16_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nlt_f32_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_nlt_f64_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_o_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_o_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_o_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_t_i16_e64                sdst,             src0,        src1
+v_cmpx_t_i32_e64                sdst,             src0,        src1
+v_cmpx_t_i64_e64                sdst,             src0,        src1
+v_cmpx_t_u16_e64                sdst,             src0,        src1
+v_cmpx_t_u32_e64                sdst,             src0,        src1
+v_cmpx_t_u64_e64                sdst,             src0,        src1
+v_cmpx_tru_f16_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_tru_f32_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_tru_f64_e64              sdst,             src0:m,      src1:m                     clamp
+v_cmpx_u_f16_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_u_f32_e64                sdst,             src0:m,      src1:m                     clamp
+v_cmpx_u_f64_e64                sdst,             src0:m,      src1:m                     clamp
+v_cndmask_b32_e64               vdst,             src0,        src1,       ssrc2
+v_cos_f16_e64                   vdst,             src:m                                   clamp omod
+v_cos_f32_e64                   vdst,             src:m                                   clamp omod
+v_cubeid_f32                    vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_cubema_f32                    vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_cubesc_f32                    vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_cubetc_f32                    vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_cvt_f16_f32_e64               vdst,             src:m                                   clamp omod
+v_cvt_f16_i16_e64               vdst,             src                                     clamp omod
+v_cvt_f16_u16_e64               vdst,             src                                     clamp omod
+v_cvt_f32_f16_e64               vdst,             src:m                                   clamp omod
+v_cvt_f32_f64_e64               vdst,             src:m                                   clamp omod
+v_cvt_f32_i32_e64               vdst,             src                                     clamp omod
+v_cvt_f32_u32_e64               vdst,             src                                     clamp omod
+v_cvt_f32_ubyte0_e64            vdst,             src                                     clamp omod
+v_cvt_f32_ubyte1_e64            vdst,             src                                     clamp omod
+v_cvt_f32_ubyte2_e64            vdst,             src                                     clamp omod
+v_cvt_f32_ubyte3_e64            vdst,             src                                     clamp omod
+v_cvt_f64_f32_e64               vdst,             src:m                                   clamp omod
+v_cvt_f64_i32_e64               vdst,             src                                     clamp omod
+v_cvt_f64_u32_e64               vdst,             src                                     clamp omod
+v_cvt_flr_i32_f32_e64           vdst,             src:m
+v_cvt_i16_f16_e64               vdst,             src:m                                   clamp
+v_cvt_i32_f32_e64               vdst,             src:m                                   clamp
+v_cvt_i32_f64_e64               vdst,             src:m                                   clamp
+v_cvt_norm_i16_f16_e64          vdst,             src:m                                   clamp
+v_cvt_norm_u16_f16_e64          vdst,             src:m                                   clamp
+v_cvt_off_f32_i4_e64            vdst,             src                                     clamp omod
+v_cvt_pk_i16_i32                vdst,             src0:i32,    src1:i32
+v_cvt_pk_u16_u32                vdst,             src0:u32,    src1:u32
+v_cvt_pk_u8_f32                 vdst:b32,         src0:m:f32,  src1:u32,   src2:u32
+v_cvt_pkaccum_u8_f32            vdst:b32,         src0:m:f32,  src1:u32
+v_cvt_pknorm_i16_f16            vdst,             src0:m:f16,  src1:m:f16                 op_sel
+v_cvt_pknorm_i16_f32            vdst,             src0:m:f32,  src1:m:f32
+v_cvt_pknorm_u16_f16            vdst,             src0:m:f16,  src1:m:f16                 op_sel
+v_cvt_pknorm_u16_f32            vdst,             src0:m:f32,  src1:m:f32
+v_cvt_pkrtz_f16_f32             vdst,             src0:m:f32,  src1:m:f32
+v_cvt_rpi_i32_f32_e64           vdst,             src:m
+v_cvt_u16_f16_e64               vdst,             src:m                                   clamp
+v_cvt_u32_f32_e64               vdst,             src:m                                   clamp
+v_cvt_u32_f64_e64               vdst,             src:m                                   clamp
+v_div_fixup_f16                 vdst,             src0:m,      src1:m,     src2:m         op_sel clamp
+v_div_fixup_f32                 vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_div_fixup_f64                 vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_div_fixup_legacy_f16          vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_div_fmas_f32                  vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_div_fmas_f64                  vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_div_scale_f32                 vdst,       vcc,  src0,        src1,       src2
+v_div_scale_f64                 vdst,       vcc,  src0,        src1,       src2
+v_exp_f16_e64                   vdst,             src:m                                   clamp omod
+v_exp_f32_e64                   vdst,             src:m                                   clamp omod
+v_exp_legacy_f32_e64            vdst,             src:m                                   clamp omod
+v_ffbh_i32_e64                  vdst,             src
+v_ffbh_u32_e64                  vdst,             src
+v_ffbl_b32_e64                  vdst,             src
+v_floor_f16_e64                 vdst,             src:m                                   clamp omod
+v_floor_f32_e64                 vdst,             src:m                                   clamp omod
+v_floor_f64_e64                 vdst,             src:m                                   clamp omod
+v_fma_f16                       vdst,             src0:m,      src1:m,     src2:m         op_sel clamp
+v_fma_f32                       vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_fma_f64                       vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_fma_legacy_f16                vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_fract_f16_e64                 vdst,             src:m                                   clamp omod
+v_fract_f32_e64                 vdst,             src:m                                   clamp omod
+v_fract_f64_e64                 vdst,             src:m                                   clamp omod
+v_frexp_exp_i16_f16_e64         vdst,             src:m
+v_frexp_exp_i32_f32_e64         vdst,             src:m
+v_frexp_exp_i32_f64_e64         vdst,             src:m
+v_frexp_mant_f16_e64            vdst,             src:m                                   clamp omod
+v_frexp_mant_f32_e64            vdst,             src:m                                   clamp omod
+v_frexp_mant_f64_e64            vdst,             src:m                                   clamp omod
+v_interp_mov_f32_e64            vdst,             param:b32,   attr:b32                   clamp omod
+v_interp_p1_f32_e64             vdst,             vsrc:m,      attr:b32                   clamp omod
+v_interp_p1ll_f16               vdst:f32,         vsrc:m:f32,  attr:b32                   high clamp omod
+v_interp_p1lv_f16               vdst:f32,         vsrc0:m:f32, attr:b32,   vsrc2:m:f16x2  high clamp omod
+v_interp_p2_f16                 vdst,             vsrc0:m:f32, attr:b32,   vsrc2:m:f32    high clamp
+v_interp_p2_f32_e64             vdst,             vsrc:m,      attr:b32                   clamp omod
+v_interp_p2_legacy_f16          vdst,             vsrc0:m:f32, attr:b32,   vsrc2:m:f32    high clamp
+v_ldexp_f16_e64                 vdst,             src0:m,      src1:i16                   clamp omod
+v_ldexp_f32                     vdst,             src0:m,      src1:i32                   clamp omod
+v_ldexp_f64                     vdst,             src0:m,      src1:i32                   clamp omod
+v_lerp_u8                       vdst:u32,         src0:b32,    src1:b32,   src2:b32
+v_log_f16_e64                   vdst,             src:m                                   clamp omod
+v_log_f32_e64                   vdst,             src:m                                   clamp omod
+v_log_legacy_f32_e64            vdst,             src:m                                   clamp omod
+v_lshl_add_u32                  vdst,             src0,        src1,       src2
+v_lshl_or_b32                   vdst,             src0,        src1:u32,   src2
+v_lshlrev_b16_e64               vdst,             src0:u16,    src1
+v_lshlrev_b32_e64               vdst,             src0:u32,    src1
+v_lshlrev_b64                   vdst,             src0:u32,    src1
+v_lshrrev_b16_e64               vdst,             src0:u16,    src1
+v_lshrrev_b32_e64               vdst,             src0:u32,    src1
+v_lshrrev_b64                   vdst,             src0:u32,    src1
+v_mac_f16_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_mac_f32_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_mad_f16                       vdst,             src0:m,      src1:m,     src2:m         op_sel clamp
+v_mad_f32                       vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_mad_i16                       vdst,             src0,        src1,       src2           op_sel clamp
+v_mad_i32_i16                   vdst,             src0,        src1,       src2:i32       op_sel clamp
+v_mad_i32_i24                   vdst,             src0,        src1,       src2:i32       clamp
+v_mad_i64_i32                   vdst,       sdst, src0,        src1,       src2:i64       clamp
+v_mad_legacy_f16                vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_mad_legacy_f32                vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_mad_legacy_i16                vdst,             src0,        src1,       src2           clamp
+v_mad_legacy_u16                vdst,             src0,        src1,       src2           clamp
+v_mad_u16                       vdst,             src0,        src1,       src2           op_sel clamp
+v_mad_u32_u16                   vdst,             src0,        src1,       src2:u32       op_sel clamp
+v_mad_u32_u24                   vdst,             src0,        src1,       src2:u32       clamp
+v_mad_u64_u32                   vdst,       sdst, src0,        src1,       src2:u64       clamp
+v_max3_f16                      vdst,             src0:m,      src1:m,     src2:m         op_sel clamp
+v_max3_f32                      vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_max3_i16                      vdst,             src0,        src1,       src2           op_sel
+v_max3_i32                      vdst,             src0,        src1,       src2
+v_max3_u16                      vdst,             src0,        src1,       src2           op_sel
+v_max3_u32                      vdst,             src0,        src1,       src2
+v_max_f16_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_max_f32_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_max_f64                       vdst,             src0:m,      src1:m                     clamp omod
+v_max_i16_e64                   vdst,             src0,        src1
+v_max_i32_e64                   vdst,             src0,        src1
+v_max_u16_e64                   vdst,             src0,        src1
+v_max_u32_e64                   vdst,             src0,        src1
+v_mbcnt_hi_u32_b32              vdst,             src0,        src1
+v_mbcnt_lo_u32_b32              vdst,             src0,        src1
+v_med3_f16                      vdst,             src0:m,      src1:m,     src2:m         op_sel clamp
+v_med3_f32                      vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_med3_i16                      vdst,             src0,        src1,       src2           op_sel
+v_med3_i32                      vdst,             src0,        src1,       src2
+v_med3_u16                      vdst,             src0,        src1,       src2           op_sel
+v_med3_u32                      vdst,             src0,        src1,       src2
+v_min3_f16                      vdst,             src0:m,      src1:m,     src2:m         op_sel clamp
+v_min3_f32                      vdst,             src0:m,      src1:m,     src2:m         clamp omod
+v_min3_i16                      vdst,             src0,        src1,       src2           op_sel
+v_min3_i32                      vdst,             src0,        src1,       src2
+v_min3_u16                      vdst,             src0,        src1,       src2           op_sel
+v_min3_u32                      vdst,             src0,        src1,       src2
+v_min_f16_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_min_f32_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_min_f64                       vdst,             src0:m,      src1:m                     clamp omod
+v_min_i16_e64                   vdst,             src0,        src1
+v_min_i32_e64                   vdst,             src0,        src1
+v_min_u16_e64                   vdst,             src0,        src1
+v_min_u32_e64                   vdst,             src0,        src1
+v_mov_b32_e64                   vdst,             src
+v_mqsad_pk_u16_u8               vdst:u16x4,       src0:u8x8,   src1:u8x4,  src2:u16x4     clamp
+v_mqsad_u32_u8                  vdst:u32x4,       src0:u8x8,   src1:u8x4,  vsrc2:u32x4    clamp
+v_msad_u8                       vdst:u32,         src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_mul_f16_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_mul_f32_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_mul_f64                       vdst,             src0:m,      src1:m                     clamp omod
+v_mul_hi_i32                    vdst,             src0,        src1
+v_mul_hi_i32_i24_e64            vdst,             src0,        src1
+v_mul_hi_u32                    vdst,             src0,        src1
+v_mul_hi_u32_u24_e64            vdst,             src0,        src1
+v_mul_i32_i24_e64               vdst,             src0,        src1                       clamp
+v_mul_legacy_f32_e64            vdst,             src0:m,      src1:m                     clamp omod
+v_mul_lo_u16_e64                vdst,             src0,        src1
+v_mul_lo_u32                    vdst,             src0,        src1
+v_mul_u32_u24_e64               vdst,             src0,        src1                       clamp
+v_nop_e64
+v_not_b32_e64                   vdst,             src
+v_or3_b32                       vdst,             src0,        src1,       src2
+v_or_b32_e64                    vdst,             src0,        src1
+v_pack_b32_f16                  vdst,             src0:m,      src1:m                     op_sel
+v_perm_b32                      vdst,             src0,        src1,       src2
+v_qsad_pk_u16_u8                vdst:u16x4,       src0:u8x8,   src1:u8x4,  src2:u16x4     clamp
+v_rcp_f16_e64                   vdst,             src:m                                   clamp omod
+v_rcp_f32_e64                   vdst,             src:m                                   clamp omod
+v_rcp_f64_e64                   vdst,             src:m                                   clamp omod
+v_rcp_iflag_f32_e64             vdst,             src:m                                   clamp omod
+v_readlane_b32                  sdst,             src0,        ssrc1
+v_rndne_f16_e64                 vdst,             src:m                                   clamp omod
+v_rndne_f32_e64                 vdst,             src:m                                   clamp omod
+v_rndne_f64_e64                 vdst,             src:m                                   clamp omod
+v_rsq_f16_e64                   vdst,             src:m                                   clamp omod
+v_rsq_f32_e64                   vdst,             src:m                                   clamp omod
+v_rsq_f64_e64                   vdst,             src:m                                   clamp omod
+v_sad_hi_u8                     vdst:u32,         src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_sad_u16                       vdst:u32,         src0:u16x2,  src1:u16x2, src2:u32       clamp
+v_sad_u32                       vdst,             src0,        src1,       src2           clamp
+v_sad_u8                        vdst:u32,         src0:u8x4,   src1:u8x4,  src2:u32       clamp
+v_sat_pk_u8_i16_e64             vdst:u8x4,        src
+v_screen_partition_4se_b32_e64  vdst,             src
+v_sin_f16_e64                   vdst,             src:m                                   clamp omod
+v_sin_f32_e64                   vdst,             src:m                                   clamp omod
+v_sqrt_f16_e64                  vdst,             src:m                                   clamp omod
+v_sqrt_f32_e64                  vdst,             src:m                                   clamp omod
+v_sqrt_f64_e64                  vdst,             src:m                                   clamp omod
+v_sub_co_u32_e64                vdst,       sdst, src0,        src1                       clamp
+v_sub_f16_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_sub_f32_e64                   vdst,             src0:m,      src1:m                     clamp omod
+v_sub_i16                       vdst,             src0,        src1                       op_sel clamp
+v_sub_i32                       vdst,             src0,        src1                       clamp
+v_sub_u16_e64                   vdst,             src0,        src1                       clamp
+v_sub_u32_e64                   vdst,             src0,        src1                       clamp
+v_subb_co_u32_e64               vdst,       sdst, src0,        src1,       ssrc2          clamp
+v_subbrev_co_u32_e64            vdst,       sdst, src0,        src1,       ssrc2          clamp
+v_subrev_co_u32_e64             vdst,       sdst, src0,        src1                       clamp
+v_subrev_f16_e64                vdst,             src0:m,      src1:m                     clamp omod
+v_subrev_f32_e64                vdst,             src0:m,      src1:m                     clamp omod
+v_subrev_u16_e64                vdst,             src0,        src1                       clamp
+v_subrev_u32_e64                vdst,             src0,        src1                       clamp
+v_trig_preop_f64                vdst,             src0:m,      src1:u32                   clamp omod
+v_trunc_f16_e64                 vdst,             src:m                                   clamp omod
+v_trunc_f32_e64                 vdst,             src:m                                   clamp omod
+v_trunc_f64_e64                 vdst,             src:m                                   clamp omod
+v_writelane_b32                 vdst,             ssrc0,       ssrc1
+v_xad_u32                       vdst,             src0,        src1,       src2
+v_xor_b32_e64                   vdst,             src0,        src1
+
+
+

VOP3P

+
INSTRUCTION            DST      SRC0        SRC1     SRC2       MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————————————————————
+v_pk_add_f16           vdst,    src0,       src1                op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_add_i16           vdst,    src0,       src1                op_sel op_sel_hi clamp
+v_pk_add_u16           vdst,    src0,       src1                op_sel op_sel_hi clamp
+v_pk_ashrrev_i16       vdst,    src0:u16x2, src1                op_sel op_sel_hi
+v_pk_fma_f16           vdst,    src0,       src1,    src2       op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_lshlrev_b16       vdst,    src0:u16x2, src1                op_sel op_sel_hi
+v_pk_lshrrev_b16       vdst,    src0:u16x2, src1                op_sel op_sel_hi
+v_pk_mad_i16           vdst,    src0,       src1,    src2       op_sel op_sel_hi clamp
+v_pk_mad_u16           vdst,    src0,       src1,    src2       op_sel op_sel_hi clamp
+v_pk_max_f16           vdst,    src0,       src1                op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_max_i16           vdst,    src0,       src1                op_sel op_sel_hi
+v_pk_max_u16           vdst,    src0,       src1                op_sel op_sel_hi
+v_pk_min_f16           vdst,    src0,       src1                op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_min_i16           vdst,    src0,       src1                op_sel op_sel_hi
+v_pk_min_u16           vdst,    src0,       src1                op_sel op_sel_hi
+v_pk_mul_f16           vdst,    src0,       src1                op_sel op_sel_hi neg_lo neg_hi clamp
+v_pk_mul_lo_u16        vdst,    src0,       src1                op_sel op_sel_hi
+v_pk_sub_i16           vdst,    src0,       src1                op_sel op_sel_hi clamp
+v_pk_sub_u16           vdst,    src0,       src1                op_sel op_sel_hi clamp
+
+
+

VOPC

+
INSTRUCTION                    DST       SRC0      SRC1            MODIFIERS
+————————————————————————————————————————————————————————————————————————————————————
+v_cmp_class_f16                vcc,      src0,     vsrc1:b32
+v_cmp_class_f16_sdwa           sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmp_class_f32                vcc,      src0,     vsrc1:b32
+v_cmp_class_f32_sdwa           sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmp_class_f64                vcc,      src0,     vsrc1:b32
+v_cmp_eq_f16                   vcc,      src0,     vsrc1
+v_cmp_eq_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_f32                   vcc,      src0,     vsrc1
+v_cmp_eq_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_f64                   vcc,      src0,     vsrc1
+v_cmp_eq_i16                   vcc,      src0,     vsrc1
+v_cmp_eq_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_i32                   vcc,      src0,     vsrc1
+v_cmp_eq_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_i64                   vcc,      src0,     vsrc1
+v_cmp_eq_u16                   vcc,      src0,     vsrc1
+v_cmp_eq_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_u32                   vcc,      src0,     vsrc1
+v_cmp_eq_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_eq_u64                   vcc,      src0,     vsrc1
+v_cmp_f_f16                    vcc,      src0,     vsrc1
+v_cmp_f_f16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_f32                    vcc,      src0,     vsrc1
+v_cmp_f_f32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_f64                    vcc,      src0,     vsrc1
+v_cmp_f_i16                    vcc,      src0,     vsrc1
+v_cmp_f_i16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_i32                    vcc,      src0,     vsrc1
+v_cmp_f_i32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_i64                    vcc,      src0,     vsrc1
+v_cmp_f_u16                    vcc,      src0,     vsrc1
+v_cmp_f_u16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_u32                    vcc,      src0,     vsrc1
+v_cmp_f_u32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_f_u64                    vcc,      src0,     vsrc1
+v_cmp_ge_f16                   vcc,      src0,     vsrc1
+v_cmp_ge_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_f32                   vcc,      src0,     vsrc1
+v_cmp_ge_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_f64                   vcc,      src0,     vsrc1
+v_cmp_ge_i16                   vcc,      src0,     vsrc1
+v_cmp_ge_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_i32                   vcc,      src0,     vsrc1
+v_cmp_ge_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_i64                   vcc,      src0,     vsrc1
+v_cmp_ge_u16                   vcc,      src0,     vsrc1
+v_cmp_ge_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_u32                   vcc,      src0,     vsrc1
+v_cmp_ge_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ge_u64                   vcc,      src0,     vsrc1
+v_cmp_gt_f16                   vcc,      src0,     vsrc1
+v_cmp_gt_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_f32                   vcc,      src0,     vsrc1
+v_cmp_gt_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_f64                   vcc,      src0,     vsrc1
+v_cmp_gt_i16                   vcc,      src0,     vsrc1
+v_cmp_gt_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_i32                   vcc,      src0,     vsrc1
+v_cmp_gt_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_i64                   vcc,      src0,     vsrc1
+v_cmp_gt_u16                   vcc,      src0,     vsrc1
+v_cmp_gt_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_u32                   vcc,      src0,     vsrc1
+v_cmp_gt_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_gt_u64                   vcc,      src0,     vsrc1
+v_cmp_le_f16                   vcc,      src0,     vsrc1
+v_cmp_le_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_f32                   vcc,      src0,     vsrc1
+v_cmp_le_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_f64                   vcc,      src0,     vsrc1
+v_cmp_le_i16                   vcc,      src0,     vsrc1
+v_cmp_le_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_i32                   vcc,      src0,     vsrc1
+v_cmp_le_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_i64                   vcc,      src0,     vsrc1
+v_cmp_le_u16                   vcc,      src0,     vsrc1
+v_cmp_le_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_u32                   vcc,      src0,     vsrc1
+v_cmp_le_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_le_u64                   vcc,      src0,     vsrc1
+v_cmp_lg_f16                   vcc,      src0,     vsrc1
+v_cmp_lg_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lg_f32                   vcc,      src0,     vsrc1
+v_cmp_lg_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lg_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_f16                   vcc,      src0,     vsrc1
+v_cmp_lt_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_f32                   vcc,      src0,     vsrc1
+v_cmp_lt_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_f64                   vcc,      src0,     vsrc1
+v_cmp_lt_i16                   vcc,      src0,     vsrc1
+v_cmp_lt_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_i32                   vcc,      src0,     vsrc1
+v_cmp_lt_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_i64                   vcc,      src0,     vsrc1
+v_cmp_lt_u16                   vcc,      src0,     vsrc1
+v_cmp_lt_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_u32                   vcc,      src0,     vsrc1
+v_cmp_lt_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_lt_u64                   vcc,      src0,     vsrc1
+v_cmp_ne_i16                   vcc,      src0,     vsrc1
+v_cmp_ne_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_i32                   vcc,      src0,     vsrc1
+v_cmp_ne_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_i64                   vcc,      src0,     vsrc1
+v_cmp_ne_u16                   vcc,      src0,     vsrc1
+v_cmp_ne_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_u32                   vcc,      src0,     vsrc1
+v_cmp_ne_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ne_u64                   vcc,      src0,     vsrc1
+v_cmp_neq_f16                  vcc,      src0,     vsrc1
+v_cmp_neq_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_neq_f32                  vcc,      src0,     vsrc1
+v_cmp_neq_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_neq_f64                  vcc,      src0,     vsrc1
+v_cmp_nge_f16                  vcc,      src0,     vsrc1
+v_cmp_nge_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nge_f32                  vcc,      src0,     vsrc1
+v_cmp_nge_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nge_f64                  vcc,      src0,     vsrc1
+v_cmp_ngt_f16                  vcc,      src0,     vsrc1
+v_cmp_ngt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ngt_f32                  vcc,      src0,     vsrc1
+v_cmp_ngt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_ngt_f64                  vcc,      src0,     vsrc1
+v_cmp_nle_f16                  vcc,      src0,     vsrc1
+v_cmp_nle_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nle_f32                  vcc,      src0,     vsrc1
+v_cmp_nle_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nle_f64                  vcc,      src0,     vsrc1
+v_cmp_nlg_f16                  vcc,      src0,     vsrc1
+v_cmp_nlg_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlg_f32                  vcc,      src0,     vsrc1
+v_cmp_nlg_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlg_f64                  vcc,      src0,     vsrc1
+v_cmp_nlt_f16                  vcc,      src0,     vsrc1
+v_cmp_nlt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlt_f32                  vcc,      src0,     vsrc1
+v_cmp_nlt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_nlt_f64                  vcc,      src0,     vsrc1
+v_cmp_o_f16                    vcc,      src0,     vsrc1
+v_cmp_o_f16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_o_f32                    vcc,      src0,     vsrc1
+v_cmp_o_f32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_o_f64                    vcc,      src0,     vsrc1
+v_cmp_t_i16                    vcc,      src0,     vsrc1
+v_cmp_t_i16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_i32                    vcc,      src0,     vsrc1
+v_cmp_t_i32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_i64                    vcc,      src0,     vsrc1
+v_cmp_t_u16                    vcc,      src0,     vsrc1
+v_cmp_t_u16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_u32                    vcc,      src0,     vsrc1
+v_cmp_t_u32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_t_u64                    vcc,      src0,     vsrc1
+v_cmp_tru_f16                  vcc,      src0,     vsrc1
+v_cmp_tru_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_tru_f32                  vcc,      src0,     vsrc1
+v_cmp_tru_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_tru_f64                  vcc,      src0,     vsrc1
+v_cmp_u_f16                    vcc,      src0,     vsrc1
+v_cmp_u_f16_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_u_f32                    vcc,      src0,     vsrc1
+v_cmp_u_f32_sdwa               sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmp_u_f64                    vcc,      src0,     vsrc1
+v_cmpx_class_f16               vcc,      src0,     vsrc1:b32
+v_cmpx_class_f16_sdwa          sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmpx_class_f32               vcc,      src0,     vsrc1:b32
+v_cmpx_class_f32_sdwa          sdst,     src0:m,   src1:m:b32      src0_sel src1_sel
+v_cmpx_class_f64               vcc,      src0,     vsrc1:b32
+v_cmpx_eq_f16                  vcc,      src0,     vsrc1
+v_cmpx_eq_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_f32                  vcc,      src0,     vsrc1
+v_cmpx_eq_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_f64                  vcc,      src0,     vsrc1
+v_cmpx_eq_i16                  vcc,      src0,     vsrc1
+v_cmpx_eq_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_i32                  vcc,      src0,     vsrc1
+v_cmpx_eq_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_i64                  vcc,      src0,     vsrc1
+v_cmpx_eq_u16                  vcc,      src0,     vsrc1
+v_cmpx_eq_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_u32                  vcc,      src0,     vsrc1
+v_cmpx_eq_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_eq_u64                  vcc,      src0,     vsrc1
+v_cmpx_f_f16                   vcc,      src0,     vsrc1
+v_cmpx_f_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_f32                   vcc,      src0,     vsrc1
+v_cmpx_f_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_f64                   vcc,      src0,     vsrc1
+v_cmpx_f_i16                   vcc,      src0,     vsrc1
+v_cmpx_f_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_i32                   vcc,      src0,     vsrc1
+v_cmpx_f_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_i64                   vcc,      src0,     vsrc1
+v_cmpx_f_u16                   vcc,      src0,     vsrc1
+v_cmpx_f_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_u32                   vcc,      src0,     vsrc1
+v_cmpx_f_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_f_u64                   vcc,      src0,     vsrc1
+v_cmpx_ge_f16                  vcc,      src0,     vsrc1
+v_cmpx_ge_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_f32                  vcc,      src0,     vsrc1
+v_cmpx_ge_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_f64                  vcc,      src0,     vsrc1
+v_cmpx_ge_i16                  vcc,      src0,     vsrc1
+v_cmpx_ge_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_i32                  vcc,      src0,     vsrc1
+v_cmpx_ge_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_i64                  vcc,      src0,     vsrc1
+v_cmpx_ge_u16                  vcc,      src0,     vsrc1
+v_cmpx_ge_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_u32                  vcc,      src0,     vsrc1
+v_cmpx_ge_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ge_u64                  vcc,      src0,     vsrc1
+v_cmpx_gt_f16                  vcc,      src0,     vsrc1
+v_cmpx_gt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_f32                  vcc,      src0,     vsrc1
+v_cmpx_gt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_f64                  vcc,      src0,     vsrc1
+v_cmpx_gt_i16                  vcc,      src0,     vsrc1
+v_cmpx_gt_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_i32                  vcc,      src0,     vsrc1
+v_cmpx_gt_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_i64                  vcc,      src0,     vsrc1
+v_cmpx_gt_u16                  vcc,      src0,     vsrc1
+v_cmpx_gt_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_u32                  vcc,      src0,     vsrc1
+v_cmpx_gt_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_gt_u64                  vcc,      src0,     vsrc1
+v_cmpx_le_f16                  vcc,      src0,     vsrc1
+v_cmpx_le_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_f32                  vcc,      src0,     vsrc1
+v_cmpx_le_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_f64                  vcc,      src0,     vsrc1
+v_cmpx_le_i16                  vcc,      src0,     vsrc1
+v_cmpx_le_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_i32                  vcc,      src0,     vsrc1
+v_cmpx_le_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_i64                  vcc,      src0,     vsrc1
+v_cmpx_le_u16                  vcc,      src0,     vsrc1
+v_cmpx_le_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_u32                  vcc,      src0,     vsrc1
+v_cmpx_le_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_le_u64                  vcc,      src0,     vsrc1
+v_cmpx_lg_f16                  vcc,      src0,     vsrc1
+v_cmpx_lg_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lg_f32                  vcc,      src0,     vsrc1
+v_cmpx_lg_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lg_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_f16                  vcc,      src0,     vsrc1
+v_cmpx_lt_f16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_f32                  vcc,      src0,     vsrc1
+v_cmpx_lt_f32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_f64                  vcc,      src0,     vsrc1
+v_cmpx_lt_i16                  vcc,      src0,     vsrc1
+v_cmpx_lt_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_i32                  vcc,      src0,     vsrc1
+v_cmpx_lt_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_i64                  vcc,      src0,     vsrc1
+v_cmpx_lt_u16                  vcc,      src0,     vsrc1
+v_cmpx_lt_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_u32                  vcc,      src0,     vsrc1
+v_cmpx_lt_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_lt_u64                  vcc,      src0,     vsrc1
+v_cmpx_ne_i16                  vcc,      src0,     vsrc1
+v_cmpx_ne_i16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_i32                  vcc,      src0,     vsrc1
+v_cmpx_ne_i32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_i64                  vcc,      src0,     vsrc1
+v_cmpx_ne_u16                  vcc,      src0,     vsrc1
+v_cmpx_ne_u16_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_u32                  vcc,      src0,     vsrc1
+v_cmpx_ne_u32_sdwa             sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ne_u64                  vcc,      src0,     vsrc1
+v_cmpx_neq_f16                 vcc,      src0,     vsrc1
+v_cmpx_neq_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_neq_f32                 vcc,      src0,     vsrc1
+v_cmpx_neq_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_neq_f64                 vcc,      src0,     vsrc1
+v_cmpx_nge_f16                 vcc,      src0,     vsrc1
+v_cmpx_nge_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nge_f32                 vcc,      src0,     vsrc1
+v_cmpx_nge_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nge_f64                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f16                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ngt_f32                 vcc,      src0,     vsrc1
+v_cmpx_ngt_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_ngt_f64                 vcc,      src0,     vsrc1
+v_cmpx_nle_f16                 vcc,      src0,     vsrc1
+v_cmpx_nle_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nle_f32                 vcc,      src0,     vsrc1
+v_cmpx_nle_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nle_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f16                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlg_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlg_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlg_f64                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f16                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlt_f32                 vcc,      src0,     vsrc1
+v_cmpx_nlt_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_nlt_f64                 vcc,      src0,     vsrc1
+v_cmpx_o_f16                   vcc,      src0,     vsrc1
+v_cmpx_o_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_o_f32                   vcc,      src0,     vsrc1
+v_cmpx_o_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_o_f64                   vcc,      src0,     vsrc1
+v_cmpx_t_i16                   vcc,      src0,     vsrc1
+v_cmpx_t_i16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_i32                   vcc,      src0,     vsrc1
+v_cmpx_t_i32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_i64                   vcc,      src0,     vsrc1
+v_cmpx_t_u16                   vcc,      src0,     vsrc1
+v_cmpx_t_u16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_u32                   vcc,      src0,     vsrc1
+v_cmpx_t_u32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_t_u64                   vcc,      src0,     vsrc1
+v_cmpx_tru_f16                 vcc,      src0,     vsrc1
+v_cmpx_tru_f16_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_tru_f32                 vcc,      src0,     vsrc1
+v_cmpx_tru_f32_sdwa            sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_tru_f64                 vcc,      src0,     vsrc1
+v_cmpx_u_f16                   vcc,      src0,     vsrc1
+v_cmpx_u_f16_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_u_f32                   vcc,      src0,     vsrc1
+v_cmpx_u_f32_sdwa              sdst,     src0:m,   src1:m          src0_sel src1_sel
+v_cmpx_u_f64                   vcc,      src0,     vsrc1
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src_1.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src_2.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, iconst, ival, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src_3.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_src.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_type_deviation.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_vdst.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx1011_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx1011_vsrc.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_attr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_attr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_attr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_attr.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,179 @@ + + + + + + + + + attr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

attr

+

Interpolation attribute and channel:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

attr{0..32}.x

Attribute 0..32 with x channel.

attr{0..32}.y

Attribute 0..32 with y channel.

attr{0..32}.z

Attribute 0..32 with z channel.

attr{0..32}.w

Attribute 0..32 with w channel.

+
+

Examples:

+
v_interp_p1_f32 v1, v0, attr0.x
+v_interp_p1_f32 v1, v0, attr32.w
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_dst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_dst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_dst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_dst.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + dst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

dst

+

This is an input operand. It may optionally serve as a destination if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_fx_operand.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_fx_operand.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_fx_operand.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_fx_operand.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,151 @@ + + + + + + + + + FX Operand — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FX Operand

+

This is an f32 or f16 operand depending on instruction modifiers:

+
    +
  • Operand size is controlled by m_op_sel_hi.

  • +
  • Location of 16-bit operand is controlled by m_op_sel.

  • +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_hwreg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_hwreg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_hwreg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_hwreg.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,292 @@ + + + + + + + + + hwreg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

hwreg

+

Bits of a hardware register being accessed.

+

The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

5:0

Register id.

0..63

10:6

First bit offset.

0..31

15:11

Size in bits.

1..32

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • An hwreg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + +

    Hwreg Value Syntax

    Description

    hwreg({0..63})

    All bits of a register indicated by its id.

    hwreg(<name>)

    All bits of a register indicated by its name.

    hwreg({0..63}, {0..31}, {1..32})

    Register bits indicated by register id, first bit offset and size.

    hwreg(<name>, {0..31}, {1..32})

    Register bits indicated by register name, first bit offset and size.

    +
    +
  • +
+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Defined register names include:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

Description

HW_REG_MODE

Shader writeable mode bits.

HW_REG_STATUS

Shader read-only status.

HW_REG_TRAPSTS

Trap status.

HW_REG_HW_ID

Id of wave, simd, compute unit, etc.

HW_REG_GPR_ALLOC

Per-wave SGPR and VGPR allocation.

HW_REG_LDS_ALLOC

Per-wave LDS allocation.

HW_REG_IB_STS

Counters of outstanding instructions.

HW_REG_SH_MEM_BASES

Memory aperture.

HW_REG_TBA_LO

tba_lo register.

HW_REG_TBA_HI

tba_hi register.

HW_REG_TMA_LO

tma_lo register.

HW_REG_TMA_HI

tma_hi register.

HW_REG_FLAT_SCR_LO

flat_scratch_lo register.

HW_REG_FLAT_SCR_HI

flat_scratch_hi register.

HW_REG_XNACK_MASK

xnack_mask register.

HW_REG_POPS_PACKER

pops_packer register.

+
+

Examples:

+
reg = 1
+offset = 2
+size = 4
+hwreg_enc = reg | (offset << 6) | ((size - 1) << 11)
+
+s_getreg_b32 s2, 0x1881
+s_getreg_b32 s2, hwreg_enc                     // the same as above
+s_getreg_b32 s2, hwreg(1, 2, 4)                // the same as above
+s_getreg_b32 s2, hwreg(reg, offset, size)      // the same as above
+
+s_getreg_b32 s2, hwreg(15)
+s_getreg_b32 s2, hwreg(51, 1, 31)
+s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_imm16_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_imm16_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_imm16_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_imm16_1.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range 0..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_imm16_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_imm16_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_imm16_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_imm16_2.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

A 16-bit integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_imm16.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_imm16.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_imm16.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_imm16.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_label.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_label.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_label.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_label.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,168 @@ + + + + + + + + + label — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

label

+

A branch target which is a 16-bit signed integer treated as a PC-relative dword offset.

+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range -32768..65535.

  • +
  • A symbol (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker.

  • +
+

Examples:

+
offset = 30
+label_1:
+label_2 = . + 4
+
+s_branch 32
+s_branch offset + 2
+s_branch label_1
+s_branch label_2
+s_branch label_3
+s_branch label_4
+
+label_3 = label_2 + 4
+label_4:
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_m_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_m_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_m_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_m_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with integer operand modifier sext.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_m.html 2021-09-19 16:16:22.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_msg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_msg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_msg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_msg.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,383 @@ + + + + + + + + + msg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

msg

+

A 16-bit message code. The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

3:0

Message type.

0..15

6:4

Optional operation.

0..7

7:7

Unused.

-

9:8

Optional stream.

0..3

15:10

Unused.

-

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A sendmsg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + +

    Sendmsg Value Syntax

    Description

    sendmsg(<type>)

    A message identified by its type.

    sendmsg(<type>,<op>)

    A message identified by its type and operation.

    sendmsg(<type>,<op>,<stream>)

    A message identified by its type and operation +with a stream id.

    +
    +
  • +
+

Type may be specified using message name or message id.

+

Op may be specified using operation name or operation id.

+

Stream id is an integer in the range 0..3.

+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Each message type supports specific operations:

+
+
+++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Message name

Message Id

Supported Operations

Operation Id

Stream Id

MSG_INTERRUPT

1

-

-

-

MSG_GS

2

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_GS_DONE

3

GS_OP_NOP

0

-

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_SAVEWAVE

4

-

-

-

MSG_STALL_WAVE_GEN

5

-

-

-

MSG_HALT_WAVES

6

-

-

-

MSG_ORDERED_PS_DONE

7

-

-

-

MSG_GS_ALLOC_REQ

9

-

-

-

MSG_GET_DOORBELL

10

-

-

-

MSG_GET_DDID

11

-

-

-

MSG_SYSMSG

15

SYSMSG_OP_ECC_ERR_INTERRUPT

1

-

SYSMSG_OP_REG_RD

2

-

SYSMSG_OP_HOST_TRAP_ACK

3

-

SYSMSG_OP_TTRACE_PC

4

-

+
+

Sendmsg arguments are validated depending on how type value is specified:

+
    +
  • If message type is specified by name, arguments values must satisfy limitations detailed in the table above.

  • +
  • If message type is specified as a number, each argument must not exceed corresponding value range (see the first table).

  • +
+

Examples:

+
// numeric message code
+msg = 0x10
+s_sendmsg 0x12
+s_sendmsg msg + 2
+
+// sendmsg with strict arguments validation
+s_sendmsg sendmsg(MSG_INTERRUPT)
+s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT)
+s_sendmsg sendmsg(MSG_GS, 2)
+s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1)
+s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC)
+s_sendmsg sendmsg(MSG_GET_DOORBELL)
+
+// sendmsg with validation of value range only
+msg = 2
+op = 3
+stream = 1
+s_sendmsg sendmsg(msg, op, stream)
+s_sendmsg sendmsg(2, GS_OP_CUT)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_opt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_opt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_opt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_opt.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + opt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

opt

+

This is an optional operand. It must be used if and only if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_param.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_param.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_param.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_param.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,171 @@ + + + + + + + + + param — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

param

+

Interpolation parameter to read:

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

p0

Parameter P0.

p10

Parameter P10.

p20

Parameter P20.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_probe.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_probe.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_probe.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_probe.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,173 @@ + + + + + + + + + probe — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

probe

+

A bit mask which indicates request permissions.

+

This operand must be specified as an integer_number or an absolute_expression. +The value is truncated to 7 bits, but only 3 low bits are significant.

+
+
++++ + + + + + + + + + + + + + + + + +

Bit Number

Description

0

Request read permission.

1

Request write permission.

2

Request execute permission.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_saddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_saddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_saddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_saddr_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + saddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

saddr

+

An optional 32-bit flat scratch offset. Must be specified as off if not used.

+

Either this operand or vaddr must be set to off.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_saddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_saddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_saddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_saddr.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + saddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

saddr

+

An optional 64-bit flat global address. Must be specified as off if not used.

+

See vaddr for description of available addressing modes.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sbase_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sbase_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sbase_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sbase_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sbase_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sbase_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sbase_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sbase_2.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

This operand is ignored by H/W and flat_scratch is supplied instead.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sbase.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sbase.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sbase.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sbase.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 64-bit base address for scalar memory operations.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_2.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_3.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_4.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata_5.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdata.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_2.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 16 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_3.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_4.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_5.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 8 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_6.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_7.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp, null, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst_8.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_sdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_sdst.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword if wavefront size is 32, otherwise 2 dwords.

+

Operands: s, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_simm32_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_simm32_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_simm32_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_simm32_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f16 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_simm32_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_simm32_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_simm32_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_simm32_2.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f32 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_simm32.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_simm32.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_simm32.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_simm32.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

An integer_number or an absolute_expression. The value is truncated to 32 bits.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_soffset_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_soffset_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_soffset_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_soffset_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An offset added to the base address to get memory address.

+
    +
  • If offset is specified as a register, it supplies an unsigned byte offset.

  • +
  • If offset is specified as a 21-bit immediate, it supplies a signed byte offset.

  • +
+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null, m0, simm21

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_soffset_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_soffset_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_soffset_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_soffset_2.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned 20-bit offset added to the base address to get memory address.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null, m0, uimm20

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_soffset.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_soffset.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_soffset.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_soffset.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null, m0, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_2.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_3.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, vcc, ttmp, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_4.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, iconst, ival, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_5.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, lds_direct

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_6.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_7.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src_8.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_src.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_srsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_srsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_srsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_srsrc_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Buffer resource constant which defines the address and characteristics of the buffer in memory.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_srsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_srsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_srsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_srsrc.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format.

+

Size: 8 dwords by default, 4 dwords if r128 is specified.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssamp.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssamp.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssamp.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssamp.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssamp — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssamp

+

Sampler constant used to specify filtering options applied to the image data after it is read.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_1.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp, null, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_2.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_3.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, vcc, ttmp, null

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_4.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_5.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword if wavefront size is 32, otherwise 2 dwords.

+

Operands: s, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_6.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_7.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, m0, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc_8.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_ssrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_ssrc.html 2021-09-19 16:16:23.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, vcc, ttmp, null, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_tgt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_tgt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_tgt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_tgt.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,180 @@ + + + + + + + + + tgt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

tgt

+

An export target:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

pos{0..4}

Copy vertex position 0..4.

param{0..31}

Copy vertex parameter 0..31.

mrt{0..7}

Copy pixel color to the MRTs 0..7.

mrtz

Copy pixel depth (Z) data.

prim

Copy primitive (connectivity) data.

null

Copy nothing.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_type_deviation.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat address.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat global address or a 32-bit offset depending on addressing mode:

+ +

Size: 1 or 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_3.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An optional 32-bit flat scratch offset. Must be specified as off if not used.

+

Either this operand or saddr must be set to off.

+

Size: 1 dword.

+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_4.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image.

+

This operand may be specified using either standard VGPR syntax or special NSA VGPR syntax.

+

Size: 1-13 dwords. Actual size depends on syntax, opcode, dim and a16.

+
    +
  • If specified using NSA VGPR syntax, the size is 1-13 dwords.

  • +
  • If specified using standard VGPR syntax, the size is 1-8 dwords. Opcodes which require more than 8 dwords for address size must specify 16 dwords due to a limited range of supported register sequences.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr_5.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,155 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

This is an optional operand which may specify offset and/or index.

+

Size: 0, 1 or 2 dwords. Size is controlled by modifiers offen and idxen:

+
    +
  • If only idxen is specified, this operand supplies an index. Size is 1 dword.

  • +
  • If only offen is specified, this operand supplies an offset. Size is 1 dword.

  • +
  • If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords.

  • +
  • If none of these modifiers are specified, this operand must be set to off.

  • +
+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vaddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vaddr.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An offset from the start of GDS/LDS memory.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vcc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vcc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vcc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vcc.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,151 @@ + + + + + + + + + vcc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vcc

+

Vector condition code. This operand depends on wavefront size:

+
    +
  • Should be vcc_lo if wavefront size is 32.

  • +
  • Should be vcc if wavefront size is 64.

  • +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata0_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata0_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata0_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata0_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata0.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata0.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata0.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata0.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_10.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata1_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata1_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata1_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata1_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_3.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_4.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_5.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_6.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 specifies that data in registers are packed; each value occupies 16 bits.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_7.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask which may specify from 1 to 4 data elements. Each data element occupies 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_8.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata_9.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdata.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_10.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_11.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_11.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_11.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_11.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 3 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_12.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_12.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_12.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_12.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_13.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_13.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_13.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_13.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

If lds is specified, this operand is ignored by H/W and data are stored directly into LDS.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+
+

Note that tfe and lds cannot be used together.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_3.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_4.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 32-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_5.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 64-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_6.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image_gather4 instruction.

+

Size: 4 data elements by default. Each data element occupies either 32 bits or 16 bits depending on d16.

+

d16 and tfe affect operand size as follows:

+
    +
  • d16 specifies that data elements in registers are packed; each value occupies 16 bits.

  • +
  • tfe adds one dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_7.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_8.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask, tfe and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 specifies that data elements in registers are packed; each value occupies 16 bits.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst_9.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vdst.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Data to copy to export buffers. This is an optional operand. Must be specified as off if not used.

+

compr modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2:

+
    +
  • src0 and src1 must specify the first register (or off).

  • +
  • src2 and src3 must specify the second register (or off).

  • +
+

An example:

+
exp mrtz v3, v3, off, off compr
+
+
+

Size: 1 dword.

+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc_3.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_vsrc.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_waitcnt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_waitcnt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx10_waitcnt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx10_waitcnt.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,242 @@ + + + + + + + + + waitcnt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

waitcnt

+

Counts of outstanding instructions to wait for.

+

The bits of this operand have the following meaning:

+
+
++++++ + + + + + + + + + + + + + + + + + + + + + + + + +

High Bits

Low Bits

Description

Value Range

15:14

3:0

VM_CNT: vector memory operations count.

0..63

-

6:4

EXP_CNT: export count.

0..7

-

11:8

LGKM_CNT: LDS, GDS, Constant and Message count.

0..15

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A combination of vmcnt, expcnt, lgkmcnt and other values described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

    Syntax

    Description

    vmcnt(<N>)

    A VM_CNT value. N must not exceed the largest VM_CNT value.

    expcnt(<N>)

    An EXP_CNT value. N must not exceed the largest EXP_CNT value.

    lgkmcnt(<N>)

    An LGKM_CNT value. N must not exceed the largest LGKM_CNT value.

    vmcnt_sat(<N>)

    A VM_CNT value computed as min(N, the largest VM_CNT value).

    expcnt_sat(<N>)

    An EXP_CNT value computed as min(N, the largest EXP_CNT value).

    lgkmcnt_sat(<N>)

    An LGKM_CNT value computed as min(N, the largest LGKM_CNT value).

    +
    +
  • +
+

These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators.

+

N is either an +integer number or an +absolute expression.

+

Examples:

+
vm_cnt = 1
+exp_cnt = 2
+lgkm_cnt = 3
+cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8)
+
+s_waitcnt cnt
+s_waitcnt 1 | (2 << 4) | (3 << 8)                          // the same as above
+s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3)                    // the same as above
+s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt)  // the same as above
+
+s_waitcnt vmcnt(1)
+s_waitcnt expcnt(2) lgkmcnt(3)
+s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3)
+s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_attr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_attr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_attr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_attr.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,179 @@ + + + + + + + + + attr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

attr

+

Interpolation attribute and channel:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

attr{0..32}.x

Attribute 0..32 with x channel.

attr{0..32}.y

Attribute 0..32 with y channel.

attr{0..32}.z

Attribute 0..32 with z channel.

attr{0..32}.w

Attribute 0..32 with w channel.

+
+

Examples:

+
v_interp_p1_f32 v1, v0, attr0.x
+v_interp_p1_f32 v1, v0, attr32.w
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_dst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_dst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_dst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_dst.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + dst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

dst

+

This is an input operand. It may optionally serve as a destination if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_hwreg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_hwreg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_hwreg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_hwreg.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,265 @@ + + + + + + + + + hwreg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

hwreg

+

Bits of a hardware register being accessed.

+

The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

5:0

Register id.

0..63

10:6

First bit offset.

0..31

15:11

Size in bits.

1..32

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • An hwreg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + +

    Hwreg Value Syntax

    Description

    hwreg({0..63})

    All bits of a register indicated by its id.

    hwreg(<name>)

    All bits of a register indicated by its name.

    hwreg({0..63}, {0..31}, {1..32})

    Register bits indicated by register id, first bit offset and size.

    hwreg(<name>, {0..31}, {1..32})

    Register bits indicated by register name, first bit offset and size.

    +
    +
  • +
+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Defined register names include:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

Description

HW_REG_MODE

Shader writeable mode bits.

HW_REG_STATUS

Shader read-only status.

HW_REG_TRAPSTS

Trap status.

HW_REG_HW_ID

Id of wave, simd, compute unit, etc.

HW_REG_GPR_ALLOC

Per-wave SGPR and VGPR allocation.

HW_REG_LDS_ALLOC

Per-wave LDS allocation.

HW_REG_IB_STS

Counters of outstanding instructions.

+
+

Examples:

+
reg = 1
+offset = 2
+size = 4
+hwreg_enc = reg | (offset << 6) | ((size - 1) << 11)
+
+s_getreg_b32 s2, 0x1881
+s_getreg_b32 s2, hwreg_enc                     // the same as above
+s_getreg_b32 s2, hwreg(1, 2, 4)                // the same as above
+s_getreg_b32 s2, hwreg(reg, offset, size)      // the same as above
+
+s_getreg_b32 s2, hwreg(15)
+s_getreg_b32 s2, hwreg(51, 1, 31)
+s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_imm16_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_imm16_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_imm16_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_imm16_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range 0..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_imm16_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_imm16_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_imm16_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_imm16_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

A 16-bit integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_imm16.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_imm16.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_imm16.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_imm16.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_label.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_label.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_label.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_label.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,168 @@ + + + + + + + + + label — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

label

+

A branch target which is a 16-bit signed integer treated as a PC-relative dword offset.

+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range -32768..65535.

  • +
  • A symbol (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker.

  • +
+

Examples:

+
offset = 30
+label_1:
+label_2 = . + 4
+
+s_branch 32
+s_branch offset + 2
+s_branch label_1
+s_branch label_2
+s_branch label_3
+s_branch label_4
+
+label_3 = label_2 + 4
+label_4:
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_m.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_msg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_msg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_msg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_msg.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,340 @@ + + + + + + + + + msg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

msg

+

A 16-bit message code. The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

3:0

Message type.

0..15

6:4

Optional operation.

0..7

7:7

Unused.

-

9:8

Optional stream.

0..3

15:10

Unused.

-

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A sendmsg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + +

    Sendmsg Value Syntax

    Description

    sendmsg(<type>)

    A message identified by its type.

    sendmsg(<type>,<op>)

    A message identified by its type and operation.

    sendmsg(<type>,<op>,<stream>)

    A message identified by its type and operation +with a stream id.

    +
    +
  • +
+

Type may be specified using message name or message id.

+

Op may be specified using operation name or operation id.

+

Stream id is an integer in the range 0..3.

+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Each message type supports specific operations:

+
+
+++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Message name

Message Id

Supported Operations

Operation Id

Stream Id

MSG_INTERRUPT

1

-

-

-

MSG_GS

2

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_GS_DONE

3

GS_OP_NOP

0

-

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_SYSMSG

15

SYSMSG_OP_ECC_ERR_INTERRUPT

1

-

SYSMSG_OP_REG_RD

2

-

SYSMSG_OP_HOST_TRAP_ACK

3

-

SYSMSG_OP_TTRACE_PC

4

-

+
+

Sendmsg arguments are validated depending on how type value is specified:

+
    +
  • If message type is specified by name, arguments values must satisfy limitations detailed in the table above.

  • +
  • If message type is specified as a number, each argument must not exceed corresponding value range (see the first table).

  • +
+

Examples:

+
// numeric message code
+msg = 0x10
+s_sendmsg 0x12
+s_sendmsg msg + 2
+
+// sendmsg with strict arguments validation
+s_sendmsg sendmsg(MSG_INTERRUPT)
+s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT)
+s_sendmsg sendmsg(MSG_GS, 2)
+s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1)
+s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC)
+
+// sendmsg with validation of value range only
+msg = 2
+op = 3
+stream = 1
+s_sendmsg sendmsg(msg, op, stream)
+s_sendmsg sendmsg(2, GS_OP_CUT)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_opt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_opt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_opt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_opt.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + opt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

opt

+

This is an optional operand. It must be used if and only if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_param.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_param.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_param.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_param.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,171 @@ + + + + + + + + + param — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

param

+

Interpolation parameter to read:

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

p0

Parameter P0.

p10

Parameter P10.

p20

Parameter P20.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sbase_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sbase_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sbase_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sbase_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 64-bit base address for scalar memory operations.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sbase.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sbase.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sbase.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sbase.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 16 dwords.

+

Operands: s

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_3.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_4.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 8 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_5.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_6.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, vcc, trap, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst_7.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_sdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_sdst.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_simm32_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_simm32_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_simm32_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_simm32_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f32 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_simm32.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_simm32.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_simm32.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_simm32.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

An integer_number or an absolute_expression. The value is truncated to 32 bits.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_soffset_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_soffset_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_soffset_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_soffset_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned offset added to the base address to get memory address.

+
    +
  • If offset is specified as a register, it supplies an unsigned byte offset but 2 lsb’s are ignored.

  • +
  • If offset is specified as an uimm32, it supplies a 32-bit unsigned byte offset but 2 lsb’s are ignored.

  • +
  • If offset is specified as an uimm8, it supplies an 8-bit unsigned dword offset.

  • +
+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, uimm8, uimm32

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_soffset.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_soffset.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_soffset.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_soffset.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_10.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, vcc, trap, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, iconst, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_3.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, lds_direct

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_4.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_5.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_6.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_7.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, vcc, trap, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_8.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, lds_direct

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src_9.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_src.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_srsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_srsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_srsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_srsrc_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Buffer resource constant which defines the address and characteristics of the buffer in memory.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_srsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_srsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_srsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_srsrc.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format.

+

Size: 8 dwords by default, 4 dwords if r128 is specified.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssamp.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssamp.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssamp.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssamp.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssamp — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssamp

+

Sampler constant used to specify filtering options applied to the image data after it is read.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_10.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_1.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, vcc, trap, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_2.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_3.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_4.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_5.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, iconst, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_6.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, vcc, trap, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_7.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, vcc, trap, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_8.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc_9.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_ssrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_ssrc.html 2021-09-19 16:16:24.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, vcc, trap, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_tgt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_tgt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_tgt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_tgt.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,177 @@ + + + + + + + + + tgt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

tgt

+

An export target:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

pos{0..3}

Copy vertex position 0..3.

param{0..31}

Copy vertex parameter 0..31.

mrt{0..7}

Copy pixel color to the MRTs 0..7.

mrtz

Copy pixel depth (Z) data.

null

Copy nothing.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_type_deviation.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat address.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_2.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image.

+

Size: 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode and specific image being handled.

+
+

Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction.

+

Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr_3.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,157 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

This is an optional operand which may specify a 64-bit address, offset and/or index.

+

Size: 0, 1 or 2 dwords. Size is controlled by modifiers addr64, offen and idxen:

+
    +
  • If only addr64 is specified, this operand supplies a 64-bit address. Size is 2 dwords.

  • +
  • If only idxen is specified, this operand supplies an index. Size is 1 dword.

  • +
  • If only offen is specified, this operand supplies an offset. Size is 1 dword.

  • +
  • If both idxen and offen are specified, index is in the first register and offset is in the second. Size is 2 dwords.

  • +
  • If none of these modifiers are specified, this operand must be set to off.

  • +
  • All other combinations of these modifiers are illegal.

  • +
+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vaddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vaddr.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An offset from the start of GDS/LDS memory.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vcc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vcc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vcc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vcc.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vcc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vcc

+

Vector condition code.

+

Size: 2 dwords.

+

Operands: vcc

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata0_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata0_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata0_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata0_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata0.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata0.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata0.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata0.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata1_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata1_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata1_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata1_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_2.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_3.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_4.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_5.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_6.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask which may specify from 1 to 4 data elements. Each data element occupies 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_7.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_8.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata_9.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdata.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_10.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 3 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_11.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_11.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_11.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_11.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_12.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_12.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_12.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_12.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

If lds is specified, this operand is ignored by H/W and data are stored directly into LDS.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+
+

Note that tfe and lds cannot be used together.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_2.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_3.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_4.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 32-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_5.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 64-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_6.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image_gather4 instruction.

+

Size: 4 data elements by default. Each data element occupies 1 dword. tfe adds one more dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_7.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_8.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst_9.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vdst.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_2.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc_3.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_vsrc.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Data to copy to export buffers. This is an optional operand. Must be specified as off if not used.

+

compr modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2:

+
    +
  • src0 and src1 must specify the first register (or off).

  • +
  • src2 and src3 must specify the second register (or off).

  • +
+

An example:

+
exp mrtz v3, v3, off, off compr
+
+
+

Size: 1 dword.

+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_waitcnt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_waitcnt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx7_waitcnt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx7_waitcnt.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,237 @@ + + + + + + + + + waitcnt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

waitcnt

+

Counts of outstanding instructions to wait for.

+

The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

3:0

VM_CNT: vector memory operations count.

0..15

6:4

EXP_CNT: export count.

0..7

12:8

LGKM_CNT: LDS, GDS, Constant and Message count.

0..31

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A combination of vmcnt, expcnt, lgkmcnt and other values described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

    Syntax

    Description

    vmcnt(<N>)

    A VM_CNT value. N must not exceed the largest VM_CNT value.

    expcnt(<N>)

    An EXP_CNT value. N must not exceed the largest EXP_CNT value.

    lgkmcnt(<N>)

    An LGKM_CNT value. N must not exceed the largest LGKM_CNT value.

    vmcnt_sat(<N>)

    A VM_CNT value computed as min(N, the largest VM_CNT value).

    expcnt_sat(<N>)

    An EXP_CNT value computed as min(N, the largest EXP_CNT value).

    lgkmcnt_sat(<N>)

    An LGKM_CNT value computed as min(N, the largest LGKM_CNT value).

    +
    +
  • +
+

These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators.

+

N is either an +integer number or an +absolute expression.

+

Examples:

+
vm_cnt = 1
+exp_cnt = 2
+lgkm_cnt = 3
+cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8)
+
+s_waitcnt cnt
+s_waitcnt 1 | (2 << 4) | (3 << 8)                          // the same as above
+s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3)                    // the same as above
+s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt)  // the same as above
+
+s_waitcnt vmcnt(1)
+s_waitcnt expcnt(2) lgkmcnt(3)
+s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3)
+s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_attr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_attr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_attr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_attr.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,179 @@ + + + + + + + + + attr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

attr

+

Interpolation attribute and channel:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

attr{0..32}.x

Attribute 0..32 with x channel.

attr{0..32}.y

Attribute 0..32 with y channel.

attr{0..32}.z

Attribute 0..32 with z channel.

attr{0..32}.w

Attribute 0..32 with w channel.

+
+

Examples:

+
v_interp_p1_f32 v1, v0, attr0.x
+v_interp_p1_f32 v1, v0, attr32.w
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_dst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_dst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_dst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_dst.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + dst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

dst

+

This is an input operand. It may optionally serve as a destination if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_hwreg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_hwreg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_hwreg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_hwreg.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,265 @@ + + + + + + + + + hwreg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

hwreg

+

Bits of a hardware register being accessed.

+

The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

5:0

Register id.

0..63

10:6

First bit offset.

0..31

15:11

Size in bits.

1..32

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • An hwreg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + +

    Hwreg Value Syntax

    Description

    hwreg({0..63})

    All bits of a register indicated by its id.

    hwreg(<name>)

    All bits of a register indicated by its name.

    hwreg({0..63}, {0..31}, {1..32})

    Register bits indicated by register id, first bit offset and size.

    hwreg(<name>, {0..31}, {1..32})

    Register bits indicated by register name, first bit offset and size.

    +
    +
  • +
+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Defined register names include:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

Description

HW_REG_MODE

Shader writeable mode bits.

HW_REG_STATUS

Shader read-only status.

HW_REG_TRAPSTS

Trap status.

HW_REG_HW_ID

Id of wave, simd, compute unit, etc.

HW_REG_GPR_ALLOC

Per-wave SGPR and VGPR allocation.

HW_REG_LDS_ALLOC

Per-wave LDS allocation.

HW_REG_IB_STS

Counters of outstanding instructions.

+
+

Examples:

+
reg = 1
+offset = 2
+size = 4
+hwreg_enc = reg | (offset << 6) | ((size - 1) << 11)
+
+s_getreg_b32 s2, 0x1881
+s_getreg_b32 s2, hwreg_enc                     // the same as above
+s_getreg_b32 s2, hwreg(1, 2, 4)                // the same as above
+s_getreg_b32 s2, hwreg(reg, offset, size)      // the same as above
+
+s_getreg_b32 s2, hwreg(15)
+s_getreg_b32 s2, hwreg(51, 1, 31)
+s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imask.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imask.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imask.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imask.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,225 @@ + + + + + + + + + imask — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imask

+

This operand is a mask which controls indexing mode for operands of subsequent instructions. +Bits 0, 1 and 2 control indexing of src0, src1 and src2, while bit 3 controls indexing of dst. +Value 1 enables indexing and value 0 disables it.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Bit

Meaning

0

Enables or disables src0 indexing.

1

Enables or disables src1 indexing.

2

Enables or disables src2 indexing.

3

Enables or disables dst indexing.

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..15.

  • +
  • A gpr_idx value described below.

    +
    +
    ++++ + + + + + + + + + + +

    Gpr_idx Value Syntax

    Description

    gpr_idx(<operands>)

    Enable indexing for specified operands +and disable it for the rest. +Operands is a comma-separated list of +values which may include:

    +
      +
    • “SRC0” - enable src0 indexing.

    • +
    • “SRC1” - enable src1 indexing.

    • +
    • “SRC2” - enable src2 indexing.

    • +
    • “DST” - enable dst indexing.

    • +
    +

    Each of these values may be specified only +once.

    +

    Operands list may be empty; this syntax +disables indexing for all operands.

    +
    +
    +
  • +
+

Examples:

+
s_set_gpr_idx_mode 0
+s_set_gpr_idx_mode gpr_idx()                        // the same as above
+
+s_set_gpr_idx_mode 15
+s_set_gpr_idx_mode gpr_idx(DST,SRC0,SRC1,SRC2)      // the same as above
+s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST)      // the same as above
+
+s_set_gpr_idx_mode gpr_idx(DST,SRC1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imm16_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imm16_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imm16_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imm16_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range 0..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imm16_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imm16_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imm16_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imm16_2.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

A 16-bit integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imm16.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imm16.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_imm16.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_imm16.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_label.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_label.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_label.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_label.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,168 @@ + + + + + + + + + label — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

label

+

A branch target which is a 16-bit signed integer treated as a PC-relative dword offset.

+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range -32768..65535.

  • +
  • A symbol (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker.

  • +
+

Examples:

+
offset = 30
+label_1:
+label_2 = . + 4
+
+s_branch 32
+s_branch offset + 2
+s_branch label_1
+s_branch label_2
+s_branch label_3
+s_branch label_4
+
+label_3 = label_2 + 4
+label_4:
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_m_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_m_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_m_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_m_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_m.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with integer operand modifier sext.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_msg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_msg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_msg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_msg.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,346 @@ + + + + + + + + + msg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

msg

+

A 16-bit message code. The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

3:0

Message type.

0..15

6:4

Optional operation.

0..7

7:7

Unused.

-

9:8

Optional stream.

0..3

15:10

Unused.

-

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A sendmsg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + +

    Sendmsg Value Syntax

    Description

    sendmsg(<type>)

    A message identified by its type.

    sendmsg(<type>,<op>)

    A message identified by its type and operation.

    sendmsg(<type>,<op>,<stream>)

    A message identified by its type and operation +with a stream id.

    +
    +
  • +
+

Type may be specified using message name or message id.

+

Op may be specified using operation name or operation id.

+

Stream id is an integer in the range 0..3.

+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Each message type supports specific operations:

+
+
+++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Message name

Message Id

Supported Operations

Operation Id

Stream Id

MSG_INTERRUPT

1

-

-

-

MSG_GS

2

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_GS_DONE

3

GS_OP_NOP

0

-

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_SAVEWAVE

4

-

-

-

MSG_SYSMSG

15

SYSMSG_OP_ECC_ERR_INTERRUPT

1

-

SYSMSG_OP_REG_RD

2

-

SYSMSG_OP_HOST_TRAP_ACK

3

-

SYSMSG_OP_TTRACE_PC

4

-

+
+

Sendmsg arguments are validated depending on how type value is specified:

+
    +
  • If message type is specified by name, arguments values must satisfy limitations detailed in the table above.

  • +
  • If message type is specified as a number, each argument must not exceed corresponding value range (see the first table).

  • +
+

Examples:

+
// numeric message code
+msg = 0x10
+s_sendmsg 0x12
+s_sendmsg msg + 2
+
+// sendmsg with strict arguments validation
+s_sendmsg sendmsg(MSG_INTERRUPT)
+s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT)
+s_sendmsg sendmsg(MSG_GS, 2)
+s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1)
+s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC)
+
+// sendmsg with validation of value range only
+msg = 2
+op = 3
+stream = 1
+s_sendmsg sendmsg(msg, op, stream)
+s_sendmsg sendmsg(2, GS_OP_CUT)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_opt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_opt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_opt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_opt.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + opt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

opt

+

This is an optional operand. It must be used if and only if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_param.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_param.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_param.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_param.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,171 @@ + + + + + + + + + param — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

param

+

Interpolation parameter to read:

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

p0

Parameter P0.

p10

Parameter P10.

p20

Parameter P20.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_probe.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_probe.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_probe.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_probe.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,173 @@ + + + + + + + + + probe — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

probe

+

A bit mask which indicates request permissions.

+

This operand must be specified as an integer_number or an absolute_expression. +The value is truncated to 7 bits, but only 3 low bits are significant.

+
+
++++ + + + + + + + + + + + + + + + + +

Bit Number

Description

0

Request read permission.

1

Request write permission.

2

Request execute permission.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sbase_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sbase_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sbase_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sbase_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sbase.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sbase.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sbase.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sbase.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 64-bit base address for scalar memory operations.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdata_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdata_2.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdata.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_1.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 16 dwords.

+

Operands: s

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_2.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_3.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_4.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 8 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_5.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_6.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst_7.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_sdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_sdst.html 2021-09-19 16:16:25.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_simm32_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_simm32_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_simm32_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_simm32_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f16 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_simm32_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_simm32_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_simm32_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_simm32_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f32 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_simm32.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_simm32.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_simm32.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_simm32.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

An integer_number or an absolute_expression. The value is truncated to 32 bits.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_soffset_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_soffset_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_soffset_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_soffset_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset added to the base address to get memory address.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, m0, uimm20

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_soffset_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_soffset_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_soffset_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_soffset_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset added to the base address to get memory address.

+

Size: 1 dword.

+

Operands: m0, uimm20

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_soffset.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_soffset.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_soffset.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_soffset.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_10.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, iconst, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_3.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, lds_direct

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_4.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, iconst, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_5.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_6.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_7.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_8.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src_9.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_src.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_srsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_srsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_srsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_srsrc_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Buffer resource constant which defines the address and characteristics of the buffer in memory.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_srsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_srsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_srsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_srsrc.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format.

+

Size: 8 dwords by default, 4 dwords if r128 is specified.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssamp.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssamp.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssamp.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssamp.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssamp — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssamp

+

Sampler constant used to specify filtering options applied to the image data after it is read.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_3.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_4.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_5.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_6.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_7.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, m0, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc_8.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_ssrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_ssrc.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, trap, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_tgt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_tgt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_tgt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_tgt.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,177 @@ + + + + + + + + + tgt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

tgt

+

An export target:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

pos{0..3}

Copy vertex position 0..3.

param{0..31}

Copy vertex parameter 0..31.

mrt{0..7}

Copy pixel color to the MRTs 0..7.

mrtz

Copy pixel depth (Z) data.

null

Copy nothing.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_type_deviation.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat address.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image.

+

Size: 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode and specific image being handled.

+
+

Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction.

+

Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr_3.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,155 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

This is an optional operand which may specify offset and/or index.

+

Size: 0, 1 or 2 dwords. Size is controlled by modifiers offen and idxen:

+
    +
  • If only idxen is specified, this operand supplies an index. Size is 1 dword.

  • +
  • If only offen is specified, this operand supplies an offset. Size is 1 dword.

  • +
  • If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords.

  • +
  • If none of these modifiers are specified, this operand must be set to off.

  • +
+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vaddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vaddr.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An offset from the start of GDS/LDS memory.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vcc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vcc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vcc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vcc.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vcc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vcc

+

Vector condition code.

+

Size: 2 dwords.

+

Operands: vcc

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata0_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata0_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata0_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata0_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata0.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata0.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata0.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata0.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_10.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

16-bit data to store by a buffer instruction.

+

Size: depends on GFX8 GPU revision:

+
    +
  • 3 dwords for GFX8.0. This H/W supports no packing.

  • +
  • 2 dwords for GFX8.1+. This H/W supports data packing.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_11.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_11.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_11.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_11.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

16-bit data to store by a buffer instruction.

+

Size: depends on GFX8 GPU revision:

+
    +
  • 4 dwords for GFX8.0. This H/W supports no packing.

  • +
  • 2 dwords for GFX8.1+. This H/W supports data packing.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata1_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata1_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata1_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata1_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_12.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_12.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_12.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_12.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_13.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_13.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_13.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_13.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_14.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_14.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_14.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_14.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_3.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_4.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_5.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_6.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 has different meaning for GFX8.0 and GFX8.1:

    +
      +
    • For GFX8.0 this modifier does not affect size of data elements in registers. Data in registers are stored in low 16 bits, high 16 bits are unused. There is no packing.

    • +
    • Starting from GFX8.1 this modifier specifies that data elements in registers are packed; each value occupies 16 bits.

    • +
    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_7.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask which may specify from 1 to 4 data elements. Each data element occupies 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_8.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

16-bit data to store by a buffer instruction.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata_9.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

16-bit data to store by a buffer instruction.

+

Size: depends on GFX8 GPU revision:

+
    +
  • 2 dwords for GFX8.0. This H/W supports no packing.

  • +
  • 1 dword for GFX8.1+. This H/W supports data packing.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdata.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_10.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer and converted to a 16-bit format.

+

Size: depends on GFX8 GPU revision and tfe:

+
    +
  • 2 dwords for GFX8.0. This H/W supports no packing.

  • +
  • 1 dword for GFX8.1+. This H/W supports data packing.

  • +
  • tfe adds one dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_11.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_11.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_11.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_11.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer and converted to a 16-bit format.

+

Size: depends on GFX8 GPU revision and tfe:

+
    +
  • 3 dwords for GFX8.0. This H/W supports no packing.

  • +
  • 2 dwords for GFX8.1+. This H/W supports data packing.

  • +
  • tfe adds one dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_12.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_12.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_12.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_12.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer and converted to a 16-bit format.

+

Size: depends on GFX8 GPU revision and tfe:

+
    +
  • 4 dwords for GFX8.0. This H/W supports no packing.

  • +
  • 2 dwords for GFX8.1+. This H/W supports data packing.

  • +
  • tfe adds one dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_13.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_13.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_13.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_13.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_14.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_14.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_14.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_14.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_15.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_15.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_15.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_15.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 3 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_16.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_16.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_16.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_16.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_17.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_17.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_17.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_17.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

If lds is specified, this operand is ignored by H/W and data are stored directly into LDS.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+
+

Note that tfe and lds cannot be used together.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_3.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_4.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 32-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_5.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 64-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_6.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image_gather4 instruction.

+

Size: 4 data elements by default. Each data element occupies either 32 bits or 16 bits depending on d16.

+

d16 and tfe affect operand size as follows:

+
    +
  • d16 has different meaning for GFX8.0 and GFX8.1:

    +
      +
    • For GFX8.0 this modifier does not affect size of data elements in registers. Data in registers are stored in low 16 bits, high 16 bits are unused. There is no packing.

    • +
    • Starting from GFX8.1 this modifier specifies that data elements in registers are packed; each value occupies 16 bits.

    • +
    +
  • +
  • tfe adds one dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_7.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_8.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask, tfe and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 has different meaning for GFX8.0 and GFX8.1:

    +
      +
    • For GFX8.0 this modifier does not affect size of data elements in registers. Data in registers are stored in low 16 bits, high 16 bits are unused. There is no packing.

    • +
    • Starting from GFX8.1 this modifier specifies that data elements in registers are packed; each value occupies 16 bits.

    • +
    +
  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst_9.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer and converted to a 16-bit format.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vdst.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_1.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_2.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc_3.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_vsrc.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Data to copy to export buffers. This is an optional operand. Must be specified as off if not used.

+

compr modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2:

+
    +
  • src0 and src1 must specify the first register (or off).

  • +
  • src2 and src3 must specify the second register (or off).

  • +
+

An example:

+
exp mrtz v3, v3, off, off compr
+
+
+

Size: 1 dword.

+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_waitcnt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_waitcnt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx8_waitcnt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx8_waitcnt.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,237 @@ + + + + + + + + + waitcnt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

waitcnt

+

Counts of outstanding instructions to wait for.

+

The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

3:0

VM_CNT: vector memory operations count.

0..15

6:4

EXP_CNT: export count.

0..7

11:8

LGKM_CNT: LDS, GDS, Constant and Message count.

0..15

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A combination of vmcnt, expcnt, lgkmcnt and other values described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

    Syntax

    Description

    vmcnt(<N>)

    A VM_CNT value. N must not exceed the largest VM_CNT value.

    expcnt(<N>)

    An EXP_CNT value. N must not exceed the largest EXP_CNT value.

    lgkmcnt(<N>)

    An LGKM_CNT value. N must not exceed the largest LGKM_CNT value.

    vmcnt_sat(<N>)

    A VM_CNT value computed as min(N, the largest VM_CNT value).

    expcnt_sat(<N>)

    An EXP_CNT value computed as min(N, the largest EXP_CNT value).

    lgkmcnt_sat(<N>)

    An LGKM_CNT value computed as min(N, the largest LGKM_CNT value).

    +
    +
  • +
+

These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators.

+

N is either an +integer number or an +absolute expression.

+

Examples:

+
vm_cnt = 1
+exp_cnt = 2
+lgkm_cnt = 3
+cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8)
+
+s_waitcnt cnt
+s_waitcnt 1 | (2 << 4) | (3 << 8)                          // the same as above
+s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3)                    // the same as above
+s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt)  // the same as above
+
+s_waitcnt vmcnt(1)
+s_waitcnt expcnt(2) lgkmcnt(3)
+s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3)
+s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_fx_operand.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_fx_operand.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_fx_operand.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_fx_operand.html 2021-09-19 16:16:26.000000000 +0000 @@ -0,0 +1,151 @@ + + + + + + + + + FX Operand — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FX Operand

+

This is an f32 or f16 operand depending on instruction modifiers:

+
    +
  • Operand size is controlled by m_op_sel_hi.

  • +
  • Location of 16-bit operand is controlled by m_op_sel.

  • +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_m.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_src_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_src.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx900_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx900_vdst.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_fx_operand.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_fx_operand.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_fx_operand.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_fx_operand.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,151 @@ + + + + + + + + + FX Operand — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FX Operand

+

This is an f32 or f16 operand depending on instruction modifiers:

+
    +
  • Operand size is controlled by m_op_sel_hi.

  • +
  • Location of 16-bit operand is controlled by m_op_sel.

  • +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_m.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_src_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_src.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx904_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx904_vdst.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_fx_operand.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_fx_operand.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_fx_operand.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_fx_operand.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,151 @@ + + + + + + + + + FX Operand — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FX Operand

+

This is an f32 or f16 operand depending on instruction modifiers:

+
    +
  • Operand size is controlled by m_op_sel_hi.

  • +
  • Location of 16-bit operand is controlled by m_op_sel.

  • +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_m_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_m_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_m_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_m_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with integer operand modifier sext.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_m.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_3.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src_4.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_src.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_type_deviation.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_vdst.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx906_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx906_vsrc.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_dst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_dst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_dst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_dst.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + dst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

dst

+

This is an input operand. It may optionally serve as a destination if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_fx_operand.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_fx_operand.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_fx_operand.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_fx_operand.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,151 @@ + + + + + + + + + FX Operand — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FX Operand

+

This is an f32 or f16 operand depending on instruction modifiers:

+
    +
  • Operand size is controlled by m_op_sel_hi.

  • +
  • Location of 16-bit operand is controlled by m_op_sel.

  • +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_m_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_m_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_m_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_m_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with integer operand modifier sext.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_m.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_opt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_opt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_opt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_opt.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + opt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

opt

+

This is an optional operand. It must be used if and only if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_saddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_saddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_saddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_saddr.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + saddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

saddr

+

An optional 64-bit flat global address. Must be specified as off if not used.

+

See vaddr for description of available addressing modes.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_soffset.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_soffset.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_soffset.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_soffset.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_3.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, iconst, fconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_4.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src_5.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_src.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_srsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_srsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_srsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_srsrc.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Buffer resource constant which defines the address and characteristics of the buffer in memory.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_type_deviation.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vaddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vaddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vaddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vaddr_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,155 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

This is an optional operand which may specify offset and/or index.

+

Size: 0, 1 or 2 dwords. Size is controlled by modifiers offen and idxen:

+
    +
  • If only idxen is specified, this operand supplies an index. Size is 1 dword.

  • +
  • If only offen is specified, this operand supplies an offset. Size is 1 dword.

  • +
  • If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords.

  • +
  • If none of these modifiers are specified, this operand must be set to off.

  • +
+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vaddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vaddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vaddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vaddr.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat global address or a 32-bit offset depending on addressing mode:

+ +

Size: 1 or 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdata_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdata.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_3.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_4.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 16 dwords.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst_5.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 32 dwords.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vdst.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 32-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_3.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 4 dwords.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_4.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_5.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 16 dwords.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc_6.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 32 dwords.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx908_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx908_vsrc.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_dst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_dst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_dst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_dst.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + dst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

dst

+

This is an input operand. It may optionally serve as a destination if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_fx_operand.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_fx_operand.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_fx_operand.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_fx_operand.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,151 @@ + + + + + + + + + FX Operand — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FX Operand

+

This is an f32 or f16 operand depending on instruction modifiers:

+
    +
  • Operand size is controlled by m_op_sel_hi.

  • +
  • Location of 16-bit operand is controlled by m_op_sel.

  • +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_hwreg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_hwreg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_hwreg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_hwreg.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,268 @@ + + + + + + + + + hwreg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

hwreg

+

Bits of a hardware register being accessed.

+

The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

5:0

Register id.

0..63

10:6

First bit offset.

0..31

15:11

Size in bits.

1..32

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • An hwreg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + +

    Hwreg Value Syntax

    Description

    hwreg({0..63})

    All bits of a register indicated by its id.

    hwreg(<name>)

    All bits of a register indicated by its name.

    hwreg({0..63}, {0..31}, {1..32})

    Register bits indicated by register id, first bit offset and size.

    hwreg(<name>, {0..31}, {1..32})

    Register bits indicated by register name, first bit offset and size.

    +
    +
  • +
+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Defined register names include:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

Description

HW_REG_MODE

Shader writeable mode bits.

HW_REG_STATUS

Shader read-only status.

HW_REG_TRAPSTS

Trap status.

HW_REG_HW_ID

Id of wave, simd, compute unit, etc.

HW_REG_GPR_ALLOC

Per-wave SGPR and VGPR allocation.

HW_REG_LDS_ALLOC

Per-wave LDS allocation.

HW_REG_IB_STS

Counters of outstanding instructions.

HW_REG_SH_MEM_BASES

Memory aperture.

+
+

Examples:

+
reg = 1
+offset = 2
+size = 4
+hwreg_enc = reg | (offset << 6) | ((size - 1) << 11)
+
+s_getreg_b32 s2, 0x1881
+s_getreg_b32 s2, hwreg_enc                     // the same as above
+s_getreg_b32 s2, hwreg(1, 2, 4)                // the same as above
+s_getreg_b32 s2, hwreg(reg, offset, size)      // the same as above
+
+s_getreg_b32 s2, hwreg(15)
+s_getreg_b32 s2, hwreg(51, 1, 31)
+s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imask.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imask.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imask.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imask.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,225 @@ + + + + + + + + + imask — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imask

+

This operand is a mask which controls indexing mode for operands of subsequent instructions. +Bits 0, 1 and 2 control indexing of src0, src1 and src2, while bit 3 controls indexing of dst. +Value 1 enables indexing and value 0 disables it.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Bit

Meaning

0

Enables or disables src0 indexing.

1

Enables or disables src1 indexing.

2

Enables or disables src2 indexing.

3

Enables or disables dst indexing.

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..15.

  • +
  • A gpr_idx value described below.

    +
    +
    ++++ + + + + + + + + + + +

    Gpr_idx Value Syntax

    Description

    gpr_idx(<operands>)

    Enable indexing for specified operands +and disable it for the rest. +Operands is a comma-separated list of +values which may include:

    +
      +
    • “SRC0” - enable src0 indexing.

    • +
    • “SRC1” - enable src1 indexing.

    • +
    • “SRC2” - enable src2 indexing.

    • +
    • “DST” - enable dst indexing.

    • +
    +

    Each of these values may be specified only +once.

    +

    Operands list may be empty; this syntax +disables indexing for all operands.

    +
    +
    +
  • +
+

Examples:

+
s_set_gpr_idx_mode 0
+s_set_gpr_idx_mode gpr_idx()                        // the same as above
+
+s_set_gpr_idx_mode 15
+s_set_gpr_idx_mode gpr_idx(DST,SRC0,SRC1,SRC2)      // the same as above
+s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST)      // the same as above
+
+s_set_gpr_idx_mode gpr_idx(DST,SRC1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range 0..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imm16_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

A 16-bit integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imm16.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imm16.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_imm16.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_imm16.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_label.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_label.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_label.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_label.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,168 @@ + + + + + + + + + label — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

label

+

A branch target which is a 16-bit signed integer treated as a PC-relative dword offset.

+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range -32768..65535.

  • +
  • A symbol (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker.

  • +
+

Examples:

+
offset = 30
+label_1:
+label_2 = . + 4
+
+s_branch 32
+s_branch offset + 2
+s_branch label_1
+s_branch label_2
+s_branch label_3
+s_branch label_4
+
+label_3 = label_2 + 4
+label_4:
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_m_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_m_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_m_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_m_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_m.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with integer operand modifier sext.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_msg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_msg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_msg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_msg.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,383 @@ + + + + + + + + + msg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

msg

+

A 16-bit message code. The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

3:0

Message type.

0..15

6:4

Optional operation.

0..7

7:7

Unused.

-

9:8

Optional stream.

0..3

15:10

Unused.

-

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A sendmsg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + +

    Sendmsg Value Syntax

    Description

    sendmsg(<type>)

    A message identified by its type.

    sendmsg(<type>,<op>)

    A message identified by its type and operation.

    sendmsg(<type>,<op>,<stream>)

    A message identified by its type and operation +with a stream id.

    +
    +
  • +
+

Type may be specified using message name or message id.

+

Op may be specified using operation name or operation id.

+

Stream id is an integer in the range 0..3.

+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Each message type supports specific operations:

+
+
+++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Message name

Message Id

Supported Operations

Operation Id

Stream Id

MSG_INTERRUPT

1

-

-

-

MSG_GS

2

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_GS_DONE

3

GS_OP_NOP

0

-

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_SAVEWAVE

4

-

-

-

MSG_STALL_WAVE_GEN

5

-

-

-

MSG_HALT_WAVES

6

-

-

-

MSG_ORDERED_PS_DONE

7

-

-

-

MSG_EARLY_PRIM_DEALLOC

8

-

-

-

MSG_GS_ALLOC_REQ

9

-

-

-

MSG_GET_DOORBELL

10

-

-

-

MSG_SYSMSG

15

SYSMSG_OP_ECC_ERR_INTERRUPT

1

-

SYSMSG_OP_REG_RD

2

-

SYSMSG_OP_HOST_TRAP_ACK

3

-

SYSMSG_OP_TTRACE_PC

4

-

+
+

Sendmsg arguments are validated depending on how type value is specified:

+
    +
  • If message type is specified by name, arguments values must satisfy limitations detailed in the table above.

  • +
  • If message type is specified as a number, each argument must not exceed corresponding value range (see the first table).

  • +
+

Examples:

+
// numeric message code
+msg = 0x10
+s_sendmsg 0x12
+s_sendmsg msg + 2
+
+// sendmsg with strict arguments validation
+s_sendmsg sendmsg(MSG_INTERRUPT)
+s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT)
+s_sendmsg sendmsg(MSG_GS, 2)
+s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1)
+s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC)
+s_sendmsg sendmsg(MSG_GET_DOORBELL)
+
+// sendmsg with validation of value range only
+msg = 2
+op = 3
+stream = 1
+s_sendmsg sendmsg(msg, op, stream)
+s_sendmsg sendmsg(2, GS_OP_CUT)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_opt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_opt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_opt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_opt.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + opt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

opt

+

This is an optional operand. It must be used if and only if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_probe.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_probe.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_probe.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_probe.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,173 @@ + + + + + + + + + probe — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

probe

+

A bit mask which indicates request permissions.

+

This operand must be specified as an integer_number or an absolute_expression. +The value is truncated to 7 bits, but only 3 low bits are significant.

+
+
++++ + + + + + + + + + + + + + + + + +

Bit Number

Description

0

Request read permission.

1

Request write permission.

2

Request execute permission.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_saddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_saddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_saddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_saddr_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + saddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

saddr

+

An optional 32-bit flat scratch offset. Must be specified as off if not used.

+

Either this operand or vaddr must be set to off.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_saddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_saddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_saddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_saddr.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + saddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

saddr

+

An optional 64-bit flat global address. Must be specified as off if not used.

+

See vaddr for description of available addressing modes.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sbase_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

This operand is ignored by H/W and flat_scratch is supplied instead.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sbase.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sbase.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sbase.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sbase.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 64-bit base address for scalar memory operations.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_3.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_4.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata_5.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdata.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 16 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_3.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_4.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 8 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_5.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_6.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst_7.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_sdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_sdst.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_1.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f16 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_simm32_2.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f32 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_simm32.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_simm32.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_simm32.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_simm32.html 2021-09-19 16:16:27.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

An integer_number or an absolute_expression. The value is truncated to 32 bits.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An offset added to the base address to get memory address.

+
    +
  • If offset is specified as a register, it supplies an unsigned byte offset.

  • +
  • If offset is specified as a 21-bit immediate, it supplies a signed byte offset.

  • +
+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, simm21

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_soffset_2.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned 20-bit offset added to the base address to get memory address.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, uimm20

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_soffset.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_soffset.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_soffset.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_soffset.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_10.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 8 dwords.

+

Operands: v, a, iconst, fconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_11.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_11.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_11.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_11.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, a, iconst, fconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_2.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_3.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_4.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_5.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_6.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, iconst, fconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_7.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v, a, iconst, fconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_8.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 16 dwords.

+

Operands: v, a, iconst, fconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src_9.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 32 dwords.

+

Operands: v, a, iconst, fconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_src.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Buffer resource constant which defines the address and characteristics of the buffer in memory.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_srsrc.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format.

+

Size: 8 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssamp.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssamp.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssamp.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssamp.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssamp — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssamp

+

Sampler constant used to specify filtering options applied to the image data after it is read.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_2.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_3.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_4.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_5.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_6.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_7.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc_8.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_ssrc.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_type_deviation.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat address.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_2.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat global address or a 32-bit offset depending on addressing mode:

+ +

Size: 1 or 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_3.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An optional 32-bit flat scratch offset. Must be specified as off if not used.

+

Either this operand or saddr must be set to off.

+

Size: 1 dword.

+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_4.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image.

+

Size: 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode, specific image being handled and a16.

+
+

Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction.

+

Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr_5.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,155 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

This is an optional operand which may specify offset and/or index.

+

Size: 0, 1 or 2 dwords. Size is controlled by modifiers offen and idxen:

+
    +
  • If only idxen is specified, this operand supplies an index. Size is 1 dword.

  • +
  • If only offen is specified, this operand supplies an offset. Size is 1 dword.

  • +
  • If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords.

  • +
  • If none of these modifiers are specified, this operand must be set to off.

  • +
+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vaddr.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An offset from the start of GDS/LDS memory.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vcc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vcc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vcc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vcc.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vcc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vcc

+

Vector condition code.

+

Size: 2 dwords.

+

Operands: vcc

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata0.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_10.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_2.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_3.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 3 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_4.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_5.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_6.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 specifies that data in registers are packed; each value occupies 16 bits.

  • +
+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_7.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask which may specify from 1 to 4 data elements. Each data element occupies 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_8.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata_9.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdata.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_10.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 3 dwords by default. tfe adds 1 dword if specified.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_11.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_11.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_11.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_11.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_12.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_12.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_12.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_12.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

If lds is specified, this operand is ignored by H/W and data are stored directly into LDS.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+
+

Note that tfe and lds cannot be used together.

+
+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_13.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_13.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_13.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_13.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_14.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_14.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_14.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_14.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_15.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_15.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_15.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_15.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_16.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_16.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_16.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_16.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_17.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_17.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_17.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_17.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 16 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_18.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_18.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_18.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_18.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 32 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_19.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_19.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_19.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_19.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 8 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_1.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_2.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_3.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 3 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_4.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 32-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_5.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 64-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_6.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_7.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask, tfe and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 specifies that data elements in registers are packed; each value occupies 16 bits.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_8.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst_9.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vdst.html 2021-09-19 16:16:28.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_3.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_4.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc_5.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_vsrc.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: a

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_waitcnt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_waitcnt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx90a_waitcnt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx90a_waitcnt.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,242 @@ + + + + + + + + + waitcnt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

waitcnt

+

Counts of outstanding instructions to wait for.

+

The bits of this operand have the following meaning:

+
+
++++++ + + + + + + + + + + + + + + + + + + + + + + + + +

High Bits

Low Bits

Description

Value Range

15:14

3:0

VM_CNT: vector memory operations count.

0..63

-

6:4

EXP_CNT: export count.

0..7

-

11:8

LGKM_CNT: LDS, GDS, Constant and Message count.

0..15

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A combination of vmcnt, expcnt, lgkmcnt and other values described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

    Syntax

    Description

    vmcnt(<N>)

    A VM_CNT value. N must not exceed the largest VM_CNT value.

    expcnt(<N>)

    An EXP_CNT value. N must not exceed the largest EXP_CNT value.

    lgkmcnt(<N>)

    An LGKM_CNT value. N must not exceed the largest LGKM_CNT value.

    vmcnt_sat(<N>)

    A VM_CNT value computed as min(N, the largest VM_CNT value).

    expcnt_sat(<N>)

    An EXP_CNT value computed as min(N, the largest EXP_CNT value).

    lgkmcnt_sat(<N>)

    An LGKM_CNT value computed as min(N, the largest LGKM_CNT value).

    +
    +
  • +
+

These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators.

+

N is either an +integer number or an +absolute expression.

+

Examples:

+
vm_cnt = 1
+exp_cnt = 2
+lgkm_cnt = 3
+cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8)
+
+s_waitcnt cnt
+s_waitcnt 1 | (2 << 4) | (3 << 8)                          // the same as above
+s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3)                    // the same as above
+s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt)  // the same as above
+
+s_waitcnt vmcnt(1)
+s_waitcnt expcnt(2) lgkmcnt(3)
+s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3)
+s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_attr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_attr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_attr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_attr.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,179 @@ + + + + + + + + + attr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

attr

+

Interpolation attribute and channel:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

attr{0..32}.x

Attribute 0..32 with x channel.

attr{0..32}.y

Attribute 0..32 with y channel.

attr{0..32}.z

Attribute 0..32 with z channel.

attr{0..32}.w

Attribute 0..32 with w channel.

+
+

Examples:

+
v_interp_p1_f32 v1, v0, attr0.x
+v_interp_p1_f32 v1, v0, attr32.w
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_dst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_dst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_dst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_dst.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + dst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

dst

+

This is an input operand. It may optionally serve as a destination if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_hwreg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_hwreg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_hwreg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_hwreg.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,268 @@ + + + + + + + + + hwreg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

hwreg

+

Bits of a hardware register being accessed.

+

The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

5:0

Register id.

0..63

10:6

First bit offset.

0..31

15:11

Size in bits.

1..32

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • An hwreg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + +

    Hwreg Value Syntax

    Description

    hwreg({0..63})

    All bits of a register indicated by its id.

    hwreg(<name>)

    All bits of a register indicated by its name.

    hwreg({0..63}, {0..31}, {1..32})

    Register bits indicated by register id, first bit offset and size.

    hwreg(<name>, {0..31}, {1..32})

    Register bits indicated by register name, first bit offset and size.

    +
    +
  • +
+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Defined register names include:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

Description

HW_REG_MODE

Shader writeable mode bits.

HW_REG_STATUS

Shader read-only status.

HW_REG_TRAPSTS

Trap status.

HW_REG_HW_ID

Id of wave, simd, compute unit, etc.

HW_REG_GPR_ALLOC

Per-wave SGPR and VGPR allocation.

HW_REG_LDS_ALLOC

Per-wave LDS allocation.

HW_REG_IB_STS

Counters of outstanding instructions.

HW_REG_SH_MEM_BASES

Memory aperture.

+
+

Examples:

+
reg = 1
+offset = 2
+size = 4
+hwreg_enc = reg | (offset << 6) | ((size - 1) << 11)
+
+s_getreg_b32 s2, 0x1881
+s_getreg_b32 s2, hwreg_enc                     // the same as above
+s_getreg_b32 s2, hwreg(1, 2, 4)                // the same as above
+s_getreg_b32 s2, hwreg(reg, offset, size)      // the same as above
+
+s_getreg_b32 s2, hwreg(15)
+s_getreg_b32 s2, hwreg(51, 1, 31)
+s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imask.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imask.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imask.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imask.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,225 @@ + + + + + + + + + imask — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imask

+

This operand is a mask which controls indexing mode for operands of subsequent instructions. +Bits 0, 1 and 2 control indexing of src0, src1 and src2, while bit 3 controls indexing of dst. +Value 1 enables indexing and value 0 disables it.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Bit

Meaning

0

Enables or disables src0 indexing.

1

Enables or disables src1 indexing.

2

Enables or disables src2 indexing.

3

Enables or disables dst indexing.

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..15.

  • +
  • A gpr_idx value described below.

    +
    +
    ++++ + + + + + + + + + + +

    Gpr_idx Value Syntax

    Description

    gpr_idx(<operands>)

    Enable indexing for specified operands +and disable it for the rest. +Operands is a comma-separated list of +values which may include:

    +
      +
    • “SRC0” - enable src0 indexing.

    • +
    • “SRC1” - enable src1 indexing.

    • +
    • “SRC2” - enable src2 indexing.

    • +
    • “DST” - enable dst indexing.

    • +
    +

    Each of these values may be specified only +once.

    +

    Operands list may be empty; this syntax +disables indexing for all operands.

    +
    +
    +
  • +
+

Examples:

+
s_set_gpr_idx_mode 0
+s_set_gpr_idx_mode gpr_idx()                        // the same as above
+
+s_set_gpr_idx_mode 15
+s_set_gpr_idx_mode gpr_idx(DST,SRC0,SRC1,SRC2)      // the same as above
+s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST)      // the same as above
+
+s_set_gpr_idx_mode gpr_idx(DST,SRC1)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imm16_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imm16_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imm16_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imm16_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range 0..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imm16_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imm16_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imm16_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imm16_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

A 16-bit integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imm16.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imm16.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_imm16.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_imm16.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + imm16 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

imm16

+

An integer_number or an absolute_expression. The value must be in the range -32768..65535.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_label.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_label.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_label.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_label.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,168 @@ + + + + + + + + + label — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

label

+

A branch target which is a 16-bit signed integer treated as a PC-relative dword offset.

+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range -32768..65535.

  • +
  • A symbol (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker.

  • +
+

Examples:

+
offset = 30
+label_1:
+label_2 = . + 4
+
+s_branch 32
+s_branch offset + 2
+s_branch label_1
+s_branch label_2
+s_branch label_3
+s_branch label_4
+
+label_3 = label_2 + 4
+label_4:
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_m_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_m_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_m_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_m_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with floating point operand modifiers abs and neg.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_m.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_m.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_m.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_m.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + m — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

m

+

This operand may be used with integer operand modifier sext.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_msg.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_msg.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_msg.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_msg.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,383 @@ + + + + + + + + + msg — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

msg

+

A 16-bit message code. The bits of this operand have the following meaning:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Bits

Description

Value Range

3:0

Message type.

0..15

6:4

Optional operation.

0..7

7:7

Unused.

-

9:8

Optional stream.

0..3

15:10

Unused.

-

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A sendmsg value described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + +

    Sendmsg Value Syntax

    Description

    sendmsg(<type>)

    A message identified by its type.

    sendmsg(<type>,<op>)

    A message identified by its type and operation.

    sendmsg(<type>,<op>,<stream>)

    A message identified by its type and operation +with a stream id.

    +
    +
  • +
+

Type may be specified using message name or message id.

+

Op may be specified using operation name or operation id.

+

Stream id is an integer in the range 0..3.

+

Numeric values may be specified as positive integer numbers +or absolute expressions.

+

Each message type supports specific operations:

+
+
+++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Message name

Message Id

Supported Operations

Operation Id

Stream Id

MSG_INTERRUPT

1

-

-

-

MSG_GS

2

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_GS_DONE

3

GS_OP_NOP

0

-

GS_OP_CUT

1

Optional

GS_OP_EMIT

2

Optional

GS_OP_EMIT_CUT

3

Optional

MSG_SAVEWAVE

4

-

-

-

MSG_STALL_WAVE_GEN

5

-

-

-

MSG_HALT_WAVES

6

-

-

-

MSG_ORDERED_PS_DONE

7

-

-

-

MSG_EARLY_PRIM_DEALLOC

8

-

-

-

MSG_GS_ALLOC_REQ

9

-

-

-

MSG_GET_DOORBELL

10

-

-

-

MSG_SYSMSG

15

SYSMSG_OP_ECC_ERR_INTERRUPT

1

-

SYSMSG_OP_REG_RD

2

-

SYSMSG_OP_HOST_TRAP_ACK

3

-

SYSMSG_OP_TTRACE_PC

4

-

+
+

Sendmsg arguments are validated depending on how type value is specified:

+
    +
  • If message type is specified by name, arguments values must satisfy limitations detailed in the table above.

  • +
  • If message type is specified as a number, each argument must not exceed corresponding value range (see the first table).

  • +
+

Examples:

+
// numeric message code
+msg = 0x10
+s_sendmsg 0x12
+s_sendmsg msg + 2
+
+// sendmsg with strict arguments validation
+s_sendmsg sendmsg(MSG_INTERRUPT)
+s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT)
+s_sendmsg sendmsg(MSG_GS, 2)
+s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1)
+s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC)
+s_sendmsg sendmsg(MSG_GET_DOORBELL)
+
+// sendmsg with validation of value range only
+msg = 2
+op = 3
+stream = 1
+s_sendmsg sendmsg(msg, op, stream)
+s_sendmsg sendmsg(2, GS_OP_CUT)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_opt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_opt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_opt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_opt.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + opt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

opt

+

This is an optional operand. It must be used if and only if glc is specified.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_param.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_param.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_param.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_param.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,171 @@ + + + + + + + + + param — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

param

+

Interpolation parameter to read:

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

p0

Parameter P0.

p10

Parameter P10.

p20

Parameter P20.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_probe.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_probe.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_probe.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_probe.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,173 @@ + + + + + + + + + probe — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

probe

+

A bit mask which indicates request permissions.

+

This operand must be specified as an integer_number or an absolute_expression. +The value is truncated to 7 bits, but only 3 low bits are significant.

+
+
++++ + + + + + + + + + + + + + + + + +

Bit Number

Description

0

Request read permission.

1

Request write permission.

2

Request execute permission.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_saddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_saddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_saddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_saddr_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + saddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

saddr

+

An optional 32-bit flat scratch offset. Must be specified as off if not used.

+

Either this operand or vaddr must be set to off.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_saddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_saddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_saddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_saddr.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + saddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

saddr

+

An optional 64-bit flat global address. Must be specified as off if not used.

+

See vaddr for description of available addressing modes.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sbase_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sbase_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sbase_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sbase_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sbase_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sbase_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sbase_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sbase_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

This operand is ignored by H/W and flat_scratch is supplied instead.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sbase.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sbase.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sbase.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sbase.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sbase — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sbase

+

A 64-bit base address for scalar memory operations.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_3.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_4.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata_5.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdata.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + sdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 16 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_3.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_4.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 8 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_5.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_6.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst_7.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_sdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_sdst.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + sdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

sdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_simm32_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_simm32_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_simm32_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_simm32_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f16 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_simm32_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_simm32_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_simm32_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_simm32_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

A floating-point_number, an integer_number, or an absolute_expression. +The value is converted to f32 as described here.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_simm32.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_simm32.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_simm32.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_simm32.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + simm32 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

simm32

+

An integer_number or an absolute_expression. The value is truncated to 32 bits.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_soffset_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_soffset_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_soffset_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_soffset_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An offset added to the base address to get memory address.

+
    +
  • If offset is specified as a register, it supplies an unsigned byte offset.

  • +
  • If offset is specified as a 21-bit immediate, it supplies a signed byte offset.

  • +
+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, simm21

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_soffset_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_soffset_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_soffset_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_soffset_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned 20-bit offset added to the base address to get memory address.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, uimm20

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_soffset.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_soffset.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_soffset.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_soffset.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + soffset — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

soffset

+

An unsigned byte offset.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_10.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_3.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, iconst, ival, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_4.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_5.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, lds_direct

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_6.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, iconst, ival, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_7.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_8.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src_9.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_src.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_src.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + src — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

src

+

Instruction input.

+

Size: 1 dword.

+

Operands: v, s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, lds_direct, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_srsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_srsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_srsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_srsrc_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Buffer resource constant which defines the address and characteristics of the buffer in memory.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_srsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_srsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_srsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_srsrc.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + srsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

srsrc

+

Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format.

+

Size: 8 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssamp.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssamp.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssamp.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssamp.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssamp — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssamp

+

Sampler constant used to specify filtering options applied to the image data after it is read.

+

Size: 4 dwords.

+

Operands: s, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_3.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_4.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_5.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_6.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_7.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, iconst

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc_8.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_ssrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_ssrc.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + ssrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ssrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: s, flat_scratch, xnack_mask, vcc, ttmp, m0, exec, vccz, execz, scc, constant, literal

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_tgt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_tgt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_tgt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_tgt.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,177 @@ + + + + + + + + + tgt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

tgt

+

An export target:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

pos{0..3}

Copy vertex position 0..3.

param{0..31}

Copy vertex parameter 0..31.

mrt{0..7}

Copy pixel color to the MRTs 0..7.

mrtz

Copy pixel depth (Z) data.

null

Copy nothing.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_type_deviation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_type_deviation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_type_deviation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_type_deviation.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + Type Deviation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Deviation

+

Type of this operand differs from type implied by the opcode. This tag specifies actual operand type.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_1.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat address.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_2.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

A 64-bit flat global address or a 32-bit offset depending on addressing mode:

+ +

Size: 1 or 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_3.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An optional 32-bit flat scratch offset. Must be specified as off if not used.

+

Either this operand or saddr must be set to off.

+

Size: 1 dword.

+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_4.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image.

+

Size: 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode, specific image being handled and a16.

+
+

Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction.

+

Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr_5.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,155 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

This is an optional operand which may specify offset and/or index.

+

Size: 0, 1 or 2 dwords. Size is controlled by modifiers offen and idxen:

+
    +
  • If only idxen is specified, this operand supplies an index. Size is 1 dword.

  • +
  • If only offen is specified, this operand supplies an offset. Size is 1 dword.

  • +
  • If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords.

  • +
  • If none of these modifiers are specified, this operand must be set to off.

  • +
+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vaddr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vaddr.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vaddr — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vaddr

+

An offset from the start of GDS/LDS memory.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vcc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vcc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vcc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vcc.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vcc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vcc

+

Vector condition code.

+

Size: 2 dwords.

+

Operands: vcc

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata0_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata0_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata0_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata0_1.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata0.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata0.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata0.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata0.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata0 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata0

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_10.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata1_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata1_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata1_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata1_1.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_1.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata1.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata1 — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata1

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_2.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_3.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_4.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_5.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

    +

    Note: the surface data format is indicated in the image resource constant but not in the instruction.

    +
  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_6.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 specifies that data in registers are packed; each value occupies 16 bits.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_7.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Image data to store by an image_store instruction.

+

Size: depends on dmask which may specify from 1 to 4 data elements. Each data element occupies 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_8.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata_9.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Input data for an atomic instruction.

+

Optionally may serve as an output data:

+
    +
  • If glc is specified, gets the memory value before the operation.

  • +
+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdata.html 2021-09-19 16:16:29.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdata

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_10.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 2 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_11.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_11.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_11.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_11.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 3 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_12.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_12.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_12.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_12.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 4 dwords by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_13.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_13.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_13.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_13.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

If lds is specified, this operand is ignored by H/W and data are stored directly into LDS.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+
+

Note that tfe and lds cannot be used together.

+
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_1.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_2.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_3.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 3 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_4.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 32-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_5.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_5.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_5.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_5.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,150 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Data returned by a 64-bit atomic flat instruction.

+

This is an optional operand. It must be used if and only if glc is specified.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_6.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_6.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_6.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_6.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image_gather4 instruction.

+

Size: 4 data elements by default. Each data element occupies either 32 bits or 16 bits depending on d16.

+

d16 and tfe affect operand size as follows:

+
    +
  • d16 specifies that data elements in registers are packed; each value occupies 16 bits.

  • +
  • tfe adds one dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_7.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_7.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_7.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_7.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,153 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask and tfe:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies 1 dword.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_8.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_8.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_8.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_8.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,154 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Image data to load by an image instruction.

+

Size: depends on dmask, tfe and d16:

+
    +
  • dmask may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on d16.

  • +
  • d16 specifies that data elements in registers are packed; each value occupies 16 bits.

  • +
  • tfe adds 1 dword if specified.

  • +
+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_9.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_9.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst_9.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst_9.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output: data read from a memory buffer.

+

Size: 1 dword by default. tfe adds 1 dword if specified.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vdst.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vdst.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vdst — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vdst

+

Instruction output.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_1.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 1 dword.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_2.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 4 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc_3.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,149 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Instruction input.

+

Size: 2 dwords.

+

Operands: v

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_vsrc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_vsrc.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + vsrc — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

vsrc

+

Data to copy to export buffers. This is an optional operand. Must be specified as off if not used.

+

compr modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2:

+
    +
  • src0 and src1 must specify the first register (or off).

  • +
  • src2 and src3 must specify the second register (or off).

  • +
+

An example:

+
exp mrtz v3, v3, off, off compr
+
+
+

Size: 1 dword.

+

Operands: v, off

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_waitcnt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_waitcnt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPU/gfx9_waitcnt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPU/gfx9_waitcnt.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,242 @@ + + + + + + + + + waitcnt — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

waitcnt

+

Counts of outstanding instructions to wait for.

+

The bits of this operand have the following meaning:

+
+
++++++ + + + + + + + + + + + + + + + + + + + + + + + + +

High Bits

Low Bits

Description

Value Range

15:14

3:0

VM_CNT: vector memory operations count.

0..63

-

6:4

EXP_CNT: export count.

0..7

-

11:8

LGKM_CNT: LDS, GDS, Constant and Message count.

0..15

+
+

This operand may be specified as one of the following:

+
    +
  • An integer_number or an absolute_expression. The value must be in the range 0..0xFFFF.

  • +
  • A combination of vmcnt, expcnt, lgkmcnt and other values described below.

    +
    +
    ++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

    Syntax

    Description

    vmcnt(<N>)

    A VM_CNT value. N must not exceed the largest VM_CNT value.

    expcnt(<N>)

    An EXP_CNT value. N must not exceed the largest EXP_CNT value.

    lgkmcnt(<N>)

    An LGKM_CNT value. N must not exceed the largest LGKM_CNT value.

    vmcnt_sat(<N>)

    A VM_CNT value computed as min(N, the largest VM_CNT value).

    expcnt_sat(<N>)

    An EXP_CNT value computed as min(N, the largest EXP_CNT value).

    lgkmcnt_sat(<N>)

    An LGKM_CNT value computed as min(N, the largest LGKM_CNT value).

    +
    +
  • +
+

These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators.

+

N is either an +integer number or an +absolute expression.

+

Examples:

+
vm_cnt = 1
+exp_cnt = 2
+lgkm_cnt = 3
+cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8)
+
+s_waitcnt cnt
+s_waitcnt 1 | (2 << 4) | (3 << 8)                          // the same as above
+s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3)                    // the same as above
+s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt)  // the same as above
+
+s_waitcnt vmcnt(1)
+s_waitcnt expcnt(2) lgkmcnt(3)
+s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3)
+s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2)
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUDwarfExtensionsForHeterogeneousDebugging.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUDwarfExtensionsForHeterogeneousDebugging.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUDwarfExtensionsForHeterogeneousDebugging.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUDwarfExtensionsForHeterogeneousDebugging.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,4162 @@ + + + + + + + + + DWARF Extensions For Heterogeneous Debugging — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

DWARF Extensions For Heterogeneous Debugging

+
+ +
+
+

Warning

+

This document describes provisional extensions to DWARF Version 5 +[DWARF] to support heterogeneous debugging. It is +not currently fully implemented and is subject to change.

+
+
+

Introduction

+

AMD [AMD] has been working on supporting heterogeneous +computing through the AMD Radeon Open Compute Platform (ROCm) [AMD-ROCm]. A heterogeneous computing program can be written in a +high level language such as C++ or Fortran with OpenMP pragmas, OpenCL, or HIP +(a portable C++ programming environment for heterogeneous computing [HIP]). A heterogeneous compiler and runtime allows a program to +execute on multiple devices within the same native process. Devices could +include CPUs, GPUs, DSPs, FPGAs, or other special purpose accelerators. +Currently HIP programs execute on systems with CPUs and GPUs.

+

ROCm is fully open sourced and includes contributions to open source projects +such as LLVM for compilation [LLVM] and GDB for +debugging [GDB], as well as collaboration with other +third party projects such as the GCC compiler [GCC] +and the Perforce TotalView HPC debugger [Perforce-TotalView].

+

To support debugging heterogeneous programs several features that are not +provided by current DWARF Version 5 [DWARF] have +been identified. This document contains a collection of extensions to address +providing those features.

+

The Motivation section describes the issues that are being +addressed for heterogeneous computing. That is followed by the +Changes Relative to DWARF Version 5 section containing the +textual changes for the extensions relative to the DWARF Version 5 standard. +Then there is an Examples section that links to the AMD GPU +specific usage of the extensions that includes an example. Finally, there is a +References section. There are a number of notes included +that raise open questions, or provide alternative approaches considered. The +extensions seek to be general in nature and backwards compatible with DWARF +Version 5. The goal is to be applicable to meeting the needs of any +heterogeneous system and not be vendor or architecture specific.

+

A fundamental aspect of the extensions is that it allows DWARF expression +location descriptions as stack elements. The extensions are based on DWARF +Version 5 and maintains compatibility with DWARF Version 5. After attempting +several alternatives, the current thinking is that such extensions to DWARF +Version 5 are the simplest and cleanest ways to support debugging optimized GPU +code. It also appears to be generally useful and may be able to address other +reported DWARF issues, as well as being helpful in providing better optimization +support for non-GPU code.

+

General feedback on these extensions is sought, together with suggestions on how +to clarify, simplify, or organize them. If their is general interest then some +or all of these extensions could be submitted as future DWARF proposals.

+

We are in the process of modifying LLVM and GDB to support these extensions +which is providing experience and insights. We plan to upstream the changes to +those projects for any final form of the extensions.

+

The author very much appreciates the input provided so far by many others which +has been incorporated into this current version.

+
+
+

Motivation

+

This document presents a set of backwards compatible extensions to DWARF Version +5 [DWARF] to support heterogeneous debugging.

+

The remainder of this section provides motivation for each extension in +terms of heterogeneous debugging on commercially available AMD GPU hardware +(AMDGPU). The goal is to add support to the AMD [AMD] +open source Radeon Open Compute Platform (ROCm) [AMD-ROCm] which is an implementation of the industry standard +for heterogeneous computing devices defined by the Heterogeneous System +Architecture (HSA) Foundation [HSA]. ROCm includes the +LLVM compiler [LLVM] with upstreamed support for +AMDGPU [AMDGPU-LLVM]. The goal is to also add +the GDB debugger [GDB] with upstreamed support for +AMDGPU [AMD-ROCgdb]. In addition, the goal is +to work with third parties to enable support for AMDGPU debugging in the GCC +compiler [GCC] and the Perforce TotalView HPC debugger +[Perforce-TotalView].

+

However, the extensions are intended to be vendor and architecture neutral. They +are believed to apply to other heterogeneous hardware devices including GPUs, +DSPs, FPGAs, and other specialized hardware. These collectively include similar +characteristics and requirements as AMDGPU devices. Some of the extension can +also apply to traditional CPU hardware that supports large vector registers. +Compilers can map source languages and extensions that describe large scale +parallel execution onto the lanes of the vector registers. This is common in +programming languages used in ML and HPC. The extensions also include improved +support for optimized code on any architecture. Some of the generalizations may +also benefit other issues that have been raised.

+

The extensions have evolved through collaboration with many individuals and +active prototyping within the GDB debugger and LLVM compiler. Input has also +been very much appreciated from the developers working on the Perforce TotalView +HPC Debugger and GCC compiler.

+

The AMDGPU has several features that require additional DWARF functionality in +order to support optimized code.

+

AMDGPU optimized code may spill vector registers to non-global address space +memory, and this spilling may be done only for lanes that are active on entry +to the subprogram. To support this, a location description that can be created +as a masked select is required. See DW_OP_LLVM_select_bit_piece.

+

Since the active lane mask may be held in a register, a way to get the value +of a register on entry to a subprogram is required. To support this an +operation that returns the caller value of a register as specified by the Call +Frame Information (CFI) is required. See DW_OP_LLVM_call_frame_entry_reg +and Call Frame Information.

+

Current DWARF uses an empty expression to indicate an undefined location +description. Since the masked select composite location description operation +takes more than one location description, it is necessary to have an explicit +way to specify an undefined location description. Otherwise it is not possible +to specify that a particular one of the input location descriptions is +undefined. See DW_OP_LLVM_undefined.

+

CFI describes restoring callee saved registers that are spilled. Currently CFI +only allows a location description that is a register, memory address, or +implicit location description. AMDGPU optimized code may spill scalar +registers into portions of vector registers. This requires extending CFI to +allow any location description. See +Call Frame Information.

+

The vector registers of the AMDGPU are represented as their full wavefront +size, meaning the wavefront size times the dword size. This reflects the +actual hardware and allows the compiler to generate DWARF for languages that +map a thread to the complete wavefront. It also allows more efficient DWARF to +be generated to describe the CFI as only a single expression is required for +the whole vector register, rather than a separate expression for each lane’s +dword of the vector register. It also allows the compiler to produce DWARF +that indexes the vector register if it spills scalar registers into portions +of a vector register.

+

Since DWARF stack value entries have a base type and AMDGPU registers are a +vector of dwords, the ability to specify that a base type is a vector is +required. See DW_AT_LLVM_vector_size.

+

If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner, +then the variable DWARF location expressions must compute the location for a +single lane of the wavefront. Therefore, a DWARF operation is required to denote +the current lane, much like DW_OP_push_object_address denotes the current +object. The DW_OP_*piece operations only allow literal indices. Therefore, a +way to use a computed offset of an arbitrary location description (such as a +vector register) is required. See DW_OP_LLVM_push_lane, +DW_OP_LLVM_offset, DW_OP_LLVM_offset_uconst, and +DW_OP_LLVM_bit_offset.

+

If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner +the compiler can use the AMDGPU execution mask register to control which lanes +are active. To describe the conceptual location of non-active lanes a DWARF +expression is needed that can compute a per lane PC. For efficiency, this is +done for the wavefront as a whole. This expression benefits by having a masked +select composite location description operation. This requires an attribute +for source location of each lane. The AMDGPU may update the execution mask for +whole wavefront operations and so needs an attribute that computes the current +active lane mask. See DW_OP_LLVM_select_bit_piece, DW_OP_LLVM_extend, +DW_AT_LLVM_lane_pc, and DW_AT_LLVM_active_lane.

+

AMDGPU needs to be able to describe addresses that are in different kinds of +memory. Optimized code may need to describe a variable that resides in pieces +that are in different kinds of storage which may include parts of registers, +memory that is in a mixture of memory kinds, implicit values, or be undefined. +DWARF has the concept of segment addresses. However, the segment cannot be +specified within a DWARF expression, which is only able to specify the offset +portion of a segment address. The segment index is only provided by the entity +that specifies the DWARF expression. Therefore, the segment index is a +property that can only be put on complete objects, such as a variable. That +makes it only suitable for describing an entity (such as variable or +subprogram code) that is in a single kind of memory. Therefore, AMDGPU uses +the DWARF concept of address spaces. For example, a variable may be allocated +in a register that is partially spilled to the call stack which is in the +private address space, and partially spilled to the local address space.

+

DWARF uses the concept of an address in many expression operations but does not +define how it relates to address spaces. For example, +DW_OP_push_object_address pushes the address of an object. Other contexts +implicitly push an address on the stack before evaluating an expression. For +example, the DW_AT_use_location attribute of the +DW_TAG_ptr_to_member_type. The expression that uses the address needs to +do so in a general way and not need to be dependent on the address space of +the address. For example, a pointer to member value may want to be applied to +an object that may reside in any address space.

+

The number of registers and the cost of memory operations is much higher for +AMDGPU than a typical CPU. The compiler attempts to optimize whole variables +and arrays into registers. Currently DWARF only allows +DW_OP_push_object_address and related operations to work with a global +memory location. To support AMDGPU optimized code it is required to generalize +DWARF to allow any location description to be used. This allows registers, or +composite location descriptions that may be a mixture of memory, registers, or +even implicit values.

+

DWARF Version 5 does not allow location descriptions to be entries on the +DWARF stack. They can only be the final result of the evaluation of a DWARF +expression. However, by allowing a location description to be a first-class +entry on the DWARF stack it becomes possible to compose expressions containing +both values and location descriptions naturally. It allows objects to be +located in any kind of memory address space, in registers, be implicit values, +be undefined, or a composite of any of these. By extending DWARF carefully, +all existing DWARF expressions can retain their current semantic meaning. +DWARF has implicit conversions that convert from a value that represents an +address in the default address space to a memory location description. This +can be extended to allow a default address space memory location description +to be implicitly converted back to its address value. This allows all DWARF +Version 5 expressions to retain their same meaning, while adding the ability +to explicitly create memory location descriptions in non-default address +spaces and generalizing the power of composite location descriptions to any +kind of location description. See DWARF Operation Expressions.

+

To allow composition of composite location descriptions, an explicit operation +that indicates the end of the definition of a composite location description +is required. This can be implied if the end of a DWARF expression is reached, +allowing current DWARF expressions to remain legal. See +DW_OP_LLVM_piece_end.

+

The DW_OP_plus and DW_OP_minus can be defined to operate on a memory +location description in the default target architecture specific address space +and a generic type value to produce an updated memory location description. This +allows them to continue to be used to offset an address. To generalize +offsetting to any location description, including location descriptions that +describe when bytes are in registers, are implicit, or a composite of these, the +DW_OP_LLVM_offset, DW_OP_LLVM_offset_uconst, and +DW_OP_LLVM_bit_offset offset operations are added. Unlike DW_OP_plus, +DW_OP_plus_uconst, and DW_OP_minus arithmetic operations, these do not +define that integer overflow causes wrap-around. The offset operations can +operate on location storage of any size. For example, implicit location storage +could be any number of bits in size. It is simpler to define offsets that exceed +the size of the location storage as being an evaluation error, than having to +force an implementation to support potentially infinite precision offsets to +allow it to correctly track a series of positive and negative offsets that may +transiently overflow or underflow, but end up in range. This is simple for the +arithmetic operations as they are defined in terms of two’s compliment +arithmetic on a base type of a fixed size.

+

Having the offset operations allows DW_OP_push_object_address to push a +location description that may be in a register, or be an implicit value, and the +DWARF expression of DW_TAG_ptr_to_member_type can contain them to offset +within it. DW_OP_LLVM_bit_offset generalizes DWARF to work with bit fields +which is not possible in DWARF Version 5.

+

The DWARF DW_OP_xderef* operations allow a value to be converted into an +address of a specified address space which is then read. But it provides no +way to create a memory location description for an address in the non-default +address space. For example, AMDGPU variables can be allocated in the local +address space at a fixed address. It is required to have an operation to +create an address in a specific address space that can be used to define the +location description of the variable. Defining this operation to produce a +location description allows the size of addresses in an address space to be +larger than the generic type. See DW_OP_LLVM_form_aspace_address.

+

If the DW_OP_LLVM_form_aspace_address operation had to produce a value +that can be implicitly converted to a memory location description, then it +would be limited to the size of the generic type which matches the size of the +default address space. Its value would be undefined and likely not match any +value in the actual program. By making the result a location description, it +allows a consumer great freedom in how it implements it. The implicit +conversion back to a value can be limited only to the default address space to +maintain compatibility with DWARF Version 5. For other address spaces the +producer can use the new operations that explicitly specify the address space.

+

DW_OP_breg* treats the register as containing an address in the default +address space. It is required to be able to specify the address space of the +register value. See DW_OP_LLVM_aspace_bregx.

+

Similarly, DW_OP_implicit_pointer treats its implicit pointer value as +being in the default address space. It is required to be able to specify the +address space of the pointer value. See +DW_OP_LLVM_aspace_implicit_pointer.

+

Almost all uses of addresses in DWARF are limited to defining location +descriptions, or to be dereferenced to read memory. The exception is +DW_CFA_val_offset which uses the address to set the value of a register. +By defining the CFA DWARF expression as being a memory location description, +it can maintain what address space it is, and that can be used to convert the +offset address back to an address in that address space. See +Call Frame Information.

+

This approach allows all existing DWARF to have the identical semantics. It +allows the compiler to explicitly specify the address space it is using. For +example, a compiler could choose to access private memory in a swizzled manner +when mapping a source language to a wavefront in a SIMT manner, or to access +it in an unswizzled manner if mapping the same language with the wavefront +being the thread. It also allows the compiler to mix the address space it uses +to access private memory. For example, for SIMT it can still spill entire +vector registers in an unswizzled manner, while using a swizzled private +memory for SIMT variable access. This approach allows memory location +descriptions for different address spaces to be combined using the regular +DW_OP_*piece operations.

+

Location descriptions are an abstraction of storage, they give freedom to the +consumer on how to implement them. They allow the address space to encode lane +information so they can be used to read memory with only the memory +description and no extra arguments. The same set of operations can operate on +locations independent of their kind of storage. The DW_OP_deref* therefore +can be used on any storage kind. DW_OP_xderef* is unnecessary, except to +become a more compact way to convert a non-default address space address +followed by dereferencing it.

+

In DWARF Version 5 a location description is defined as a single location +description or a location list. A location list is defined as either +effectively an undefined location description or as one or more single +location descriptions to describe an object with multiple places. The +DW_OP_push_object_address and DW_OP_call* operations can put a +location description on the stack. Furthermore, debugger information entry +attributes such as DW_AT_data_member_location, DW_AT_use_location, and +DW_AT_vtable_elem_location are defined as pushing a location description +on the expression stack before evaluating the expression. However, DWARF +Version 5 only allows the stack to contain values and so only a single memory +address can be on the stack which makes these incapable of handling location +descriptions with multiple places, or places other than memory. Since these +extensions allow the stack to contain location descriptions, the operations are +generalized to support location descriptions that can have multiple places. +This is backwards compatible with DWARF Version 5 and allows objects with +multiple places to be supported. For example, the expression that describes +how to access the field of an object can be evaluated with a location +description that has multiple places and will result in a location description +with multiple places as expected. With this change, the separate DWARF Version +5 sections that described DWARF expressions and location lists have been +unified into a single section that describes DWARF expressions in general. +This unification seems to be a natural consequence and a necessity of allowing +location descriptions to be part of the evaluation stack.

+

For those familiar with the definition of location descriptions in DWARF Version +5, the definitions in these extensions are presented differently, but does +in fact define the same concept with the same fundamental semantics. However, +it does so in a way that allows the concept to extend to support address +spaces, bit addressing, the ability for composite location descriptions to be +composed of any kind of location description, and the ability to support +objects located at multiple places. Collectively these changes expand the set +of processors that can be supported and improves support for optimized code.

+

Several approaches were considered, and the one presented appears to be the +cleanest and offers the greatest improvement of DWARF’s ability to support +optimized code. Examining the GDB debugger and LLVM compiler, it appears only +to require modest changes as they both already have to support general use of +location descriptions. It is anticipated that will also be the case for other +debuggers and compilers.

+

As an experiment, GDB was modified to evaluate DWARF Version 5 expressions +with location descriptions as stack entries and implicit conversions. All GDB +tests have passed, except one that turned out to be an invalid test by DWARF +Version 5 rules. The code in GDB actually became simpler as all evaluation was +on the stack and there was no longer a need to maintain a separate structure +for the location description result. This gives confidence of the backwards +compatibility.

+

Since the AMDGPU supports languages such as OpenCL [OpenCL], there is a need to define source language address +classes so they can be used in a consistent way by consumers. It would also be +desirable to add support for using them in defining language types rather than +the current target architecture specific address spaces. See +Segmented Addresses.

+

A DW_AT_LLVM_augmentation attribute is added to a compilation unit +debugger information entry to indicate that there is additional target +architecture specific information in the debugging information entries of that +compilation unit. This allows a consumer to know what extensions are present +in the debugger information entries as is possible with the augmentation +string of other sections. The format that should be used for the augmentation +string in the lookup by name table and CFI Common Information Entry is also +recommended to allow a consumer to parse the string when it contains +information from multiple vendors.

+

The AMDGPU supports programming languages that include online compilation +where the source text may be created at runtime. Therefore, a way to embed the +source text in the debug information is required. For example, the OpenCL +language runtime supports online compilation. See +Line Number Information.

+

Support to allow MD5 checksums to be optionally present in the line table is +added. This allows linking together compilation units where some have MD5 +checksums and some do not. In DWARF Version 5 the file timestamp and file size +can be optional, but if the MD5 checksum is present it must be valid for all +files. See Line Number Information.

+

Support is added for the HIP programming language [HIP] which is supported by the AMDGPU. See +Unit Entities.

+

The following sections provide the definitions for the additional operations, +as well as clarifying how existing expression operations, CFI operations, and +attributes behave with respect to generalized location descriptions that +support address spaces and location descriptions that support multiple places. +It has been defined such that it is backwards compatible with DWARF Version 5. +The definitions are intended to fully define well-formed DWARF in a consistent +style based on the DWARF Version 5 specification. Non-normative text is shown +in italics.

+

The names for the new operations, attributes, and constants include “LLVM” and are encoded with vendor specific codes so these extensions can +be implemented as an LLVM vendor extension to DWARF Version 5. If accepted these +names would not include the “LLVM” and would not use encodings in the +vendor range.

+

The extensions are described in +Changes Relative to DWARF Version 5 and are +organized to follow the section ordering of DWARF Version 5. It includes notes +to indicate the corresponding DWARF Version 5 sections to which they pertain. +Other notes describe additional changes that may be worth considering, and to +raise questions.

+
+
+

Changes Relative to DWARF Version 5

+
+

General Description

+
+

Attribute Types

+
+

Note

+

This augments DWARF Version 5 section 2.2 and Table 2.2.

+
+

The following table provides the additional attributes. See +Debugging Information Entry Attributes.

+ + ++++ + + + + + + + + + + + + + + + + + + + + + + +
Attribute names

Attribute

Usage

DW_AT_LLVM_active_lane

SIMD or SIMT active lanes

DW_AT_LLVM_augmentation

Compilation unit augmentation string

DW_AT_LLVM_lane_pc

SIMD or SIMT lane program location

DW_AT_LLVM_lanes

SIMD or SIMT thread lane count

DW_AT_LLVM_vector_size

Base type vector size

+
+
+

DWARF Expressions

+
+

Note

+

This section, and its nested sections, replaces DWARF Version 5 section 2.5 +and section 2.6. The new DWARF expression operation extensions are defined as +well as clarifying the extensions to already existing DWARF Version 5 +operations. It is based on the text of the existing DWARF Version 5 standard.

+
+

DWARF expressions describe how to compute a value or specify a location.

+

The evaluation of a DWARF expression can provide the location of an object, the +value of an array bound, the length of a dynamic string, the desired value +itself, and so on.

+

If the evaluation of a DWARF expression does not encounter an error, then it can +either result in a value (see DWARF Expression Value) or a +location description (see DWARF Location Description). When a +DWARF expression is evaluated, it may be specified whether a value or location +description is required as the result kind.

+

If a result kind is specified, and the result of the evaluation does not match +the specified result kind, then the implicit conversions described in +Memory Location Description Operations are performed if +valid. Otherwise, the DWARF expression is ill-formed.

+

If the evaluation of a DWARF expression encounters an evaluation error, then the +result is an evaluation error.

+
+

Note

+

Decided to define the concept of an evaluation error. An alternative is to +introduce an undefined value base type in a similar way to location +descriptions having an undefined location description. Then operations that +encounter an evaluation error can return the undefined location description or +value with an undefined base type.

+

All operations that act on values would return an undefined entity if given an +undefined value. The expression would then always evaluate to completion, and +can be tested to determine if it is an undefined entity.

+

However, this would add considerable additional complexity and does not match +that GDB throws an exception when these evaluation errors occur.

+
+

If a DWARF expression is ill-formed, then the result is undefined.

+

The following sections detail the rules for when a DWARF expression is +ill-formed or results in an evaluation error.

+

A DWARF expression can either be encoded as an operation expression (see +DWARF Operation Expressions), or as a location list expression +(see DWARF Location List Expressions).

+
+
DWARF Expression Evaluation Context
+

A DWARF expression is evaluated in a context that can include a number of +context elements. If multiple context elements are specified then they must be +self consistent or the result of the evaluation is undefined. The context +elements that can be specified are:

+

A current result kind

+
+

The kind of result required by the DWARF expression evaluation. If specified +it can be a location description or a value.

+
+

A current thread

+
+

The target architecture thread identifier of the source program thread of +execution for which a user presented expression is currently being evaluated.

+

It is required for operations that are related to target architecture threads.

+

For example, the DW_OP_form_tls_address operation and +DW_OP_LLVM_form_aspace_address operation when given an address space that +is thread specific.

+
+

A current lane

+
+

The target architecture lane identifier of the source program thread of +execution for which a user presented expression is currently being evaluated. +This applies to languages that are implemented using a SIMD or SIMT execution +model.

+

It is required for operations that are related to target architecture lanes.

+

For example, the DW_OP_LLVM_push_lane operation and +DW_OP_LLVM_form_aspace_address operation when given an address space that +is lane specific.

+

If specified, it must be consistent with any specified current thread and +current target architecture. It is consistent with a thread if it identifies a +lane of the thread. It is consistent with a target architecture if it is a +valid lane identifier of the target architecture. Otherwise the result is +undefined.

+
+

A current call frame

+
+

The target architecture call frame identifier. It identifies a call frame that +corresponds to an active invocation of a subprogram in the current thread. It +is identified by its address on the call stack. The address is referred to as +the Canonical Frame Address (CFA). The call frame information is used to +determine the CFA for the call frames of the current thread’s call stack (see +Call Frame Information).

+

It is required for operations that specify target architecture registers to +support virtual unwinding of the call stack.

+

For example, the DW_OP_*reg* operations.

+

If specified, it must be an active call frame in the current thread. If the +current lane is specified, then that lane must have been active on entry to +the call frame (see the DW_AT_LLVM_lane_pc attribute). Otherwise the +result is undefined.

+

If it is the currently executing call frame, then it is termed the top call +frame.

+
+

A current program location

+
+

The target architecture program location corresponding to the current call +frame of the current thread.

+

The program location of the top call frame is the target architecture program +counter for the current thread. The call frame information is used to obtain +the value of the return address register to determine the program location of +the other call frames (see Call Frame Information).

+

It is required for the evaluation of location list expressions to select +amongst multiple program location ranges. It is required for operations that +specify target architecture registers to support virtual unwinding of the call +stack (see Call Frame Information).

+

If specified:

+
    +
  • If the current lane is not specified:

    +
      +
    • If the current call frame is the top call frame, it must be the current +target architecture program location.

    • +
    • If the current call frame F is not the top call frame, it must be the +program location associated with the call site in the current caller frame +F that invoked the callee frame.

    • +
    +
  • +
  • If the current lane is specified and the architecture program location LPC +computed by the DW_AT_LLVM_lane_pc attribute for the current lane is not +the undefined location description (indicating the lane was not active on +entry to the call frame), it must be LPC.

  • +
  • Otherwise the result is undefined.

  • +
+
+

A current compilation unit

+
+

The compilation unit debug information entry that contains the DWARF expression +being evaluated.

+

It is required for operations that reference debug information associated with +the same compilation unit, including indicating if such references use the +32-bit or 64-bit DWARF format. It can also provide the default address space +address size if no current target architecture is specified.

+

For example, the DW_OP_constx and DW_OP_addrx operations.

+

Note that this compilation unit may not be the same as the compilation unit +determined from the loaded code object corresponding to the current program +location. For example, the evaluation of the expression E associated with a +``DW_AT_location`` attribute of the debug information entry operand of the +``DW_OP_call*`` operations is evaluated with the compilation unit that +contains E and not the one that contains the ``DW_OP_call*`` operation +expression.

+
+

A current target architecture

+
+

The target architecture.

+

It is required for operations that specify target architecture specific +entities.

+

For example, target architecture specific entities include DWARF register +identifiers, DWARF lane identifiers, DWARF address space identifiers, the +default address space, and the address space address sizes.

+

If specified:

+
    +
  • If the current thread is specified, then the current target architecture +must be the same as the target architecture of the current thread.

  • +
  • If the current compilation unit is specified, then the current target +architecture default address space address size must be the same as he +address_size field in the header of the current compilation unit and any +associated entry in the .debug_aranges section.

  • +
  • If the current program location is specified, then the current target +architecture must be the same as the target architecture of any line number +information entry (see Line Number Information) +corresponding to the current program location.

  • +
  • If the current program location is specified, then the current target +architecture default address space address size must be the same as he +address_size field in the header of any entry corresponding to the +current program location in the .debug_addr, .debug_line, +.debug_rnglists, .debug_rnglists.dwo, .debug_loclists, and +.debug_loclists.dwo sections.

  • +
  • Otherwise the result is undefined.

  • +
+
+

A current object

+
+

The location description of a program object.

+

It is required for the DW_OP_push_object_address operation.

+

For example, the DW_AT_data_location attribute on type debug +information entries specifies the the program object corresponding to a +runtime descriptor as the current object when it evaluates its associated +expression.

+

The result is undefined if the location descriptor is invalid (see +DWARF Location Description).

+
+

An initial stack

+
+

This is a list of values or location descriptions that will be pushed on the +operation expression evaluation stack in the order provided before evaluation +of an operation expression starts.

+

Some debugger information entries have attributes that evaluate their DWARF +expression value with initial stack entries. In all other cases the initial +stack is empty.

+

The result is undefined if any location descriptors are invalid (see +DWARF Location Description).

+
+

If the evaluation requires a context element that is not specified, then the +result of the evaluation is an error.

+

A DWARF expression for the location description may be able to be evaluated +without a thread, lane, call frame, program location, or architecture context. +For example, the location of a global variable may be able to be evaluated +without such context. If the expression evaluates with an error then it may +indicate the variable has been optimized and so requires more context.

+

The DWARF expression for call frame information (see +:ref:`amdgpu-dwarf-call-frame-information`) operations are restricted to those +that do not require the compilation unit context to be specified.

+

The DWARF is ill-formed if all the address_size fields in the headers of all +the entries in the .debug_info, .debug_addr, .debug_line, +.debug_rnglists, .debug_rnglists.dwo, .debug_loclists, and +.debug_loclists.dwo sections corresponding to any given program location do +not match.

+
+
+
DWARF Expression Value
+

A value has a type and a literal value. It can represent a literal value of any +supported base type of the target architecture. The base type specifies the +size, encoding, and endianity of the literal value.

+
+

Note

+

It may be desirable to add an implicit pointer base type encoding. It would be +used for the type of the value that is produced when the DW_OP_deref* +operation retrieves the full contents of an implicit pointer location storage +created by the DW_OP_implicit_pointer or +DW_OP_LLVM_aspace_implicit_pointer operations. The literal value would +record the debugging information entry and byte displacement specified by the +associated DW_OP_implicit_pointer or +DW_OP_LLVM_aspace_implicit_pointer operations.

+
+

There is a distinguished base type termed the generic type, which is an integral +type that has the size of an address in the target architecture default address +space, a target architecture defined endianity, and unspecified signedness.

+

The generic type is the same as the unspecified type used for stack operations +defined in DWARF Version 4 and before.

+

An integral type is a base type that has an encoding of DW_ATE_signed, +DW_ATE_signed_char, DW_ATE_unsigned, DW_ATE_unsigned_char, +DW_ATE_boolean, or any target architecture defined integral encoding in the +inclusive range DW_ATE_lo_user to DW_ATE_hi_user.

+
+

Note

+

It is unclear if DW_ATE_address is an integral type. GDB does not seem to +consider it as integral.

+
+
+
+
DWARF Location Description
+

Debugging information must provide consumers a way to find the location of +program variables, determine the bounds of dynamic arrays and strings, and +possibly to find the base address of a subprogram’s call frame or the return +address of a subprogram. Furthermore, to meet the needs of recent computer +architectures and optimization techniques, debugging information must be able to +describe the location of an object whose location changes over the object’s +lifetime, and may reside at multiple locations simultaneously during parts of an +object’s lifetime.

+

Information about the location of program objects is provided by location +descriptions.

+

Location descriptions can consist of one or more single location descriptions.

+

A single location description specifies the location storage that holds a +program object and a position within the location storage where the program +object starts. The position within the location storage is expressed as a bit +offset relative to the start of the location storage.

+

A location storage is a linear stream of bits that can hold values. Each +location storage has a size in bits and can be accessed using a zero-based bit +offset. The ordering of bits within a location storage uses the bit numbering +and direction conventions that are appropriate to the current language on the +target architecture.

+

There are five kinds of location storage:

+
+
memory location storage

Corresponds to the target architecture memory address spaces.

+
+
register location storage

Corresponds to the target architecture registers.

+
+
implicit location storage

Corresponds to fixed values that can only be read.

+
+
undefined location storage

Indicates no value is available and therefore cannot be read or written.

+
+
composite location storage

Allows a mixture of these where some bits come from one location storage and +some from another location storage, or from disjoint parts of the same +location storage.

+
+
+
+

Note

+

It may be better to add an implicit pointer location storage kind used by the +DW_OP_implicit_pointer and DW_OP_LLVM_aspace_implicit_pointer +operations. It would specify the debugger information entry and byte offset +provided by the operations.

+
+

Location descriptions are a language independent representation of addressing +rules. They are created using DWARF operation expressions of arbitrary +complexity. They can be the result of evaluating a debugger information entry +attribute that specifies an operation expression. In this usage they can +describe the location of an object as long as its lifetime is either static or +the same as the lexical block (see DWARF Version 5 section 3.5) that owns it, +and it does not move during its lifetime. They can be the result of evaluating a +debugger information entry attribute that specifies a location list expression. +In this usage they can describe the location of an object that has a limited +lifetime, changes its location during its lifetime, or has multiple locations +over part or all of its lifetime.

+

If a location description has more than one single location description, the +DWARF expression is ill-formed if the object value held in each single location +description’s position within the associated location storage is not the same +value, except for the parts of the value that are uninitialized.

+

A location description that has more than one single location description can +only be created by a location list expression that has overlapping program +location ranges, or certain expression operations that act on a location +description that has more than one single location description. There are no +operation expression operations that can directly create a location description +with more than one single location description.

+

A location description with more than one single location description can be +used to describe objects that reside in more than one piece of storage at the +same time. An object may have more than one location as a result of +optimization. For example, a value that is only read may be promoted from memory +to a register for some region of code, but later code may revert to reading the +value from memory as the register may be used for other purposes. For the code +region where the value is in a register, any change to the object value must be +made in both the register and the memory so both regions of code will read the +updated value.

+

A consumer of a location description with more than one single location +description can read the object’s value from any of the single location +descriptions (since they all refer to location storage that has the same value), +but must write any changed value to all the single location descriptions.

+

The evaluation of an expression may require context elements to create a +location description. If such a location description is accessed, the storage it +denotes is that associated with the context element values specified when the +location description was created, which may differ from the context at the time +it is accessed.

+

For example, creating a register location description requires the thread +context: the location storage is for the specified register of that thread. +Creating a memory location description for an address space may required a +thread and a lane context: the location storage is the memory associated with +that thread and lane.

+

If any of the context elements required to create a location description change, +the location description becomes invalid and accessing it is undefined.

+

Examples of context that can invalidate a location description are:

+
    +
  • The thread context is required and execution causes the thread to terminate.

  • +
  • The call frame context is required and further execution causes the call +frame to return to the calling frame.

  • +
  • The program location is required and further execution of the thread occurs. +That could change the location list entry or call frame information entry that +applies.

  • +
  • An operation uses call frame information:

    +
      +
    • Any of the frames used in the virtual call frame unwinding return.

    • +
    • The top call frame is used, the program location is used to select the call +frame information entry, and further execution of the thread occurs.

    • +
    +
  • +
+

A DWARF expression can be used to compute a location description for an object. +A subsequent DWARF expression evaluation can be given the object location +description as the object context or initial stack context to compute a +component of the object. The final result is undefined if the object location +description becomes invalid between the two expression evaluations.

+

A change of a thread’s program location may not make a location description +invalid, yet may still render it as no longer meaningful. Accessing such a +location description, or using it as the object context or initial stack context +of an expression evaluation, may produce an undefined result.

+

For example, a location description may specify a register that no longer holds +the intended program object after a program location change. One way to avoid +such problems is to recompute location descriptions associated with threads when +their program locations change.

+
+
+
DWARF Operation Expressions
+

An operation expression is comprised of a stream of operations, each consisting +of an opcode followed by zero or more operands. The number of operands is +implied by the opcode.

+

Operations represent a postfix operation on a simple stack machine. Each stack +entry can hold either a value or a location description. Operations can act on +entries on the stack, including adding entries and removing entries. If the kind +of a stack entry does not match the kind required by the operation and is not +implicitly convertible to the required kind (see +Memory Location Description Operations), then the DWARF +operation expression is ill-formed.

+

Evaluation of an operation expression starts with an empty stack on which the +entries from the initial stack provided by the context are pushed in the order +provided. Then the operations are evaluated, starting with the first operation +of the stream. Evaluation continues until either an operation has an evaluation +error, or until one past the last operation of the stream is reached.

+

The result of the evaluation is:

+
    +
  • If an operation has an evaluation error, or an operation evaluates an +expression that has an evaluation error, then the result is an evaluation +error.

  • +
  • If the current result kind specifies a location description, then:

    +
      +
    • If the stack is empty, the result is a location description with one +undefined location description.

      +

      This rule is for backwards compatibility with DWARF Version 5 which has no +explicit operation to create an undefined location description, and uses an +empty operation expression for this purpose.

      +
    • +
    • If the top stack entry is a location description, or can be converted +to one (see Memory Location Description Operations), +then the result is that, possibly converted, location description. Any other +entries on the stack are discarded.

    • +
    • Otherwise the DWARF expression is ill-formed.

      +
      +

      Note

      +

      Could define this case as returning an implicit location description as +if the DW_OP_implicit operation is performed.

      +
      +
    • +
    +
  • +
  • If the current result kind specifies a value, then:

    +
      +
    • If the top stack entry is a value, or can be converted to one (see +Memory Location Description Operations), then the result +is that, possibly converted, value. Any other entries on the stack are +discarded.

    • +
    • Otherwise the DWARF expression is ill-formed.

    • +
    +
  • +
  • If the current result kind is not specified, then:

    +
      +
    • If the stack is empty, the result is a location description with one +undefined location description.

      +

      This rule is for backwards compatibility with DWARF Version 5 which has no +explicit operation to create an undefined location description, and uses an +empty operation expression for this purpose.

      +
      +

      Note

      +

      This rule is consistent with the rule above for when a location +description is requested. However, GDB appears to report this as an error +and no GDB tests appear to cause an empty stack for this case.

      +
      +
    • +
    • Otherwise, the top stack entry is returned. Any other entries on the stack +are discarded.

    • +
    +
  • +
+

An operation expression is encoded as a byte block with some form of prefix that +specifies the byte count. It can be used:

+
    +
  • as the value of a debugging information entry attribute that is encoded using +class exprloc (see DWARF Version 5 section 7.5.5),

  • +
  • as the operand to certain operation expression operations,

  • +
  • as the operand to certain call frame information operations (see +Call Frame Information),

  • +
  • and in location list entries (see +DWARF Location List Expressions).

  • +
+
+
Stack Operations
+

The following operations manipulate the DWARF stack. Operations that index the +stack assume that the top of the stack (most recently added entry) has index 0. +They allow the stack entries to be either a value or location description.

+

If any stack entry accessed by a stack operation is an incomplete composite +location description (see +Composite Location Description Operations), then the DWARF +expression is ill-formed.

+
+

Note

+

These operations now support stack entries that are values and location +descriptions.

+
+
+

Note

+

If it is desired to also make them work with incomplete composite location +descriptions, then would need to define that the composite location storage +specified by the incomplete composite location description is also replicated +when a copy is pushed. This ensures that each copy of the incomplete composite +location description can update the composite location storage they specify +independently.

+
+
    +
  1. DW_OP_dup

    +

    DW_OP_dup duplicates the stack entry at the top of the stack.

    +
  2. +
  3. DW_OP_drop

    +

    DW_OP_drop pops the stack entry at the top of the stack and discards it.

    +
  4. +
  5. DW_OP_pick

    +

    DW_OP_pick has a single unsigned 1-byte operand that represents an index +I. A copy of the stack entry with index I is pushed onto the stack.

    +
  6. +
  7. DW_OP_over

    +

    DW_OP_over pushes a copy of the entry with index 1.

    +

    This is equivalent to a ``DW_OP_pick 1`` operation.

    +
  8. +
  9. DW_OP_swap

    +

    DW_OP_swap swaps the top two stack entries. The entry at the top of the +stack becomes the second stack entry, and the second stack entry becomes the +top of the stack.

    +
  10. +
  11. DW_OP_rot

    +

    DW_OP_rot rotates the first three stack entries. The entry at the top of +the stack becomes the third stack entry, the second entry becomes the top of +the stack, and the third entry becomes the second entry.

    +
  12. +
+
+
+
Control Flow Operations
+

The following operations provide simple control of the flow of a DWARF operation +expression.

+
    +
  1. DW_OP_nop

    +

    DW_OP_nop is a place holder. It has no effect on the DWARF stack +entries.

    +
  2. +
  3. DW_OP_le, DW_OP_ge, DW_OP_eq, DW_OP_lt, DW_OP_gt, +DW_OP_ne

    +
    +

    Note

    +

    The same as in DWARF Version 5 section 2.5.1.5.

    +
    +
  4. +
  5. DW_OP_skip

    +

    DW_OP_skip is an unconditional branch. Its single operand is a 2-byte +signed integer constant. The 2-byte constant is the number of bytes of the +DWARF expression to skip forward or backward from the current operation, +beginning after the 2-byte constant.

    +

    If the updated position is at one past the end of the last operation, then +the operation expression evaluation is complete.

    +

    Otherwise, the DWARF expression is ill-formed if the updated operation +position is not in the range of the first to last operation inclusive, or +not at the start of an operation.

    +
  6. +
  7. DW_OP_bra

    +

    DW_OP_bra is a conditional branch. Its single operand is a 2-byte signed +integer constant. This operation pops the top of stack. If the value popped +is not the constant 0, the 2-byte constant operand is the number of bytes of +the DWARF operation expression to skip forward or backward from the current +operation, beginning after the 2-byte constant.

    +

    If the updated position is at one past the end of the last operation, then +the operation expression evaluation is complete.

    +

    Otherwise, the DWARF expression is ill-formed if the updated operation +position is not in the range of the first to last operation inclusive, or +not at the start of an operation.

    +
  8. +
  9. DW_OP_call2, DW_OP_call4, DW_OP_call_ref

    +

    DW_OP_call2, DW_OP_call4, and DW_OP_call_ref perform DWARF +procedure calls during evaluation of a DWARF expression.

    +

    DW_OP_call2 and DW_OP_call4, have one operand that is, respectively, +a 2-byte or 4-byte unsigned offset DR that represents the byte offset of a +debugging information entry D relative to the beginning of the current +compilation unit.

    +

    DW_OP_call_ref has one operand that is a 4-byte unsigned value in the +32-bit DWARF format, or an 8-byte unsigned value in the 64-bit DWARF format, +that represents the byte offset DR of a debugging information entry D +relative to the beginning of the .debug_info section that contains the +current compilation unit. D may not be in the current compilation unit.

    +

    Operand interpretation of DW_OP_call2, DW_OP_call4, and +DW_OP_call_ref is exactly like that for DW_FORM_ref2, +``DW_FORM_ref4``*, and DW_FORM_ref_addr, respectively.

    +

    The call operation is evaluated by:

    +
      +
    • If D has a DW_AT_location attribute that is encoded as a exprloc +that specifies an operation expression E, then execution of the current +operation expression continues from the first operation of E. Execution +continues until one past the last operation of E is reached, at which +point execution continues with the operation following the call operation. +The operations of E are evaluated with the same current context, except +current compilation unit is the one that contains D and the stack is the +same as that being used by the call operation. After the call operation +has been evaluated, the stack is therefore as it is left by the evaluation +of the operations of E. Since E is evaluated on the same stack as the call +operation, E can use, and/or remove entries already on the stack, and can +add new entries to the stack.

      +

      Values on the stack at the time of the call may be used as parameters by +the called expression and values left on the stack by the called expression +may be used as return values by prior agreement between the calling and +called expressions.

      +
    • +
    • If D has a DW_AT_location attribute that is encoded as a loclist or +loclistsptr, then the specified location list expression E is +evaluated. The evaluation of E uses the current context, except the result +kind is a location description, the compilation unit is the one that +contains D, and the initial stack is empty. The location description +result is pushed on the stack.

      +
      +

      Note

      +

      This rule avoids having to define how to execute a matched location list +entry operation expression on the same stack as the call when there are +multiple matches. But it allows the call to obtain the location +description for a variable or formal parameter which may use a location +list expression.

      +

      An alternative is to treat the case when D has a DW_AT_location +attribute that is encoded as a loclist or loclistsptr, and the +specified location list expression E’ matches a single location list +entry with operation expression E, the same as the exprloc case and +evaluate on the same stack.

      +

      But this is not attractive as if the attribute is for a variable that +happens to end with a non-singleton stack, it will not simply put a +location description on the stack. Presumably the intent of using +DW_OP_call* on a variable or formal parameter debugger information +entry is to push just one location description on the stack. That +location description may have more than one single location description.

      +

      The previous rule for exprloc also has the same problem as normally +a variable or formal parameter location expression may leave multiple +entries on the stack and only return the top entry.

      +

      GDB implements DW_OP_call* by always executing E on the same stack. +If the location list has multiple matching entries, it simply picks the +first one and ignores the rest. This seems fundamentally at odds with +the desire to supporting multiple places for variables.

      +

      So, it feels like DW_OP_call* should both support pushing a location +description on the stack for a variable or formal parameter, and also +support being able to execute an operation expression on the same stack. +Being able to specify a different operation expression for different +program locations seems a desirable feature to retain.

      +

      A solution to that is to have a distinct DW_AT_LLVM_proc attribute +for the DW_TAG_dwarf_procedure debugging information entry. Then the +DW_AT_location attribute expression is always executed separately +and pushes a location description (that may have multiple single +location descriptions), and the DW_AT_LLVM_proc attribute expression +is always executed on the same stack and can leave anything on the +stack.

      +

      The DW_AT_LLVM_proc attribute could have the new classes +exprproc, loclistproc, and loclistsptrproc to indicate that +the expression is executed on the same stack. exprproc is the same +encoding as exprloc. loclistproc and loclistsptrproc are the +same encoding as their non-proc counterparts, except the DWARF is +ill-formed if the location list does not match exactly one location list +entry and a default entry is required. These forms indicate explicitly +that the matched single operation expression must be executed on the +same stack. This is better than ad hoc special rules for loclistproc +and loclistsptrproc which are currently clearly defined to always +return a location description. The producer then explicitly indicates +the intent through the attribute classes.

      +

      Such a change would be a breaking change for how GDB implements +DW_OP_call*. However, are the breaking cases actually occurring in +practice? GDB could implement the current approach for DWARF Version 5, +and the new semantics for DWARF Version 6 which has been done for some +other features.

      +

      Another option is to limit the execution to be on the same stack only to +the evaluation of an expression E that is the value of a +DW_AT_location attribute of a DW_TAG_dwarf_procedure debugging +information entry. The DWARF would be ill-formed if E is a location list +expression that does not match exactly one location list entry. In all +other cases the evaluation of an expression E that is the value of a +DW_AT_location attribute would evaluate E with the current context, +except the result kind is a location description, the compilation unit +is the one that contains D, and the initial stack is empty. The location +description result is pushed on the stack.

      +
      +
    • +
    • If D has a DW_AT_const_value attribute with a value V, then it is as +if a DW_OP_implicit_value V operation was executed.

      +

      This allows a call operation to be used to compute the location +description for any variable or formal parameter regardless of whether the +producer has optimized it to a constant. This is consistent with the +``DW_OP_implicit_pointer`` operation.

      +
      +

      Note

      +

      Alternatively, could deprecate using DW_AT_const_value for +DW_TAG_variable and DW_TAG_formal_parameter debugger information +entries that are constants and instead use DW_AT_location with an +operation expression that results in a location description with one +implicit location description. Then this rule would not be required.

      +
      +
    • +
    • Otherwise, there is no effect and no changes are made to the stack.

      +
      +

      Note

      +

      In DWARF Version 5, if D does not have a DW_AT_location then +DW_OP_call* is defined to have no effect. It is unclear that this is +the right definition as a producer should be able to rely on using +DW_OP_call* to get a location description for any non-DW_TAG_dwarf_procedure debugging information entries. Also, the +producer should not be creating DWARF with DW_OP_call* to a +DW_TAG_dwarf_procedure that does not have a DW_AT_location +attribute. So, should this case be defined as an ill-formed DWARF +expression?

      +
      +
    • +
    +

    The DW_TAG_dwarf_procedure debugging information entry can be used to +define DWARF procedures that can be called.

    +
  10. +
+
+
+
Value Operations
+

This section describes the operations that push values on the stack.

+

Each value stack entry has a type and a literal value and can represent a +literal value of any supported base type of the target architecture. The base +type specifies the size, encoding, and endianity of the literal value.

+

The base type of value stack entries can be the distinguished generic type.

+
+Literal Operations +

The following operations all push a literal value onto the DWARF stack.

+

Operations other than DW_OP_const_type push a value V with the generic type. +If V is larger than the generic type, then V is truncated to the generic type +size and the low-order bits used.

+
    +
  1. DW_OP_lit0, DW_OP_lit1, …, DW_OP_lit31

    +

    DW_OP_lit<N> operations encode an unsigned literal value N from 0 +through 31, inclusive. They push the value N with the generic type.

    +
  2. +
  3. DW_OP_const1u, DW_OP_const2u, DW_OP_const4u, DW_OP_const8u

    +

    DW_OP_const<N>u operations have a single operand that is a 1, 2, 4, or +8-byte unsigned integer constant U, respectively. They push the value U with +the generic type.

    +
  4. +
  5. DW_OP_const1s, DW_OP_const2s, DW_OP_const4s, DW_OP_const8s

    +

    DW_OP_const<N>s operations have a single operand that is a 1, 2, 4, or +8-byte signed integer constant S, respectively. They push the value S with +the generic type.

    +
  6. +
  7. DW_OP_constu

    +

    DW_OP_constu has a single unsigned LEB128 integer operand N. It pushes +the value N with the generic type.

    +
  8. +
  9. DW_OP_consts

    +

    DW_OP_consts has a single signed LEB128 integer operand N. It pushes the +value N with the generic type.

    +
  10. +
  11. DW_OP_constx

    +

    DW_OP_constx has a single unsigned LEB128 integer operand that +represents a zero-based index into the .debug_addr section relative to +the value of the DW_AT_addr_base attribute of the associated compilation +unit. The value N in the .debug_addr section has the size of the generic +type. It pushes the value N with the generic type.

    +

    The DW_OP_constx operation is provided for constants that require +link-time relocation but should not be interpreted by the consumer as a +relocatable address (for example, offsets to thread-local storage).

    +
  12. +
+
    +
  1. DW_OP_const_type

    +

    DW_OP_const_type has three operands. The first is an unsigned LEB128 +integer DR that represents the byte offset of a debugging information entry +D relative to the beginning of the current compilation unit, that provides +the type T of the constant value. The second is a 1-byte unsigned integral +constant S. The third is a block of bytes B, with a length equal to S.

    +

    TS is the bit size of the type T. The least significant TS bits of B are +interpreted as a value V of the type D. It pushes the value V with the type +D.

    +

    The DWARF is ill-formed if D is not a DW_TAG_base_type debugging +information entry in the current compilation unit, or if TS divided by 8 +(the byte size) and rounded up to a whole number is not equal to S.

    +

    While the size of the byte block B can be inferred from the type D +definition, it is encoded explicitly into the operation so that the +operation can be parsed easily without reference to the .debug_info +section.

    +
  2. +
  3. DW_OP_LLVM_push_lane New

    +

    DW_OP_LLVM_push_lane pushes the target architecture lane identifier of +the current lane as a value with the generic type.

    +

    For languages that are implemented using a SIMD or SIMT execution model, +this is the lane number that corresponds to the source language thread of +execution upon which the user is focused.

    +
  4. +
+
+
+Arithmetic and Logical Operations +
+

Note

+

This section is the same as DWARF Version 5 section 2.5.1.4.

+
+
+
+Type Conversion Operations +
+

Note

+

This section is the same as DWARF Version 5 section 2.5.1.6.

+
+
+
+Special Value Operations +

There are these special value operations currently defined:

+
    +
  1. DW_OP_regval_type

    +

    DW_OP_regval_type has two operands. The first is an unsigned LEB128 +integer that represents a register number R. The second is an unsigned +LEB128 integer DR that represents the byte offset of a debugging information +entry D relative to the beginning of the current compilation unit, that +provides the type T of the register value.

    +

    The operation is equivalent to performing DW_OP_regx R; DW_OP_deref_type +DR.

    +
    +

    Note

    +

    Should DWARF allow the type T to be a larger size than the size of the +register R? Restricting a larger bit size avoids any issue of conversion +as the, possibly truncated, bit contents of the register is simply +interpreted as a value of T. If a conversion is wanted it can be done +explicitly using a DW_OP_convert operation.

    +

    GDB has a per register hook that allows a target specific conversion on a +register by register basis. It defaults to truncation of bigger registers. +Removing use of the target hook does not cause any test failures in common +architectures. If the compiler for a target architecture did want some +form of conversion, including a larger result type, it could always +explicitly used the DW_OP_convert operation.

    +

    If T is a larger type than the register size, then the default GDB +register hook reads bytes from the next register (or reads out of bounds +for the last register!). Removing use of the target hook does not cause +any test failures in common architectures (except an illegal hand written +assembly test). If a target architecture requires this behavior, these +extensions allow a composite location description to be used to combine +multiple registers.

    +
    +
  2. +
  3. DW_OP_deref

    +

    S is the bit size of the generic type divided by 8 (the byte size) and +rounded up to a whole number. DR is the offset of a hypothetical debug +information entry D in the current compilation unit for a base type of the +generic type.

    +

    The operation is equivalent to performing DW_OP_deref_type S, DR.

    +
  4. +
  5. DW_OP_deref_size

    +

    DW_OP_deref_size has a single 1-byte unsigned integral constant that +represents a byte result size S.

    +

    TS is the smaller of the generic type bit size and S scaled by 8 (the byte +size). If TS is smaller than the generic type bit size then T is an unsigned +integral type of bit size TS, otherwise T is the generic type. DR is the +offset of a hypothetical debug information entry D in the current +compilation unit for a base type T.

    +
    +

    Note

    +

    Truncating the value when S is larger than the generic type matches what +GDB does. This allows the generic type size to not be an integral byte +size. It does allow S to be arbitrarily large. Should S be restricted to +the size of the generic type rounded up to a multiple of 8?

    +
    +

    The operation is equivalent to performing DW_OP_deref_type S, DR, except +if T is not the generic type, the value V pushed is zero-extended to the +generic type bit size and its type changed to the generic type.

    +
  6. +
  7. DW_OP_deref_type

    +

    DW_OP_deref_type has two operands. The first is a 1-byte unsigned +integral constant S. The second is an unsigned LEB128 integer DR that +represents the byte offset of a debugging information entry D relative to +the beginning of the current compilation unit, that provides the type T of +the result value.

    +

    TS is the bit size of the type T.

    +

    While the size of the pushed value V can be inferred from the type T, it is +encoded explicitly as the operand S so that the operation can be parsed +easily without reference to the .debug_info section.

    +
    +

    Note

    +

    It is unclear why the operand S is needed. Unlike DW_OP_const_type, +the size is not needed for parsing. Any evaluation needs to get the base +type T to push with the value to know its encoding and bit size.

    +
    +

    It pops one stack entry that must be a location description L.

    +

    A value V of TS bits is retrieved from the location storage LS specified by +one of the single location descriptions SL of L.

    +

    If L, or the location description of any composite location description +part that is a subcomponent of L, has more than one single location +description, then any one of them can be selected as they are required to +all have the same value. For any single location description SL, bits are +retrieved from the associated storage location starting at the bit offset +specified by SL. For a composite location description, the retrieved bits +are the concatenation of the N bits from each composite location part PL, +where N is limited to the size of PL.

    +

    V is pushed on the stack with the type T.

    +
    +

    Note

    +

    This definition makes it an evaluation error if L is a register location +description that has less than TS bits remaining in the register storage. +Particularly since these extensions extend location descriptions to have +a bit offset, it would be odd to define this as performing sign extension +based on the type, or be target architecture dependent, as the number of +remaining bits could be any number. This matches the GDB implementation +for DW_OP_deref_type.

    +

    These extensions define DW_OP_*breg* in terms of +DW_OP_regval_type. DW_OP_regval_type is defined in terms of +DW_OP_regx, which uses a 0 bit offset, and DW_OP_deref_type. +Therefore, it requires the register size to be greater or equal to the +address size of the address space. This matches the GDB implementation for +DW_OP_*breg*.

    +
    +

    The DWARF is ill-formed if D is not in the current compilation unit, D is +not a DW_TAG_base_type debugging information entry, or if TS divided by +8 (the byte size) and rounded up to a whole number is not equal to S.

    +
    +

    Note

    +

    This definition allows the base type to be a bit size since there seems no +reason to restrict it.

    +
    +

    It is an evaluation error if any bit of the value is retrieved from the +undefined location storage or the offset of any bit exceeds the size of the +location storage LS specified by any single location description SL of L.

    +

    See Implicit Location Description Operations for special rules +concerning implicit location descriptions created by the +DW_OP_implicit_pointer and DW_OP_LLVM_implicit_aspace_pointer +operations.

    +
  8. +
  9. DW_OP_xderef Deprecated

    +

    DW_OP_xderef pops two stack entries. The first must be an integral type +value that represents an address A. The second must be an integral type +value that represents a target architecture specific address space +identifier AS.

    +

    The operation is equivalent to performing DW_OP_swap; +DW_OP_LLVM_form_aspace_address; DW_OP_deref. The value V retrieved is left +on the stack with the generic type.

    +

    This operation is deprecated as the DW_OP_LLVM_form_aspace_address +operation can be used and provides greater expressiveness.

    +
  10. +
  11. DW_OP_xderef_size Deprecated

    +

    DW_OP_xderef_size has a single 1-byte unsigned integral constant that +represents a byte result size S.

    +

    It pops two stack entries. The first must be an integral type value that +represents an address A. The second must be an integral type value that +represents a target architecture specific address space identifier AS.

    +

    The operation is equivalent to performing DW_OP_swap; +DW_OP_LLVM_form_aspace_address; DW_OP_deref_size S. The zero-extended +value V retrieved is left on the stack with the generic type.

    +

    This operation is deprecated as the DW_OP_LLVM_form_aspace_address +operation can be used and provides greater expressiveness.

    +
  12. +
  13. DW_OP_xderef_type Deprecated

    +

    DW_OP_xderef_type has two operands. The first is a 1-byte unsigned +integral constant S. The second operand is an unsigned LEB128 integer DR +that represents the byte offset of a debugging information entry D relative +to the beginning of the current compilation unit, that provides the type T +of the result value.

    +

    It pops two stack entries. The first must be an integral type value that +represents an address A. The second must be an integral type value that +represents a target architecture specific address space identifier AS.

    +

    The operation is equivalent to performing DW_OP_swap; +DW_OP_LLVM_form_aspace_address; DW_OP_deref_type S R. The value V +retrieved is left on the stack with the type D.

    +

    This operation is deprecated as the DW_OP_LLVM_form_aspace_address +operation can be used and provides greater expressiveness.

    +
  14. +
  15. DW_OP_entry_value Deprecated

    +

    DW_OP_entry_value pushes the value of an expression that is evaluated in +the context of the calling frame.

    +

    It may be used to determine the value of arguments on entry to the current +call frame provided they are not clobbered.

    +

    It has two operands. The first is an unsigned LEB128 integer S. The second +is a block of bytes, with a length equal S, interpreted as a DWARF +operation expression E.

    +

    E is evaluated with the current context, except the result kind is +unspecified, the call frame is the one that called the current frame, the +program location is the call site in the calling frame, the object is +unspecified, and the initial stack is empty. The calling frame information +is obtained by virtually unwinding the current call frame using the call +frame information (see Call Frame Information).

    +

    If the result of E is a location description L (see +Register Location Description Operations), and the last operation +executed by E is a DW_OP_reg* for register R with a target architecture +specific base type of T, then the contents of the register are retrieved as +if a DW_OP_deref_type DR operation was performed where DR is the offset +of a hypothetical debug information entry in the current compilation unit +for T. The resulting value V s pushed on the stack.

    +

    Using DW_OP_reg* provides a more compact form for the case where the +value was in a register on entry to the subprogram.

    +

    If the result of E is a value V, then V is pushed on the stack.

    +

    Otherwise, the DWARF expression is ill-formed.

    +

    The DW_OP_entry_value operation is deprecated as its main usage is +provided by other means. DWARF Version 5 added the +DW_TAG_call_site_parameter debugger information entry for call sites +that has DW_AT_call_value, DW_AT_call_data_location, and +DW_AT_call_data_value attributes that provide DWARF expressions to +compute actual parameter values at the time of the call, and requires the +producer to ensure the expressions are valid to evaluate even when virtually +unwound. The DW_OP_LLVM_call_frame_entry_reg operation provides access +to registers in the virtually unwound calling frame.

    +
    +

    Note

    +

    GDB only implements DW_OP_entry_value when E is exactly +DW_OP_reg* or DW_OP_breg*; DW_OP_deref*.

    +
    +
  16. +
+
+
+
+
Location Description Operations
+

This section describes the operations that push location descriptions on the +stack.

+
+General Location Description Operations +
    +
  1. DW_OP_LLVM_offset New

    +

    DW_OP_LLVM_offset pops two stack entries. The first must be an integral +type value that represents a byte displacement B. The second must be a +location description L.

    +

    It adds the value of B scaled by 8 (the byte size) to the bit offset of each +single location description SL of L, and pushes the updated L.

    +

    It is an evaluation error if the updated bit offset of any SL is less than 0 +or greater than or equal to the size of the location storage specified by +SL.

    +
  2. +
  3. DW_OP_LLVM_offset_uconst New

    +

    DW_OP_LLVM_offset_uconst has a single unsigned LEB128 integer operand +that represents a byte displacement B.

    +

    The operation is equivalent to performing DW_OP_constu B; +DW_OP_LLVM_offset.

    +

    This operation is supplied specifically to be able to encode more field +displacements in two bytes than can be done with DW_OP_lit*; +DW_OP_LLVM_offset.

    +
    +

    Note

    +

    Should this be named DW_OP_LLVM_offset_uconst to match +DW_OP_plus_uconst, or DW_OP_LLVM_offset_constu to match +DW_OP_constu?

    +
    +
  4. +
  5. DW_OP_LLVM_bit_offset New

    +

    DW_OP_LLVM_bit_offset pops two stack entries. The first must be an +integral type value that represents a bit displacement B. The second must be +a location description L.

    +

    It adds the value of B to the bit offset of each single location description +SL of L, and pushes the updated L.

    +

    It is an evaluation error if the updated bit offset of any SL is less than 0 +or greater than or equal to the size of the location storage specified by +SL.

    +
  6. +
  7. DW_OP_push_object_address

    +

    DW_OP_push_object_address pushes the location description L of the +current object.

    +

    This object may correspond to an independent variable that is part of a +user presented expression that is being evaluated. The object location +description may be determined from the variable’s own debugging information +entry or it may be a component of an array, structure, or class whose +address has been dynamically determined by an earlier step during user +expression evaluation.

    +

    This operation provides explicit functionality (especially for arrays +involving descriptions) that is analogous to the implicit push of the base +location description of a structure prior to evaluation of a +``DW_AT_data_member_location`` to access a data member of a structure.

    +
    +

    Note

    +

    This operation could be removed and the object location description +specified as the initial stack as for DW_AT_data_member_location.

    +

    The only attribute that specifies a current object is +DW_AT_data_location so the non-normative text seems to overstate how +this is being used. Or are there other attributes that need to state they +pass an object?

    +
    +
  8. +
  9. DW_OP_LLVM_call_frame_entry_reg New

    +

    DW_OP_LLVM_call_frame_entry_reg has a single unsigned LEB128 integer +operand that represents a target architecture register number R.

    +

    It pushes a location description L that holds the value of register R on +entry to the current subprogram as defined by the call frame information +(see Call Frame Information).

    +

    If there is no call frame information defined, then the default rules for +the target architecture are used. If the register rule is undefined, then +the undefined location description is pushed. If the register rule is same +value, then a register location description for R is pushed.

    +
  10. +
+
+
+Undefined Location Description Operations +

The undefined location storage represents a piece or all of an object that is +present in the source but not in the object code (perhaps due to optimization). +Neither reading nor writing to the undefined location storage is meaningful.

+

An undefined location description specifies the undefined location storage. +There is no concept of the size of the undefined location storage, nor of a bit +offset for an undefined location description. The DW_OP_LLVM_*offset +operations leave an undefined location description unchanged. The +DW_OP_*piece operations can explicitly or implicitly specify an undefined +location description, allowing any size and offset to be specified, and results +in a part with all undefined bits.

+
    +
  1. DW_OP_LLVM_undefined New

    +

    DW_OP_LLVM_undefined pushes a location description L that comprises one +undefined location description SL.

    +
  2. +
+
+
+Memory Location Description Operations +

Each of the target architecture specific address spaces has a corresponding +memory location storage that denotes the linear addressable memory of that +address space. The size of each memory location storage corresponds to the range +of the addresses in the corresponding address space.

+

It is target architecture defined how address space location storage maps to +target architecture physical memory. For example, they may be independent +memory, or more than one location storage may alias the same physical memory +possibly at different offsets and with different interleaving. The mapping may +also be dictated by the source language address classes.

+

A memory location description specifies a memory location storage. The bit +offset corresponds to a bit position within a byte of the memory. Bits accessed +using a memory location description, access the corresponding target +architecture memory starting at the bit position within the byte specified by +the bit offset.

+

A memory location description that has a bit offset that is a multiple of 8 (the +byte size) is defined to be a byte address memory location description. It has a +memory byte address A that is equal to the bit offset divided by 8.

+

A memory location description that does not have a bit offset that is a multiple +of 8 (the byte size) is defined to be a bit field memory location description. +It has a bit position B equal to the bit offset modulo 8, and a memory byte +address A equal to the bit offset minus B that is then divided by 8.

+

The address space AS of a memory location description is defined to be the +address space that corresponds to the memory location storage associated with +the memory location description.

+

A location description that is comprised of one byte address memory location +description SL is defined to be a memory byte address location description. It +has a byte address equal to A and an address space equal to AS of the +corresponding SL.

+

DW_ASPACE_none is defined as the target architecture default address space.

+

If a stack entry is required to be a location description, but it is a value V +with the generic type, then it is implicitly converted to a location description +L with one memory location description SL. SL specifies the memory location +storage that corresponds to the target architecture default address space with a +bit offset equal to V scaled by 8 (the byte size).

+
+

Note

+

If it is wanted to allow any integral type value to be implicitly converted to +a memory location description in the target architecture default address +space:

+
+

If a stack entry is required to be a location description, but is a value V +with an integral type, then it is implicitly converted to a location +description L with a one memory location description SL. If the type size of +V is less than the generic type size, then the value V is zero extended to +the size of the generic type. The least significant generic type size bits +are treated as a twos-complement unsigned value to be used as an address A. +SL specifies memory location storage corresponding to the target +architecture default address space with a bit offset equal to A scaled by 8 +(the byte size).

+
+

The implicit conversion could also be defined as target architecture specific. +For example, GDB checks if V is an integral type. If it is not it gives an +error. Otherwise, GDB zero-extends V to 64 bits. If the GDB target defines a +hook function, then it is called. The target specific hook function can modify +the 64-bit value, possibly sign extending based on the original value type. +Finally, GDB treats the 64-bit value V as a memory location address.

+
+

If a stack entry is required to be a location description, but it is an implicit +pointer value IPV with the target architecture default address space, then it is +implicitly converted to a location description with one single location +description specified by IPV. See +Implicit Location Description Operations.

+
+

Note

+

Is this rule required for DWARF Version 5 backwards compatibility? If not, it +can be eliminated, and the producer can use +DW_OP_LLVM_form_aspace_address.

+
+

If a stack entry is required to be a value, but it is a location description L +with one memory location description SL in the target architecture default +address space with a bit offset B that is a multiple of 8, then it is implicitly +converted to a value equal to B divided by 8 (the byte size) with the generic +type.

+
    +
  1. DW_OP_addr

    +

    DW_OP_addr has a single byte constant value operand, which has the size +of the generic type, that represents an address A.

    +

    It pushes a location description L with one memory location description SL +on the stack. SL specifies the memory location storage corresponding to the +target architecture default address space with a bit offset equal to A +scaled by 8 (the byte size).

    +

    If the DWARF is part of a code object, then A may need to be relocated. For +example, in the ELF code object format, A must be adjusted by the difference +between the ELF segment virtual address and the virtual address at which the +segment is loaded.

    +
  2. +
  3. DW_OP_addrx

    +

    DW_OP_addrx has a single unsigned LEB128 integer operand that represents +a zero-based index into the .debug_addr section relative to the value of +the DW_AT_addr_base attribute of the associated compilation unit. The +address value A in the .debug_addr section has the size of the generic +type.

    +

    It pushes a location description L with one memory location description SL +on the stack. SL specifies the memory location storage corresponding to the +target architecture default address space with a bit offset equal to A +scaled by 8 (the byte size).

    +

    If the DWARF is part of a code object, then A may need to be relocated. For +example, in the ELF code object format, A must be adjusted by the difference +between the ELF segment virtual address and the virtual address at which the +segment is loaded.

    +
  4. +
  5. DW_OP_LLVM_form_aspace_address New

    +

    DW_OP_LLVM_form_aspace_address pops top two stack entries. The first +must be an integral type value that represents a target architecture +specific address space identifier AS. The second must be an integral type +value that represents an address A.

    +

    The address size S is defined as the address bit size of the target +architecture specific address space that corresponds to AS.

    +

    A is adjusted to S bits by zero extending if necessary, and then treating the +least significant S bits as a twos-complement unsigned value A’.

    +

    It pushes a location description L with one memory location description SL +on the stack. SL specifies the memory location storage LS that corresponds +to AS with a bit offset equal to A’ scaled by 8 (the byte size).

    +

    If AS is an address space that is specific to context elements, then LS +corresponds to the location storage associated with the current context.

    +

    For example, if AS is for per thread storage then LS is the location +storage for the current thread. For languages that are implemented using a +SIMD or SIMT execution model, then if AS is for per lane storage then LS is +the location storage for the current lane of the current thread. Therefore, +if L is accessed by an operation, the location storage selected when the +location description was created is accessed, and not the location storage +associated with the current context of the access operation.

    +

    The DWARF expression is ill-formed if AS is not one of the values defined by +the target architecture specific DW_ASPACE_* values.

    +

    See Implicit Location Description Operations for special rules +concerning implicit pointer values produced by dereferencing implicit +location descriptions created by the DW_OP_implicit_pointer and +DW_OP_LLVM_implicit_aspace_pointer operations.

    +
  6. +
  7. DW_OP_form_tls_address

    +

    DW_OP_form_tls_address pops one stack entry that must be an integral +type value and treats it as a thread-local storage address TA.

    +

    It pushes a location description L with one memory location description SL +on the stack. SL is the target architecture specific memory location +description that corresponds to the thread-local storage address TA.

    +

    The meaning of the thread-local storage address TA is defined by the +run-time environment. If the run-time environment supports multiple +thread-local storage blocks for a single thread, then the block +corresponding to the executable or shared library containing this DWARF +expression is used.

    +

    Some implementations of C, C++, Fortran, and other languages support a +thread-local storage class. Variables with this storage class have distinct +values and addresses in distinct threads, much as automatic variables have +distinct values and addresses in each subprogram invocation. Typically, +there is a single block of storage containing all thread-local variables +declared in the main executable, and a separate block for the variables +declared in each shared library. Each thread-local variable can then be +accessed in its block using an identifier. This identifier is typically a +byte offset into the block and pushed onto the DWARF stack by one of the +DW_OP_const* operations prior to the DW_OP_form_tls_address +operation. Computing the address of the appropriate block can be complex +(in some cases, the compiler emits a function call to do it), and difficult +to describe using ordinary DWARF location descriptions. Instead of forcing +complex thread-local storage calculations into the DWARF expressions, the +DW_OP_form_tls_address allows the consumer to perform the computation +based on the target architecture specific run-time environment.

    +
  8. +
  9. DW_OP_call_frame_cfa

    +

    DW_OP_call_frame_cfa pushes the location description L of the Canonical +Frame Address (CFA) of the current subprogram, obtained from the call frame +information on the stack. See Call Frame Information.

    +

    Although the value of the DW_AT_frame_base attribute of the debugger +information entry corresponding to the current subprogram can be computed +using a location list expression, in some cases this would require an +extensive location list because the values of the registers used in +computing the CFA change during a subprogram execution. If the call frame +information is present, then it already encodes such changes, and it is +space efficient to reference that using the DW_OP_call_frame_cfa +operation.

    +
  10. +
  11. DW_OP_fbreg

    +

    DW_OP_fbreg has a single signed LEB128 integer operand that represents a +byte displacement B.

    +

    The location description L for the frame base of the current subprogram is +obtained from the DW_AT_frame_base attribute of the debugger information +entry corresponding to the current subprogram as described in +Debugging Information Entry Attributes.

    +

    The location description L is updated as if the DW_OP_LLVM_offset_uconst +B operation was applied. The updated L is pushed on the stack.

    +
  12. +
  13. DW_OP_breg0, DW_OP_breg1, …, DW_OP_breg31

    +

    The DW_OP_breg<N> operations encode the numbers of up to 32 registers, +numbered from 0 through 31, inclusive. The register number R corresponds to +the N in the operation name.

    +

    They have a single signed LEB128 integer operand that represents a byte +displacement B.

    +

    The address space identifier AS is defined as the one corresponding to the +target architecture specific default address space.

    +

    The address size S is defined as the address bit size of the target +architecture specific address space corresponding to AS.

    +

    The contents of the register specified by R are retrieved as if a +DW_OP_regval_type R, DR operation was performed where DR is the offset +of a hypothetical debug information entry in the current compilation unit +for an unsigned integral base type of size S bits. B is added and the least +significant S bits are treated as an unsigned value to be used as an address +A.

    +

    They push a location description L comprising one memory location +description LS on the stack. LS specifies the memory location storage that +corresponds to AS with a bit offset equal to A scaled by 8 (the byte size).

    +
  14. +
  15. DW_OP_bregx

    +

    DW_OP_bregx has two operands. The first is an unsigned LEB128 integer +that represents a register number R. The second is a signed LEB128 +integer that represents a byte displacement B.

    +

    The action is the same as for DW_OP_breg<N>, except that R is used as +the register number and B is used as the byte displacement.

    +
  16. +
  17. DW_OP_LLVM_aspace_bregx New

    +

    DW_OP_LLVM_aspace_bregx has two operands. The first is an unsigned +LEB128 integer that represents a register number R. The second is a signed +LEB128 integer that represents a byte displacement B. It pops one stack +entry that is required to be an integral type value that represents a target +architecture specific address space identifier AS.

    +

    The action is the same as for DW_OP_breg<N>, except that R is used as +the register number, B is used as the byte displacement, and AS is used as +the address space identifier.

    +

    The DWARF expression is ill-formed if AS is not one of the values defined by +the target architecture specific DW_ASPACE_* values.

    +
    +

    Note

    +

    Could also consider adding DW_OP_aspace_breg0, DW_OP_aspace_breg1, ..., +DW_OP_aspace_bref31 which would save encoding size.

    +
    +
  18. +
+
+
+Register Location Description Operations +

There is a register location storage that corresponds to each of the target +architecture registers. The size of each register location storage corresponds +to the size of the corresponding target architecture register.

+

A register location description specifies a register location storage. The bit +offset corresponds to a bit position within the register. Bits accessed using a +register location description access the corresponding target architecture +register starting at the specified bit offset.

+
    +
  1. DW_OP_reg0, DW_OP_reg1, …, DW_OP_reg31

    +

    DW_OP_reg<N> operations encode the numbers of up to 32 registers, +numbered from 0 through 31, inclusive. The target architecture register +number R corresponds to the N in the operation name.

    +

    The operation is equivalent to performing DW_OP_regx R.

    +
  2. +
  3. DW_OP_regx

    +

    DW_OP_regx has a single unsigned LEB128 integer operand that represents +a target architecture register number R.

    +

    If the current call frame is the top call frame, it pushes a location +description L that specifies one register location description SL on the +stack. SL specifies the register location storage that corresponds to R with +a bit offset of 0 for the current thread.

    +

    If the current call frame is not the top call frame, call frame information +(see Call Frame Information) is used to determine the +location description that holds the register for the current call frame and +current program location of the current thread. The resulting location +description L is pushed.

    +

    Note that if call frame information is used, the resulting location +description may be register, memory, or undefined.

    +

    An implementation may evaluate the call frame information immediately, or +may defer evaluation until L is accessed by an operation. If evaluation is +deferred, R and the current context can be recorded in L. When accessed, the +recorded context is used to evaluate the call frame information, not the +current context of the access operation.

    +
  4. +
+

These operations obtain a register location. To fetch the contents of a +register, it is necessary to use DW_OP_regval_type, use one of the +DW_OP_breg* register-based addressing operations, or use DW_OP_deref* +on a register location description.

+
+
+Implicit Location Description Operations +

Implicit location storage represents a piece or all of an object which has no +actual location in the program but whose contents are nonetheless known, either +as a constant or can be computed from other locations and values in the program.

+

An implicit location description specifies an implicit location storage. The bit +offset corresponds to a bit position within the implicit location storage. Bits +accessed using an implicit location description, access the corresponding +implicit storage value starting at the bit offset.

+
    +
  1. DW_OP_implicit_value

    +

    DW_OP_implicit_value has two operands. The first is an unsigned LEB128 +integer that represents a byte size S. The second is a block of bytes with a +length equal to S treated as a literal value V.

    +

    An implicit location storage LS is created with the literal value V and a +size of S.

    +

    It pushes location description L with one implicit location description SL +on the stack. SL specifies LS with a bit offset of 0.

    +
  2. +
  3. DW_OP_stack_value

    +

    DW_OP_stack_value pops one stack entry that must be a value V.

    +

    An implicit location storage LS is created with the literal value V using +the size, encoding, and enianity specified by V’s base type.

    +

    It pushes a location description L with one implicit location description SL +on the stack. SL specifies LS with a bit offset of 0.

    +

    The DW_OP_stack_value operation specifies that the object does not +exist in memory, but its value is nonetheless known. In this form, the +location description specifies the actual value of the object, rather than +specifying the memory or register storage that holds the value.

    +

    See Implicit Location Description Operations for special rules +concerning implicit pointer values produced by dereferencing implicit +location descriptions created by the DW_OP_implicit_pointer and +DW_OP_LLVM_implicit_aspace_pointer operations.

    +
    +

    Note

    +

    Since location descriptions are allowed on the stack, the +DW_OP_stack_value operation no longer terminates the DWARF operation +expression execution as in DWARF Version 5.

    +
    +
  4. +
  5. DW_OP_implicit_pointer

    +

    An optimizing compiler may eliminate a pointer, while still retaining the +value that the pointer addressed. DW_OP_implicit_pointer allows a +producer to describe this value.

    +

    DW_OP_implicit_pointer specifies an object is a pointer to the target +architecture default address space that cannot be represented as a real +pointer, even though the value it would point to can be described. In this +form, the location description specifies a debugging information entry that +represents the actual location description of the object to which the +pointer would point. Thus, a consumer of the debug information would be able +to access the dereferenced pointer, even when it cannot access the pointer +itself.

    +

    DW_OP_implicit_pointer has two operands. The first operand is a 4-byte +unsigned value in the 32-bit DWARF format, or an 8-byte unsigned value in +the 64-bit DWARF format, that represents the byte offset DR of a debugging +information entry D relative to the beginning of the .debug_info section +that contains the current compilation unit. The second operand is a signed +LEB128 integer that represents a byte displacement B.

    +

    Note that D may not be in the current compilation unit.

    +

    The first operand interpretation is exactly like that for +DW_FORM_ref_addr.

    +

    The address space identifier AS is defined as the one corresponding to the +target architecture specific default address space.

    +

    The address size S is defined as the address bit size of the target +architecture specific address space corresponding to AS.

    +

    An implicit location storage LS is created with the debugging information +entry D, address space AS, and size of S.

    +

    It pushes a location description L that comprises one implicit location +description SL on the stack. SL specifies LS with a bit offset of 0.

    +

    It is an evaluation error if a DW_OP_deref* operation pops a location +description L’, and retrieves S bits, such that any retrieved bits come from +an implicit location storage that is the same as LS, unless both the +following conditions are met:

    +
      +
    1. All retrieved bits come from an implicit location description that +refers to an implicit location storage that is the same as LS.

      +

      Note that all bits do not have to come from the same implicit location +description, as L’ may involve composite location descriptors.

      +
    2. +
    3. The bits come from consecutive ascending offsets within their respective +implicit location storage.

    4. +
    +

    These rules are equivalent to retrieving the complete contents of LS.

    +

    If both the above conditions are met, then the value V pushed by the +DW_OP_deref* operation is an implicit pointer value IPV with a target +architecture specific address space of AS, a debugging information entry of +D, and a base type of T. If AS is the target architecture default address +space, then T is the generic type. Otherwise, T is a target architecture +specific integral type with a bit size equal to S.

    +

    If IPV is either implicitly converted to a location description (only done +if AS is the target architecture default address space) or used by +DW_OP_LLVM_form_aspace_address (only done if the address space popped by +DW_OP_LLVM_form_aspace_address is AS), then the resulting location +description RL is:

    +
      +
    • If D has a DW_AT_location attribute, the DWARF expression E from the +DW_AT_location attribute is evaluated with the current context, except +that the result kind is a location description, the compilation unit is +the one that contains D, the object is unspecified, and the initial stack +is empty. RL is the expression result.

      +

      Note that E is evaluated with the context of the expression accessing +IPV, and not the context of the expression that contained the +DW_OP_implicit_pointer or DW_OP_LLVM_aspace_implicit_pointer +operation that created L.

      +
    • +
    • If D has a DW_AT_const_value attribute, then an implicit location +storage RLS is created from the DW_AT_const_value attribute’s value +with a size matching the size of the DW_AT_const_value attribute’s +value. RL comprises one implicit location description SRL. SRL specifies +RLS with a bit offset of 0.

      +
      +

      Note

      +

      If using DW_AT_const_value for variables and formal parameters is +deprecated and instead DW_AT_location is used with an implicit +location description, then this rule would not be required.

      +
      +
    • +
    • Otherwise, it is an evaluation error.

    • +
    +

    The bit offset of RL is updated as if the DW_OP_LLVM_offset_uconst B +operation was applied.

    +

    If a DW_OP_stack_value operation pops a value that is the same as IPV, +then it pushes a location description that is the same as L.

    +

    It is an evaluation error if LS or IPV is accessed in any other manner.

    +

    The restrictions on how an implicit pointer location description created +by DW_OP_implicit_pointer and DW_OP_LLVM_aspace_implicit_pointer +can be used are to simplify the DWARF consumer. Similarly, for an implicit +pointer value created by DW_OP_deref* and DW_OP_stack_value.*

    +
  6. +
  7. DW_OP_LLVM_aspace_implicit_pointer New

    +

    DW_OP_LLVM_aspace_implicit_pointer has two operands that are the same as +for DW_OP_implicit_pointer.

    +

    It pops one stack entry that must be an integral type value that represents +a target architecture specific address space identifier AS.

    +

    The location description L that is pushed on the stack is the same as for +DW_OP_implicit_pointer, except that the address space identifier used is +AS.

    +

    The DWARF expression is ill-formed if AS is not one of the values defined by +the target architecture specific DW_ASPACE_* values.

    +
    +

    Note

    +

    This definition of DW_OP_LLVM_aspace_implicit_pointer may change when +full support for address classes is added as required for languages such +as OpenCL/SyCL.

    +
    +
  8. +
+

Typically a DW_OP_implicit_pointer or +DW_OP_LLVM_aspace_implicit_pointer operation is used in a DWARF expression +E1 of a DW_TAG_variable or DW_TAG_formal_parameter +debugging information entry D1‘s DW_AT_location attribute. +The debugging information entry referenced by the DW_OP_implicit_pointer +or DW_OP_LLVM_aspace_implicit_pointer operations is typically itself a +DW_TAG_variable or DW_TAG_formal_parameter debugging information +entry D2 whose DW_AT_location attribute gives a second DWARF +expression E2.

+

D1 and E1 are describing the location of a pointer type +object. D2 and E2 are describing the location of the +object pointed to by that pointer object.

+

However, D2 may be any debugging information entry that contains a +DW_AT_location or DW_AT_const_value attribute (for example, +DW_TAG_dwarf_procedure). By using E2, a consumer can +reconstruct the value of the object when asked to dereference the pointer +described by E1 which contains the DW_OP_implicit_pointer or +DW_OP_LLVM_aspace_implicit_pointer operation.

+
+
+Composite Location Description Operations +

A composite location storage represents an object or value which may be +contained in part of another location storage or contained in parts of more +than one location storage.

+

Each part has a part location description L and a part bit size S. L can have +one or more single location descriptions SL. If there are more than one SL then +that indicates that part is located in more than one place. The bits of each +place of the part comprise S contiguous bits from the location storage LS +specified by SL starting at the bit offset specified by SL. All the bits must +be within the size of LS or the DWARF expression is ill-formed.

+

A composite location storage can have zero or more parts. The parts are +contiguous such that the zero-based location storage bit index will range over +each part with no gaps between them. Therefore, the size of a composite location +storage is the sum of the size of its parts. The DWARF expression is ill-formed +if the size of the contiguous location storage is larger than the size of the +memory location storage corresponding to the largest target architecture +specific address space.

+

A composite location description specifies a composite location storage. The bit +offset corresponds to a bit position within the composite location storage.

+

There are operations that create a composite location storage.

+

There are other operations that allow a composite location storage to be +incrementally created. Each part is created by a separate operation. There may +be one or more operations to create the final composite location storage. A +series of such operations describes the parts of the composite location storage +that are in the order that the associated part operations are executed.

+

To support incremental creation, a composite location storage can be in an +incomplete state. When an incremental operation operates on an incomplete +composite location storage, it adds a new part, otherwise it creates a new +composite location storage. The DW_OP_LLVM_piece_end operation explicitly +makes an incomplete composite location storage complete.

+

A composite location description that specifies a composite location storage +that is incomplete is termed an incomplete composite location description. A +composite location description that specifies a composite location storage that +is complete is termed a complete composite location description.

+

If the top stack entry is a location description that has one incomplete +composite location description SL after the execution of an operation expression +has completed, SL is converted to a complete composite location description.

+

Note that this conversion does not happen after the completion of an operation +expression that is evaluated on the same stack by the DW_OP_call* +operations. Such executions are not a separate evaluation of an operation +expression, but rather the continued evaluation of the same operation expression +that contains the DW_OP_call* operation.

+

If a stack entry is required to be a location description L, but L has an +incomplete composite location description, then the DWARF expression is +ill-formed. The exception is for the operations involved in incrementally +creating a composite location description as described below.

+

Note that a DWARF operation expression may arbitrarily compose composite +location descriptions from any other location description, including those that +have multiple single location descriptions, and those that have composite +location descriptions.

+

The incremental composite location description operations are defined to be +compatible with the definitions in DWARF Version 5.

+
    +
  1. DW_OP_piece

    +

    DW_OP_piece has a single unsigned LEB128 integer that represents a byte +size S.

    +

    The action is based on the context:

    +
      +
    • If the stack is empty, then a location description L comprised of one +incomplete composite location description SL is pushed on the stack.

      +

      An incomplete composite location storage LS is created with a single part +P. P specifies a location description PL and has a bit size of S scaled by +8 (the byte size). PL is comprised of one undefined location description +PSL.

      +

      SL specifies LS with a bit offset of 0.

      +
    • +
    • Otherwise, if the top stack entry is a location description L comprised of +one incomplete composite location description SL, then the incomplete +composite location storage LS that SL specifies is updated to append a new +part P. P specifies a location description PL and has a bit size of S +scaled by 8 (the byte size). PL is comprised of one undefined location +description PSL. L is left on the stack.

    • +
    • Otherwise, if the top stack entry is a location description or can be +converted to one, then it is popped and treated as a part location +description PL. Then:

      +
        +
      • If the top stack entry (after popping PL) is a location description L +comprised of one incomplete composite location description SL, then the +incomplete composite location storage LS that SL specifies is updated to +append a new part P. P specifies the location description PL and has a +bit size of S scaled by 8 (the byte size). L is left on the stack.

      • +
      • Otherwise, a location description L comprised of one incomplete +composite location description SL is pushed on the stack.

        +

        An incomplete composite location storage LS is created with a single +part P. P specifies the location description PL and has a bit size of S +scaled by 8 (the byte size).

        +

        SL specifies LS with a bit offset of 0.

        +
      • +
      +
    • +
    • Otherwise, the DWARF expression is ill-formed

    • +
    +

    Many compilers store a single variable in sets of registers or store a +variable partially in memory and partially in registers. DW_OP_piece +provides a way of describing where a part of a variable is located.

    +

    If a non-0 byte displacement is required, the DW_OP_LLVM_offset +operation can be used to update the location description before using it as +the part location description of a DW_OP_piece operation.

    +

    The evaluation rules for the DW_OP_piece operation allow it to be +compatible with the DWARF Version 5 definition.

    +
    +

    Note

    +

    Since these extensions allow location descriptions to be entries on the +stack, a simpler operation to create composite location descriptions could +be defined. For example, just one operation that specifies how many parts, +and pops pairs of stack entries for the part size and location +description. Not only would this be a simpler operation and avoid the +complexities of incomplete composite location descriptions, but it may +also have a smaller encoding in practice. However, the desire for +compatibility with DWARF Version 5 is likely a stronger consideration.

    +
    +
  2. +
  3. DW_OP_bit_piece

    +

    DW_OP_bit_piece has two operands. The first is an unsigned LEB128 +integer that represents the part bit size S. The second is an unsigned +LEB128 integer that represents a bit displacement B.

    +

    The action is the same as for DW_OP_piece, except that any part created +has the bit size S, and the location description PL of any created part is +updated as if the DW_OP_constu B; DW_OP_LLVM_bit_offset operations were +applied.

    +

    DW_OP_bit_piece is used instead of DW_OP_piece when the piece to +be assembled is not byte-sized or is not at the start of the part location +description.

    +

    If a computed bit displacement is required, the DW_OP_LLVM_bit_offset +operation can be used to update the location description before using it as +the part location description of a DW_OP_bit_piece operation.

    +
    +

    Note

    +

    The bit offset operand is not needed as DW_OP_LLVM_bit_offset can be +used on the part’s location description.

    +
    +
  4. +
  5. DW_OP_LLVM_piece_end New

    +

    If the top stack entry is not a location description L comprised of one +incomplete composite location description SL, then the DWARF expression is +ill-formed.

    +

    Otherwise, the incomplete composite location storage LS specified by SL is +updated to be a complete composite location description with the same parts.

    +
  6. +
  7. DW_OP_LLVM_extend New

    +

    DW_OP_LLVM_extend has two operands. The first is an unsigned LEB128 +integer that represents the element bit size S. The second is an unsigned +LEB128 integer that represents a count C.

    +

    It pops one stack entry that must be a location description and is treated +as the part location description PL.

    +

    A location description L comprised of one complete composite location +description SL is pushed on the stack.

    +

    A complete composite location storage LS is created with C identical parts +P. Each P specifies PL and has a bit size of S.

    +

    SL specifies LS with a bit offset of 0.

    +

    The DWARF expression is ill-formed if the element bit size or count are 0.

    +
  8. +
  9. DW_OP_LLVM_select_bit_piece New

    +

    DW_OP_LLVM_select_bit_piece has two operands. The first is an unsigned +LEB128 integer that represents the element bit size S. The second is an +unsigned LEB128 integer that represents a count C.

    +

    It pops three stack entries. The first must be an integral type value that +represents a bit mask value M. The second must be a location description +that represents the one-location description L1. The third must be a +location description that represents the zero-location description L0.

    +

    A complete composite location storage LS is created with C parts PN +ordered in ascending N from 0 to C-1 inclusive. Each PN specifies +location description PLN and has a bit size of S.

    +

    PLN is as if the DW_OP_LLVM_bit_offset N*S operation was +applied to PLXN.

    +

    PLXN is the same as L0 if the Nth least significant bit of +M is a zero, otherwise it is the same as L1.

    +

    A location description L comprised of one complete composite location +description SL is pushed on the stack. SL specifies LS with a bit offset of +0.

    +

    The DWARF expression is ill-formed if S or C are 0, or if the bit size of M +is less than C.

    +
  10. +
+
+
+
+
+
DWARF Location List Expressions
+

To meet the needs of recent computer architectures and optimization techniques, +debugging information must be able to describe the location of an object whose +location changes over the object’s lifetime, and may reside at multiple +locations during parts of an object’s lifetime. Location list expressions are +used in place of operation expressions whenever the object whose location is +being described has these requirements.

+

A location list expression consists of a series of location list entries. Each +location list entry is one of the following kinds:

+

Bounded location description

+
+

This kind of location list entry provides an operation expression that +evaluates to the location description of an object that is valid over a +lifetime bounded by a starting and ending address. The starting address is the +lowest address of the address range over which the location is valid. The +ending address is the address of the first location past the highest address +of the address range.

+

The location list entry matches when the current program location is within +the given range.

+

There are several kinds of bounded location description entries which differ +in the way that they specify the starting and ending addresses.

+
+

Default location description

+
+

This kind of location list entry provides an operation expression that +evaluates to the location description of an object that is valid when no +bounded location description entry applies.

+

The location list entry matches when the current program location is not +within the range of any bounded location description entry.

+
+

Base address

+
+

This kind of location list entry provides an address to be used as the base +address for beginning and ending address offsets given in certain kinds of +bounded location description entries. The applicable base address of a bounded +location description entry is the address specified by the closest preceding +base address entry in the same location list. If there is no preceding base +address entry, then the applicable base address defaults to the base address +of the compilation unit (see DWARF Version 5 section 3.1.1).

+

In the case of a compilation unit where all of the machine code is contained +in a single contiguous section, no base address entry is needed.

+
+

End-of-list

+
+

This kind of location list entry marks the end of the location list +expression.

+
+

The address ranges defined by the bounded location description entries of a +location list expression may overlap. When they do, they describe a situation in +which an object exists simultaneously in more than one place.

+

If all of the address ranges in a given location list expression do not +collectively cover the entire range over which the object in question is +defined, and there is no following default location description entry, it is +assumed that the object is not available for the portion of the range that is +not covered.

+

The result of the evaluation of a DWARF location list expression is:

+
    +
  • If the current program location is not specified, then it is an evaluation +error.

    +
    +

    Note

    +

    If the location list only has a single default entry, should that be +considered a match if there is no program location? If there are non-default +entries then it seems it has to be an evaluation error when there is no +program location as that indicates the location depends on the program +location which is not known.

    +
    +
  • +
  • If there are no matching location list entries, then the result is a location +description that comprises one undefined location description.

  • +
  • Otherwise, the operation expression E of each matching location list entry is +evaluated with the current context, except that the result kind is a location +description, the object is unspecified, and the initial stack is empty. The +location list entry result is the location description returned by the +evaluation of E.

    +

    The result is a location description that is comprised of the union of the +single location descriptions of the location description result of each +matching location list entry.

    +
  • +
+

A location list expression can only be used as the value of a debugger +information entry attribute that is encoded using class loclist or +loclistsptr (see DWARF Version 5 section 7.5.5). The value of the attribute +provides an index into a separate object file section called .debug_loclists +or .debug_loclists.dwo (for split DWARF object files) that contains the +location list entries.

+

A DW_OP_call* and DW_OP_implicit_pointer operation can be used to +specify a debugger information entry attribute that has a location list +expression. Several debugger information entry attributes allow DWARF +expressions that are evaluated with an initial stack that includes a location +description that may originate from the evaluation of a location list +expression.

+

This location list representation, the loclist and loclistsptr +class, and the related DW_AT_loclists_base attribute are new in DWARF +Version 5. Together they eliminate most, or all of the code object relocations +previously needed for location list expressions.

+
+

Note

+

The rest of this section is the same as DWARF Version 5 section 2.6.2.

+
+
+
+
+

Segmented Addresses

+
+

Note

+

This augments DWARF Version 5 section 2.12.

+
+

DWARF address classes are used for source languages that have the concept of +memory spaces. They are used in the DW_AT_address_class attribute for +pointer type, reference type, subprogram, and subprogram type debugger +information entries.

+

Each DWARF address class is conceptually a separate source language memory space +with its own lifetime and aliasing rules. DWARF address classes are used to +specify the source language memory spaces that pointer type and reference type +values refer, and to specify the source language memory space in which variables +are allocated.

+

The set of currently defined source language DWARF address classes, together +with source language mappings, is given in +Address class.

+

Vendor defined source language address classes may be defined using codes in the +range DW_ADDR_LLVM_lo_user to DW_ADDR_LLVM_hi_user.

+ + +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Address class

Address Class Name

Meaning

C/C++

OpenCL

CUDA/HIP

DW_ADDR_none

generic

default

generic

default

DW_ADDR_LLVM_global

global

global

DW_ADDR_LLVM_constant

constant

constant

constant

DW_ADDR_LLVM_group

thread-group

local

shared

DW_ADDR_LLVM_private

thread

private

DW_ADDR_LLVM_lo_user

DW_ADDR_LLVM_hi_user

+

DWARF address spaces correspond to target architecture specific linear +addressable memory areas. They are used in DWARF expression location +descriptions to describe in which target architecture specific memory area data +resides.

+

Target architecture specific DWARF address spaces may correspond to hardware +supported facilities such as memory utilizing base address registers, scratchpad +memory, and memory with special interleaving. The size of addresses in these +address spaces may vary. Their access and allocation may be hardware managed +with each thread or group of threads having access to independent storage. For +these reasons they may have properties that do not allow them to be viewed as +part of the unified global virtual address space accessible by all threads.

+

It is target architecture specific whether multiple DWARF address spaces are +supported and how source language DWARF address classes map to target +architecture specific DWARF address spaces. A target architecture may map +multiple source language DWARF address classes to the same target architecture +specific DWARF address class. Optimization may determine that variable lifetime +and access pattern allows them to be allocated in faster scratchpad memory +represented by a different DWARF address space.

+

Although DWARF address space identifiers are target architecture specific, +DW_ASPACE_none is a common address space supported by all target +architectures.

+

DWARF address space identifiers are used by:

+
    +
  • The DWARF expression operations: DW_OP_LLVM_aspace_bregx, +DW_OP_LLVM_form_aspace_address, DW_OP_LLVM_implicit_aspace_pointer, +and DW_OP_xderef*.

  • +
  • The CFI instructions: DW_CFA_LLVM_def_aspace_cfa and +DW_CFA_LLVM_def_aspace_cfa_sf.

  • +
+
+

Note

+

With the definition of DWARF address classes and DWARF address spaces in these +extensions, DWARF Version 5 table 2.7 needs to be updated. It seems it is an +example of DWARF address spaces and not DWARF address classes.

+
+
+

Note

+

With the expanded support for DWARF address spaces in these extensions, it may +be worth examining if DWARF segments can be eliminated and DWARF address +spaces used instead.

+

That may involve extending DWARF address spaces to also be used to specify +code locations. In target architectures that use different memory areas for +code and data this would seem a natural use for DWARF address spaces. This +would allow DWARF expression location descriptions to be used to describe the +location of subprograms and entry points that are used in expressions +involving subprogram pointer type values.

+

Currently, DWARF expressions assume data and code resides in the same default +DWARF address space, and only the address ranges in DWARF location list +entries and in the .debug_aranges section for accelerated access for +addresses allow DWARF segments to be used to distinguish.

+
+
+

Note

+

Currently, DWARF defines address class values as being target architecture +specific. It is unclear how language specific memory spaces are intended to be +represented in DWARF using these.

+

For example, OpenCL defines memory spaces (called address spaces in OpenCL) +for global, local, constant, and private. These are part of +the type system and are modifiers to pointer types. In addition, OpenCL +defines generic pointers that can reference either the global, +local, or private memory spaces. To support the OpenCL language the +debugger would want to support casting pointers between the generic and +other memory spaces, querying what memory space a generic pointer value is +currently referencing, and possibly using pointer casting to form an address +for a specific memory space out of an integral value.

+

The method to use to dereference a pointer type or reference type value is +defined in DWARF expressions using DW_OP_xderef* which uses a target +architecture specific address space.

+

DWARF defines the DW_AT_address_class attribute on pointer type and +reference type debugger information entries. It specifies the method to use to +dereference them. Why is the value of this not the same as the address space +value used in DW_OP_xderef*? In both cases it is target architecture +specific and the architecture presumably will use the same set of methods to +dereference pointers in both cases.

+

Since DW_AT_address_class uses a target architecture specific value, it +cannot in general capture the source language memory space type modifier +concept. On some architectures all source language memory space modifiers may +actually use the same method for dereferencing pointers.

+

One possibility is for DWARF to add an DW_TAG_LLVM_address_class_type +debugger information entry type modifier that can be applied to a pointer type +and reference type. The DW_AT_address_class attribute could be re-defined +to not be target architecture specific and instead define generalized language +values (as presented above for DWARF address classes in the table +Address class) that will support OpenCL and other +languages using memory spaces. The DW_AT_address_class attribute could be +defined to not be applied to pointer types or reference types, but instead +only to the new DW_TAG_LLVM_address_class_type type modifier debugger +information entry.

+

If a pointer type or reference type is not modified by +DW_TAG_LLVM_address_class_type or if DW_TAG_LLVM_address_class_type +has no DW_AT_address_class attribute, then the pointer type or reference +type would be defined to use the DW_ADDR_none address class as currently. +Since modifiers can be chained, it would need to be defined if multiple +DW_TAG_LLVM_address_class_type modifiers were legal, and if so if the +outermost one is the one that takes precedence.

+

A target architecture implementation that supports multiple address spaces +would need to map DW_ADDR_none appropriately to support CUDA-like +languages that have no address classes in the type system but do support +variable allocation in address classes. Such variable allocation would result +in the variable’s location description needing an address space.

+

The approach presented in Address class is to define +the default DW_ADDR_none to be the generic address class and not the +global address class. This matches how CLANG and LLVM have added support for +CUDA-like languages on top of existing C++ language support. This allows all +addresses to be generic by default which matches CUDA-like languages.

+

An alternative approach is to define DW_ADDR_none as being the global +address class and then change DW_ADDR_LLVM_global to +DW_ADDR_LLVM_generic. This would match the reality that languages that do +not support multiple memory spaces only have one default global memory space. +Generally, in these languages if they expose that the target architecture +supports multiple address spaces, the default one is still the global memory +space. Then a language that does support multiple memory spaces has to +explicitly indicate which pointers have the added ability to reference more +than the global memory space. However, compilers generating DWARF for +CUDA-like languages would then have to define every CUDA-like language pointer +type or reference type using DW_TAG_LLVM_address_class_type with a +DW_AT_address_class attribute of DW_ADDR_LLVM_generic to match the +language semantics.

+

A new DW_AT_LLVM_address_space attribute could be defined that can be +applied to pointer type, reference type, subprogram, and subprogram type to +describe how objects having the given type are dereferenced or called (the +role that DW_AT_address_class currently provides). The values of +DW_AT_address_space would be target architecture specific and the same as +used in DW_OP_xderef*.

+
+
+

Note

+

Some additional changes will be made to support languages such as OpenCL/SyCL +that allow address class pointer casting and queries.

+

This requires the compiler to provide the mapping from address space to +address class which may be runtime and not target architecture dependent. Some +implementations may have a one-to-one mapping from source language address +class to target architecture address space, and some may have a many-to-one +mapping which requires knowledge of the address class when determining if +pointer address class casts are allowed.

+

The changes will likely add an attribute that has an expression provided by +the compiler to map from address class to address space. The +DW_OP_implicit_pointer and DW_OP_LLVM_aspace_implicit_pointer +operations may be changed as the current IPV definition may not provide enough +information when used to cast between address classes. Other attributes and +operations may be needed. The legal casts between address classes may need to +be defined on a per language address class basis.

+
+
+
+

Debugging Information Entry Attributes

+
+

Note

+

This section provides changes to existing debugger information entry +attributes and defines attributes added by these extensions. These would be +incorporated into the appropriate DWARF Version 5 chapter 2 sections.

+
+
    +
  1. DW_AT_location

    +

    Any debugging information entry describing a data object (which includes +variables and parameters) or common blocks may have a DW_AT_location +attribute, whose value is a DWARF expression E.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an unspecified object, the +compilation unit that contains E, an empty initial stack, and other context +elements corresponding to the source language thread of execution upon which +the user is focused, if any. The result of the evaluation is the location +description of the base of the data object.

    +

    See Control Flow Operations for special evaluation rules +used by the DW_OP_call* operations.

    +
    +

    Note

    +

    Delete the description of how the DW_OP_call* operations evaluate a +DW_AT_location attribute as that is now described in the operations.

    +
    +
    +

    Note

    +

    See the discussion about the DW_AT_location attribute in the +DW_OP_call* operation. Having each attribute only have a single +purpose and single execution semantics seems desirable. It makes it easier +for the consumer that no longer have to track the context. It makes it +easier for the producer as it can rely on a single semantics for each +attribute.

    +

    For that reason, limiting the DW_AT_location attribute to only +supporting evaluating the location description of an object, and using a +different attribute and encoding class for the evaluation of DWARF +expression procedures on the same operation expression stack seems +desirable.

    +
    +
  2. +
  3. DW_AT_const_value

    +
    +

    Note

    +

    Could deprecate using the DW_AT_const_value attribute for +DW_TAG_variable or DW_TAG_formal_parameter debugger information +entries that have been optimized to a constant. Instead, +DW_AT_location could be used with a DWARF expression that produces an +implicit location description now that any location description can be +used within a DWARF expression. This allows the DW_OP_call* operations +to be used to push the location description of any variable regardless of +how it is optimized.

    +
    +
  4. +
  5. DW_AT_frame_base

    +

    A DW_TAG_subprogram or DW_TAG_entry_point debugger information entry +may have a DW_AT_frame_base attribute, whose value is a DWARF expression +E.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an unspecified object, the +compilation unit that contains E, an empty initial stack, and other context +elements corresponding to the source language thread of execution upon which +the user is focused, if any.

    +

    The DWARF is ill-formed if E contains an DW_OP_fbreg operation, or the +resulting location description L is not comprised of one single location +description SL.

    +

    If SL a register location description for register R, then L is replaced +with the result of evaluating a DW_OP_bregx R, 0 operation. This +computes the frame base memory location description in the target +architecture default address space.

    +

    This allows the more compact DW_OPreg* to be used instead of +DW_OP_breg* 0.

    +
    +

    Note

    +

    This rule could be removed and require the producer to create the required +location description directly using DW_OP_call_frame_cfa, +DW_OP_breg*, or DW_OP_LLVM_aspace_bregx. This would also then +allow a target to implement the call frames within a large register.

    +
    +

    Otherwise, the DWARF is ill-formed if SL is not a memory location +description in any of the target architecture specific address spaces.

    +

    The resulting L is the frame base for the subprogram or entry point.

    +

    Typically, E will use the DW_OP_call_frame_cfa operation or be a +stack pointer register plus or minus some offset.

    +
  6. +
  7. DW_AT_data_member_location

    +

    For a DW_AT_data_member_location attribute there are two cases:

    +
      +
    1. If the attribute is an integer constant B, it provides the offset in +bytes from the beginning of the containing entity.

      +

      The result of the attribute is obtained by evaluating a +DW_OP_LLVM_offset B operation with an initial stack comprising the +location description of the beginning of the containing entity. The +result of the evaluation is the location description of the base of the +member entry.

      +

      If the beginning of the containing entity is not byte aligned, then the +beginning of the member entry has the same bit displacement within a +byte.

      +
    2. +
    3. Otherwise, the attribute must be a DWARF expression E which is evaluated +with a context that has a result kind of a location description, an +unspecified object, the compilation unit that contains E, an initial +stack comprising the location description of the beginning of the +containing entity, and other context elements corresponding to the +source language thread of execution upon which the user is focused, if +any. The result of the evaluation is the location description of the +base of the member entry.

    4. +
    +
    +

    Note

    +

    The beginning of the containing entity can now be any location +description, including those with more than one single location +description, and those with single location descriptions that are of any +kind and have any bit offset.

    +
    +
  8. +
  9. DW_AT_use_location

    +

    The DW_TAG_ptr_to_member_type debugging information entry has a +DW_AT_use_location attribute whose value is a DWARF expression E. It is +used to compute the location description of the member of the class to which +the pointer to member entry points.

    +

    The method used to find the location description of a given member of a +class, structure, or union is common to any instance of that class, +structure, or union and to any instance of the pointer to member type. The +method is thus associated with the pointer to member type, rather than with +each object that has a pointer to member type.

    +

    The DW_AT_use_location DWARF expression is used in conjunction with the +location description for a particular object of the given pointer to member +type and for a particular structure or class instance.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an unspecified object, the +compilation unit that contains E, an initial stack comprising two entries, +and other context elements corresponding to the source language thread of +execution upon which the user is focused, if any. The first stack entry is +the value of the pointer to member object itself. The second stack entry is +the location description of the base of the entire class, structure, or +union instance containing the member whose location is being calculated. The +result of the evaluation is the location description of the member of the +class to which the pointer to member entry points.

    +
  10. +
  11. DW_AT_data_location

    +

    The DW_AT_data_location attribute may be used with any type that +provides one or more levels of hidden indirection and/or run-time parameters +in its representation. Its value is a DWARF operation expression E which +computes the location description of the data for an object. When this +attribute is omitted, the location description of the data is the same as +the location description of the object.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an object that is the location +description of the data descriptor, the compilation unit that contains E, an +empty initial stack, and other context elements corresponding to the source +language thread of execution upon which the user is focused, if any. The +result of the evaluation is the location description of the base of the +member entry.

    +

    E will typically involve an operation expression that begins with a +DW_OP_push_object_address operation which loads the location +description of the object which can then serve as a description in +subsequent calculation.

    +
    +

    Note

    +

    Since DW_AT_data_member_location, DW_AT_use_location, and +DW_AT_vtable_elem_location allow both operation expressions and +location list expressions, why does DW_AT_data_location not allow +both? In all cases they apply to data objects so less likely that +optimization would cause different operation expressions for different +program location ranges. But if supporting for some then should be for +all.

    +

    It seems odd this attribute is not the same as +DW_AT_data_member_location in having an initial stack with the +location description of the object since the expression has to need it.

    +
    +
  12. +
  13. DW_AT_vtable_elem_location

    +

    An entry for a virtual function also has a DW_AT_vtable_elem_location +attribute whose value is a DWARF expression E.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an unspecified object, the +compilation unit that contains E, an initial stack comprising the location +description of the object of the enclosing type, and other context elements +corresponding to the source language thread of execution upon which the user +is focused, if any. The result of the evaluation is the location description +of the slot for the function within the virtual function table for the +enclosing class.

    +
  14. +
  15. DW_AT_static_link

    +

    If a DW_TAG_subprogram or DW_TAG_entry_point debugger information +entry is lexically nested, it may have a DW_AT_static_link attribute, +whose value is a DWARF expression E.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an unspecified object, the +compilation unit that contains E, an empty initial stack, and other context +elements corresponding to the source language thread of execution upon which +the user is focused, if any. The result of the evaluation is the location +description L of the canonical frame address (see +Call Frame Information) of the relevant call frame of +the subprogram instance that immediately lexically encloses the current call +frame’s subprogram or entry point.

    +

    The DWARF is ill-formed if L is is not comprised of one memory location +description for one of the target architecture specific address spaces.

    +
  16. +
  17. DW_AT_return_addr

    +

    A DW_TAG_subprogram, DW_TAG_inlined_subroutine, or +DW_TAG_entry_point debugger information entry may have a +DW_AT_return_addr attribute, whose value is a DWARF expression E.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an unspecified object, the +compilation unit that contains E, an empty initial stack, and other context +elements corresponding to the source language thread of execution upon which +the user is focused, if any. The result of the evaluation is the location +description L of the place where the return address for the current call +frame’s subprogram or entry point is stored.

    +

    The DWARF is ill-formed if L is not comprised of one memory location +description for one of the target architecture specific address spaces.

    +
    +

    Note

    +

    It is unclear why DW_TAG_inlined_subroutine has a +DW_AT_return_addr attribute but not a DW_AT_frame_base or +DW_AT_static_link attribute. Seems it would either have all of them or +none. Since inlined subprograms do not have a call frame it seems they +would have none of these attributes.

    +
    +
  18. +
  19. DW_AT_call_value, DW_AT_call_data_location, and +DW_AT_call_data_value

    +

    A DW_TAG_call_site_parameter debugger information entry may have a +DW_AT_call_value attribute, whose value is a DWARF operation expression +E1.

    +

    The result of the DW_AT_call_value attribute is obtained by evaluating +E1 with a context that has a result kind of a value, an unspecified +object, the compilation unit that contains E, an empty initial stack, and +other context elements corresponding to the source language thread of +execution upon which the user is focused, if any. The resulting value V1 is the value of the parameter at the time of the call made by the +call site.

    +

    For parameters passed by reference, where the code passes a pointer to a +location which contains the parameter, or for reference type parameters, the +DW_TAG_call_site_parameter debugger information entry may also have a +DW_AT_call_data_location attribute whose value is a DWARF operation +expression E2, and a DW_AT_call_data_value attribute whose +value is a DWARF operation expression E3.

    +

    The value of the DW_AT_call_data_location attribute is obtained by +evaluating E2 with a context that has a result kind of a location +description, an unspecified object, the compilation unit that contains E, an +empty initial stack, and other context elements corresponding to the source +language thread of execution upon which the user is focused, if any. The +resulting location description L2 is the location where the +referenced parameter lives during the call made by the call site. If E2 would just be a DW_OP_push_object_address, then the +DW_AT_call_data_location attribute may be omitted.

    +

    The value of the DW_AT_call_data_value attribute is obtained by +evaluating E3 with a context that has a result kind of a value, an +unspecified object, the compilation unit that contains E, an empty initial +stack, and other context elements corresponding to the source language +thread of execution upon which the user is focused, if any. The resulting +value V3 is the value in L2 at the time of the call made +by the call site.

    +

    The result of these attributes is undefined if the current call frame is +not for the subprogram containing the DW_TAG_call_site_parameter +debugger information entry or the current program location is not for the +call site containing the DW_TAG_call_site_parameter debugger information +entry in the current call frame.

    +

    The consumer may have to virtually unwind to the call site (see +Call Frame Information) in order to evaluate these +attributes. This will ensure the source language thread of execution upon +which the user is focused corresponds to the call site needed to evaluate +the expression.

    +

    If it is not possible to avoid the expressions of these attributes from +accessing registers or memory locations that might be clobbered by the +subprogram being called by the call site, then the associated attribute +should not be provided.

    +

    The reason for the restriction is that the parameter may need to be +accessed during the execution of the callee. The consumer may virtually +unwind from the called subprogram back to the caller and then evaluate the +attribute expressions. The call frame information (see +Call Frame Information) will not be able to restore +registers that have been clobbered, and clobbered memory will no longer have +the value at the time of the call.

    +
  20. +
  21. DW_AT_LLVM_lanes New

    +

    For languages that are implemented using a SIMD or SIMT execution model, a +DW_TAG_subprogram, DW_TAG_inlined_subroutine, or +DW_TAG_entry_point debugger information entry may have a +DW_AT_LLVM_lanes attribute whose value is an integer constant that is +the number of lanes per thread. This is the static number of lanes per +thread. It is not the dynamic number of lanes with which the thread was +initiated, for example, due to smaller or partial work-groups.

    +

    If not present, the default value of 1 is used.

    +

    The DWARF is ill-formed if the value is 0.

    +
  22. +
  23. DW_AT_LLVM_lane_pc New

    +

    For languages that are implemented using a SIMD or SIMT execution model, a +DW_TAG_subprogram, DW_TAG_inlined_subroutine, or +DW_TAG_entry_point debugging information entry may have a +DW_AT_LLVM_lane_pc attribute whose value is a DWARF expression E.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a location description, an unspecified object, the +compilation unit that contains E, an empty initial stack, and other context +elements corresponding to the source language thread of execution upon which +the user is focused, if any.

    +

    The resulting location description L is for a thread lane count sized vector +of generic type elements. The thread lane count is the value of the +DW_AT_LLVM_lanes attribute. Each element holds the conceptual program +location of the corresponding lane, where the least significant element +corresponds to the first target architecture specific lane identifier and so +forth. If the lane was not active when the current subprogram was called, +its element is an undefined location description.

    +

    DW_AT_LLVM_lane_pc allows the compiler to indicate conceptually where +each lane of a SIMT thread is positioned even when it is in divergent +control flow that is not active.

    +

    Typically, the result is a location description with one composite location +description with each part being a location description with either one +undefined location description or one memory location description.

    +

    If not present, the thread is not being used in a SIMT manner, and the +thread’s current program location is used.

    +
  24. +
  25. DW_AT_LLVM_active_lane New

    +

    For languages that are implemented using a SIMD or SIMT execution model, a +DW_TAG_subprogram, DW_TAG_inlined_subroutine, or +DW_TAG_entry_point debugger information entry may have a +DW_AT_LLVM_active_lane attribute whose value is a DWARF expression E.

    +

    The result of the attribute is obtained by evaluating E with a context that +has a result kind of a value, an unspecified object, the compilation unit +that contains E, an empty initial stack, and other context elements +corresponding to the source language thread of execution upon which the user +is focused, if any.

    +

    The DWARF is ill-formed if the resulting value V is not an integral value.

    +

    The resulting V is a bit mask of active lanes for the current program +location. The Nth least significant bit of the mask corresponds to +the Nth lane. If the bit is 1 the lane is active, otherwise it is +inactive.

    +

    Some targets may update the target architecture execution mask for regions +of code that must execute with different sets of lanes than the current +active lanes. For example, some code must execute with all lanes made +temporarily active. DW_AT_LLVM_active_lane allows the compiler to +provide the means to determine the source language active lanes.

    +

    If not present and DW_AT_LLVM_lanes is greater than 1, then the target +architecture execution mask is used.

    +
  26. +
  27. DW_AT_LLVM_vector_size New

    +

    A DW_TAG_base_type debugger information entry for a base type T may have +a DW_AT_LLVM_vector_size attribute whose value is an integer constant +that is the vector type size N.

    +

    The representation of a vector base type is as N contiguous elements, each +one having the representation of a base type T’ that is the same as T +without the DW_AT_LLVM_vector_size attribute.

    +

    If a DW_TAG_base_type debugger information entry does not have a +DW_AT_LLVM_vector_size attribute, then the base type is not a vector +type.

    +

    The DWARF is ill-formed if N is not greater than 0.

    +
    +

    Note

    +

    LLVM has mention of a non-upstreamed debugger information entry that is +intended to support vector types. However, that was not for a base type so +would not be suitable as the type of a stack value entry. But perhaps that +could be replaced by using this attribute.

    +
    +
  28. +
  29. DW_AT_LLVM_augmentation New

    +

    A DW_TAG_compile_unit debugger information entry for a compilation unit +may have a DW_AT_LLVM_augmentation attribute, whose value is an +augmentation string.

    +

    The augmentation string allows producers to indicate that there is +additional vendor or target specific information in the debugging +information entries. For example, this might be information about the +version of vendor specific extensions that are being used.

    +

    If not present, or if the string is empty, then the compilation unit has no +augmentation string.

    +

    The format for the augmentation string is:

    +
    +
    +
    [vendor:vX.Y[:options]]*
    +
    +
    +

    Where vendor is the producer, vX.Y specifies the major X and minor Y +version number of the extensions used, and options is an optional string +providing additional information about the extensions. The version number +must conform to semantic versioning [SEMVER]. +The options string must not contain the “]” character.

    +

    For example:

    +
    +
    [abc:v0.0][def:v1.2:feature-a=on,feature-b=3]
    +
    +
    +
    +
  30. +
+
+
+
+

Program Scope Entities

+
+

Unit Entities

+
+

Note

+

This augments DWARF Version 5 section 3.1.1 and Table 3.1.

+
+

Additional language codes defined for use with the DW_AT_language attribute +are defined in Language Names.

+ + ++++ + + + + + + + + + + +
Language Names

Language Name

Meaning

DW_LANG_LLVM_HIP

HIP Language.

+

The HIP language [HIP] can be supported by extending +the C++ language.

+
+
+
+

Other Debugger Information

+
+

Accelerated Access

+
+
Lookup By Name
+
+
Contents of the Name Index
+
+

Note

+

The following provides changes to DWARF Version 5 section 6.1.1.1.

+

The rule for debugger information entries included in the name index in the +optional .debug_names section is extended to also include named +DW_TAG_variable debugging information entries with a DW_AT_location +attribute that includes a DW_OP_LLVM_form_aspace_address operation.

+
+

The name index must contain an entry for each debugging information entry that +defines a named subprogram, label, variable, type, or namespace, subject to the +following rules:

+
    +
  • DW_TAG_variable debugging information entries with a DW_AT_location +attribute that includes a DW_OP_addr, DW_OP_LLVM_form_aspace_address, +or DW_OP_form_tls_address operation are included; otherwise, they are +excluded.

  • +
+
+
+
Data Representation of the Name Index
+
+Section Header +
+

Note

+

The following provides an addition to DWARF Version 5 section 6.1.1.4.1 item +14 augmentation_string.

+
+

A null-terminated UTF-8 vendor specific augmentation string, which provides +additional information about the contents of this index. If provided, the +recommended format for augmentation string is:

+
+
+
[vendor:vX.Y[:options]]*
+
+
+

Where vendor is the producer, vX.Y specifies the major X and minor Y +version number of the extensions used in the DWARF of the compilation unit, and +options is an optional string providing additional information about the +extensions. The version number must conform to semantic versioning [SEMVER]. The options string must not contain the “]” +character.

+

For example:

+
+
[abc:v0.0][def:v1.2:feature-a=on,feature-b=3]
+
+
+
+
+

Note

+

This is different to the definition in DWARF Version 5 but is consistent with +the other augmentation strings and allows multiple vendor extensions to be +supported.

+
+
+
+
+
+
+

Line Number Information

+
+
The Line Number Program Header
+
+
Standard Content Descriptions
+
+

Note

+

This augments DWARF Version 5 section 6.2.4.1.

+
+
    +
  1. DW_LNCT_LLVM_source

    +

    The component is a null-terminated UTF-8 source text string with “\n” line endings. This content code is paired with the same forms as +DW_LNCT_path. It can be used for file name entries.

    +

    The value is an empty null-terminated string if no source is available. If +the source is available but is an empty file then the value is a +null-terminated single “\n“.

    +

    When the source field is present, consumers can use the embedded source +instead of attempting to discover the source on disk using the file path +provided by the DW_LNCT_path field. When the source field is absent, +consumers can access the file to get the source text.

    +

    This is particularly useful for programming languages that support runtime +compilation and runtime generation of source text. In these cases, the +source text does not reside in any permanent file. For example, the OpenCL +language [:ref:`OpenCL <amdgpu-dwarf-OpenCL>`] supports online compilation.

    +
  2. +
  3. DW_LNCT_LLVM_is_MD5

    +

    DW_LNCT_LLVM_is_MD5 indicates if the DW_LNCT_MD5 content kind, if +present, is valid: when 0 it is not valid and when 1 it is valid. If +DW_LNCT_LLVM_is_MD5 content kind is not present, and DW_LNCT_MD5 +content kind is present, then the MD5 checksum is valid.

    +

    DW_LNCT_LLVM_is_MD5 is always paired with the DW_FORM_udata form.

    +

    This allows a compilation unit to have a mixture of files with and without +MD5 checksums. This can happen when multiple relocatable files are linked +together.

    +
  4. +
+
+
+
+
+

Call Frame Information

+
+

Note

+

This section provides changes to existing call frame information and defines +instructions added by these extensions. Additional support is added for +address spaces. Register unwind DWARF expressions are generalized to allow any +location description, including those with composite and implicit location +descriptions.

+

These changes would be incorporated into the DWARF Version 5 section 6.1.

+
+
+
Structure of Call Frame Information
+

The register rules are:

+
+
undefined

A register that has this rule has no recoverable value in the previous frame. +The previous value of this register is the undefined location description (see +Undefined Location Description Operations).

+

By convention, the register is not preserved by a callee.

+
+
same value

This register has not been modified from the previous caller frame.

+

If the current frame is the top frame, then the previous value of this +register is the location description L that specifies one register location +description SL. SL specifies the register location storage that corresponds to +the register with a bit offset of 0 for the current thread.

+

If the current frame is not the top frame, then the previous value of this +register is the location description obtained using the call frame information +for the callee frame and callee program location invoked by the current caller +frame for the same register.

+

By convention, the register is preserved by the callee, but the callee has +not modified it.

+
+
offset(N)

N is a signed byte offset. The previous value of this register is saved at the +location description computed as if the DWARF operation expression +DW_OP_LLVM_offset N is evaluated with the current context, except the +result kind is a location description, the compilation unit is unspecified, +the object is unspecified, and an initial stack comprising the location +description of the current CFA (see +DWARF Operation Expressions).

+
+
val_offset(N)

N is a signed byte offset. The previous value of this register is the memory +byte address of the location description computed as if the DWARF operation +expression DW_OP_LLVM_offset N is evaluated with the current context, +except the result kind is a location description, the compilation unit is +unspecified, the object is unspecified, and an initial stack comprising the +location description of the current CFA (see +DWARF Operation Expressions).

+

The DWARF is ill-formed if the CFA location description is not a memory byte +address location description, or if the register size does not match the size +of an address in the address space of the current CFA location description.

+

Since the CFA location description is required to be a memory byte address +location description, the value of val_offset(N) will also be a memory byte +address location description since it is offsetting the CFA location +description by N bytes. Furthermore, the value of val_offset(N) will be a +memory byte address in the same address space as the CFA location +description.

+
+

Note

+

Should DWARF allow the address size to be a different size to the size of +the register? Requiring them to be the same bit size avoids any issue of +conversion as the bit contents of the register is simply interpreted as a +value of the address.

+

GDB has a per register hook that allows a target specific conversion on a +register by register basis. It defaults to truncation of bigger registers, +and to actually reading bytes from the next register (or reads out of bounds +for the last register) for smaller registers. There are no GDB tests that +read a register out of bounds (except an illegal hand written assembly +test).

+
+
+
register(R)

This register has been stored in another register numbered R.

+

The previous value of this register is the location description obtained using +the call frame information for the current frame and current program location +for register R.

+

The DWARF is ill-formed if the size of this register does not match the size +of register R or if there is a cyclic dependency in the call frame +information.

+
+

Note

+

Should this also allow R to be larger than this register? If so is the value +stored in the low order bits and it is undefined what is stored in the +extra upper bits?

+
+
+
expression(E)

The previous value of this register is located at the location description +produced by evaluating the DWARF operation expression E (see +DWARF Operation Expressions).

+

E is evaluated with the current context, except the result kind is a location +description, the compilation unit is unspecified, the object is unspecified, +and an initial stack comprising the location description of the current CFA +(see DWARF Operation Expressions).

+
+
val_expression(E)

The previous value of this register is the value produced by evaluating the +DWARF operation expression E (see DWARF Operation Expressions).

+

E is evaluated with the current context, except the result kind is a value, +the compilation unit is unspecified, the object is unspecified, and an initial +stack comprising the location description of the current CFA (see +DWARF Operation Expressions).

+

The DWARF is ill-formed if the resulting value type size does not match the +register size.

+
+

Note

+

This has limited usefulness as the DWARF expression E can only produce +values up to the size of the generic type. This is due to not allowing any +operations that specify a type in a CFI operation expression. This makes it +unusable for registers that are larger than the generic type. However, +expression(E) can be used to create an implicit location description of +any size.

+
+
+
architectural

The rule is defined externally to this specification by the augmenter.

+
+
+

A Common Information Entry (CIE) holds information that is shared among many +Frame Description Entries (FDE). There is at least one CIE in every non-empty +.debug_frame section. A CIE contains the following fields, in order:

+
    +
  1. length (initial length)

    +

    A constant that gives the number of bytes of the CIE structure, not +including the length field itself. The size of the length field plus the +value of length must be an integral multiple of the address size specified +in the address_size field.

    +
  2. +
  3. CIE_id (4 or 8 bytes, see +32-Bit and 64-Bit DWARF Formats)

    +

    A constant that is used to distinguish CIEs from FDEs.

    +

    In the 32-bit DWARF format, the value of the CIE id in the CIE header is +0xffffffff; in the 64-bit DWARF format, the value is 0xffffffffffffffff.

    +
  4. +
  5. version (ubyte)

    +

    A version number. This number is specific to the call frame information and +is independent of the DWARF version number.

    +

    The value of the CIE version number is 4.

    +
    +

    Note

    +

    Would this be increased to 5 to reflect the changes in these extensions?

    +
    +
  6. +
  7. augmentation (sequence of UTF-8 characters)

    +

    A null-terminated UTF-8 string that identifies the augmentation to this CIE +or to the FDEs that use it. If a reader encounters an augmentation string +that is unexpected, then only the following fields can be read:

    +
      +
    • CIE: length, CIE_id, version, augmentation

    • +
    • FDE: length, CIE_pointer, initial_location, address_range

    • +
    +

    If there is no augmentation, this value is a zero byte.

    +

    The augmentation string allows users to indicate that there is additional +vendor and target architecture specific information in the CIE or FDE which +is needed to virtually unwind a stack frame. For example, this might be +information about dynamically allocated data which needs to be freed on exit +from the routine.

    +

    Because the .debug_frame section is useful independently of any +.debug_info section, the augmentation string always uses UTF-8 +encoding.

    +

    The recommended format for the augmentation string is:

    +
    +
    +
    [vendor:vX.Y[:options]]*
    +
    +
    +

    Where vendor is the producer, vX.Y specifies the major X and minor Y +version number of the extensions used, and options is an optional string +providing additional information about the extensions. The version number +must conform to semantic versioning [SEMVER]. +The options string must not contain the “]” character.

    +

    For example:

    +
    +
    [abc:v0.0][def:v1.2:feature-a=on,feature-b=3]
    +
    +
    +
    +
  8. +
  9. address_size (ubyte)

    +

    The size of a target address in this CIE and any FDEs that use it, in bytes. +If a compilation unit exists for this frame, its address size must match the +address size here.

    +
  10. +
  11. segment_selector_size (ubyte)

    +

    The size of a segment selector in this CIE and any FDEs that use it, in +bytes.

    +
  12. +
  13. code_alignment_factor (unsigned LEB128)

    +

    A constant that is factored out of all advance location instructions (see +Row Creation Instructions). The resulting value is +(operand * code_alignment_factor).

    +
  14. +
  15. data_alignment_factor (signed LEB128)

    +

    A constant that is factored out of certain offset instructions (see +CFA Definition Instructions and +Register Rule Instructions). The resulting value is +(operand * data_alignment_factor).

    +
  16. +
  17. return_address_register (unsigned LEB128)

    +

    An unsigned LEB128 constant that indicates which column in the rule table +represents the return address of the subprogram. Note that this column might +not correspond to an actual machine register.

    +

    The value of the return address register is used to determine the program +location of the caller frame. The program location of the top frame is the +target architecture program counter value of the current thread.

    +
  18. +
  19. initial_instructions (array of ubyte)

    +

    A sequence of rules that are interpreted to create the initial setting of +each column in the table.

    +

    The default rule for all columns before interpretation of the initial +instructions is the undefined rule. However, an ABI authoring body or a +compilation system authoring body may specify an alternate default value for +any or all columns.

    +
  20. +
  21. padding (array of ubyte)

    +

    Enough DW_CFA_nop instructions to make the size of this entry match the +length value above.

    +
  22. +
+

An FDE contains the following fields, in order:

+
    +
  1. length (initial length)

    +

    A constant that gives the number of bytes of the header and instruction +stream for this subprogram, not including the length field itself. The size +of the length field plus the value of length must be an integral multiple of +the address size.

    +
  2. +
  3. CIE_pointer (4 or 8 bytes, see +32-Bit and 64-Bit DWARF Formats)

    +

    A constant offset into the .debug_frame section that denotes the CIE +that is associated with this FDE.

    +
  4. +
  5. initial_location (segment selector and target address)

    +

    The address of the first location associated with this table entry. If the +segment_selector_size field of this FDE’s CIE is non-zero, the initial +location is preceded by a segment selector of the given length.

    +
  6. +
  7. address_range (target address)

    +

    The number of bytes of program instructions described by this entry.

    +
  8. +
  9. instructions (array of ubyte)

    +

    A sequence of table defining instructions that are described in +Call Frame Instructions.

    +
  10. +
  11. padding (array of ubyte)

    +

    Enough DW_CFA_nop instructions to make the size of this entry match the +length value above.

    +
  12. +
+
+
+
Call Frame Instructions
+

Some call frame instructions have operands that are encoded as DWARF operation +expressions E (see DWARF Operation Expressions). The DWARF +operations that can be used in E have the following restrictions:

+
    +
  • DW_OP_addrx, DW_OP_call2, DW_OP_call4, DW_OP_call_ref, +DW_OP_const_type, DW_OP_constx, DW_OP_convert, +DW_OP_deref_type, DW_OP_fbreg, DW_OP_implicit_pointer, +DW_OP_regval_type, DW_OP_reinterpret, and DW_OP_xderef_type +operations are not allowed because the call frame information must not depend +on other debug sections.

  • +
  • DW_OP_push_object_address is not allowed because there is no object +context to provide a value to push.

  • +
  • DW_OP_LLVM_push_lane is not allowed because the call frame instructions +describe the actions for the whole thread, not the lanes independently.

  • +
  • DW_OP_call_frame_cfa and DW_OP_entry_value are not allowed because +their use would be circular.

  • +
  • DW_OP_LLVM_call_frame_entry_reg is not allowed if evaluating E causes a +circular dependency between DW_OP_LLVM_call_frame_entry_reg operations.

    +

    For example, if a register R1 has a DW_CFA_def_cfa_expression +instruction that evaluates a DW_OP_LLVM_call_frame_entry_reg operation +that specifies register R2, and register R2 has a +DW_CFA_def_cfa_expression instruction that that evaluates a +DW_OP_LLVM_call_frame_entry_reg operation that specifies register R1.

    +
  • +
+

Call frame instructions to which these restrictions apply include +DW_CFA_def_cfa_expression, DW_CFA_expression, and +DW_CFA_val_expression.

+
+
Row Creation Instructions
+
+

Note

+

These instructions are the same as in DWARF Version 5 section 6.4.2.1.

+
+
+
+
CFA Definition Instructions
+
    +
  1. DW_CFA_def_cfa

    +

    The DW_CFA_def_cfa instruction takes two unsigned LEB128 operands +representing a register number R and a (non-factored) byte displacement B. +AS is set to the target architecture default address space identifier. The +required action is to define the current CFA rule to be the result of +evaluating the DWARF operation expression DW_OP_constu AS; +DW_OP_aspace_bregx R, B as a location description.

    +
  2. +
  3. DW_CFA_def_cfa_sf

    +

    The DW_CFA_def_cfa_sf instruction takes two operands: an unsigned LEB128 +value representing a register number R and a signed LEB128 factored byte +displacement B. AS is set to the target architecture default address space +identifier. The required action is to define the current CFA rule to be the +result of evaluating the DWARF operation expression DW_OP_constu AS; +DW_OP_aspace_bregx R, B*data_alignment_factor as a location description.

    +

    The action is the same as DW_CFA_def_cfa, except that the second +operand is signed and factored.

    +
  4. +
  5. DW_CFA_LLVM_def_aspace_cfa New

    +

    The DW_CFA_LLVM_def_aspace_cfa instruction takes three unsigned LEB128 +operands representing a register number R, a (non-factored) byte +displacement B, and a target architecture specific address space identifier +AS. The required action is to define the current CFA rule to be the result +of evaluating the DWARF operation expression DW_OP_constu AS; +DW_OP_aspace_bregx R, B as a location description.

    +

    If AS is not one of the values defined by the target architecture specific +DW_ASPACE_* values then the DWARF expression is ill-formed.

    +
  6. +
  7. DW_CFA_LLVM_def_aspace_cfa_sf New

    +

    The DW_CFA_def_cfa_sf instruction takes three operands: an unsigned +LEB128 value representing a register number R, a signed LEB128 factored byte +displacement B, and an unsigned LEB128 value representing a target +architecture specific address space identifier AS. The required action is to +define the current CFA rule to be the result of evaluating the DWARF +operation expression DW_OP_constu AS; DW_OP_aspace_bregx R, +B*data_alignment_factor as a location description.

    +

    If AS is not one of the values defined by the target architecture specific +DW_ASPACE_* values, then the DWARF expression is ill-formed.

    +

    The action is the same as DW_CFA_aspace_def_cfa, except that the +second operand is signed and factored.

    +
  8. +
  9. DW_CFA_def_cfa_register

    +

    The DW_CFA_def_cfa_register instruction takes a single unsigned LEB128 +operand representing a register number R. The required action is to define +the current CFA rule to be the result of evaluating the DWARF operation +expression DW_OP_constu AS; DW_OP_aspace_bregx R, B as a location +description. B and AS are the old CFA byte displacement and address space +respectively.

    +

    If the subprogram has no current CFA rule, or the rule was defined by a +DW_CFA_def_cfa_expression instruction, then the DWARF is ill-formed.

    +
  10. +
  11. DW_CFA_def_cfa_offset

    +

    The DW_CFA_def_cfa_offset instruction takes a single unsigned LEB128 +operand representing a (non-factored) byte displacement B. The required +action is to define the current CFA rule to be the result of evaluating the +DWARF operation expression DW_OP_constu AS; DW_OP_aspace_bregx R, B as a +location description. R and AS are the old CFA register number and address +space respectively.

    +

    If the subprogram has no current CFA rule, or the rule was defined by a +DW_CFA_def_cfa_expression instruction, then the DWARF is ill-formed.

    +
  12. +
  13. DW_CFA_def_cfa_offset_sf

    +

    The DW_CFA_def_cfa_offset_sf instruction takes a signed LEB128 operand +representing a factored byte displacement B. The required action is to +define the current CFA rule to be the result of evaluating the DWARF +operation expression DW_OP_constu AS; DW_OP_aspace_bregx R, +B*data_alignment_factor as a location description. R and AS are the old +CFA register number and address space respectively.

    +

    If the subprogram has no current CFA rule, or the rule was defined by a +DW_CFA_def_cfa_expression instruction, then the DWARF is ill-formed.

    +

    The action is the same as DW_CFA_def_cfa_offset, except that the +operand is signed and factored.

    +
  14. +
  15. DW_CFA_def_cfa_expression

    +

    The DW_CFA_def_cfa_expression instruction takes a single operand encoded +as a DW_FORM_exprloc value representing a DWARF operation expression E. +The required action is to define the current CFA rule to be the result of +evaluating E with the current context, except the result kind is a location +description, the compilation unit is unspecified, the object is unspecified, +and an empty initial stack.

    +

    See Call Frame Instructions regarding restrictions on +the DWARF expression operations that can be used in E.

    +

    The DWARF is ill-formed if the result of evaluating E is not a memory byte +address location description.

    +
  16. +
+
+
+
Register Rule Instructions
+
    +
  1. DW_CFA_undefined

    +

    The DW_CFA_undefined instruction takes a single unsigned LEB128 operand +that represents a register number R. The required action is to set the rule +for the register specified by R to undefined.

    +
  2. +
  3. DW_CFA_same_value

    +

    The DW_CFA_same_value instruction takes a single unsigned LEB128 operand +that represents a register number R. The required action is to set the rule +for the register specified by R to same value.

    +
  4. +
  5. DW_CFA_offset

    +

    The DW_CFA_offset instruction takes two operands: a register number R +(encoded with the opcode) and an unsigned LEB128 constant representing a +factored displacement B. The required action is to change the rule for the +register specified by R to be an offset(B*data_alignment_factor) rule.

    +
    +

    Note

    +

    Seems this should be named DW_CFA_offset_uf since the offset is +unsigned factored.

    +
    +
  6. +
  7. DW_CFA_offset_extended

    +

    The DW_CFA_offset_extended instruction takes two unsigned LEB128 +operands representing a register number R and a factored displacement B. +This instruction is identical to DW_CFA_offset, except for the encoding +and size of the register operand.

    +
    +

    Note

    +

    Seems this should be named DW_CFA_offset_extended_uf since the +displacement is unsigned factored.

    +
    +
  8. +
  9. DW_CFA_offset_extended_sf

    +

    The DW_CFA_offset_extended_sf instruction takes two operands: an +unsigned LEB128 value representing a register number R and a signed LEB128 +factored displacement B. This instruction is identical to +DW_CFA_offset_extended, except that B is signed.

    +
  10. +
  11. DW_CFA_val_offset

    +

    The DW_CFA_val_offset instruction takes two unsigned LEB128 operands +representing a register number R and a factored displacement B. The required +action is to change the rule for the register indicated by R to be a +val_offset(B*data_alignment_factor) rule.

    +
    +

    Note

    +

    Seems this should be named DW_CFA_val_offset_uf since the displacement +is unsigned factored.

    +
    +
    +

    Note

    +

    An alternative is to define DW_CFA_val_offset to implicitly use the +target architecture default address space, and add another operation that +specifies the address space.

    +
    +
  12. +
  13. DW_CFA_val_offset_sf

    +

    The DW_CFA_val_offset_sf instruction takes two operands: an unsigned +LEB128 value representing a register number R and a signed LEB128 factored +displacement B. This instruction is identical to DW_CFA_val_offset, +except that B is signed.

    +
  14. +
  15. DW_CFA_register

    +

    The DW_CFA_register instruction takes two unsigned LEB128 operands +representing register numbers R1 and R2 respectively. The required action is +to set the rule for the register specified by R1 to be a register(R2) rule.

    +
  16. +
  17. DW_CFA_expression

    +

    The DW_CFA_expression instruction takes two operands: an unsigned LEB128 +value representing a register number R, and a DW_FORM_block value +representing a DWARF operation expression E. The required action is to +change the rule for the register specified by R to be an expression(E) +rule.

    +

    That is, E computes the location description where the register value can +be retrieved.

    +

    See Call Frame Instructions regarding restrictions on +the DWARF expression operations that can be used in E.

    +
  18. +
  19. DW_CFA_val_expression

    +

    The DW_CFA_val_expression instruction takes two operands: an unsigned +LEB128 value representing a register number R, and a DW_FORM_block value +representing a DWARF operation expression E. The required action is to +change the rule for the register specified by R to be a val_expression(E) +rule.

    +

    That is, E computes the value of register R.

    +

    See Call Frame Instructions regarding restrictions on +the DWARF expression operations that can be used in E.

    +

    If the result of evaluating E is not a value with a base type size that +matches the register size, then the DWARF is ill-formed.

    +
  20. +
  21. DW_CFA_restore

    +

    The DW_CFA_restore instruction takes a single operand (encoded with the +opcode) that represents a register number R. The required action is to +change the rule for the register specified by R to the rule assigned it by +the initial_instructions in the CIE.

    +
  22. +
  23. DW_CFA_restore_extended

    +

    The DW_CFA_restore_extended instruction takes a single unsigned LEB128 +operand that represents a register number R. This instruction is identical +to DW_CFA_restore, except for the encoding and size of the register +operand.

    +
  24. +
+
+
+
Row State Instructions
+
+

Note

+

These instructions are the same as in DWARF Version 5 section 6.4.2.4.

+
+
+
+
Padding Instruction
+
+

Note

+

These instructions are the same as in DWARF Version 5 section 6.4.2.5.

+
+
+
+
+
Call Frame Instruction Usage
+
+

Note

+

The same as in DWARF Version 5 section 6.4.3.

+
+
+
+
Call Frame Calling Address
+
+

Note

+

The same as in DWARF Version 5 section 6.4.4.

+
+
+
+
+
+

Data Representation

+
+

32-Bit and 64-Bit DWARF Formats

+
+

Note

+

This augments DWARF Version 5 section 7.4.

+
+
    +
  1. Within the body of the .debug_info section, certain forms of attribute +value depend on the choice of DWARF format as follows. For the 32-bit DWARF +format, the value is a 4-byte unsigned integer; for the 64-bit DWARF format, +the value is an 8-byte unsigned integer.

    + + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    .debug_info section attribute form roles

    Form

    Role

    DW_FORM_line_strp

    offset in .debug_line_str

    DW_FORM_ref_addr

    offset in .debug_info

    DW_FORM_sec_offset

    offset in a section other than +.debug_info or .debug_str

    DW_FORM_strp

    offset in .debug_str

    DW_FORM_strp_sup

    offset in .debug_str section of +supplementary object file

    DW_OP_call_ref

    offset in .debug_info

    DW_OP_implicit_pointer

    offset in .debug_info

    DW_OP_LLVM_aspace_implicit_pointer

    offset in .debug_info

    +
  2. +
+
+
+

Format of Debugging Information

+
+
Attribute Encodings
+
+

Note

+

This augments DWARF Version 5 section 7.5.4 and Table 7.5.

+
+

The following table gives the encoding of the additional debugging information +entry attributes.

+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Attribute encodings

Attribute Name

Value

Classes

DW_AT_LLVM_active_lane

0x3e08

exprloc, loclist

DW_AT_LLVM_augmentation

0x3e09

string

DW_AT_LLVM_lanes

0x3e0a

constant

DW_AT_LLVM_lane_pc

0x3e0b

exprloc, loclist

DW_AT_LLVM_vector_size

0x3e0c

constant

+
+
+
+

DWARF Expressions

+
+

Note

+

Rename DWARF Version 5 section 7.7 to reflect the unification of location +descriptions into DWARF expressions.

+
+
+
Operation Expressions
+
+

Note

+

Rename DWARF Version 5 section 7.7.1 and delete section 7.7.2 to reflect the +unification of location descriptions into DWARF expressions.

+

This augments DWARF Version 5 section 7.7.1 and Table 7.9.

+
+

The following table gives the encoding of the additional DWARF expression +operations.

+ + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DWARF Operation Encodings

Operation

Code

Number +of +Operands

Notes

DW_OP_LLVM_form_aspace_address

0xe1

0

DW_OP_LLVM_push_lane

0xe2

0

DW_OP_LLVM_offset

0xe3

0

DW_OP_LLVM_offset_uconst

0xe4

1

ULEB128 byte displacement

DW_OP_LLVM_bit_offset

0xe5

0

DW_OP_LLVM_call_frame_entry_reg

0xe6

1

ULEB128 register number

DW_OP_LLVM_undefined

0xe7

0

DW_OP_LLVM_aspace_bregx

0xe8

2

ULEB128 register number, +ULEB128 byte displacement

DW_OP_LLVM_aspace_implicit_pointer

0xe9

2

4-byte or 8-byte offset of DIE, +SLEB128 byte displacement

DW_OP_LLVM_piece_end

0xea

0

DW_OP_LLVM_extend

0xeb

2

ULEB128 bit size, +ULEB128 count

DW_OP_LLVM_select_bit_piece

0xec

2

ULEB128 bit size, +ULEB128 count

+
+
+
Location List Expressions
+
+

Note

+

Rename DWARF Version 5 section 7.7.3 to reflect that location lists are a kind +of DWARF expression.

+
+
+
+
+

Source Languages

+
+

Note

+

This augments DWARF Version 5 section 7.12 and Table 7.17.

+
+

The following table gives the encoding of the additional DWARF languages.

+ + +++++ + + + + + + + + + + + + +
Language encodings

Language Name

Value

Default Lower Bound

DW_LANG_LLVM_HIP

0x8100

0

+
+
+

Address Class and Address Space Encodings

+
+

Note

+

This replaces DWARF Version 5 section 7.13.

+
+

The encodings of the constants used for the currently defined address classes +are given in Address class encodings.

+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Address class encodings

Address Class Name

Value

DW_ADDR_none

0x0000

DW_ADDR_LLVM_global

0x0001

DW_ADDR_LLVM_constant

0x0002

DW_ADDR_LLVM_group

0x0003

DW_ADDR_LLVM_private

0x0004

DW_ADDR_LLVM_lo_user

0x8000

DW_ADDR_LLVM_hi_user

0xffff

+
+
+

Line Number Information

+
+

Note

+

This augments DWARF Version 5 section 7.22 and Table 7.27.

+
+

The following table gives the encoding of the additional line number header +entry formats.

+ + ++++ + + + + + + + + + + + + + +
Line number header entry format encodings

Line number header entry format name

Value

DW_LNCT_LLVM_source

0x2001

DW_LNCT_LLVM_is_MD5

0x2002

+
+
+

Call Frame Information

+
+

Note

+

This augments DWARF Version 5 section 7.24 and Table 7.29.

+
+

The following table gives the encoding of the additional call frame information +instructions.

+ + ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + +
Call frame instruction encodings

Instruction

High 2 +Bits

Low 6 +Bits

Operand 1

Operand 2

Operand 3

DW_CFA_LLVM_def_aspace_cfa

0

0x30

ULEB128 register

ULEB128 offset

ULEB128 address space

DW_CFA_LLVM_def_aspace_cfa_sf

0

0x31

ULEB128 register

SLEB128 offset

ULEB128 address space

+
+
+
+

Attributes by Tag Value (Informative)

+
+

Note

+

This augments DWARF Version 5 Appendix A and Table A.1.

+
+

The following table provides the additional attributes that are applicable to +debugger information entries.

+ + ++++ + + + + + + + + + + + + + + + + + + + + + + +
Attributes by tag value

Tag Name

Applicable Attributes

DW_TAG_base_type

    +
  • DW_AT_LLVM_vector_size

  • +
+

DW_TAG_compile_unit

    +
  • DW_AT_LLVM_augmentation

  • +
+

DW_TAG_entry_point

    +
  • DW_AT_LLVM_active_lane

  • +
  • DW_AT_LLVM_lane_pc

  • +
  • DW_AT_LLVM_lanes

  • +
+

DW_TAG_inlined_subroutine

    +
  • DW_AT_LLVM_active_lane

  • +
  • DW_AT_LLVM_lane_pc

  • +
  • DW_AT_LLVM_lanes

  • +
+

DW_TAG_subprogram

    +
  • DW_AT_LLVM_active_lane

  • +
  • DW_AT_LLVM_lane_pc

  • +
  • DW_AT_LLVM_lanes

  • +
+
+
+
+
+

Examples

+

The AMD GPU specific usage of the features in these extensions, including +examples, is available at User Guide for AMDGPU Backend section +DWARF Debug Information.

+
+

Note

+

Change examples to use DW_OP_LLVM_offset instead of DW_OP_add when +acting on a location description.

+

Need to provide examples of new features.

+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUInstructionNotation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUInstructionNotation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUInstructionNotation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUInstructionNotation.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,296 @@ + + + + + + + + + AMDGPU Instructions Notation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

AMDGPU Instructions Notation

+ +
+

Introduction

+

This is an overview of notation used to describe syntax of AMDGPU assembler instructions.

+

This notation mimics the syntax of assembler instructions +except that instead of real operands and modifiers it provides references to their description.

+
+
+

Instructions

+
+

Notation

+

This is the notation used to describe AMDGPU instructions:

+
+
+
+
+
+

Opcode

+
+

Notation

+

TBD

+
+
+
+

Operands

+

An instruction may have zero or more operands. They are comma-separated in the description:

+
+
+

The order of operands is fixed. Operands cannot be omitted +except for special cases described below.

+
+

Notation

+

An operand is described using the following notation:

+
+

<kind><name><tag0><tag1>…

+
+

Where:

+
    +
  • kind is an optional prefix describing operand kind.

  • +
  • name is a link to a description of the operand.

  • +
  • tags are optional. They are used to indicate special operand properties.

  • +
+
+

Operand Kinds

+

Operand kind indicates which values are accepted by the operand.

+
    +
  • Operands which only accept vector registers are labelled with ‘v’ prefix.

  • +
  • Operands which only accept scalar values are labelled with ‘s’ prefix.

  • +
  • Operands which accept both vector registers and scalar values have no prefix.

  • +
+

Examples:

+
vdata          // operand only accepts vector registers
+sdst           // operand only accepts scalar registers
+src1           // operand accepts both scalar and vector registers
+
+
+
+
+

Operand Tags

+

Operand tags indicate special operand properties.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + +

Operand tag

Meaning

:opt

An optional operand.

:m

An operand which may be used with +VOP3 operand modifiers or +SDWA operand modifiers.

:dst

An input operand which may also serve as a destination +if glc modifier is specified.

:fx

This is an f32 or f16 operand depending on +m_op_sel_hi modifier.

:<type>

Operand type differs from type +implied by the opcode name. +This tag specifies actual operand type.

+
+

Examples:

+
src1:m             // src1 operand may be used with operand modifiers
+vdata:dst          // vdata operand may be used as both source and destination
+vdst:u32           // vdst operand has u32 type
+
+
+
+
+
+
+

Modifiers

+

An instruction may have zero or more optional modifiers. They are space-separated in the description:

+
+
+

The order of modifiers is fixed.

+
+

Notation

+

A modifier is described using the following notation:

+
+

<name>

+
+

Where name is a link to a description of the modifier.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUInstructionSyntax.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUInstructionSyntax.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUInstructionSyntax.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUInstructionSyntax.html 2021-09-19 16:16:30.000000000 +0000 @@ -0,0 +1,418 @@ + + + + + + + + + AMDGPU Instruction Syntax — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

AMDGPU Instruction Syntax

+ +
+

Instructions

+
+

Syntax

+

An instruction has the following syntax:

+
+

<opcode mnemonic>    <operand0>, <operand1>,...    <modifier0> <modifier1>...

+
+

Operands are normally comma-separated while +modifiers are space-separated.

+

The order of operands and modifiers is fixed. +Most modifiers are optional and may be omitted.

+
+
+

Opcode Mnemonic

+

Opcode mnemonic describes opcode semantics and may include one or more suffices in this order:

+ +
+
+

Packing Suffix

+

Most instructions which operate on packed data have a _pk suffix. +Unless otherwise noted, +these instructions operate on and produce packed data composed of +two values. The type of values is indicated by +type suffices.

+

For example, the following instruction sums up two pairs of f16 values +and produces a pair of f16 values:

+
v_pk_add_f16 v1, v2, v3     // Each operand has f16x2 type
+
+
+
+
+

Type and Size Suffices

+

Instructions which operate with data have an implied type of data operands. +This data type is specified as a suffix of instruction mnemonic.

+

There are instructions which have 2 type suffices: +the first is the data type of the destination operand, +the second is the data type of source data operand(s).

+

Note that data type specified by an instruction does not apply +to other kinds of operands such as addresses, offsets and so on.

+

The following table enumerates the most frequently used type suffices.

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Type Suffices

Packed instruction?

Data Type

_b512, _b256, _b128, _b64, _b32, _b16, _b8

No

Bits.

_u64, _u32, _u16, _u8

No

Unsigned integer.

_i64, _i32, _i16, _i8

No

Signed integer.

_f64, _f32, _f16

No

Floating-point.

_b16, _u16, _i16, _f16

Yes

Packed (b16x2, u16x2, etc).

+
+

Instructions which have no type suffices are assumed to operate with typeless data. +The size of data is specified by size suffices:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Size Suffix

Implied data type

Required register size in dwords

-

b32

1

x2

b64

2

x3

b96

3

x4

b128

4

x8

b256

8

x16

b512

16

x

b32

1

xy

b64

2

xyz

b96

3

xyzw

b128

4

d16_x

b16

1

d16_xy

b16x2

2 for GFX8.0, 1 for GFX8.1 and GFX9+

d16_xyz

b16x3

3 for GFX8.0, 2 for GFX8.1 and GFX9+

d16_xyzw

b16x4

4 for GFX8.0, 2 for GFX8.1 and GFX9+

+
+
+

Warning

+

There are exceptions from rules described above. +Operands which have type different from type specified by the opcode are +tagged in the description.

+
+

Examples of instructions with different types of source and destination operands:

+
s_bcnt0_i32_b64
+v_cvt_f32_u32
+
+
+

Examples of instructions with one data type:

+
v_max3_f32
+v_max3_i16
+
+
+

Examples of instructions which operate with packed data:

+
v_pk_add_u16
+v_pk_add_i16
+v_pk_add_f16
+
+
+

Examples of typeless instructions which operate on b128 data:

+
buffer_store_dwordx4
+flat_load_dwordx4
+
+
+
+
+

Encoding Suffices

+

Most VOP1, VOP2 and VOPC instructions have several variants: +they may also be encoded in VOP3, DPP and SDWA formats.

+

The assembler will automatically use optimal encoding based on instruction operands. +To force specific encoding, one can add a suffix to the opcode of the instruction:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Encoding

Encoding Suffix

VOP1, VOP2 and VOPC (32-bit) encoding

_e32

VOP3 (64-bit) encoding

_e64

DPP encoding

_dpp

SDWA encoding

_sdwa

+
+

These suffices are used in this reference to indicate the assumed encoding. +When no suffix is specified, native instruction encoding is implied.

+
+
+
+

Operands

+
+

Syntax

+

Syntax of generic operands is described in this document.

+

For detailed information about operands follow operand links in GPU-specific documents:

+ +
+
+
+

Modifiers

+
+

Syntax

+

Syntax of modifiers is described in this document.

+

Information about modifiers supported for individual instructions may be found in GPU-specific documents:

+ +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUModifierSyntax.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUModifierSyntax.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUModifierSyntax.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUModifierSyntax.html 2021-09-19 16:16:31.000000000 +0000 @@ -0,0 +1,3128 @@ + + + + + + + + + Syntax of AMDGPU Instruction Modifiers — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of AMDGPU Instruction Modifiers

+ +
+

Conventions

+

The following notation is used throughout this document:

+
+
++++ + + + + + + + + + + + + + +

Notation

Description

{0..N}

Any integer value in the range from 0 to N (inclusive).

<x>

Syntax and meaning of x is explained elsewhere.

+
+
+
+

Modifiers

+
+

DS Modifiers

+
+

offset0

+

Specifies first 8-bit offset, in bytes. The default value is 0.

+

Used with DS instructions that expect two addresses.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset0:{0..0xFF}

Specifies an unsigned 8-bit offset as a positive +integer number +or an absolute expression.

+
+

Examples:

+
offset0:0xff
+offset0:2-x
+offset0:-x-y
+
+
+
+
+

offset1

+

Specifies second 8-bit offset, in bytes. The default value is 0.

+

Used with DS instructions that expect two addresses.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset1:{0..0xFF}

Specifies an unsigned 8-bit offset as a positive +integer number +or an absolute expression.

+
+

Examples:

+
offset1:0xff
+offset1:2-x
+offset1:-x-y
+
+
+
+
+

offset

+

Specifies a 16-bit offset, in bytes. The default value is 0.

+

Used with DS instructions that expect a single address.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset:{0..0xFFFF}

Specifies an unsigned 16-bit offset as a positive +integer number +or an absolute expression.

+
+

Examples:

+
offset:65535
+offset:0xffff
+offset:-x-y
+
+
+
+
+

swizzle pattern

+

This is a special modifier which may be used with ds_swizzle_b32 instruction only. +It specifies a swizzle pattern in numeric or symbolic form. The default value is 0.

+

See AMD documentation for more information.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

offset:{0..0xFFFF}

Specifies a 16-bit swizzle pattern.

offset:swizzle(QUAD_PERM,{0..3},{0..3},{0..3},{0..3})

Specifies a quad permute mode pattern

+

Each number is a lane id.

+

offset:swizzle(BITMASK_PERM, “<mask>”)

Specifies a bitmask permute mode pattern.

+

The pattern converts a 5-bit lane id to another +lane id with which the lane interacts.

+

mask is a 5 character sequence which +specifies how to transform the bits of the +lane id.

+

The following characters are allowed:

+
    +
  • “0” - set bit to 0.

  • +
  • “1” - set bit to 1.

  • +
  • “p” - preserve bit.

  • +
  • “i” - inverse bit.

  • +
+

offset:swizzle(BROADCAST,{2..32},{0..N})

Specifies a broadcast mode.

+

Broadcasts the value of any particular lane to +all lanes in its group.

+

The first numeric parameter is a group +size and must be equal to 2, 4, 8, 16 or 32.

+

The second numeric parameter is an index of the +lane being broadcasted.

+

The index must not exceed group size.

+

offset:swizzle(SWAP,{1..16})

Specifies a swap mode.

+

Swaps the neighboring groups of +1, 2, 4, 8 or 16 lanes.

+

offset:swizzle(REVERSE,{2..32})

Specifies a reverse mode.

+

Reverses the lanes for groups of 2, 4, 8, 16 or 32 lanes.

+
+
+

Note: numeric values may be specified as either integer numbers or +absolute expressions.

+

Examples:

+
offset:255
+offset:0xffff
+offset:swizzle(QUAD_PERM, 0, 1, 2, 3)
+offset:swizzle(BITMASK_PERM, "01pi0")
+offset:swizzle(BROADCAST, 2, 0)
+offset:swizzle(SWAP, 8)
+offset:swizzle(REVERSE, 30 + 2)
+
+
+
+
+

gds

+

Specifies whether to use GDS or LDS memory (LDS is the default).

+
+
++++ + + + + + + + + + + +

Syntax

Description

gds

Use GDS memory.

+
+
+
+
+

EXP Modifiers

+
+

done

+

Specifies if this is the last export from the shader to the target. By default, +exp instruction does not finish an export sequence.

+
+
++++ + + + + + + + + + + +

Syntax

Description

done

Indicates the last export operation.

+
+
+
+

compr

+

Indicates if the data are compressed (data are not compressed by default).

+
+
++++ + + + + + + + + + + +

Syntax

Description

compr

Data are compressed.

+
+
+
+

vm

+

Specifies valid mask flag state (off by default).

+
+
++++ + + + + + + + + + + +

Syntax

Description

vm

Set valid mask flag.

+
+
+
+
+

FLAT Modifiers

+
+

offset12

+

Specifies an immediate unsigned 12-bit offset, in bytes. The default value is 0.

+

Cannot be used with global/scratch opcodes. GFX9 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset:{0..4095}

Specifies a 12-bit unsigned offset as a positive +integer number +or an absolute expression.

+
+

Examples:

+
offset:4095
+offset:x-0xff
+
+
+
+
+

offset13s

+

Specifies an immediate signed 13-bit offset, in bytes. The default value is 0.

+

Can be used with global/scratch opcodes only. GFX9 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset:{-4096..4095}

Specifies a 13-bit signed offset as an +integer number +or an absolute expression.

+
+

Examples:

+
offset:-4000
+offset:0x10
+offset:-x
+
+
+
+
+

offset12s

+

Specifies an immediate signed 12-bit offset, in bytes. The default value is 0.

+

Can be used with global/scratch opcodes only.

+

GFX10 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset:{-2048..2047}

Specifies a 12-bit signed offset as an +integer number +or an absolute expression.

+
+

Examples:

+
offset:-2000
+offset:0x10
+offset:-x+y
+
+
+
+
+

offset11

+

Specifies an immediate unsigned 11-bit offset, in bytes. The default value is 0.

+

Cannot be used with global/scratch opcodes.

+

GFX10 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset:{0..2047}

Specifies an 11-bit unsigned offset as a positive +integer number +or an absolute expression.

+
+

Examples:

+
offset:2047
+offset:x+0xff
+
+
+
+
+

dlc

+

See a description here. GFX10 only.

+
+
+

glc

+

See a description here.

+
+
+

lds

+

See a description here. GFX10 only.

+
+
+

slc

+

See a description here.

+
+
+

tfe

+

See a description here.

+
+
+

nv

+

See a description here.

+
+
+
+

MIMG Modifiers

+
+

dmask

+

Specifies which channels (image components) are used by the operation. By default, no channels +are used.

+
+
++++ + + + + + + + + + + +

Syntax

Description

dmask:{0..15}

Specifies image channels as a positive +integer number +or an absolute expression.

+

Each bit corresponds to one of 4 image components (RGBA).

+

If the specified bit value is 0, the component is not used, +value 1 means that the component is used.

+
+
+

This modifier has some limitations depending on instruction kind:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

Instruction Kind

Valid dmask Values

32-bit atomic cmpswap

0x3

32-bit atomic instructions except for cmpswap

0x1

64-bit atomic cmpswap

0xF

64-bit atomic instructions except for cmpswap

0x3

gather4

0x1, 0x2, 0x4, 0x8

Other instructions

any value

+
+

Examples:

+
dmask:0xf
+dmask:0b1111
+dmask:x|y|z
+
+
+
+
+

unorm

+

Specifies whether the address is normalized or not (the address is normalized by default).

+
+
++++ + + + + + + + + + + +

Syntax

Description

unorm

Force the address to be unnormalized.

+
+
+
+

glc

+

See a description here.

+
+
+

slc

+

See a description here.

+
+
+

r128

+

Specifies texture resource size. The default size is 256 bits.

+

GFX7, GFX8 and GFX10 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

r128

Specifies 128 bits texture resource size.

+
+
+

Warning

+

Using this modifier should decrease rsrc operand size from 8 to 4 dwords, but assembler does not currently support this feature.

+
+
+
+

tfe

+

See a description here.

+
+
+

lwe

+

Specifies LOD warning status (LOD warning is disabled by default).

+
+
++++ + + + + + + + + + + +

Syntax

Description

lwe

Enables LOD warning.

+
+
+
+

da

+

Specifies if an array index must be sent to TA. By default, array index is not sent.

+
+
++++ + + + + + + + + + + +

Syntax

Description

da

Send an array-index to TA.

+
+
+
+

d16

+

Specifies data size: 16 or 32 bits (32 bits by default). Not supported by GFX7.

+
+
++++ + + + + + + + + + + +

Syntax

Description

d16

Enables 16-bits data mode.

+

On loads, convert data in memory to 16-bit +format before storing it in VGPRs.

+

For stores, convert 16-bit data in VGPRs to +32 bits before going to memory.

+

Note that GFX8.0 does not support data packing. +Each 16-bit data element occupies 1 VGPR.

+

GFX8.1, GFX9 and GFX10 support data packing. +Each pair of 16-bit data elements +occupies 1 VGPR.

+
+
+
+
+

a16

+

Specifies size of image address components: 16 or 32 bits (32 bits by default). +GFX9 and GFX10 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

a16

Enables 16-bits image address components.

+
+
+
+

dim

+

Specifies surface dimension. This is a mandatory modifier. There is no default value.

+

GFX10 only.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

dim:1D

One-dimensional image.

dim:2D

Two-dimensional image.

dim:3D

Three-dimensional image.

dim:CUBE

Cubemap array.

dim:1D_ARRAY

One-dimensional image array.

dim:2D_ARRAY

Two-dimensional image array.

dim:2D_MSAA

Two-dimensional multi-sample auto-aliasing image.

dim:2D_MSAA_ARRAY

Two-dimensional multi-sample auto-aliasing image array.

+
+

The following table defines an alternative syntax which is supported +for compatibility with SP3 assembler:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

dim:SQ_RSRC_IMG_1D

One-dimensional image.

dim:SQ_RSRC_IMG_2D

Two-dimensional image.

dim:SQ_RSRC_IMG_3D

Three-dimensional image.

dim:SQ_RSRC_IMG_CUBE

Cubemap array.

dim:SQ_RSRC_IMG_1D_ARRAY

One-dimensional image array.

dim:SQ_RSRC_IMG_2D_ARRAY

Two-dimensional image array.

dim:SQ_RSRC_IMG_2D_MSAA

Two-dimensional multi-sample auto-aliasing image.

dim:SQ_RSRC_IMG_2D_MSAA_ARRAY

Two-dimensional multi-sample auto-aliasing image array.

+
+
+
+

dlc

+

See a description here. GFX10 only.

+
+
+
+

Miscellaneous Modifiers

+
+

dlc

+

Controls device level cache policy for memory operations. Used for synchronization. +When specified, forces operation to bypass device level cache making the operation device +level coherent. By default, instructions use device level cache.

+

GFX10 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

dlc

Bypass device level cache.

+
+
+
+

glc

+

This modifier has different meaning for loads, stores, and atomic operations. +The default value is off (0).

+

See AMD documentation for details.

+
+
++++ + + + + + + + + + + +

Syntax

Description

glc

Set glc bit to 1.

+
+
+
+

lds

+

Specifies where to store the result: VGPRs or LDS (VGPRs by default).

+
+
++++ + + + + + + + + + + +

Syntax

Description

lds

Store result in LDS.

+
+
+
+

nv

+

Specifies if instruction is operating on non-volatile memory. By default, memory is volatile.

+

GFX9 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

nv

Indicates that instruction operates on +non-volatile memory.

+
+
+
+

slc

+

Specifies cache policy. The default value is off (0).

+

See AMD documentation for details.

+
+
++++ + + + + + + + + + + +

Syntax

Description

slc

Set slc bit to 1.

+
+
+
+

tfe

+

Controls access to partially resident textures. The default value is off (0).

+

See AMD documentation for details.

+
+
++++ + + + + + + + + + + +

Syntax

Description

tfe

Set tfe bit to 1.

+
+
+
+
+

MUBUF/MTBUF Modifiers

+
+

idxen

+

Specifies whether address components include an index. By default, no components are used.

+

Can be used together with offen.

+

Cannot be used with addr64.

+
+
++++ + + + + + + + + + + +

Syntax

Description

idxen

Address components include an index.

+
+
+
+

offen

+

Specifies whether address components include an offset. By default, no components are used.

+

Can be used together with idxen.

+

Cannot be used with addr64.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offen

Address components include an offset.

+
+
+
+

addr64

+

Specifies whether a 64-bit address is used. By default, no address is used.

+

GFX7 only. Cannot be used with offen and +idxen modifiers.

+
+
++++ + + + + + + + + + + +

Syntax

Description

addr64

A 64-bit address is used.

+
+
+
+

offset12

+

Specifies an immediate unsigned 12-bit offset, in bytes. The default value is 0.

+
+
++++ + + + + + + + + + + +

Syntax

Description

offset:{0..0xFFF}

Specifies a 12-bit unsigned offset as a positive +integer number +or an absolute expression.

+
+

Examples:

+
offset:x+y
+offset:0x10
+
+
+
+
+

glc

+

See a description here.

+
+
+

slc

+

See a description here.

+
+
+

lds

+

See a description here.

+
+
+

dlc

+

See a description here. GFX10 only.

+
+
+

tfe

+

See a description here.

+
+
+

fmt

+

Specifies data and numeric formats used by the operation. +The default numeric format is BUF_NUM_FORMAT_UNORM. +The default data format is BUF_DATA_FORMAT_8.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

format:{0..127}

Use format specified as either an +integer number or an +absolute expression.

format:[<data format>]

Use the specified data format and +default numeric format.

format:[<numeric format>]

Use the specified numeric format and +default data format.

format:[<data format>, <numeric format>]

Use the specified data and numeric formats.

format:[<numeric format>, <data format>]

Use the specified data and numeric formats.

+
+

Supported data formats are defined in the following table:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Note

BUF_DATA_FORMAT_INVALID

BUF_DATA_FORMAT_8

Default value.

BUF_DATA_FORMAT_16

BUF_DATA_FORMAT_8_8

BUF_DATA_FORMAT_32

BUF_DATA_FORMAT_16_16

BUF_DATA_FORMAT_10_11_11

BUF_DATA_FORMAT_11_11_10

BUF_DATA_FORMAT_10_10_10_2

BUF_DATA_FORMAT_2_10_10_10

BUF_DATA_FORMAT_8_8_8_8

BUF_DATA_FORMAT_32_32

BUF_DATA_FORMAT_16_16_16_16

BUF_DATA_FORMAT_32_32_32

BUF_DATA_FORMAT_32_32_32_32

BUF_DATA_FORMAT_RESERVED_15

+
+

Supported numeric formats are defined below:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Note

BUF_NUM_FORMAT_UNORM

Default value.

BUF_NUM_FORMAT_SNORM

BUF_NUM_FORMAT_USCALED

BUF_NUM_FORMAT_SSCALED

BUF_NUM_FORMAT_UINT

BUF_NUM_FORMAT_SINT

BUF_NUM_FORMAT_SNORM_OGL

GFX7 only.

BUF_NUM_FORMAT_RESERVED_6

GFX8 and GFX9 only.

BUF_NUM_FORMAT_FLOAT

+
+

Examples:

+
format:0
+format:127
+format:[BUF_DATA_FORMAT_16]
+format:[BUF_DATA_FORMAT_16,BUF_NUM_FORMAT_SSCALED]
+format:[BUF_NUM_FORMAT_FLOAT]
+
+
+
+
+

ufmt

+

Specifies a unified format used by the operation. +The default format is BUF_FMT_8_UNORM. +GFX10 only.

+
+
++++ + + + + + + + + + + + + + +

Syntax

Description

format:{0..127}

Use unified format specified as either an +integer number or an +absolute expression. +Note that unified format numbers are not compatible with +format numbers used for pre-GFX10 ISA.

format:[<unified format>]

Use the specified unified format.

+
+

Unified format is a replacement for data +and numeric formats. For compatibility with older ISA, +syntax with data and numeric formats is still accepted +provided that the combination of formats can be mapped to a unified format.

+

Supported unified formats and equivalent combinations of data and numeric formats +are defined below:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Equivalent Data Format

Equivalent Numeric Format

BUF_FMT_INVALID

BUF_DATA_FORMAT_INVALID

BUF_NUM_FORMAT_UNORM

BUF_FMT_8_UNORM

BUF_DATA_FORMAT_8

BUF_NUM_FORMAT_UNORM

BUF_FMT_8_SNORM

BUF_DATA_FORMAT_8

BUF_NUM_FORMAT_SNORM

BUF_FMT_8_USCALED

BUF_DATA_FORMAT_8

BUF_NUM_FORMAT_USCALED

BUF_FMT_8_SSCALED

BUF_DATA_FORMAT_8

BUF_NUM_FORMAT_SSCALED

BUF_FMT_8_UINT

BUF_DATA_FORMAT_8

BUF_NUM_FORMAT_UINT

BUF_FMT_8_SINT

BUF_DATA_FORMAT_8

BUF_NUM_FORMAT_SINT

BUF_FMT_16_UNORM

BUF_DATA_FORMAT_16

BUF_NUM_FORMAT_UNORM

BUF_FMT_16_SNORM

BUF_DATA_FORMAT_16

BUF_NUM_FORMAT_SNORM

BUF_FMT_16_USCALED

BUF_DATA_FORMAT_16

BUF_NUM_FORMAT_USCALED

BUF_FMT_16_SSCALED

BUF_DATA_FORMAT_16

BUF_NUM_FORMAT_SSCALED

BUF_FMT_16_UINT

BUF_DATA_FORMAT_16

BUF_NUM_FORMAT_UINT

BUF_FMT_16_SINT

BUF_DATA_FORMAT_16

BUF_NUM_FORMAT_SINT

BUF_FMT_16_FLOAT

BUF_DATA_FORMAT_16

BUF_NUM_FORMAT_FLOAT

BUF_FMT_8_8_UNORM

BUF_DATA_FORMAT_8_8

BUF_NUM_FORMAT_UNORM

BUF_FMT_8_8_SNORM

BUF_DATA_FORMAT_8_8

BUF_NUM_FORMAT_SNORM

BUF_FMT_8_8_USCALED

BUF_DATA_FORMAT_8_8

BUF_NUM_FORMAT_USCALED

BUF_FMT_8_8_SSCALED

BUF_DATA_FORMAT_8_8

BUF_NUM_FORMAT_SSCALED

BUF_FMT_8_8_UINT

BUF_DATA_FORMAT_8_8

BUF_NUM_FORMAT_UINT

BUF_FMT_8_8_SINT

BUF_DATA_FORMAT_8_8

BUF_NUM_FORMAT_SINT

BUF_FMT_32_UINT

BUF_DATA_FORMAT_32

BUF_NUM_FORMAT_UINT

BUF_FMT_32_SINT

BUF_DATA_FORMAT_32

BUF_NUM_FORMAT_SINT

BUF_FMT_32_FLOAT

BUF_DATA_FORMAT_32

BUF_NUM_FORMAT_FLOAT

BUF_FMT_16_16_UNORM

BUF_DATA_FORMAT_16_16

BUF_NUM_FORMAT_UNORM

BUF_FMT_16_16_SNORM

BUF_DATA_FORMAT_16_16

BUF_NUM_FORMAT_SNORM

BUF_FMT_16_16_USCALED

BUF_DATA_FORMAT_16_16

BUF_NUM_FORMAT_USCALED

BUF_FMT_16_16_SSCALED

BUF_DATA_FORMAT_16_16

BUF_NUM_FORMAT_SSCALED

BUF_FMT_16_16_UINT

BUF_DATA_FORMAT_16_16

BUF_NUM_FORMAT_UINT

BUF_FMT_16_16_SINT

BUF_DATA_FORMAT_16_16

BUF_NUM_FORMAT_SINT

BUF_FMT_16_16_FLOAT

BUF_DATA_FORMAT_16_16

BUF_NUM_FORMAT_FLOAT

BUF_FMT_10_11_11_UNORM

BUF_DATA_FORMAT_10_11_11

BUF_NUM_FORMAT_UNORM

BUF_FMT_10_11_11_SNORM

BUF_DATA_FORMAT_10_11_11

BUF_NUM_FORMAT_SNORM

BUF_FMT_10_11_11_USCALED

BUF_DATA_FORMAT_10_11_11

BUF_NUM_FORMAT_USCALED

BUF_FMT_10_11_11_SSCALED

BUF_DATA_FORMAT_10_11_11

BUF_NUM_FORMAT_SSCALED

BUF_FMT_10_11_11_UINT

BUF_DATA_FORMAT_10_11_11

BUF_NUM_FORMAT_UINT

BUF_FMT_10_11_11_SINT

BUF_DATA_FORMAT_10_11_11

BUF_NUM_FORMAT_SINT

BUF_FMT_10_11_11_FLOAT

BUF_DATA_FORMAT_10_11_11

BUF_NUM_FORMAT_FLOAT

BUF_FMT_11_11_10_UNORM

BUF_DATA_FORMAT_11_11_10

BUF_NUM_FORMAT_UNORM

BUF_FMT_11_11_10_SNORM

BUF_DATA_FORMAT_11_11_10

BUF_NUM_FORMAT_SNORM

BUF_FMT_11_11_10_USCALED

BUF_DATA_FORMAT_11_11_10

BUF_NUM_FORMAT_USCALED

BUF_FMT_11_11_10_SSCALED

BUF_DATA_FORMAT_11_11_10

BUF_NUM_FORMAT_SSCALED

BUF_FMT_11_11_10_UINT

BUF_DATA_FORMAT_11_11_10

BUF_NUM_FORMAT_UINT

BUF_FMT_11_11_10_SINT

BUF_DATA_FORMAT_11_11_10

BUF_NUM_FORMAT_SINT

BUF_FMT_11_11_10_FLOAT

BUF_DATA_FORMAT_11_11_10

BUF_NUM_FORMAT_FLOAT

BUF_FMT_10_10_10_2_UNORM

BUF_DATA_FORMAT_10_10_10_2

BUF_NUM_FORMAT_UNORM

BUF_FMT_10_10_10_2_SNORM

BUF_DATA_FORMAT_10_10_10_2

BUF_NUM_FORMAT_SNORM

BUF_FMT_10_10_10_2_USCALED

BUF_DATA_FORMAT_10_10_10_2

BUF_NUM_FORMAT_USCALED

BUF_FMT_10_10_10_2_SSCALED

BUF_DATA_FORMAT_10_10_10_2

BUF_NUM_FORMAT_SSCALED

BUF_FMT_10_10_10_2_UINT

BUF_DATA_FORMAT_10_10_10_2

BUF_NUM_FORMAT_UINT

BUF_FMT_10_10_10_2_SINT

BUF_DATA_FORMAT_10_10_10_2

BUF_NUM_FORMAT_SINT

BUF_FMT_2_10_10_10_UNORM

BUF_DATA_FORMAT_2_10_10_10

BUF_NUM_FORMAT_UNORM

BUF_FMT_2_10_10_10_SNORM

BUF_DATA_FORMAT_2_10_10_10

BUF_NUM_FORMAT_SNORM

BUF_FMT_2_10_10_10_USCALED

BUF_DATA_FORMAT_2_10_10_10

BUF_NUM_FORMAT_USCALED

BUF_FMT_2_10_10_10_SSCALED

BUF_DATA_FORMAT_2_10_10_10

BUF_NUM_FORMAT_SSCALED

BUF_FMT_2_10_10_10_UINT

BUF_DATA_FORMAT_2_10_10_10

BUF_NUM_FORMAT_UINT

BUF_FMT_2_10_10_10_SINT

BUF_DATA_FORMAT_2_10_10_10

BUF_NUM_FORMAT_SINT

BUF_FMT_8_8_8_8_UNORM

BUF_DATA_FORMAT_8_8_8_8

BUF_NUM_FORMAT_UNORM

BUF_FMT_8_8_8_8_SNORM

BUF_DATA_FORMAT_8_8_8_8

BUF_NUM_FORMAT_SNORM

BUF_FMT_8_8_8_8_USCALED

BUF_DATA_FORMAT_8_8_8_8

BUF_NUM_FORMAT_USCALED

BUF_FMT_8_8_8_8_SSCALED

BUF_DATA_FORMAT_8_8_8_8

BUF_NUM_FORMAT_SSCALED

BUF_FMT_8_8_8_8_UINT

BUF_DATA_FORMAT_8_8_8_8

BUF_NUM_FORMAT_UINT

BUF_FMT_8_8_8_8_SINT

BUF_DATA_FORMAT_8_8_8_8

BUF_NUM_FORMAT_SINT

BUF_FMT_32_32_UINT

BUF_DATA_FORMAT_32_32

BUF_NUM_FORMAT_UINT

BUF_FMT_32_32_SINT

BUF_DATA_FORMAT_32_32

BUF_NUM_FORMAT_SINT

BUF_FMT_32_32_FLOAT

BUF_DATA_FORMAT_32_32

BUF_NUM_FORMAT_FLOAT

BUF_FMT_16_16_16_16_UNORM

BUF_DATA_FORMAT_16_16_16_16

BUF_NUM_FORMAT_UNORM

BUF_FMT_16_16_16_16_SNORM

BUF_DATA_FORMAT_16_16_16_16

BUF_NUM_FORMAT_SNORM

BUF_FMT_16_16_16_16_USCALED

BUF_DATA_FORMAT_16_16_16_16

BUF_NUM_FORMAT_USCALED

BUF_FMT_16_16_16_16_SSCALED

BUF_DATA_FORMAT_16_16_16_16

BUF_NUM_FORMAT_SSCALED

BUF_FMT_16_16_16_16_UINT

BUF_DATA_FORMAT_16_16_16_16

BUF_NUM_FORMAT_UINT

BUF_FMT_16_16_16_16_SINT

BUF_DATA_FORMAT_16_16_16_16

BUF_NUM_FORMAT_SINT

BUF_FMT_16_16_16_16_FLOAT

BUF_DATA_FORMAT_16_16_16_16

BUF_NUM_FORMAT_FLOAT

BUF_FMT_32_32_32_UINT

BUF_DATA_FORMAT_32_32_32

BUF_NUM_FORMAT_UINT

BUF_FMT_32_32_32_SINT

BUF_DATA_FORMAT_32_32_32

BUF_NUM_FORMAT_SINT

BUF_FMT_32_32_32_FLOAT

BUF_DATA_FORMAT_32_32_32

BUF_NUM_FORMAT_FLOAT

BUF_FMT_32_32_32_32_UINT

BUF_DATA_FORMAT_32_32_32_32

BUF_NUM_FORMAT_UINT

BUF_FMT_32_32_32_32_SINT

BUF_DATA_FORMAT_32_32_32_32

BUF_NUM_FORMAT_SINT

BUF_FMT_32_32_32_32_FLOAT

BUF_DATA_FORMAT_32_32_32_32

BUF_NUM_FORMAT_FLOAT

+
+

Examples:

+
format:0
+format:[BUF_FMT_32_UINT]
+
+
+
+
+
+

SMRD/SMEM Modifiers

+
+

glc

+

See a description here.

+
+
+

nv

+

See a description here. GFX9 only.

+
+
+

dlc

+

See a description here. GFX10 only.

+
+
+
+

VINTRP Modifiers

+
+

high

+

Specifies which half of the LDS word to use. Low half of LDS word is used by default. +GFX9 and GFX10 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

high

Use high half of LDS word.

+
+
+
+
+

DPP8 Modifiers

+

GFX10 only.

+
+

dpp8_sel

+

Selects which lanes to pull data from, within a group of 8 lanes. This is a mandatory modifier. +There is no default value.

+

GFX10 only.

+

The dpp8_sel modifier must specify exactly 8 values. +First value selects which lane to read from to supply data into lane 0. +Second value controls lane 1 and so on.

+

Each value may be specified as either +an integer number or +an absolute expression.

+
+
++++ + + + + + + + + + + +

Syntax

Description

dpp8:[{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7}]

Select lanes to read from.

+
+

Examples:

+
dpp8:[7,6,5,4,3,2,1,0]
+dpp8:[0,1,0,1,0,1,0,1]
+
+
+
+
+

fi

+

Controls interaction with inactive lanes for dpp8 instructions. The default value is zero.

+

Note: inactive lanes are those whose exec mask bit is zero.

+

GFX10 only.

+
+
++++ + + + + + + + + + + + + + +

Syntax

Description

fi:0

Fetch zero when accessing data from inactive lanes.

fi:1

Fetch pre-exist values from inactive lanes.

+
+

Note: numeric values may be specified as either integer numbers or +absolute expressions.

+
+
+
+

DPP Modifiers

+

GFX8, GFX9 and GFX10 only.

+
+

dpp_ctrl

+

Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value.

+

GFX8 and GFX9 only. Use dpp16_ctrl for GFX10.

+

Note: the lanes of a wavefront are organized in four rows and four banks.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

quad_perm:[{0..3},{0..3},{0..3},{0..3}]

Full permute of 4 threads.

row_mirror

Mirror threads within row.

row_half_mirror

Mirror threads within 1/2 row (8 threads).

row_bcast:15

Broadcast 15th thread of each row to next row.

row_bcast:31

Broadcast thread 31 to rows 2 and 3.

wave_shl:1

Wavefront left shift by 1 thread.

wave_rol:1

Wavefront left rotate by 1 thread.

wave_shr:1

Wavefront right shift by 1 thread.

wave_ror:1

Wavefront right rotate by 1 thread.

row_shl:{1..15}

Row shift left by 1-15 threads.

row_shr:{1..15}

Row shift right by 1-15 threads.

row_ror:{1..15}

Row rotate right by 1-15 threads.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
quad_perm:[0, 1, 2, 3]
+row_shl:3
+
+
+
+
+

dpp16_ctrl

+

Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value.

+

GFX10 only. Use dpp_ctrl for GFX8 and GFX9.

+

Note: the lanes of a wavefront are organized in four rows and four banks. +(There are only two rows in wave32 mode.)

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

quad_perm:[{0..3},{0..3},{0..3},{0..3}]

Full permute of 4 threads.

row_mirror

Mirror threads within row.

row_half_mirror

Mirror threads within 1/2 row (8 threads).

row_share:{0..15}

Share the value from the specified lane with other +lanes in the row.

row_xmask:{0..15}

Fetch from XOR(current lane id, specified lane id).

row_shl:{1..15}

Row shift left by 1-15 threads.

row_shr:{1..15}

Row shift right by 1-15 threads.

row_ror:{1..15}

Row rotate right by 1-15 threads.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
quad_perm:[0, 1, 2, 3]
+row_shl:3
+
+
+
+
+

dpp32_ctrl

+

Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value.

+

May be used only with GFX90A 32-bit instructions.

+

Note: the lanes of a wavefront are organized in four rows and four banks.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

quad_perm:[{0..3},{0..3},{0..3},{0..3}]

Full permute of 4 threads.

row_mirror

Mirror threads within row.

row_half_mirror

Mirror threads within 1/2 row (8 threads).

row_bcast:15

Broadcast 15th thread of each row to next row.

row_bcast:31

Broadcast thread 31 to rows 2 and 3.

wave_shl:1

Wavefront left shift by 1 thread.

wave_rol:1

Wavefront left rotate by 1 thread.

wave_shr:1

Wavefront right shift by 1 thread.

wave_ror:1

Wavefront right rotate by 1 thread.

row_shl:{1..15}

Row shift left by 1-15 threads.

row_shr:{1..15}

Row shift right by 1-15 threads.

row_ror:{1..15}

Row rotate right by 1-15 threads.

row_newbcast:{1..15}

Broadcast a thread within a row to the whole row.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
quad_perm:[0, 1, 2, 3]
+row_shl:3
+
+
+
+
+

dpp64_ctrl

+

Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value.

+

May be used only with GFX90A 64-bit instructions.

+

Note: the lanes of a wavefront are organized in four rows and four banks.

+
+
++++ + + + + + + + + + + +

Syntax

Description

row_newbcast:{1..15}

Broadcast a thread within a row to the whole row.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
row_newbcast:3
+
+
+
+
+

row_mask

+

Controls which rows are enabled for data sharing. By default, all rows are enabled.

+

Note: the lanes of a wavefront are organized in four rows and four banks. +(There are only two rows in wave32 mode.)

+
+
++++ + + + + + + + + + + +

Syntax

Description

row_mask:{0..15}

Specifies a row mask as a positive +integer number +or an absolute expression.

+

Each of 4 bits in the mask controls one row +(0 - disabled, 1 - enabled).

+

In wave32 mode the values should be limited to 0..7.

+
+
+

Examples:

+
row_mask:0xf
+row_mask:0b1010
+row_mask:x|y
+
+
+
+
+

bank_mask

+

Controls which banks are enabled for data sharing. By default, all banks are enabled.

+

Note: the lanes of a wavefront are organized in four rows and four banks. +(There are only two rows in wave32 mode.)

+
+
++++ + + + + + + + + + + +

Syntax

Description

bank_mask:{0..15}

Specifies a bank mask as a positive +integer number +or an absolute expression.

+

Each of 4 bits in the mask controls one bank +(0 - disabled, 1 - enabled).

+
+
+

Examples:

+
bank_mask:0x3
+bank_mask:0b0011
+bank_mask:x&y
+
+
+
+
+

bound_ctrl

+

Controls data sharing when accessing an invalid lane. By default, data sharing with +invalid lanes is disabled.

+
+
++++ + + + + + + + + + + +

Syntax

Description

bound_ctrl:1

Enables data sharing with invalid lanes.

+

Accessing data from an invalid lane will +return zero.

+
+
+
+
+

fi

+

Controls interaction with inactive lanes for dpp16 instructions. The default value is zero.

+

Note: inactive lanes are those whose exec mask bit is zero.

+

GFX10 only.

+
+
++++ + + + + + + + + + + + + + +

Syntax

Description

fi:0

Interaction with inactive lanes is controlled by +bound_ctrl.

fi:1

Fetch pre-exist values from inactive lanes.

+
+

Note: numeric values may be specified as either integer numbers or +absolute expressions.

+
+
+
+

SDWA Modifiers

+

GFX8, GFX9 and GFX10 only.

+
+

clamp

+

See a description here.

+
+
+

omod

+

See a description here.

+

GFX9 and GFX10 only.

+
+
+

dst_sel

+

Selects which bits in the destination are affected. By default, all bits are affected.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

dst_sel:DWORD

Use bits 31:0.

dst_sel:BYTE_0

Use bits 7:0.

dst_sel:BYTE_1

Use bits 15:8.

dst_sel:BYTE_2

Use bits 23:16.

dst_sel:BYTE_3

Use bits 31:24.

dst_sel:WORD_0

Use bits 15:0.

dst_sel:WORD_1

Use bits 31:16.

+
+
+
+

dst_unused

+

Controls what to do with the bits in the destination which are not selected +by dst_sel. +By default, unused bits are preserved.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

dst_unused:UNUSED_PAD

Pad with zeros.

dst_unused:UNUSED_SEXT

Sign-extend upper bits, zero lower bits.

dst_unused:UNUSED_PRESERVE

Preserve bits.

+
+
+
+

src0_sel

+

Controls which bits in the src0 are used. By default, all bits are used.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

src0_sel:DWORD

Use bits 31:0.

src0_sel:BYTE_0

Use bits 7:0.

src0_sel:BYTE_1

Use bits 15:8.

src0_sel:BYTE_2

Use bits 23:16.

src0_sel:BYTE_3

Use bits 31:24.

src0_sel:WORD_0

Use bits 15:0.

src0_sel:WORD_1

Use bits 31:16.

+
+
+
+

src1_sel

+

Controls which bits in the src1 are used. By default, all bits are used.

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

src1_sel:DWORD

Use bits 31:0.

src1_sel:BYTE_0

Use bits 7:0.

src1_sel:BYTE_1

Use bits 15:8.

src1_sel:BYTE_2

Use bits 23:16.

src1_sel:BYTE_3

Use bits 31:24.

src1_sel:WORD_0

Use bits 15:0.

src1_sel:WORD_1

Use bits 31:16.

+
+
+
+
+

SDWA Operand Modifiers

+

Operand modifiers are not used separately. They are applied to source operands.

+

GFX8, GFX9 and GFX10 only.

+
+

abs

+

See a description here.

+
+
+

neg

+

See a description here.

+
+
+

sext

+

Sign-extends value of a (sub-dword) operand to fill all 32 bits. +Has no effect for 32-bit operands.

+

Valid for integer operands only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

sext(<operand>)

Sign-extend operand value.

+
+

Examples:

+
sext(v4)
+sext(v255)
+
+
+
+
+
+

VOP3 Modifiers

+
+

op_sel

+

Selects the low [15:0] or high [31:16] operand bits for source and destination operands. +By default, low bits are used for all operands.

+

The number of values specified with the op_sel modifier must match the number of instruction +operands (both source and destination). First value controls src0, second value controls src1 +and so on, except that the last value controls destination. +The value 0 selects the low bits, while 1 selects the high bits.

+

Note: op_sel modifier affects 16-bit operands only. For 32-bit operands the value specified +by op_sel must be 0.

+

GFX9 and GFX10 only.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

op_sel:[{0..1},{0..1}]

Select operand bits for instructions with 1 source operand.

op_sel:[{0..1},{0..1},{0..1}]

Select operand bits for instructions with 2 source operands.

op_sel:[{0..1},{0..1},{0..1},{0..1}]

Select operand bits for instructions with 3 source operands.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
op_sel:[0,0]
+op_sel:[0,1]
+
+
+
+
+

dpp_op_sel

+

Special version of op_sel used for permlane opcodes to specify +dpp-like mode bits - fi and +bound_ctrl.

+

GFX10 only.

+
+
++++ + + + + + + + + + + +

Syntax

Description

op_sel:[{0..1},{0..1}]

First bit specifies fi, second +bit specifies bound_ctrl.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
op_sel:[0,0]
+
+
+
+
+

clamp

+

Clamp meaning depends on instruction.

+

For v_cmp instructions, clamp modifier indicates that the compare signals +if a floating point exception occurs. By default, signaling is disabled. +Not supported by GFX7.

+

For integer operations, clamp modifier indicates that the result must be clamped +to the largest and smallest representable value. By default, there is no clamping. +Integer clamping is not supported by GFX7.

+

For floating point operations, clamp modifier indicates that the result must be clamped +to the range [0.0, 1.0]. By default, there is no clamping.

+

Note: clamp modifier is applied after output modifiers (if any).

+
+
++++ + + + + + + + + + + +

Syntax

Description

clamp

Enables clamping (or signaling).

+
+
+
+

omod

+

Specifies if an output modifier must be applied to the result. +By default, no output modifiers are applied.

+

Note: output modifiers are applied before clamping (if any).

+

Output modifiers are valid for f32 and f64 floating point results only. +They must not be used with f16.

+

Note: v_cvt_f16_f32 is an exception. This instruction produces f16 result +but accepts output modifiers.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

mul:2

Multiply the result by 2.

mul:4

Multiply the result by 4.

div:2

Multiply the result by 0.5.

+
+

Note: numeric values may be specified as either integer numbers or +absolute expressions.

+

Examples:

+
mul:2
+mul:x      // x must be equal to 2 or 4
+
+
+
+
+
+

VOP3 Operand Modifiers

+

Operand modifiers are not used separately. They are applied to source operands.

+
+

abs

+

Computes the absolute value of its operand. Must be applied before neg +(if any). Valid for floating point operands only.

+
+
++++ + + + + + + + + + + + + + +

Syntax

Description

abs(<operand>)

Get the absolute value of a floating-point operand.

|<operand>|

The same as above (an SP3 syntax).

+
+

Note: avoid using SP3 syntax with operands specified as expressions because the trailing ‘|’ +may be misinterpreted. Such operands should be enclosed into additional parentheses as shown +in examples below.

+

Examples:

+
abs(v36)
+|v36|
+abs(x|y)     // ok
+|(x|y)|      // additional parentheses are required
+
+
+

neg

+

Computes the negative value of its operand. Must be applied after abs +(if any). Valid for floating point operands only.

+
+
++++ + + + + + + + + + + + + + +

Syntax

Description

neg(<operand>)

Get the negative value of a floating-point operand. +The operand may include an optional +abs modifier.

-<operand>

The same as above (an SP3 syntax).

+
+

Note: SP3 syntax is supported with limitations because of a potential ambiguity. +Currently it is allowed in the following cases:

+
    +
  • Before a register.

  • +
  • Before an abs modifier.

  • +
  • Before an SP3 abs modifier.

  • +
+

In all other cases “-” is handled as a part of an expression that follows the sign.

+

Examples:

+
// Operands with negate modifiers
+neg(v[0])
+neg(1.0)
+neg(abs(v0))
+-v5
+-abs(v5)
+-|v5|
+
+// Operands without negate modifiers
+-1
+-x+y
+
+
+
+

VOP3P Modifiers

+

This section describes modifiers of regular VOP3P instructions.

+

v_mad_mix* and v_fma_mix* +instructions use these modifiers in a special manner.

+

GFX9 and GFX10 only.

+
+

op_sel

+

Selects the low [15:0] or high [31:16] operand bits as input to the operation +which results in the lower-half of the destination. +By default, low bits are used for all operands.

+

The number of values specified by the op_sel modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on.

+

The value 0 selects the low bits, while 1 selects the high bits.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

op_sel:[{0..1}]

Select operand bits for instructions with 1 source operand.

op_sel:[{0..1},{0..1}]

Select operand bits for instructions with 2 source operands.

op_sel:[{0..1},{0..1},{0..1}]

Select operand bits for instructions with 3 source operands.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
op_sel:[0,0]
+op_sel:[0,1,0]
+
+
+
+
+

op_sel_hi

+

Selects the low [15:0] or high [31:16] operand bits as input to the operation +which results in the upper-half of the destination. +By default, high bits are used for all operands.

+

The number of values specified by the op_sel_hi modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on.

+

The value 0 selects the low bits, while 1 selects the high bits.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

op_sel_hi:[{0..1}]

Select operand bits for instructions with 1 source operand.

op_sel_hi:[{0..1},{0..1}]

Select operand bits for instructions with 2 source operands.

op_sel_hi:[{0..1},{0..1},{0..1}]

Select operand bits for instructions with 3 source operands.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
op_sel_hi:[0,0]
+op_sel_hi:[0,0,1]
+
+
+
+
+

neg_lo

+

Specifies whether to change sign of operand values selected by +op_sel. These values are then used +as input to the operation which results in the upper-half of the destination.

+

The number of values specified by this modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on.

+

The value 0 indicates that the corresponding operand value is used unmodified, +the value 1 indicates that negative value of the operand must be used.

+

By default, operand values are used unmodified.

+

This modifier is valid for floating point operands only.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

neg_lo:[{0..1}]

Select affected operands for instructions with 1 source operand.

neg_lo:[{0..1},{0..1}]

Select affected operands for instructions with 2 source operands.

neg_lo:[{0..1},{0..1},{0..1}]

Select affected operands for instructions with 3 source operands.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
neg_lo:[0]
+neg_lo:[0,1]
+
+
+
+
+

neg_hi

+

Specifies whether to change sign of operand values selected by +op_sel_hi. These values are then used +as input to the operation which results in the upper-half of the destination.

+

The number of values specified by this modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on.

+

The value 0 indicates that the corresponding operand value is used unmodified, +the value 1 indicates that negative value of the operand must be used.

+

By default, operand values are used unmodified.

+

This modifier is valid for floating point operands only.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

neg_hi:[{0..1}]

Select affected operands for instructions with 1 source operand.

neg_hi:[{0..1},{0..1}]

Select affected operands for instructions with 2 source operands.

neg_hi:[{0..1},{0..1},{0..1}]

Select affected operands for instructions with 3 source operands.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
neg_hi:[1,0]
+neg_hi:[0,1,1]
+
+
+
+
+

clamp

+

See a description here.

+
+
+
+

VOP3P MAD_MIX/FMA_MIX Modifiers

+

v_mad_mix* and v_fma_mix* +instructions use op_sel and op_sel_hi modifiers +in a manner different from regular VOP3P instructions.

+

See a description below.

+

GFX9 and GFX10 only.

+
+

m_op_sel

+

This operand has meaning only for 16-bit source operands as indicated by +m_op_sel_hi. +It specifies to select either the low [15:0] or high [31:16] operand bits +as input to the operation.

+

The number of values specified by the op_sel modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on.

+

The value 0 indicates the low bits, the value 1 indicates the high 16 bits.

+

By default, low bits are used for all operands.

+
+
++++ + + + + + + + + + + +

Syntax

Description

op_sel:[{0..1},{0..1},{0..1}]

Select location of each 16-bit source operand.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
op_sel:[0,1]
+
+
+
+
+

m_op_sel_hi

+

Selects the size of source operands: either 32 bits or 16 bits. +By default, 32 bits are used for all source operands.

+

The number of values specified by the op_sel_hi modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on.

+

The value 0 indicates 32 bits, the value 1 indicates 16 bits.

+

The location of 16 bits in the operand may be specified by +m_op_sel.

+
+
++++ + + + + + + + + + + +

Syntax

Description

op_sel_hi:[{0..1},{0..1},{0..1}]

Select size of each source operand.

+
+

Note: numeric values may be specified as either +integer numbers or +absolute expressions.

+

Examples:

+
op_sel_hi:[1,1,1]
+
+
+
+
+

abs

+

See a description here.

+
+
+

neg

+

See a description here.

+
+
+

clamp

+

See a description here.

+
+
+
+

VOP3P MFMA Modifiers

+

These modifiers may only be used with GFX908 and GFX90A.

+
+

cbsz

+

Specifies a broadcast mode.

+
+
++++ + + + + + + + + + + +

Syntax

Description

cbsz:[{0..7}]

A broadcast mode.

+
+

Note: numeric value may be specified as either +an integer number or +an absolute expression.

+
+
+

abid

+

Specifies matrix A group select.

+
+
++++ + + + + + + + + + + +

Syntax

Description

abid:[{0..15}]

Matrix A group select id.

+
+

Note: numeric value may be specified as either +an integer number or +an absolute expression.

+
+
+

blgp

+

Specifies matrix B lane group pattern.

+
+
++++ + + + + + + + + + + +

Syntax

Description

blgp:[{0..7}]

Matrix B lane group pattern.

+
+

Note: numeric value may be specified as either +an integer number or +an absolute expression.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUOperandSyntax.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUOperandSyntax.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUOperandSyntax.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUOperandSyntax.html 2021-09-19 16:16:31.000000000 +0000 @@ -0,0 +1,1861 @@ + + + + + + + + + Syntax of AMDGPU Instruction Operands — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Syntax of AMDGPU Instruction Operands

+ +
+

Conventions

+

The following notation is used throughout this document:

+
+
++++ + + + + + + + + + + + + + +

Notation

Description

{0..N}

Any integer value in the range from 0 to N (inclusive).

<x>

Syntax and meaning of x is explained elsewhere.

+
+
+
+

Operands

+
+

v

+

Vector registers. There are 256 32-bit vector registers.

+

A sequence of vector registers may be used to operate with more than 32 bits of data.

+

Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8, 16 and 32 vector registers.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

v<N>

A single 32-bit vector register.

+

N must be a decimal +integer number.

+

v[<N>]

A single 32-bit vector register.

+

N may be specified as an +integer number +or an absolute expression.

+

v[<N>:<K>]

A sequence of (K-N+1) vector registers.

+

N and K may be specified as +integer numbers +or absolute expressions.

+

[v<N>, v<N+1>, … v<K>]

A sequence of (K-N+1) vector registers.

+

Register indices must be specified as decimal +integer numbers.

+
+
+

Note: N and K must satisfy the following conditions:

+
    +
  • N <= K.

  • +
  • 0 <= N <= 255.

  • +
  • 0 <= K <= 255.

  • +
  • K-N+1 must be equal to 1, 2, 3, 4, 5, 6, 7, 8, 16 or 32.

  • +
+

GFX90A has an additional alignment requirement: pairs of vector registers must be even-aligned +(first register must be even).

+

Examples:

+
v255
+v[0]
+v[0:1]
+v[1:1]
+v[0:3]
+v[2*2]
+v[1-1:2-1]
+[v252]
+[v252,v253,v254,v255]
+
+
+

GFX10 Image instructions may use special NSA (Non-Sequential Address) syntax for image addresses:

+
+
++++ + + + + + + + + + + +

Syntax

Description

[Vm, Vn, … Vk]

A sequence of 32-bit vector registers. +Each register may be specified using syntax +defined above.

+

In contrast with standard syntax, registers +in NSA sequence are not required to have +consecutive indices. Moreover, the same register +may appear in the list more than once.

+
+
+

Examples:

+
[v32,v1,v[2]]
+[v[32],v[1:1],[v2]]
+[v4,v4,v4,v4]
+
+
+
+
+

a

+

Accumulator registers. There are 256 32-bit accumulator registers.

+

A sequence of accumulator registers may be used to operate with more than 32 bits of data.

+

Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8, 16 and 32 accumulator registers.

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

An Alternative Syntax (SP3)

Description

a<N>

acc<N>

A single 32-bit accumulator register.

+

N must be a decimal +integer number.

+

a[<N>]

acc[<N>]

A single 32-bit accumulator register.

+

N may be specified as an +integer number +or an absolute expression.

+

a[<N>:<K>]

acc[<N>:<K>]

A sequence of (K-N+1) accumulator registers.

+

N and K may be specified as +integer numbers +or absolute expressions.

+

[a<N>, a<N+1>, … a<K>]

[acc<N>, acc<N+1>, … acc<K>]

A sequence of (K-N+1) accumulator registers.

+

Register indices must be specified as decimal +integer numbers.

+
+
+

Note: N and K must satisfy the following conditions:

+
    +
  • N <= K.

  • +
  • 0 <= N <= 255.

  • +
  • 0 <= K <= 255.

  • +
  • K-N+1 must be equal to 1, 2, 3, 4, 5, 6, 7, 8, 16 or 32.

  • +
+

GFX90A has an additional alignment requirement: pairs of accumulator registers must be even-aligned +(first register must be even).

+

Examples:

+
a255
+a[0]
+a[0:1]
+a[1:1]
+a[0:3]
+a[2*2]
+a[1-1:2-1]
+[a252]
+[a252,a253,a254,a255]
+
+acc0
+acc[1]
+[acc250]
+[acc2,acc3]
+
+
+
+
+

s

+

Scalar 32-bit registers. The number of available scalar registers depends on GPU:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

GPU

Number of scalar registers

GFX7

104

GFX8

102

GFX9

102

GFX10

106

+
+

A sequence of scalar registers may be used to operate with more than 32 bits of data. +Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8, 16 and 32 scalar registers.

+

Pairs of scalar registers must be even-aligned (first register must be even). +Sequences of 4 and more scalar registers must be quad-aligned.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

s<N>

A single 32-bit scalar register.

+

N must be a decimal +integer number.

+

s[<N>]

A single 32-bit scalar register.

+

N may be specified as an +integer number +or an absolute expression.

+

s[<N>:<K>]

A sequence of (K-N+1) scalar registers.

+

N and K may be specified as +integer numbers +or absolute expressions.

+

[s<N>, s<N+1>, … s<K>]

A sequence of (K-N+1) scalar registers.

+

Register indices must be specified as decimal +integer numbers.

+
+
+

Note: N and K must satisfy the following conditions:

+
    +
  • N must be properly aligned based on sequence size.

  • +
  • N <= K.

  • +
  • 0 <= N < SMAX, where SMAX is the number of available scalar registers.

  • +
  • 0 <= K < SMAX, where SMAX is the number of available scalar registers.

  • +
  • K-N+1 must be equal to 1, 2, 3, 4, 5, 6, 7, 8, 16 or 32.

  • +
+

Examples:

+
s0
+s[0]
+s[0:1]
+s[1:1]
+s[0:3]
+s[2*2]
+s[1-1:2-1]
+[s4]
+[s4,s5,s6,s7]
+
+
+

Examples of scalar registers with an invalid alignment:

+
s[1:2]
+s[2:5]
+
+
+
+
+

trap

+

A set of trap handler registers:

+ +
+
+

ttmp

+

Trap handler temporary scalar registers, 32-bits wide. +The number of available ttmp registers depends on GPU:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

GPU

Number of ttmp registers

GFX7

12

GFX8

12

GFX9

16

GFX10

16

+
+

A sequence of ttmp registers may be used to operate with more than 32 bits of data. +Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8 and 16 ttmp registers.

+

Pairs of ttmp registers must be even-aligned (first register must be even). +Sequences of 4 and more ttmp registers must be quad-aligned.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

ttmp<N>

A single 32-bit ttmp register.

+

N must be a decimal +integer number.

+

ttmp[<N>]

A single 32-bit ttmp register.

+

N may be specified as an +integer number +or an absolute expression.

+

ttmp[<N>:<K>]

A sequence of (K-N+1) ttmp registers.

+

N and K may be specified as +integer numbers +or absolute expressions.

+

[ttmp<N>, ttmp<N+1>, … ttmp<K>]

A sequence of (K-N+1) ttmp registers.

+

Register indices must be specified as decimal +integer numbers.

+
+
+

Note: N and K must satisfy the following conditions:

+
    +
  • N must be properly aligned based on sequence size.

  • +
  • N <= K.

  • +
  • 0 <= N < TMAX, where TMAX is the number of available ttmp registers.

  • +
  • 0 <= K < TMAX, where TMAX is the number of available ttmp registers.

  • +
  • K-N+1 must be equal to 1, 2, 3, 4, 5, 6, 7, 8 or 16.

  • +
+

Examples:

+
ttmp0
+ttmp[0]
+ttmp[0:1]
+ttmp[1:1]
+ttmp[0:3]
+ttmp[2*2]
+ttmp[1-1:2-1]
+[ttmp4]
+[ttmp4,ttmp5,ttmp6,ttmp7]
+
+
+

Examples of ttmp registers with an invalid alignment:

+
ttmp[1:2]
+ttmp[2:5]
+
+
+
+
+

tba

+

Trap base address, 64-bits wide. Holds the pointer to the current trap handler program.

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

Availability

tba

64-bit trap base address register.

GFX7, GFX8

[tba]

64-bit trap base address register (an SP3 syntax).

GFX7, GFX8

[tba_lo,tba_hi]

64-bit trap base address register (an SP3 syntax).

GFX7, GFX8

+
+

High and low 32 bits of trap base address may be accessed as separate registers:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

Availability

tba_lo

Low 32 bits of trap base address register.

GFX7, GFX8

tba_hi

High 32 bits of trap base address register.

GFX7, GFX8

[tba_lo]

Low 32 bits of trap base address register (an SP3 syntax).

GFX7, GFX8

[tba_hi]

High 32 bits of trap base address register (an SP3 syntax).

GFX7, GFX8

+
+

Note that tba, tba_lo and tba_hi are not accessible as assembler registers in GFX9 and GFX10, +but tba is readable/writable with the help of s_get_reg and s_set_reg instructions.

+
+
+

tma

+

Trap memory address, 64-bits wide.

+
+
+++++ + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

Availability

tma

64-bit trap memory address register.

GFX7, GFX8

[tma]

64-bit trap memory address register (an SP3 syntax).

GFX7, GFX8

[tma_lo,tma_hi]

64-bit trap memory address register (an SP3 syntax).

GFX7, GFX8

+
+

High and low 32 bits of trap memory address may be accessed as separate registers:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Description

Availability

tma_lo

Low 32 bits of trap memory address register.

GFX7, GFX8

tma_hi

High 32 bits of trap memory address register.

GFX7, GFX8

[tma_lo]

Low 32 bits of trap memory address register (an SP3 syntax).

GFX7, GFX8

[tma_hi]

High 32 bits of trap memory address register (an SP3 syntax).

GFX7, GFX8

+
+

Note that tma, tma_lo and tma_hi are not accessible as assembler registers in GFX9 and GFX10, +but tma is readable/writable with the help of s_get_reg and s_set_reg instructions.

+
+
+

flat_scratch

+

Flat scratch address, 64-bits wide. Holds the base address of scratch memory.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

flat_scratch

64-bit flat scratch address register.

[flat_scratch]

64-bit flat scratch address register (an SP3 syntax).

[flat_scratch_lo,flat_scratch_hi]

64-bit flat scratch address register (an SP3 syntax).

+
+

High and low 32 bits of flat scratch address may be accessed as separate registers:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

flat_scratch_lo

Low 32 bits of flat scratch address register.

flat_scratch_hi

High 32 bits of flat scratch address register.

[flat_scratch_lo]

Low 32 bits of flat scratch address register (an SP3 syntax).

[flat_scratch_hi]

High 32 bits of flat scratch address register (an SP3 syntax).

+
+

Note that flat_scratch, flat_scratch_lo and flat_scratch_hi are not accessible as assembler +registers in GFX10, but flat_scratch is readable/writable with the help of +s_get_reg and s_set_reg instructions.

+
+
+

xnack_mask

+

Xnack mask, 64-bits wide. Holds a 64-bit mask of which threads +received an XNACK due to a vector memory operation.

+
+

Warning

+

GFX7 does not support xnack feature. For availability of this feature in other GPUs, refer this table.

+
+

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

xnack_mask

64-bit xnack mask register.

[xnack_mask]

64-bit xnack mask register (an SP3 syntax).

[xnack_mask_lo,xnack_mask_hi]

64-bit xnack mask register (an SP3 syntax).

+
+

High and low 32 bits of xnack mask may be accessed as separate registers:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

xnack_mask_lo

Low 32 bits of xnack mask register.

xnack_mask_hi

High 32 bits of xnack mask register.

[xnack_mask_lo]

Low 32 bits of xnack mask register (an SP3 syntax).

[xnack_mask_hi]

High 32 bits of xnack mask register (an SP3 syntax).

+
+

Note that xnack_mask, xnack_mask_lo and xnack_mask_hi are not accessible as assembler +registers in GFX10, but xnack_mask is readable/writable with the help of +s_get_reg and s_set_reg instructions.

+
+
+

vcc

+

Vector condition code, 64-bits wide. A bit mask with one bit per thread; +it holds the result of a vector compare operation.

+

Note that GFX10 H/W does not use high 32 bits of vcc in wave32 mode.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

vcc

64-bit vector condition code register.

[vcc]

64-bit vector condition code register (an SP3 syntax).

[vcc_lo,vcc_hi]

64-bit vector condition code register (an SP3 syntax).

+
+

High and low 32 bits of vector condition code may be accessed as separate registers:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

vcc_lo

Low 32 bits of vector condition code register.

vcc_hi

High 32 bits of vector condition code register.

[vcc_lo]

Low 32 bits of vector condition code register (an SP3 syntax).

[vcc_hi]

High 32 bits of vector condition code register (an SP3 syntax).

+
+
+
+

m0

+

A 32-bit memory register. It has various uses, +including register indexing and bounds checking.

+
+
++++ + + + + + + + + + + + + + +

Syntax

Description

m0

A 32-bit memory register.

[m0]

A 32-bit memory register (an SP3 syntax).

+
+
+
+

exec

+

Execute mask, 64-bits wide. A bit mask with one bit per thread, +which is applied to vector instructions and controls which threads execute +and which ignore the instruction.

+

Note that GFX10 H/W does not use high 32 bits of exec in wave32 mode.

+
+
++++ + + + + + + + + + + + + + + + + +

Syntax

Description

exec

64-bit execute mask register.

[exec]

64-bit execute mask register (an SP3 syntax).

[exec_lo,exec_hi]

64-bit execute mask register (an SP3 syntax).

+
+

High and low 32 bits of execute mask may be accessed as separate registers:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Description

exec_lo

Low 32 bits of execute mask register.

exec_hi

High 32 bits of execute mask register.

[exec_lo]

Low 32 bits of execute mask register (an SP3 syntax).

[exec_hi]

High 32 bits of execute mask register (an SP3 syntax).

+
+
+
+

vccz

+

A single bit flag indicating that the vcc is all zeros.

+

Note: when GFX10 operates in wave32 mode, this register reflects state of vcc_lo.

+
+
+

execz

+

A single bit flag indicating that the exec is all zeros.

+

Note: when GFX10 operates in wave32 mode, this register reflects state of exec_lo.

+
+
+

scc

+

A single bit flag indicating the result of a scalar compare operation.

+
+
+

lds_direct

+

A special operand which supplies a 32-bit value +fetched from LDS memory using m0 as an address.

+
+
+

null

+

This is a special operand which may be used as a source or a destination.

+

When used as a destination, the result of the operation is discarded.

+

When used as a source, it supplies zero value.

+

GFX10 only.

+
+

Warning

+

Due to a H/W bug, this operand cannot be used with VALU instructions in first generation of GFX10.

+
+
+
+

inline constant

+

An inline constant is an integer or a floating-point value encoded as a part of an instruction. +Compare inline constants with literals.

+

Inline constants include:

+ +

If a number may be encoded as either +a literal or +a constant, +assembler selects the latter encoding as more efficient.

+
+

iconst

+

An integer number or +an absolute expression +encoded as an inline constant.

+

Only a small fraction of integer numbers may be encoded as inline constants. +They are enumerated in the table below. +Other integer numbers have to be encoded as literals.

+
+
++++ + + + + + + + + + + + + + +

Value

Note

{0..64}

Positive integer inline constants.

{-16..-1}

Negative integer inline constants.

+
+
+

Warning

+

GFX7 does not support inline constants for f16 operands.

+
+
+
+

fconst

+

A floating-point number +encoded as an inline constant.

+

Only a small fraction of floating-point numbers may be encoded as inline constants. +They are enumerated in the table below. +Other floating-point numbers have to be encoded as literals.

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Value

Note

Availability

0.0

The same as integer constant 0.

All GPUs

0.5

Floating-point constant 0.5

All GPUs

1.0

Floating-point constant 1.0

All GPUs

2.0

Floating-point constant 2.0

All GPUs

4.0

Floating-point constant 4.0

All GPUs

-0.5

Floating-point constant -0.5

All GPUs

-1.0

Floating-point constant -1.0

All GPUs

-2.0

Floating-point constant -2.0

All GPUs

-4.0

Floating-point constant -4.0

All GPUs

0.1592

1.0/(2.0*pi). Use only for 16-bit operands.

GFX8, GFX9, GFX10

0.15915494

1.0/(2.0*pi). Use only for 16- and 32-bit operands.

GFX8, GFX9, GFX10

0.15915494309189532

1.0/(2.0*pi).

GFX8, GFX9, GFX10

+
+
+

Warning

+

Floating-point inline constants cannot be used with 16-bit integer operands. Assembler will attempt to encode these values as literals.

+
+
+

Warning

+

GFX7 does not support inline constants for f16 operands.

+
+
+
+

ival

+

A symbolic operand encoded as an inline constant. +These operands provide read-only access to H/W registers.

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Syntax

Note

Availability

shared_base

Base address of shared memory region.

GFX9, GFX10

shared_limit

Address of the end of shared memory region.

GFX9, GFX10

private_base

Base address of private memory region.

GFX9, GFX10

private_limit

Address of the end of private memory region.

GFX9, GFX10

pops_exiting_wave_id

A dedicated counter for POPS.

GFX9, GFX10

+
+
+
+
+

literal

+

A literal is a 64-bit value encoded as a separate 32-bit dword in the instruction stream. +Compare literals with inline constants.

+

If a number may be encoded as either +a literal or +an inline constant, +assembler selects the latter encoding as more efficient.

+

Literals may be specified as integer numbers, +floating-point numbers, +absolute expressions or +relocatable expressions.

+

An instruction may use only one literal but several operands may refer the same literal.

+
+
+

uimm8

+

A 8-bit integer number +or an absolute expression. +The value must be in the range 0..0xFF.

+
+
+

uimm32

+

A 32-bit integer number +or an absolute expression. +The value must be in the range 0..0xFFFFFFFF.

+
+
+

uimm20

+

A 20-bit integer number +or an absolute expression.

+

The value must be in the range 0..0xFFFFF.

+
+
+

simm21

+

A 21-bit integer number +or an absolute expression.

+

The value must be in the range -0x100000..0x0FFFFF.

+
+
+

off

+

A special entity which indicates that the value of this operand is not used.

+
+
++++ + + + + + + + + + + +

Syntax

Description

off

Indicates an unused operand.

+
+
+
+
+

Numbers

+
+

Integer Numbers

+

Integer numbers are 64 bits wide. +They are converted to expected operand type +as described here.

+

Integer numbers may be specified in binary, octal, hexadecimal and decimal formats:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Format

Syntax

Example

Decimal

[-]?[1-9][0-9]*

-1234

Binary

[-]?0b[01]+

0b1010

Octal

[-]?0[0-7]+

010

Hexadecimal

[-]?0x[0-9a-fA-F]+

0xff

[-]?[0x]?[0-9][0-9a-fA-F]*[hH]

0ffh

+
+
+
+

Floating-Point Numbers

+

All floating-point numbers are handled as double (64 bits wide). +They are converted to +expected operand type +as described here.

+

Floating-point numbers may be specified in hexadecimal and decimal formats:

+
+
++++++ + + + + + + + + + + + + + + + + + + + +

Format

Syntax

Examples

Note

Decimal

[-]?[0-9]*[.][0-9]*([eE][+-]?[0-9]*)?

-1.234, 234e2

Must include either +a decimal separator +or an exponent.

Hexadecimal

[-]0x[0-9a-fA-F]*(.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+

-0x1afp-10, 0x.1afp10

+
+
+
+
+

Expressions

+

An expression is evaluated to a 64-bit integer. +Note that floating-point expressions are not supported.

+

There are two kinds of expressions:

+ +
+

Absolute Expressions

+

The value of an absolute expression does not change after program relocation. +Absolute expressions must not include unassigned and relocatable values +such as labels.

+

Absolute expressions are evaluated to 64-bit integer values and converted to +expected operand type +as described here.

+

Examples:

+
x = -1
+y = x + 10
+
+
+
+
+

Relocatable Expressions

+

The value of a relocatable expression depends on program relocation.

+

Note that use of relocatable expressions is limited with branch targets +and 32-bit integer operands.

+

A relocatable expression is evaluated to a 64-bit integer value +which depends on operand kind and relocation type +of symbol(s) used in the expression. For example, if an instruction refers a label, +this reference is evaluated to an offset from the address after the instruction +to the label address:

+
label:
+v_add_co_u32_e32 v0, vcc, label, v1  // 'label' operand is evaluated to -4
+
+
+

Note that values of relocatable expressions are usually unknown at assembly time; +they are resolved later by a linker and converted to +expected operand type +as described here.

+
+
+

Operands and Operations

+

Expressions are composed of 64-bit integer operands and operations. +Operands include integer numbers +and symbols.

+

Expressions may also use “.” which is a reference to the current PC (program counter).

+

Unary and binary +operations produce 64-bit integer results.

+
+
+

Syntax of Expressions

+

Syntax of expressions is shown below:

+
expr ::= expr binop expr | primaryexpr ;
+
+primaryexpr ::= '(' expr ')' | symbol | number | '.' | unop primaryexpr ;
+
+binop ::= '&&'
+        | '||'
+        | '|'
+        | '^'
+        | '&'
+        | '!'
+        | '=='
+        | '!='
+        | '<>'
+        | '<'
+        | '<='
+        | '>'
+        | '>='
+        | '<<'
+        | '>>'
+        | '+'
+        | '-'
+        | '*'
+        | '/'
+        | '%' ;
+
+unop ::= '~'
+       | '+'
+       | '-'
+       | '!' ;
+
+
+
+
+

Binary Operators

+

Binary operators are described in the following table. +They operate on and produce 64-bit integers. +Operators with higher priority are performed first.

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Operator

Priority

Meaning

*

5

Integer multiplication.

/

5

Integer division.

%

5

Integer signed remainder.

+

4

Integer addition.

-

4

Integer subtraction.

<<

3

Integer shift left.

>>

3

Logical shift right.

==

2

Equality comparison.

!=

2

Inequality comparison.

<>

2

Inequality comparison.

<

2

Signed less than comparison.

<=

2

Signed less than or equal comparison.

>

2

Signed greater than comparison.

>=

2

Signed greater than or equal comparison.

|

1

Bitwise or.

^

1

Bitwise xor.

&

1

Bitwise and.

&&

0

Logical and.

||

0

Logical or.

+
+
+
+

Unary Operators

+

Unary operators are described in the following table. +They operate on and produce 64-bit integers.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Operator

Meaning

!

Logical negation.

~

Bitwise negation.

+

Integer unary plus.

-

Integer unary minus.

+
+
+
+

Symbols

+

A symbol is a named 64-bit integer value, representing a relocatable +address or an absolute (non-relocatable) number.

+
+
Symbol names have the following syntax:

[a-zA-Z_.][a-zA-Z0-9_$.@]*

+
+
+

The table below provides several examples of syntax used for symbol definition.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Syntax

Meaning

.globl <S>

Declares a global symbol S without assigning it a value.

.set <S>, <E>

Assigns the value of an expression E to a symbol S.

<S> = <E>

Assigns the value of an expression E to a symbol S.

<S>:

Declares a label S and assigns it the current PC value.

+
+

A symbol may be used before it is declared or assigned; +unassigned symbols are assumed to be PC-relative.

+

Additional information about symbols may be found here.

+
+
+
+

Type and Size Conversion

+

This section describes what happens when a 64-bit +integer number, a +floating-point number or an +expression +is used for an operand which has a different type or size.

+
+

Conversion of Integer Values

+

Instruction operands may be specified as 64-bit integer numbers or +absolute expressions. These values are converted to +the expected operand type using the following steps:

+

1. Validation. Assembler checks if the input value may be truncated without loss to the required truncation width +(see the table below). There are two cases when this operation is enabled:

+
+
    +
  • The truncated bits are all 0.

  • +
  • The truncated bits are all 1 and the value after truncation has its MSB bit set.

  • +
+
+

In all other cases assembler triggers an error.

+

2. Conversion. The input value is converted to the expected type as described in the table below. +Depending on operand kind, this conversion is performed by either assembler or AMDGPU H/W (or both).

+
+
++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Expected type

Truncation Width

Conversion

Description

i16, u16, b16

16

num.u16

Truncate to 16 bits.

i32, u32, b32

32

num.u32

Truncate to 32 bits.

i64

32

{-1,num.i32}

Truncate to 32 bits and then sign-extend the result to 64 bits.

u64, b64

32

{0,num.u32}

Truncate to 32 bits and then zero-extend the result to 64 bits.

f16

16

num.u16

Use low 16 bits as an f16 value.

f32

32

num.u32

Use low 32 bits as an f32 value.

f64

32

{num.u32,0}

Use low 32 bits of the number as high 32 bits +of the result; low 32 bits of the result are zeroed.

+
+

Examples of enabled conversions:

+
// GFX9
+
+v_add_u16 v0, -1, 0                   // src0 = 0xFFFF
+v_add_f16 v0, -1, 0                   // src0 = 0xFFFF (NaN)
+                                      //
+v_add_u32 v0, -1, 0                   // src0 = 0xFFFFFFFF
+v_add_f32 v0, -1, 0                   // src0 = 0xFFFFFFFF (NaN)
+                                      //
+v_add_u16 v0, 0xff00, v0              // src0 = 0xff00
+v_add_u16 v0, 0xffffffffffffff00, v0  // src0 = 0xff00
+v_add_u16 v0, -256, v0                // src0 = 0xff00
+                                      //
+s_bfe_i64 s[0:1], 0xffefffff, s3      // src0 = 0xffffffffffefffff
+s_bfe_u64 s[0:1], 0xffefffff, s3      // src0 = 0x00000000ffefffff
+v_ceil_f64_e32 v[0:1], 0xffefffff     // src0 = 0xffefffff00000000 (-1.7976922776554302e308)
+                                      //
+x = 0xffefffff                        //
+s_bfe_i64 s[0:1], x, s3               // src0 = 0xffffffffffefffff
+s_bfe_u64 s[0:1], x, s3               // src0 = 0x00000000ffefffff
+v_ceil_f64_e32 v[0:1], x              // src0 = 0xffefffff00000000 (-1.7976922776554302e308)
+
+
+

Examples of disabled conversions:

+
// GFX9
+
+v_add_u16 v0, 0x1ff00, v0               // truncated bits are not all 0 or 1
+v_add_u16 v0, 0xffffffffffff00ff, v0    // truncated bits do not match MSB of the result
+
+
+
+
+

Conversion of Floating-Point Values

+

Instruction operands may be specified as 64-bit floating-point numbers. +These values are converted to the expected operand type using the following steps:

+

1. Validation. Assembler checks if the input f64 number can be converted +to the required floating-point type (see the table below) without overflow or underflow. +Precision lost is allowed. If this conversion is not possible, assembler triggers an error.

+

2. Conversion. The input value is converted to the expected type as described in the table below. +Depending on operand kind, this is performed by either assembler or AMDGPU H/W (or both).

+
+
++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Expected type

Required FP Type

Conversion

Description

i16, u16, b16

f16

f16(num)

Convert to f16 and use bits of the result as an integer value. +The value has to be encoded as a literal or an error occurs. +Note that the value cannot be encoded as an inline constant.

i32, u32, b32

f32

f32(num)

Convert to f32 and use bits of the result as an integer value.

i64, u64, b64

-

-

Conversion disabled.

f16

f16

f16(num)

Convert to f16.

f32

f32

f32(num)

Convert to f32.

f64

f64

{num.u32.hi,0}

Use high 32 bits of the number as high 32 bits of the result; +zero-fill low 32 bits of the result.

+

Note that the result may differ from the original number.

+
+
+

Examples of enabled conversions:

+
// GFX9
+
+v_add_f16 v0, 1.0, 0        // src0 = 0x3C00 (1.0)
+v_add_u16 v0, 1.0, 0        // src0 = 0x3C00
+                            //
+v_add_f32 v0, 1.0, 0        // src0 = 0x3F800000 (1.0)
+v_add_u32 v0, 1.0, 0        // src0 = 0x3F800000
+
+                            // src0 before conversion:
+                            //   1.7976931348623157e308 = 0x7fefffffffffffff
+                            // src0 after conversion:
+                            //   1.7976922776554302e308 = 0x7fefffff00000000
+v_ceil_f64 v[0:1], 1.7976931348623157e308
+
+v_add_f16 v1, 65500.0, v2   // ok for f16.
+v_add_f32 v1, 65600.0, v2   // ok for f32, but would result in overflow for f16.
+
+
+

Examples of disabled conversions:

+
// GFX9
+
+v_add_f16 v1, 65600.0, v2    // overflow
+
+
+
+
+

Conversion of Relocatable Values

+

Relocatable expressions +may be used with 32-bit integer operands and jump targets.

+

When the value of a relocatable expression is resolved by a linker, it is +converted as needed and truncated to the operand size. The conversion depends +on relocation type and operand kind.

+

For example, when a 32-bit operand of an instruction refers a relocatable expression expr, +this reference is evaluated to a 64-bit offset from the address after the +instruction to the address being referenced, counted in bytes. +Then the value is truncated to 32 bits and encoded as a literal:

+
expr = .
+v_add_co_u32_e32 v0, vcc, expr, v1  // 'expr' operand is evaluated to -4
+                                    // and then truncated to 0xFFFFFFFC
+
+
+

As another example, when a branch instruction refers a label, +this reference is evaluated to an offset from the address after the +instruction to the label address, counted in dwords. +Then the value is truncated to 16 bits:

+
label:
+s_branch label  // 'label' operand is evaluated to -1 and truncated to 0xFFFF
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUUsage.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUUsage.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/AMDGPUUsage.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/AMDGPUUsage.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,19276 @@ + + + + + + + + + User Guide for AMDGPU Backend — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

User Guide for AMDGPU Backend

+
+ +
+
+
+
+

Introduction

+

The AMDGPU backend provides ISA code generation for AMD GPUs, starting with the +R600 family up until the current GCN families. It lives in the +llvm/lib/Target/AMDGPU directory.

+
+
+

LLVM

+
+

Target Triples

+

Use the Clang option -target <Architecture>-<Vendor>-<OS>-<Environment> +to specify the target triple:

+
+
+ ++++ + + + + + + + + + + + + + +
AMDGPU Architectures

Architecture

Description

r600

AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders.

amdgcn

AMD GPUs GCN GFX6 onwards for graphics and compute shaders.

+ + ++++ + + + + + + + + + + + + + +
AMDGPU Vendors

Vendor

Description

amd

Can be used for all AMD GPU usage.

mesa3d

Can be used if the OS is mesa3d.

+ + ++++ + + + + + + + + + + + + + + + + + + + +
AMDGPU Operating Systems

OS

Description

<empty>

Defaults to the unknown OS.

amdhsa

Compute kernels executed on HSA [HSA] compatible runtimes +such as:

+
    +
  • AMD’s ROCm™ runtime [AMD-ROCm] using the rocm-amdhsa +loader on Linux. See AMD ROCm Platform Release Notes +[AMD-ROCm-Release-Notes] for supported hardware and +software.

  • +
  • AMD’s PAL runtime using the pal-amdhsa loader on +Windows.

  • +
+

amdpal

Graphic shaders and compute kernels executed on AMD’s PAL +runtime using the pal-amdpal loader on Windows and Linux +Pro.

mesa3d

Graphic shaders and compute kernels executed on AMD’s Mesa +3D runtime using the mesa-mesa3d loader on Linux.

+ + ++++ + + + + + + + + + + +
AMDGPU Environments

Environment

Description

<empty>

Default.

+
+
+
+

Processors

+

Use the Clang options -mcpu=<target-id> or --offload-arch=<target-id> to +specify the AMDGPU processor together with optional target features. See +Target ID and Target Features for AMD GPU target +specific information.

+

Every processor supports every OS ABI (see AMDGPU Operating Systems) with the following exceptions:

+
    +
  • amdhsa is not supported in r600 architecture (see AMDGPU Architectures).

    + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AMDGPU Processors

    Processor

    Alternative +Processor

    Target +Triple +Architecture

    dGPU/ +APU

    Target +Features +Supported

    Target +Properties

    OS Support +(see +amdgpu-os +and +corresponding +runtime release +notes for +current +information and +level of +support)

    Example +Products

    Radeon HD 2000/3000 Series (R600) [AMD-RADEON-HD-2000-3000]

    r600

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    r630

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    rs880

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    rv670

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    Radeon HD 4000 Series (R700) [AMD-RADEON-HD-4000]

    rv710

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    rv730

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    rv770

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    Radeon HD 5000 Series (Evergreen) [AMD-RADEON-HD-5000]

    cedar

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    cypress

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    juniper

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    redwood

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    sumo

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    Radeon HD 6000 Series (Northern Islands) [AMD-RADEON-HD-6000]

    barts

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    caicos

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    cayman

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    turks

    r600

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +

    GCN GFX6 (Southern Islands (SI)) [AMD-GCN-GFX6]

    gfx600

      +
    • tahiti

    • +
    +

    amdgcn

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +
      +
    • pal-amdpal

    • +
    +

    gfx601

      +
    • pitcairn

    • +
    • verde

    • +
    +

    amdgcn

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +
      +
    • pal-amdpal

    • +
    +

    gfx602

      +
    • hainan

    • +
    • oland

    • +
    +

    amdgcn

    dGPU

      +
    • Does not +support +generic +address +space

    • +
    +
      +
    • pal-amdpal

    • +
    +

    GCN GFX7 (Sea Islands (CI)) [AMD-GCN-GFX7]

    gfx700

      +
    • kaveri

    • +
    +

    amdgcn

    APU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • A6-7000

    • +
    • A6 Pro-7050B

    • +
    • A8-7100

    • +
    • A8 Pro-7150B

    • +
    • A10-7300

    • +
    • A10 Pro-7350B

    • +
    • FX-7500

    • +
    • A8-7200P

    • +
    • A10-7400P

    • +
    • FX-7600P

    • +
    +

    gfx701

      +
    • hawaii

    • +
    +

    amdgcn

    dGPU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • FirePro W8100

    • +
    • FirePro W9100

    • +
    • FirePro S9150

    • +
    • FirePro S9170

    • +
    +

    gfx702

    amdgcn

    dGPU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon R9 290

    • +
    • Radeon R9 290x

    • +
    • Radeon R390

    • +
    • Radeon R390x

    • +
    +

    gfx703

      +
    • kabini

    • +
    • mullins

    • +
    +

    amdgcn

    APU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • E1-2100

    • +
    • E1-2200

    • +
    • E1-2500

    • +
    • E2-3000

    • +
    • E2-3800

    • +
    • A4-5000

    • +
    • A4-5100

    • +
    • A6-5200

    • +
    • A4 Pro-3340B

    • +
    +

    gfx704

      +
    • bonaire

    • +
    +

    amdgcn

    dGPU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon HD 7790

    • +
    • Radeon HD 8770

    • +
    • R7 260

    • +
    • R7 260X

    • +
    +

    gfx705

    amdgcn

    APU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +

    TBA

    +

    GCN GFX8 (Volcanic Islands (VI)) [AMD-GCN-GFX8]

    gfx801

      +
    • carrizo

    • +
    +

    amdgcn

    APU

      +
    • xnack

    • +
    +
      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • A6-8500P

    • +
    • Pro A6-8500B

    • +
    • A8-8600P

    • +
    • Pro A8-8600B

    • +
    • FX-8800P

    • +
    • Pro A12-8800B

    • +
    • A10-8700P

    • +
    • Pro A10-8700B

    • +
    • A10-8780P

    • +
    • A10-9600P

    • +
    • A10-9630P

    • +
    • A12-9700P

    • +
    • A12-9730P

    • +
    • FX-9800P

    • +
    • FX-9830P

    • +
    • E2-9010

    • +
    • A6-9210

    • +
    • A9-9410

    • +
    +

    gfx802

      +
    • iceland

    • +
    • tonga

    • +
    +

    amdgcn

    dGPU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon R9 285

    • +
    • Radeon R9 380

    • +
    • Radeon R9 385

    • +
    +

    gfx803

      +
    • fiji

    • +
    +

    amdgcn

    dGPU

      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon R9 Nano

    • +
    • Radeon R9 Fury

    • +
    • Radeon R9 FuryX

    • +
    • Radeon Pro Duo

    • +
    • FirePro S9300x2

    • +
    • Radeon Instinct MI8

    • +
    +

      +
    • polaris10

    • +
    +

    amdgcn

    dGPU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon RX 470

    • +
    • Radeon RX 480

    • +
    • Radeon Instinct MI6

    • +
    +

      +
    • polaris11

    • +
    +

    amdgcn

    dGPU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon RX 460

    • +
    +

    gfx805

      +
    • tongapro

    • +
    +

    amdgcn

    dGPU

      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • FirePro S7150

    • +
    • FirePro S7100

    • +
    • FirePro W7100

    • +
    • Mobile FirePro +M7170

    • +
    +

    gfx810

      +
    • stoney

    • +
    +

    amdgcn

    APU

      +
    • xnack

    • +
    +
      +
    • Offset +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +

    TBA

    +

    GCN GFX9 (Vega) [AMD-GCN-GFX900-GFX904-VEGA] [AMD-GCN-GFX906-VEGA7NM] [AMD-GCN-GFX908-CDNA1]

    gfx900

    amdgcn

    dGPU

      +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon Vega +Frontier Edition

    • +
    • Radeon RX Vega 56

    • +
    • Radeon RX Vega 64

    • +
    • Radeon RX Vega 64 +Liquid

    • +
    • Radeon Instinct MI25

    • +
    +

    gfx902

    amdgcn

    APU

      +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Ryzen 3 2200G

    • +
    • Ryzen 5 2400G

    • +
    +

    gfx904

    amdgcn

    dGPU

      +
    • xnack

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +

    TBA

    +

    gfx906

    amdgcn

    dGPU

      +
    • sramecc

    • +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon Instinct MI50

    • +
    • Radeon Instinct MI60

    • +
    • Radeon VII

    • +
    • Radeon Pro VII

    • +
    +

    gfx908

    amdgcn

    dGPU

      +
    • sramecc

    • +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    +
      +
    • AMD Instinct MI100 Accelerator

    • +
    +

    gfx909

    amdgcn

    APU

      +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • pal-amdpal

    • +
    +

    TBA

    +

    gfx90a

    amdgcn

    dGPU

      +
    • sramecc

    • +
    • tgsplit

    • +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    • Packed +work-item +IDs

    • +
    +
      +
    • rocm-amdhsa

    • +
    +

    TBA

    +

    gfx90c

    amdgcn

    APU

      +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • pal-amdpal

    • +
    +
      +
    • Ryzen 7 4700G

    • +
    • Ryzen 7 4700GE

    • +
    • Ryzen 5 4600G

    • +
    • Ryzen 5 4600GE

    • +
    • Ryzen 3 4300G

    • +
    • Ryzen 3 4300GE

    • +
    • Ryzen Pro 4000G

    • +
    • Ryzen 7 Pro 4700G

    • +
    • Ryzen 7 Pro 4750GE

    • +
    • Ryzen 5 Pro 4650G

    • +
    • Ryzen 5 Pro 4650GE

    • +
    • Ryzen 3 Pro 4350G

    • +
    • Ryzen 3 Pro 4350GE

    • +
    +

    GCN GFX10 (RDNA 1) [AMD-GCN-GFX10-RDNA1]

    gfx1010

    amdgcn

    dGPU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon RX 5700

    • +
    • Radeon RX 5700 XT

    • +
    • Radeon Pro 5600 XT

    • +
    • Radeon Pro 5600M

    • +
    +

    gfx1011

    amdgcn

    dGPU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon Pro V520

    • +
    +

    gfx1012

    amdgcn

    dGPU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon RX 5500

    • +
    • Radeon RX 5500 XT

    • +
    +

    gfx1013

    amdgcn

    APU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    • xnack

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +

    TBA

    +

    GCN GFX10 (RDNA 2) [AMD-GCN-GFX10-RDNA2]

    gfx1030

    amdgcn

    dGPU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon RX 6800

    • +
    • Radeon RX 6800 XT

    • +
    • Radeon RX 6900 XT

    • +
    +

    gfx1031

    amdgcn

    dGPU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +
      +
    • Radeon RX 6700 XT

    • +
    +

    gfx1032

    amdgcn

    dGPU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • rocm-amdhsa

    • +
    • pal-amdhsa

    • +
    • pal-amdpal

    • +
    +

    TBA

    +

    gfx1033

    amdgcn

    APU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • pal-amdpal

    • +
    +

    TBA

    +

    gfx1034

    amdgcn

    dGPU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • pal-amdpal

    • +
    +

    TBA

    +

    gfx1035

    amdgcn

    APU

      +
    • cumode

    • +
    • wavefrontsize64

    • +
    +
      +
    • Absolute +flat +scratch

    • +
    +
      +
    • pal-amdpal

    • +
    +

    TBA

    +
    +
  • +
+
+
+

Target Features

+

Target features control how code is generated to support certain +processor specific features. Not all target features are supported by +all processors. The runtime must ensure that the features supported by +the device used to execute the code match the features enabled when +generating the code. A mismatch of features may result in incorrect +execution, or a reduction in performance.

+

The target features supported by each processor is listed in +AMDGPU Processors.

+

Target features are controlled by exactly one of the following Clang +options:

+

-mcpu=<target-id> or --offload-arch=<target-id>

+
+

The -mcpu and --offload-arch can specify the target feature as +optional components of the target ID. If omitted, the target feature has the +any value. See Target ID.

+
+

-m[no-]<target-feature>

+
+

Target features not specified by the target ID are specified using a +separate option. These target features can have an on or off +value. on is specified by omitting the no- prefix, and +off is specified by including the no- prefix. The default +if not specified is off.

+
+

For example:

+
+
-mcpu=gfx908:xnack+

Enable the xnack feature.

+
+
-mcpu=gfx908:xnack-

Disable the xnack feature.

+
+
-mcumode

Enable the cumode feature.

+
+
-mno-cumode

Disable the cumode feature.

+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU Target Features

Target Feature

Clang Option to Control

Description

Name

cumode

    +
  • -m[no-]cumode

  • +
+

Control the wavefront execution mode used +when generating code for kernels. When disabled +native WGP wavefront execution mode is used, +when enabled CU wavefront execution mode is used +(see Memory Model).

sramecc

    +
  • -mcpu

  • +
  • --offload-arch

  • +
+

If specified, generate code that can only be +loaded and executed in a process that has a +matching setting for SRAMECC.

+

If not specified for code object V2 to V3, generate +code that can be loaded and executed in a process +with SRAMECC enabled.

+

If not specified for code object V4, generate +code that can be loaded and executed in a process +with either setting of SRAMECC.

+

tgsplit

-m[no-]tgsplit

Enable/disable generating code that assumes +work-groups are launched in threadgroup split mode. +When enabled the waves of a work-group may be +launched in different CUs.

wavefrontsize64

    +
  • -m[no-]wavefrontsize64

  • +
+

Control the wavefront size used when +generating code for kernels. When disabled +native wavefront size 32 is used, when enabled +wavefront size 64 is used.

xnack

    +
  • -mcpu

  • +
  • --offload-arch

  • +
+

If specified, generate code that can only be +loaded and executed in a process that has a +matching setting for XNACK replay.

+

If not specified for code object V2 to V3, generate +code that can be loaded and executed in a process +with XNACK replay enabled.

+

If not specified for code object V4, generate +code that can be loaded and executed in a process +with either setting of XNACK replay.

+

XNACK replay can be used for demand paging and +page migration. If enabled in the device, then if +a page fault occurs the code may execute +incorrectly unless generated with XNACK replay +enabled, or generated for code object V4 without +specifying XNACK replay. Executing code that was +generated with XNACK replay enabled, or generated +for code object V4 without specifying XNACK replay, +on a device that does not have XNACK replay +enabled will execute correctly but may be less +performant than code generated for XNACK replay +disabled.

+
+
+
+
+
+

Target ID

+

AMDGPU supports target IDs. See Clang Offload Bundler for a general +description. The AMDGPU target specific information is:

+
+
processor

Is an AMDGPU processor or alternative processor name specified in +AMDGPU Processors. The non-canonical form target ID allows both +the primary processor and alternative processor names. The canonical form +target ID only allow the primary processor name.

+
+
target-feature

Is a target feature name specified in AMDGPU Target Features that +is supported by the processor. The target features supported by each processor +is specified in AMDGPU Processors. Those that can be specified in +a target ID are marked as being controlled by -mcpu and +--offload-arch. Each target feature must appear at most once in a target +ID. The non-canonical form target ID allows the target features to be +specified in any order. The canonical form target ID requires the target +features to be specified in alphabetic order.

+
+
+
+

Code Object V2 to V3 Target ID

+

The target ID syntax for code object V2 to V3 is the same as defined in Clang +Offload Bundler except +when used in the .amdgcn_target <target-triple> “-” <target-id> assembler +directive and the bundle entry ID. In those cases it has the following BNF +syntax:

+
<target-id> ::== <processor> ( "+" <target-feature> )*
+
+
+

Where a target feature is omitted if Off and present if On or Any.

+
+

Note

+

The code object V2 to V3 cannot represent Any and treats it the same as +On.

+
+
+
+
+

Embedding Bundled Code Objects

+

AMDGPU supports the HIP and OpenMP languages that perform code object embedding +as described in Clang Offload Bundler.

+
+

Note

+

The target ID syntax used for code object V2 to V3 for a bundle entry ID +differs from that used elsewhere. See Code Object V2 to V3 Target ID.

+
+
+
+

Address Spaces

+

The AMDGPU architecture supports a number of memory address spaces. The address +space names use the OpenCL standard names, with some additions.

+

The AMDGPU address spaces correspond to target architecture specific LLVM +address space numbers used in LLVM IR.

+

The AMDGPU address spaces are described in +AMDGPU Address Spaces. Only 64-bit process address spaces are +supported for the amdgcn target.

+
+
+ ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU Address Spaces

64-Bit Process Address Space

Address Space Name

LLVM IR Address +Space Number

HSA Segment +Name

Hardware +Name

Address +Size

NULL Value

Generic

0

flat

flat

64

0x0000000000000000

Global

1

global

global

64

0x0000000000000000

Region

2

N/A

GDS

32

not implemented for AMDHSA

Local

3

group

LDS

32

0xFFFFFFFF

Constant

4

constant

same as global

64

0x0000000000000000

Private

5

private

scratch

32

0xFFFFFFFF

Constant 32-bit

6

TODO

0x00000000

Buffer Fat Pointer (experimental)

7

TODO

+
+
+
Generic

The generic address space is supported unless the Target Properties column +of AMDGPU Processors specifies Does not support generic address +space.

+

The generic address space uses the hardware flat address support for two fixed +ranges of virtual addresses (the private and local apertures), that are +outside the range of addressable global memory, to map from a flat address to +a private or local address. This uses FLAT instructions that can take a flat +address and access global, private (scratch), and group (LDS) memory depending +on if the address is within one of the aperture ranges.

+

Flat access to scratch requires hardware aperture setup and setup in the +kernel prologue (see Flat Scratch). Flat +access to LDS requires hardware aperture setup and M0 (GFX7-GFX8) register +setup (see M0).

+

To convert between a private or group address space address (termed a segment +address) and a flat address the base address of the corresponding aperture +can be used. For GFX7-GFX8 these are available in the +HSA AQL Queue the address of which can be obtained with +Queue Ptr SGPR (see Initial Kernel Execution State). For +GFX9-GFX10 the aperture base addresses are directly available as inline +constant registers SRC_SHARED_BASE/LIMIT and SRC_PRIVATE_BASE/LIMIT. +In 64-bit address mode the aperture sizes are 2^32 bytes and the base is +aligned to 2^32 which makes it easier to convert from flat to segment or +segment to flat.

+

A global address space address has the same value when used as a flat address +so no conversion is needed.

+
+
Global and Constant

The global and constant address spaces both use global virtual addresses, +which are the same virtual address space used by the CPU. However, some +virtual addresses may only be accessible to the CPU, some only accessible +by the GPU, and some by both.

+

Using the constant address space indicates that the data will not change +during the execution of the kernel. This allows scalar read instructions to +be used. As the constant address space could only be modified on the host +side, a generic pointer loaded from the constant address space is safe to be +assumed as a global pointer since only the device global memory is visible +and managed on the host side. The vector and scalar L1 caches are invalidated +of volatile data before each kernel dispatch execution to allow constant +memory to change values between kernel dispatches.

+
+
Region

The region address space uses the hardware Global Data Store (GDS). All +wavefronts executing on the same device will access the same memory for any +given region address. However, the same region address accessed by wavefronts +executing on different devices will access different memory. It is higher +performance than global memory. It is allocated by the runtime. The data +store (DS) instructions can be used to access it.

+
+
Local

The local address space uses the hardware Local Data Store (LDS) which is +automatically allocated when the hardware creates the wavefronts of a +work-group, and freed when all the wavefronts of a work-group have +terminated. All wavefronts belonging to the same work-group will access the +same memory for any given local address. However, the same local address +accessed by wavefronts belonging to different work-groups will access +different memory. It is higher performance than global memory. The data store +(DS) instructions can be used to access it.

+
+
Private

The private address space uses the hardware scratch memory support which +automatically allocates memory when it creates a wavefront and frees it when +a wavefronts terminates. The memory accessed by a lane of a wavefront for any +given private address will be different to the memory accessed by another lane +of the same or different wavefront for the same private address.

+

If a kernel dispatch uses scratch, then the hardware allocates memory from a +pool of backing memory allocated by the runtime for each wavefront. The lanes +of the wavefront access this using dword (4 byte) interleaving. The mapping +used from private address to backing memory address is:

+
+

wavefront-scratch-base + +((private-address / 4) * wavefront-size * 4) + +(wavefront-lane-id * 4) + (private-address % 4)

+
+

If each lane of a wavefront accesses the same private address, the +interleaving results in adjacent dwords being accessed and hence requires +fewer cache lines to be fetched.

+

There are different ways that the wavefront scratch base address is +determined by a wavefront (see +Initial Kernel Execution State).

+

Scratch memory can be accessed in an interleaved manner using buffer +instructions with the scratch buffer descriptor and per wavefront scratch +offset, by the scratch instructions, or by flat instructions. Multi-dword +access is not supported except by flat and scratch instructions in +GFX9-GFX10.

+
+
Constant 32-bit

TODO

+
+
Buffer Fat Pointer

The buffer fat pointer is an experimental address space that is currently +unsupported in the backend. It exposes a non-integral pointer that is in +the future intended to support the modelling of 128-bit buffer descriptors +plus a 32-bit offset into the buffer (in total encapsulating a 160-bit +pointer), allowing normal LLVM load/store/atomic operations to be used to +model the buffer descriptors used heavily in graphics workloads targeting +the backend.

+
+
+
+
+

Memory Scopes

+

This section provides LLVM memory synchronization scopes supported by the AMDGPU +backend memory model when the target triple OS is amdhsa (see +Memory Model and Target Triples).

+

The memory model supported is based on the HSA memory model [HSA] which is +based in turn on HRF-indirect with scope inclusion [HRF]. The happens-before +relation is transitive over the synchronizes-with relation independent of scope +and synchronizes-with allows the memory scope instances to be inclusive (see +table AMDHSA LLVM Sync Scopes).

+

This is different to the OpenCL [OpenCL] memory model which does not have scope +inclusion and requires the memory scopes to exactly match. However, this +is conservatively correct for OpenCL.

+
+
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA LLVM Sync Scopes

LLVM Sync Scope

Description

none

The default: system.

+

Synchronizes with, and participates in modification +and seq_cst total orderings with, other operations +(except image operations) for all address spaces +(except private, or generic that accesses private) +provided the other operation’s sync scope is:

+
    +
  • system.

  • +
  • agent and executed by a thread on the same +agent.

  • +
  • workgroup and executed by a thread in the +same work-group.

  • +
  • wavefront and executed by a thread in the +same wavefront.

  • +
+

agent

Synchronizes with, and participates in modification +and seq_cst total orderings with, other operations +(except image operations) for all address spaces +(except private, or generic that accesses private) +provided the other operation’s sync scope is:

+
    +
  • system or agent and executed by a thread +on the same agent.

  • +
  • workgroup and executed by a thread in the +same work-group.

  • +
  • wavefront and executed by a thread in the +same wavefront.

  • +
+

workgroup

Synchronizes with, and participates in modification +and seq_cst total orderings with, other operations +(except image operations) for all address spaces +(except private, or generic that accesses private) +provided the other operation’s sync scope is:

+
    +
  • system, agent or workgroup and +executed by a thread in the same work-group.

  • +
  • wavefront and executed by a thread in the +same wavefront.

  • +
+

wavefront

Synchronizes with, and participates in modification +and seq_cst total orderings with, other operations +(except image operations) for all address spaces +(except private, or generic that accesses private) +provided the other operation’s sync scope is:

+
    +
  • system, agent, workgroup or +wavefront and executed by a thread in the +same wavefront.

  • +
+

singlethread

Only synchronizes with and participates in +modification and seq_cst total orderings with, +other operations (except image operations) running +in the same thread for all address spaces (for +example, in signal handlers).

one-as

Same as system but only synchronizes with other +operations within the same address space.

agent-one-as

Same as agent but only synchronizes with other +operations within the same address space.

workgroup-one-as

Same as workgroup but only synchronizes with +other operations within the same address space.

wavefront-one-as

Same as wavefront but only synchronizes with +other operations within the same address space.

singlethread-one-as

Same as singlethread but only synchronizes with +other operations within the same address space.

+
+
+
+

LLVM IR Intrinsics

+

The AMDGPU backend implements the following LLVM IR intrinsics.

+

This section is WIP.

+
+
+

LLVM IR Attributes

+

The AMDGPU backend supports the following LLVM IR attributes.

+
+
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU LLVM IR Attributes

LLVM Attribute

Description

“amdgpu-flat-work-group-size”=”min,max”

Specify the minimum and maximum flat work group sizes that +will be specified when the kernel is dispatched. Generated +by the amdgpu_flat_work_group_size CLANG attribute [CLANG-ATTR].

“amdgpu-implicitarg-num-bytes”=”n”

Number of kernel argument bytes to add to the kernel +argument block size for the implicit arguments. This +varies by OS and language (for OpenCL see +OpenCL kernel implicit arguments appended for AMDHSA OS).

“amdgpu-num-sgpr”=”n”

Specifies the number of SGPRs to use. Generated by +the amdgpu_num_sgpr CLANG attribute [CLANG-ATTR].

“amdgpu-num-vgpr”=”n”

Specifies the number of VGPRs to use. Generated by the +amdgpu_num_vgpr CLANG attribute [CLANG-ATTR].

“amdgpu-waves-per-eu”=”m,n”

Specify the minimum and maximum number of waves per +execution unit. Generated by the amdgpu_waves_per_eu +CLANG attribute [CLANG-ATTR].

“amdgpu-ieee” true/false.

Specify whether the function expects the IEEE field of the +mode register to be set on entry. Overrides the default for +the calling convention.

“amdgpu-dx10-clamp” true/false.

Specify whether the function expects the DX10_CLAMP field of +the mode register to be set on entry. Overrides the default +for the calling convention.

“amdgpu-no-workitem-id-x”

Indicates the function does not depend on the value of the +llvm.amdgcn.workitem.id.x intrinsic. If a function is marked with this +attribute, or reached through a call site marked with this attribute, +the value returned by the intrinsic is undefined. The backend can +generally infer this during code generation, so typically there is no +benefit to frontends marking functions with this.

“amdgpu-no-workitem-id-y”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.workitem.id.y intrinsic.

“amdgpu-no-workitem-id-z”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.workitem.id.z intrinsic.

“amdgpu-no-workgroup-id-x”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.workgroup.id.x intrinsic.

“amdgpu-no-workgroup-id-y”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.workgroup.id.y intrinsic.

“amdgpu-no-workgroup-id-z”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.workgroup.id.z intrinsic.

“amdgpu-no-dispatch-ptr”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.dispatch.ptr intrinsic.

“amdgpu-no-implicitarg-ptr”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.implicitarg.ptr intrinsic.

“amdgpu-no-dispatch-id”

The same as amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.dispatch.id intrinsic.

“amdgpu-no-queue-ptr”

Similar to amdgpu-no-workitem-id-x, except for the +llvm.amdgcn.queue.ptr intrinsic. Note that unlike the other ABI hint +attributes, the queue pointer may be required in situations where the +intrinsic call does not directly appear in the program. Some subtargets +require the queue pointer for to handle some addrspacecasts, as well +as the llvm.amdgcn.is.shared, llvm.amdgcn.is.private, llvm.trap, and +llvm.debug intrinsics.

+
+
+
+
+

ELF Code Object

+

The AMDGPU backend generates a standard ELF [ELF] relocatable code object that +can be linked by lld to produce a standard ELF shared code object which can +be loaded and executed on an AMDGPU target.

+ +
+

Sections

+

An AMDGPU target ELF code object has the standard ELF sections which include:

+
+
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU ELF Sections

Name

Type

Attributes

.bss

SHT_NOBITS

SHF_ALLOC + SHF_WRITE

.data

SHT_PROGBITS

SHF_ALLOC + SHF_WRITE

.debug_*

SHT_PROGBITS

none

.dynamic

SHT_DYNAMIC

SHF_ALLOC

.dynstr

SHT_PROGBITS

SHF_ALLOC

.dynsym

SHT_PROGBITS

SHF_ALLOC

.got

SHT_PROGBITS

SHF_ALLOC + SHF_WRITE

.hash

SHT_HASH

SHF_ALLOC

.note

SHT_NOTE

none

.relaname

SHT_RELA

none

.rela.dyn

SHT_RELA

none

.rodata

SHT_PROGBITS

SHF_ALLOC

.shstrtab

SHT_STRTAB

none

.strtab

SHT_STRTAB

none

.symtab

SHT_SYMTAB

none

.text

SHT_PROGBITS

SHF_ALLOC + SHF_EXECINSTR

+
+

These sections have their standard meanings (see [ELF]) and are only generated +if needed.

+
+
.debug*

The standard DWARF sections. See DWARF Debug Information for +information on the DWARF produced by the AMDGPU backend.

+
+
.dynamic, .dynstr, .dynsym, .hash

The standard sections used by a dynamic loader.

+
+
.note

See Note Records for the note records supported by the AMDGPU +backend.

+
+
.relaname, .rela.dyn

For relocatable code objects, name is the name of the section that the +relocation records apply. For example, .rela.text is the section name for +relocation records associated with the .text section.

+

For linked shared code objects, .rela.dyn contains all the relocation +records from each of the relocatable code object’s .relaname sections.

+

See Relocation Records for the relocation records supported by +the AMDGPU backend.

+
+
.text

The executable machine code for the kernels and functions they call. Generated +as position independent code. See Code Conventions for +information on conventions used in the isa generation.

+
+
+
+
+

Note Records

+

The AMDGPU backend code object contains ELF note records in the .note +section. The set of generated notes and their semantics depend on the code +object version; see Code Object V2 Note Records and +Code Object V3 to V4 Note Records.

+

As required by ELFCLASS32 and ELFCLASS64, minimal zero-byte padding +must be generated after the name field to ensure the desc field is 4 +byte aligned. In addition, minimal zero-byte padding must be generated to +ensure the desc field size is a multiple of 4 bytes. The sh_addralign +field of the .note section must be at least 4 to indicate at least 8 byte +alignment.

+
+

Code Object V2 Note Records

+
+

Warning

+

Code object V2 is not the default code object version emitted by +this version of LLVM.

+
+

The AMDGPU backend code object uses the following ELF note record in the +.note section when compiling for code object V2.

+

The note record vendor field is “AMD”.

+

Additional note records may be present, but any which are not documented here +are deprecated and should not be used.

+
+
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU Code Object V2 ELF Note Records

Name

Type

Description

“AMD”

NT_AMD_HSA_CODE_OBJECT_VERSION

Code object version.

“AMD”

NT_AMD_HSA_HSAIL

HSAIL properties generated by the HSAIL +Finalizer and not the LLVM compiler.

“AMD”

NT_AMD_HSA_ISA_VERSION

Target ISA version.

“AMD”

NT_AMD_HSA_METADATA

Metadata null terminated string in +YAML [YAML] textual format.

“AMD”

NT_AMD_HSA_ISA_NAME

Target ISA name.

+
+
+
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU Code Object V2 ELF Note Record Enumeration Values

Name

Value

NT_AMD_HSA_CODE_OBJECT_VERSION

1

NT_AMD_HSA_HSAIL

2

NT_AMD_HSA_ISA_VERSION

3

reserved

4-9

NT_AMD_HSA_METADATA

10

NT_AMD_HSA_ISA_NAME

11

+
+
+
NT_AMD_HSA_CODE_OBJECT_VERSION

Specifies the code object version number. The description field has the +following layout:

+
struct amdgpu_hsa_note_code_object_version_s {
+  uint32_t major_version;
+  uint32_t minor_version;
+};
+
+
+

The major_version has a value less than or equal to 2.

+
+
NT_AMD_HSA_HSAIL

Specifies the HSAIL properties used by the HSAIL Finalizer. The description +field has the following layout:

+
struct amdgpu_hsa_note_hsail_s {
+  uint32_t hsail_major_version;
+  uint32_t hsail_minor_version;
+  uint8_t profile;
+  uint8_t machine_model;
+  uint8_t default_float_round;
+};
+
+
+
+
NT_AMD_HSA_ISA_VERSION

Specifies the target ISA version. The description field has the following layout:

+
struct amdgpu_hsa_note_isa_s {
+  uint16_t vendor_name_size;
+  uint16_t architecture_name_size;
+  uint32_t major;
+  uint32_t minor;
+  uint32_t stepping;
+  char vendor_and_architecture_name[1];
+};
+
+
+

vendor_name_size and architecture_name_size are the length of the +vendor and architecture names respectively, including the NUL character.

+

vendor_and_architecture_name contains the NUL terminates string for the +vendor, immediately followed by the NUL terminated string for the +architecture.

+

This note record is used by the HSA runtime loader.

+

Code object V2 only supports a limited number of processors and has fixed +settings for target features. See +AMDGPU Code Object V2 Supported Processors and Fixed Target Feature Settings for a list of +processors and the corresponding target ID. In the table the note record ISA +name is a concatenation of the vendor name, architecture name, major, minor, +and stepping separated by a “:”.

+

The target ID column shows the processor name and fixed target features used +by the LLVM compiler. The LLVM compiler does not generate a +NT_AMD_HSA_HSAIL note record.

+

A code object generated by the Finalizer also uses code object V2 and always +generates a NT_AMD_HSA_HSAIL note record. The processor name and +sramecc target feature is as shown in +AMDGPU Code Object V2 Supported Processors and Fixed Target Feature Settings but the xnack +target feature is specified by the EF_AMDGPU_FEATURE_XNACK_V2 e_flags +bit.

+
+
NT_AMD_HSA_ISA_NAME

Specifies the target ISA name as a non-NUL terminated string.

+

This note record is not used by the HSA runtime loader.

+

See the NT_AMD_HSA_ISA_VERSION note record description of the code object +V2’s limited support of processors and fixed settings for target features.

+

See AMDGPU Code Object V2 Supported Processors and Fixed Target Feature Settings for a mapping +from the string to the corresponding target ID. If the xnack target +feature is supported and enabled, the string produced by the LLVM compiler +will may have a +xnack appended. The Finlizer did not do the appending and +instead used the EF_AMDGPU_FEATURE_XNACK_V2 e_flags bit.

+
+
NT_AMD_HSA_METADATA

Specifies extensible metadata associated with the code objects executed on HSA +[HSA] compatible runtimes (see AMDGPU Operating Systems). It is required when the +target triple OS is amdhsa (see Target Triples). See +Code Object V2 Metadata for the syntax of the code object +metadata string.

+ + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU Code Object V2 Supported Processors and Fixed Target Feature Settings

Note Record ISA Name

Target ID

AMD:AMDGPU:6:0:0

gfx600

AMD:AMDGPU:6:0:1

gfx601

AMD:AMDGPU:6:0:2

gfx602

AMD:AMDGPU:7:0:0

gfx700

AMD:AMDGPU:7:0:1

gfx701

AMD:AMDGPU:7:0:2

gfx702

AMD:AMDGPU:7:0:3

gfx703

AMD:AMDGPU:7:0:4

gfx704

AMD:AMDGPU:7:0:5

gfx705

AMD:AMDGPU:8:0:0

gfx802

AMD:AMDGPU:8:0:1

gfx801:xnack+

AMD:AMDGPU:8:0:2

gfx802

AMD:AMDGPU:8:0:3

gfx803

AMD:AMDGPU:8:0:4

gfx803

AMD:AMDGPU:8:0:5

gfx805

AMD:AMDGPU:8:1:0

gfx810:xnack+

AMD:AMDGPU:9:0:0

gfx900:xnack-

AMD:AMDGPU:9:0:1

gfx900:xnack+

AMD:AMDGPU:9:0:2

gfx902:xnack-

AMD:AMDGPU:9:0:3

gfx902:xnack+

AMD:AMDGPU:9:0:4

gfx904:xnack-

AMD:AMDGPU:9:0:5

gfx904:xnack+

AMD:AMDGPU:9:0:6

gfx906:sramecc-:xnack-

AMD:AMDGPU:9:0:7

gfx906:sramecc-:xnack+

AMD:AMDGPU:9:0:12

gfx90c:xnack-

+
+
+
+
+

Code Object V3 to V4 Note Records

+

The AMDGPU backend code object uses the following ELF note record in the +.note section when compiling for code object V3 to V4.

+

The note record vendor field is “AMDGPU”.

+

Additional note records may be present, but any which are not documented here +are deprecated and should not be used.

+
+
+ +++++ + + + + + + + + + + + + +
AMDGPU Code Object V3 to V4 ELF Note Records

Name

Type

Description

“AMDGPU”

NT_AMDGPU_METADATA

Metadata in Message Pack [MsgPack] +binary format.

+
+
+
+ ++++ + + + + + + + + + + + + + +
AMDGPU Code Object V3 to V4 ELF Note Record Enumeration Values

Name

Value

reserved

0-31

NT_AMDGPU_METADATA

32

+
+
+
NT_AMDGPU_METADATA

Specifies extensible metadata associated with an AMDGPU code object. It is +encoded as a map in the Message Pack [MsgPack] binary data format. See +Code Object V3 Metadata and +Code Object V4 Metadata for the map keys defined for the +amdhsa OS.

+
+
+
+
+
+

Symbols

+

Symbols include the following:

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU ELF Symbols

Name

Type

Section

Description

link-name

STT_OBJECT

    +
  • .data

  • +
  • .rodata

  • +
  • .bss

  • +
+

Global variable

link-name.kd

STT_OBJECT

    +
  • .rodata

  • +
+

Kernel descriptor

link-name

STT_FUNC

    +
  • .text

  • +
+

Kernel entry point

link-name

STT_OBJECT

    +
  • SHN_AMDGPU_LDS

  • +
+

Global variable in LDS

+
+
+
Global variable

Global variables both used and defined by the compilation unit.

+

If the symbol is defined in the compilation unit then it is allocated in the +appropriate section according to if it has initialized data or is readonly.

+

If the symbol is external then its section is STN_UNDEF and the loader +will resolve relocations using the definition provided by another code object +or explicitly defined by the runtime.

+

If the symbol resides in local/group memory (LDS) then its section is the +special processor specific section name SHN_AMDGPU_LDS, and the +st_value field describes alignment requirements as it does for common +symbols.

+
+
Kernel descriptor

Every HSA kernel has an associated kernel descriptor. It is the address of the +kernel descriptor that is used in the AQL dispatch packet used to invoke the +kernel, not the kernel entry point. The layout of the HSA kernel descriptor is +defined in Kernel Descriptor.

+
+
Kernel entry point

Every HSA kernel also has a symbol for its machine code entry point.

+
+
+
+
+

Relocation Records

+

AMDGPU backend generates Elf64_Rela relocation records. Supported +relocatable fields are:

+
+
word32

This specifies a 32-bit field occupying 4 bytes with arbitrary byte +alignment. These values use the same byte order as other word values in the +AMDGPU architecture.

+
+
word64

This specifies a 64-bit field occupying 8 bytes with arbitrary byte +alignment. These values use the same byte order as other word values in the +AMDGPU architecture.

+
+
+

Following notations are used for specifying relocation calculations:

+
+
A

Represents the addend used to compute the value of the relocatable field.

+
+
G

Represents the offset into the global offset table at which the relocation +entry’s symbol will reside during execution.

+
+
GOT

Represents the address of the global offset table.

+
+
P

Represents the place (section offset for et_rel or address for et_dyn) +of the storage unit being relocated (computed using r_offset).

+
+
S

Represents the value of the symbol whose index resides in the relocation +entry. Relocations not using this must specify a symbol index of +STN_UNDEF.

+
+
B

Represents the base address of a loaded executable or shared object which is +the difference between the ELF address and the actual load address. +Relocations using this are only valid in executable or shared objects.

+
+
+

The following relocation types are supported:

+
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU ELF Relocation Records

Relocation Type

Kind

Value

Field

Calculation

R_AMDGPU_NONE

0

none

none

R_AMDGPU_ABS32_LO

Static, +Dynamic

1

word32

(S + A) & 0xFFFFFFFF

R_AMDGPU_ABS32_HI

Static, +Dynamic

2

word32

(S + A) >> 32

R_AMDGPU_ABS64

Static, +Dynamic

3

word64

S + A

R_AMDGPU_REL32

Static

4

word32

S + A - P

R_AMDGPU_REL64

Static

5

word64

S + A - P

R_AMDGPU_ABS32

Static, +Dynamic

6

word32

S + A

R_AMDGPU_GOTPCREL

Static

7

word32

G + GOT + A - P

R_AMDGPU_GOTPCREL32_LO

Static

8

word32

(G + GOT + A - P) & 0xFFFFFFFF

R_AMDGPU_GOTPCREL32_HI

Static

9

word32

(G + GOT + A - P) >> 32

R_AMDGPU_REL32_LO

Static

10

word32

(S + A - P) & 0xFFFFFFFF

R_AMDGPU_REL32_HI

Static

11

word32

(S + A - P) >> 32

reserved

12

R_AMDGPU_RELATIVE64

Dynamic

13

word64

B + A

R_AMDGPU_REL16

Static

14

word16

((S + A - P) - 4) / 4

+
+

R_AMDGPU_ABS32_LO and R_AMDGPU_ABS32_HI are only supported by +the mesa3d OS, which does not support R_AMDGPU_ABS64.

+

There is no current OS loader support for 32-bit programs and so +R_AMDGPU_ABS32 is not used.

+
+
+

Loaded Code Object Path Uniform Resource Identifier (URI)

+

The AMD GPU code object loader represents the path of the ELF shared object from +which the code object was loaded as a textual Uniform Resource Identifier (URI). +Note that the code object is the in memory loaded relocated form of the ELF +shared object. Multiple code objects may be loaded at different memory +addresses in the same process from the same ELF shared object.

+

The loaded code object path URI syntax is defined by the following BNF syntax:

+
code_object_uri ::== file_uri | memory_uri
+file_uri        ::== "file://" file_path [ range_specifier ]
+memory_uri      ::== "memory://" process_id range_specifier
+range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
+file_path       ::== URI_ENCODED_OS_FILE_PATH
+process_id      ::== DECIMAL_NUMBER
+number          ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
+
+
+
+
number

Is a C integral literal where hexadecimal values are prefixed by “0x” or “0X”, +and octal values by “0”.

+
+
file_path

Is the file’s path specified as a URI encoded UTF-8 string. In URI encoding, +every character that is not in the regular expression [a-zA-Z0-9/_.~-] is +encoded as two uppercase hexadecimal digits proceeded by “%”. Directories in +the path are separated by “/”.

+
+
offset

Is a 0-based byte offset to the start of the code object. For a file URI, it +is from the start of the file specified by the file_path, and if omitted +defaults to 0. For a memory URI, it is the memory address and is required.

+
+
size

Is the number of bytes in the code object. For a file URI, if omitted it +defaults to the size of the file. It is required for a memory URI.

+
+
process_id

Is the identity of the process owning the memory. For Linux it is the C +unsigned integral decimal literal for the process ID (PID).

+
+
+

For example:

+
file:///dir1/dir2/file1
+file:///dir3/dir4/file2#offset=0x2000&size=3000
+memory://1234#offset=0x20000&size=3000
+
+
+
+
+
+

DWARF Debug Information

+
+

Warning

+

This section describes provisional support for AMDGPU DWARF [DWARF] that +is not currently fully implemented and is subject to change.

+
+

AMDGPU generates DWARF [DWARF] debugging information ELF sections (see +ELF Code Object) which contain information that maps the code +object executable code and data to the source language constructs. It can be +used by tools such as debuggers and profilers. It uses features defined in +DWARF Extensions For Heterogeneous Debugging that are made available in +DWARF Version 4 and DWARF Version 5 as an LLVM vendor extension.

+

This section defines the AMDGPU target architecture specific DWARF mappings.

+
+

Register Identifier

+

This section defines the AMDGPU target architecture register numbers used in +DWARF operation expressions (see DWARF Version 5 section 2.5 and +DWARF Operation Expressions) and Call Frame Information +instructions (see DWARF Version 5 section 6.4 and +Call Frame Information).

+

A single code object can contain code for kernels that have different wavefront +sizes. The vector registers and some scalar registers are based on the wavefront +size. AMDGPU defines distinct DWARF registers for each wavefront size. This +simplifies the consumer of the DWARF so that each register has a fixed size, +rather than being dynamic according to the wavefront size mode. Similarly, +distinct DWARF registers are defined for those registers that vary in size +according to the process address size. This allows a consumer to treat a +specific AMDGPU processor as a single architecture regardless of how it is +configured at run time. The compiler explicitly specifies the DWARF registers +that match the mode in which the code it is generating will be executed.

+

DWARF registers are encoded as numbers, which are mapped to architecture +registers. The mapping for AMDGPU is defined in +AMDGPU DWARF Register Mapping. All AMDGPU targets use the same +mapping.

+ + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU DWARF Register Mapping

DWARF Register

AMDGPU Register

Bit Size

Description

0

PC_32

32

Program Counter (PC) when +executing in a 32-bit process +address space. Used in the CFI to +describe the PC of the calling +frame.

1

EXEC_MASK_32

32

Execution Mask Register when +executing in wavefront 32 mode.

2-15

Reserved

Reserved for highly accessed +registers using DWARF shortcut.

16

PC_64

64

Program Counter (PC) when +executing in a 64-bit process +address space. Used in the CFI to +describe the PC of the calling +frame.

17

EXEC_MASK_64

64

Execution Mask Register when +executing in wavefront 64 mode.

18-31

Reserved

Reserved for highly accessed +registers using DWARF shortcut.

32-95

SGPR0-SGPR63

32

Scalar General Purpose +Registers.

96-127

Reserved

Reserved for frequently accessed +registers using DWARF 1-byte ULEB.

128

STATUS

32

Status Register.

129-511

Reserved

Reserved for future Scalar +Architectural Registers.

512

VCC_32

32

Vector Condition Code Register +when executing in wavefront 32 +mode.

513-767

Reserved

Reserved for future Vector +Architectural Registers when +executing in wavefront 32 mode.

768

VCC_64

64

Vector Condition Code Register +when executing in wavefront 64 +mode.

769-1023

Reserved

Reserved for future Vector +Architectural Registers when +executing in wavefront 64 mode.

1024-1087

Reserved

Reserved for padding.

1088-1129

SGPR64-SGPR105

32

Scalar General Purpose Registers.

1130-1535

Reserved

Reserved for future Scalar +General Purpose Registers.

1536-1791

VGPR0-VGPR255

32*32

Vector General Purpose Registers +when executing in wavefront 32 +mode.

1792-2047

Reserved

Reserved for future Vector +General Purpose Registers when +executing in wavefront 32 mode.

2048-2303

AGPR0-AGPR255

32*32

Vector Accumulation Registers +when executing in wavefront 32 +mode.

2304-2559

Reserved

Reserved for future Vector +Accumulation Registers when +executing in wavefront 32 mode.

2560-2815

VGPR0-VGPR255

64*32

Vector General Purpose Registers +when executing in wavefront 64 +mode.

2816-3071

Reserved

Reserved for future Vector +General Purpose Registers when +executing in wavefront 64 mode.

3072-3327

AGPR0-AGPR255

64*32

Vector Accumulation Registers +when executing in wavefront 64 +mode.

3328-3583

Reserved

Reserved for future Vector +Accumulation Registers when +executing in wavefront 64 mode.

+

The vector registers are represented as the full size for the wavefront. They +are organized as consecutive dwords (32-bits), one per lane, with the dword at +the least significant bit position corresponding to lane 0 and so forth. DWARF +location expressions involving the DW_OP_LLVM_offset and +DW_OP_LLVM_push_lane operations are used to select the part of the vector +register corresponding to the lane that is executing the current thread of +execution in languages that are implemented using a SIMD or SIMT execution +model.

+

If the wavefront size is 32 lanes then the wavefront 32 mode register +definitions are used. If the wavefront size is 64 lanes then the wavefront 64 +mode register definitions are used. Some AMDGPU targets support executing in +both wavefront 32 and wavefront 64 mode. The register definitions corresponding +to the wavefront mode of the generated code will be used.

+

If code is generated to execute in a 32-bit process address space, then the +32-bit process address space register definitions are used. If code is generated +to execute in a 64-bit process address space, then the 64-bit process address +space register definitions are used. The amdgcn target only supports the +64-bit process address space.

+
+
+

Address Class Identifier

+

The DWARF address class represents the source language memory space. See DWARF +Version 5 section 2.12 which is updated by the DWARF Extensions For +Heterogeneous Debugging section Segmented Addresses.

+

The DWARF address class mapping used for AMDGPU is defined in +AMDGPU DWARF Address Class Mapping.

+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU DWARF Address Class Mapping

DWARF

AMDGPU

Address Class Name

Value

Address Space

DW_ADDR_none

0x0000

Generic (Flat)

DW_ADDR_LLVM_global

0x0001

Global

DW_ADDR_LLVM_constant

0x0002

Global

DW_ADDR_LLVM_group

0x0003

Local (group/LDS)

DW_ADDR_LLVM_private

0x0004

Private (Scratch)

DW_ADDR_AMDGPU_region

0x8000

Region (GDS)

+

The DWARF address class values defined in the DWARF Extensions For +Heterogeneous Debugging section Segmented Addresses are used.

+

In addition, DW_ADDR_AMDGPU_region is encoded as a vendor extension. This is +available for use for the AMD extension for access to the hardware GDS memory +which is scratchpad memory allocated per device.

+

For AMDGPU if no DW_AT_address_class attribute is present, then the default +address class of DW_ADDR_none is used.

+

See Address Space Identifier for information on the AMDGPU +mapping of DWARF address classes to DWARF address spaces, including address size +and NULL value.

+
+
+

Address Space Identifier

+

DWARF address spaces correspond to target architecture specific linear +addressable memory areas. See DWARF Version 5 section 2.12 and DWARF Extensions +For Heterogeneous Debugging section Segmented Addresses.

+

The DWARF address space mapping used for AMDGPU is defined in +AMDGPU DWARF Address Space Mapping.

+ + ++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU DWARF Address Space Mapping

DWARF

AMDGPU

Notes

Address Space Name

Value

Address

Bit Size

Address Space

64-bit +process +address +space

32-bit +process +address +space

DW_ASPACE_none

0x00

64

32

Global

default address space

DW_ASPACE_AMDGPU_generic

0x01

64

32

Generic (Flat)

DW_ASPACE_AMDGPU_region

0x02

32

32

Region (GDS)

DW_ASPACE_AMDGPU_local

0x03

32

32

Local (group/LDS)

Reserved

0x04

DW_ASPACE_AMDGPU_private_lane

0x05

32

32

Private (Scratch)

focused lane

DW_ASPACE_AMDGPU_private_wave

0x06

32

32

Private (Scratch)

unswizzled wavefront

+

See Address Spaces for information on the AMDGPU address spaces +including address size and NULL value.

+

The DW_ASPACE_none address space is the default target architecture address +space used in DWARF operations that do not specify an address space. It +therefore has to map to the global address space so that the DW_OP_addr* and +related operations can refer to addresses in the program code.

+

The DW_ASPACE_AMDGPU_generic address space allows location expressions to +specify the flat address space. If the address corresponds to an address in the +local address space, then it corresponds to the wavefront that is executing the +focused thread of execution. If the address corresponds to an address in the +private address space, then it corresponds to the lane that is executing the +focused thread of execution for languages that are implemented using a SIMD or +SIMT execution model.

+
+

Note

+

CUDA-like languages such as HIP that do not have address spaces in the +language type system, but do allow variables to be allocated in different +address spaces, need to explicitly specify the DW_ASPACE_AMDGPU_generic +address space in the DWARF expression operations as the default address space +is the global address space.

+
+

The DW_ASPACE_AMDGPU_local address space allows location expressions to +specify the local address space corresponding to the wavefront that is executing +the focused thread of execution.

+

The DW_ASPACE_AMDGPU_private_lane address space allows location expressions +to specify the private address space corresponding to the lane that is executing +the focused thread of execution for languages that are implemented using a SIMD +or SIMT execution model.

+

The DW_ASPACE_AMDGPU_private_wave address space allows location expressions +to specify the unswizzled private address space corresponding to the wavefront +that is executing the focused thread of execution. The wavefront view of private +memory is the per wavefront unswizzled backing memory layout defined in +Address Spaces, such that address 0 corresponds to the first +location for the backing memory of the wavefront (namely the address is not +offset by wavefront-scratch-base). The following formula can be used to +convert from a DW_ASPACE_AMDGPU_private_lane address to a +DW_ASPACE_AMDGPU_private_wave address:

+
private-address-wavefront =
+  ((private-address-lane / 4) * wavefront-size * 4) +
+  (wavefront-lane-id * 4) + (private-address-lane % 4)
+
+
+

If the DW_ASPACE_AMDGPU_private_lane address is dword aligned, and the start +of the dwords for each lane starting with lane 0 is required, then this +simplifies to:

+
private-address-wavefront =
+  private-address-lane * wavefront-size
+
+
+

A compiler can use the DW_ASPACE_AMDGPU_private_wave address space to read a +complete spilled vector register back into a complete vector register in the +CFI. The frame pointer can be a private lane address which is dword aligned, +which can be shifted to multiply by the wavefront size, and then used to form a +private wavefront address that gives a location for a contiguous set of dwords, +one per lane, where the vector register dwords are spilled. The compiler knows +the wavefront size since it generates the code. Note that the type of the +address may have to be converted as the size of a +DW_ASPACE_AMDGPU_private_lane address may be smaller than the size of a +DW_ASPACE_AMDGPU_private_wave address.

+
+
+

Lane identifier

+

DWARF lane identifies specify a target architecture lane position for hardware +that executes in a SIMD or SIMT manner, and on which a source language maps its +threads of execution onto those lanes. The DWARF lane identifier is pushed by +the DW_OP_LLVM_push_lane DWARF expression operation. See DWARF Version 5 +section 2.5 which is updated by DWARF Extensions For Heterogeneous Debugging +section DWARF Operation Expressions.

+

For AMDGPU, the lane identifier corresponds to the hardware lane ID of a +wavefront. It is numbered from 0 to the wavefront size minus 1.

+
+
+

Operation Expressions

+

DWARF expressions are used to compute program values and the locations of +program objects. See DWARF Version 5 section 2.5 and +DWARF Operation Expressions.

+

DWARF location descriptions describe how to access storage which includes memory +and registers. When accessing storage on AMDGPU, bytes are ordered with least +significant bytes first, and bits are ordered within bytes with least +significant bits first.

+

For AMDGPU CFI expressions, DW_OP_LLVM_select_bit_piece is used to describe +unwinding vector registers that are spilled under the execution mask to memory: +the zero-single location description is the vector register, and the one-single +location description is the spilled memory location description. The +DW_OP_LLVM_form_aspace_address is used to specify the address space of the +memory location description.

+

In AMDGPU expressions, DW_OP_LLVM_select_bit_piece is used by the +DW_AT_LLVM_lane_pc attribute expression where divergent control flow is +controlled by the execution mask. An undefined location description together +with DW_OP_LLVM_extend is used to indicate the lane was not active on entry +to the subprogram. See DW_AT_LLVM_lane_pc for an example.

+
+
+

Debugger Information Entry Attributes

+

This section describes how certain debugger information entry attributes are +used by AMDGPU. See the sections in DWARF Version 5 section 2 which are updated +by DWARF Extensions For Heterogeneous Debugging section +Debugging Information Entry Attributes.

+
+

DW_AT_LLVM_lane_pc

+

For AMDGPU, the DW_AT_LLVM_lane_pc attribute is used to specify the program +location of the separate lanes of a SIMT thread.

+

If the lane is an active lane then this will be the same as the current program +location.

+

If the lane is inactive, but was active on entry to the subprogram, then this is +the program location in the subprogram at which execution of the lane is +conceptual positioned.

+

If the lane was not active on entry to the subprogram, then this will be the +undefined location. A client debugger can check if the lane is part of a valid +work-group by checking that the lane is in the range of the associated +work-group within the grid, accounting for partial work-groups. If it is not, +then the debugger can omit any information for the lane. Otherwise, the debugger +may repeatedly unwind the stack and inspect the DW_AT_LLVM_lane_pc of the +calling subprogram until it finds a non-undefined location. Conceptually the +lane only has the call frames that it has a non-undefined +DW_AT_LLVM_lane_pc.

+

The following example illustrates how the AMDGPU backend can generate a DWARF +location list expression for the nested IF/THEN/ELSE structures of the +following subprogram pseudo code for a target with 64 lanes per wavefront.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
SUBPROGRAM X
+BEGIN
+  a;
+  IF (c1) THEN
+    b;
+    IF (c2) THEN
+      c;
+    ELSE
+      d;
+    ENDIF
+    e;
+  ELSE
+    f;
+  ENDIF
+  g;
+END
+
+
+

The AMDGPU backend may generate the following pseudo LLVM MIR to manipulate the +execution mask (EXEC) to linearize the control flow. The condition is +evaluated to make a mask of the lanes for which the condition evaluates to true. +First the THEN region is executed by setting the EXEC mask to the +logical AND of the current EXEC mask with the condition mask. Then the +ELSE region is executed by negating the EXEC mask and logical AND of +the saved EXEC mask at the start of the region. After the IF/THEN/ELSE +region the EXEC mask is restored to the value it had at the beginning of the +region. This is shown below. Other approaches are possible, but the basic +concept is the same.

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
$lex_start:
+  a;
+  %1 = EXEC
+  %2 = c1
+$lex_1_start:
+  EXEC = %1 & %2
+$if_1_then:
+    b;
+    %3 = EXEC
+    %4 = c2
+$lex_1_1_start:
+    EXEC = %3 & %4
+$lex_1_1_then:
+      c;
+    EXEC = ~EXEC & %3
+$lex_1_1_else:
+      d;
+    EXEC = %3
+$lex_1_1_end:
+    e;
+  EXEC = ~EXEC & %1
+$lex_1_else:
+    f;
+  EXEC = %1
+$lex_1_end:
+  g;
+$lex_end:
+
+
+

To create the DWARF location list expression that defines the location +description of a vector of lane program locations, the LLVM MIR DBG_VALUE +pseudo instruction can be used to annotate the linearized control flow. This can +be done by defining an artificial variable for the lane PC. The DWARF location +list expression created for it is used as the value of the +DW_AT_LLVM_lane_pc attribute on the subprogram’s debugger information entry.

+

A DWARF procedure is defined for each well nested structured control flow region +which provides the conceptual lane program location for a lane if it is not +active (namely it is divergent). The DWARF operation expression for each region +conceptually inherits the value of the immediately enclosing region and modifies +it according to the semantics of the region.

+

For an IF/THEN/ELSE region the divergent program location is at the start of +the region for the THEN region since it is executed first. For the ELSE +region the divergent program location is at the end of the IF/THEN/ELSE +region since the THEN region has completed.

+

The lane PC artificial variable is assigned at each region transition. It uses +the immediately enclosing region’s DWARF procedure to compute the program +location for each lane assuming they are divergent, and then modifies the result +by inserting the current program location for each lane that the EXEC mask +indicates is active.

+

By having separate DWARF procedures for each region, they can be reused to +define the value for any nested region. This reduces the total size of the DWARF +operation expressions.

+

The following provides an example using pseudo LLVM MIR.

+
  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
$lex_start:
+  DEFINE_DWARF %__uint_64 = DW_TAG_base_type[
+    DW_AT_name = "__uint64";
+    DW_AT_byte_size = 8;
+    DW_AT_encoding = DW_ATE_unsigned;
+  ];
+  DEFINE_DWARF %__active_lane_pc = DW_TAG_dwarf_procedure[
+    DW_AT_name = "__active_lane_pc";
+    DW_AT_location = [
+      DW_OP_regx PC;
+      DW_OP_LLVM_extend 64, 64;
+      DW_OP_regval_type EXEC, %uint_64;
+      DW_OP_LLVM_select_bit_piece 64, 64;
+    ];
+  ];
+  DEFINE_DWARF %__divergent_lane_pc = DW_TAG_dwarf_procedure[
+    DW_AT_name = "__divergent_lane_pc";
+    DW_AT_location = [
+      DW_OP_LLVM_undefined;
+      DW_OP_LLVM_extend 64, 64;
+    ];
+  ];
+  DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[
+    DW_OP_call_ref %__divergent_lane_pc;
+    DW_OP_call_ref %__active_lane_pc;
+  ];
+  a;
+  %1 = EXEC;
+  DBG_VALUE %1, $noreg, %__lex_1_save_exec;
+  %2 = c1;
+$lex_1_start:
+  EXEC = %1 & %2;
+$lex_1_then:
+    DEFINE_DWARF %__divergent_lane_pc_1_then = DW_TAG_dwarf_procedure[
+      DW_AT_name = "__divergent_lane_pc_1_then";
+      DW_AT_location = DIExpression[
+        DW_OP_call_ref %__divergent_lane_pc;
+        DW_OP_addrx &lex_1_start;
+        DW_OP_stack_value;
+        DW_OP_LLVM_extend 64, 64;
+        DW_OP_call_ref %__lex_1_save_exec;
+        DW_OP_deref_type 64, %__uint_64;
+        DW_OP_LLVM_select_bit_piece 64, 64;
+      ];
+    ];
+    DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[
+      DW_OP_call_ref %__divergent_lane_pc_1_then;
+      DW_OP_call_ref %__active_lane_pc;
+    ];
+    b;
+    %3 = EXEC;
+    DBG_VALUE %3, %__lex_1_1_save_exec;
+    %4 = c2;
+$lex_1_1_start:
+    EXEC = %3 & %4;
+$lex_1_1_then:
+      DEFINE_DWARF %__divergent_lane_pc_1_1_then = DW_TAG_dwarf_procedure[
+        DW_AT_name = "__divergent_lane_pc_1_1_then";
+        DW_AT_location = DIExpression[
+          DW_OP_call_ref %__divergent_lane_pc_1_then;
+          DW_OP_addrx &lex_1_1_start;
+          DW_OP_stack_value;
+          DW_OP_LLVM_extend 64, 64;
+          DW_OP_call_ref %__lex_1_1_save_exec;
+          DW_OP_deref_type 64, %__uint_64;
+          DW_OP_LLVM_select_bit_piece 64, 64;
+        ];
+      ];
+      DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[
+        DW_OP_call_ref %__divergent_lane_pc_1_1_then;
+        DW_OP_call_ref %__active_lane_pc;
+      ];
+      c;
+    EXEC = ~EXEC & %3;
+$lex_1_1_else:
+      DEFINE_DWARF %__divergent_lane_pc_1_1_else = DW_TAG_dwarf_procedure[
+        DW_AT_name = "__divergent_lane_pc_1_1_else";
+        DW_AT_location = DIExpression[
+          DW_OP_call_ref %__divergent_lane_pc_1_then;
+          DW_OP_addrx &lex_1_1_end;
+          DW_OP_stack_value;
+          DW_OP_LLVM_extend 64, 64;
+          DW_OP_call_ref %__lex_1_1_save_exec;
+          DW_OP_deref_type 64, %__uint_64;
+          DW_OP_LLVM_select_bit_piece 64, 64;
+        ];
+      ];
+      DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[
+        DW_OP_call_ref %__divergent_lane_pc_1_1_else;
+        DW_OP_call_ref %__active_lane_pc;
+      ];
+      d;
+    EXEC = %3;
+$lex_1_1_end:
+    DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[
+      DW_OP_call_ref %__divergent_lane_pc;
+      DW_OP_call_ref %__active_lane_pc;
+    ];
+    e;
+  EXEC = ~EXEC & %1;
+$lex_1_else:
+    DEFINE_DWARF %__divergent_lane_pc_1_else = DW_TAG_dwarf_procedure[
+      DW_AT_name = "__divergent_lane_pc_1_else";
+      DW_AT_location = DIExpression[
+        DW_OP_call_ref %__divergent_lane_pc;
+        DW_OP_addrx &lex_1_end;
+        DW_OP_stack_value;
+        DW_OP_LLVM_extend 64, 64;
+        DW_OP_call_ref %__lex_1_save_exec;
+        DW_OP_deref_type 64, %__uint_64;
+        DW_OP_LLVM_select_bit_piece 64, 64;
+      ];
+    ];
+    DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[
+      DW_OP_call_ref %__divergent_lane_pc_1_else;
+      DW_OP_call_ref %__active_lane_pc;
+    ];
+    f;
+  EXEC = %1;
+$lex_1_end:
+  DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc DIExpression[
+    DW_OP_call_ref %__divergent_lane_pc;
+    DW_OP_call_ref %__active_lane_pc;
+  ];
+  g;
+$lex_end:
+
+
+

The DWARF procedure %__active_lane_pc is used to update the lane pc elements +that are active, with the current program location.

+

Artificial variables %__lex_1_save_exec and %__lex_1_1_save_exec are created for +the execution masks saved on entry to a region. Using the DBG_VALUE pseudo +instruction, location list entries will be created that describe where the +artificial variables are allocated at any given program location. The compiler +may allocate them to registers or spill them to memory.

+

The DWARF procedures for each region use the values of the saved execution mask +artificial variables to only update the lanes that are active on entry to the +region. All other lanes retain the value of the enclosing region where they were +last active. If they were not active on entry to the subprogram, then will have +the undefined location description.

+

Other structured control flow regions can be handled similarly. For example, +loops would set the divergent program location for the region at the end of the +loop. Any lanes active will be in the loop, and any lanes not active must have +exited the loop.

+

An IF/THEN/ELSEIF/ELSEIF/... region can be treated as a nest of +IF/THEN/ELSE regions.

+

The DWARF procedures can use the active lane artificial variable described in +DW_AT_LLVM_active_lane rather than the actual +EXEC mask in order to support whole or quad wavefront mode.

+
+
+

DW_AT_LLVM_active_lane

+

The DW_AT_LLVM_active_lane attribute on a subprogram debugger information +entry is used to specify the lanes that are conceptually active for a SIMT +thread.

+

The execution mask may be modified to implement whole or quad wavefront mode +operations. For example, all lanes may need to temporarily be made active to +execute a whole wavefront operation. Such regions would save the EXEC mask, +update it to enable the necessary lanes, perform the operations, and then +restore the EXEC mask from the saved value. While executing the whole +wavefront region, the conceptual execution mask is the saved value, not the +EXEC value.

+

This is handled by defining an artificial variable for the active lane mask. The +active lane mask artificial variable would be the actual EXEC mask for +normal regions, and the saved execution mask for regions where the mask is +temporarily updated. The location list expression created for this artificial +variable is used to define the value of the DW_AT_LLVM_active_lane +attribute.

+
+
+

DW_AT_LLVM_augmentation

+

For AMDGPU, the DW_AT_LLVM_augmentation attribute of a compilation unit +debugger information entry has the following value for the augmentation string:

+
[amdgpu:v0.0]
+
+
+

The “vX.Y” specifies the major X and minor Y version number of the AMDGPU +extensions used in the DWARF of the compilation unit. The version number +conforms to [SEMVER].

+
+
+
+

Call Frame Information

+

DWARF Call Frame Information (CFI) describes how a consumer can virtually +unwind call frames in a running process or core dump. See DWARF Version 5 +section 6.4 and Call Frame Information.

+

For AMDGPU, the Common Information Entry (CIE) fields have the following values:

+
    +
  1. augmentation string contains the following null-terminated UTF-8 string:

    +
    [amd:v0.0]
    +
    +
    +

    The vX.Y specifies the major X and minor Y version number of the AMDGPU +extensions used in this CIE or to the FDEs that use it. The version number +conforms to [SEMVER].

    +
  2. +
  3. address_size for the Global address space is defined in +Address Space Identifier.

  4. +
  5. segment_selector_size is 0 as AMDGPU does not use a segment selector.

  6. +
  7. code_alignment_factor is 4 bytes.

    +
  8. +
  9. data_alignment_factor is 4 bytes.

    +
  10. +
  11. return_address_register is PC_32 for 32-bit processes and PC_64 +for 64-bit processes defined in Register Identifier.

  12. +
  13. initial_instructions Since a subprogram X with fewer registers can be +called from subprogram Y that has more allocated, X will not change any of +the extra registers as it cannot access them. Therefore, the default rule +for all columns is same value.

  14. +
+

For AMDGPU the register number follows the numbering defined in +Register Identifier.

+

For AMDGPU the instructions are variable size. A consumer can subtract 1 from +the return address to get the address of a byte within the call site +instructions. See DWARF Version 5 section 6.4.4.

+
+
+

Accelerated Access

+

See DWARF Version 5 section 6.1.

+
+

Lookup By Name Section Header

+

See DWARF Version 5 section 6.1.1.4.1 and Lookup By Name.

+

For AMDGPU the lookup by name section header table:

+

augmentation_string_size (uword)

+
+

Set to the length of the augmentation_string value which is always a +multiple of 4.

+
+

augmentation_string (sequence of UTF-8 characters)

+
+

Contains the following UTF-8 string null padded to a multiple of 4 bytes:

+
[amdgpu:v0.0]
+
+
+

The “vX.Y” specifies the major X and minor Y version number of the AMDGPU +extensions used in the DWARF of this index. The version number conforms to +[SEMVER].

+
+

Note

+

This is different to the DWARF Version 5 definition that requires the first +4 characters to be the vendor ID. But this is consistent with the other +augmentation strings and does allow multiple vendor contributions. However, +backwards compatibility may be more desirable.

+
+
+
+
+

Lookup By Address Section Header

+

See DWARF Version 5 section 6.1.2.

+

For AMDGPU the lookup by address section header table:

+

address_size (ubyte)

+
+

Match the address size for the Global address space defined in +Address Space Identifier.

+
+

segment_selector_size (ubyte)

+
+

AMDGPU does not use a segment selector so this is 0. The entries in the +.debug_aranges do not have a segment selector.

+
+
+
+
+

Line Number Information

+

See DWARF Version 5 section 6.2 and Line Number Information.

+

AMDGPU does not use the isa state machine registers and always sets it to 0. +The instruction set must be obtained from the ELF file header e_flags field +in the EF_AMDGPU_MACH bit position (see ELF Header). See DWARF Version 5 section 6.2.2.

+

For AMDGPU the line number program header fields have the following values (see +DWARF Version 5 section 6.2.4):

+
+
address_size (ubyte)

Matches the address size for the Global address space defined in +Address Space Identifier.

+
+
segment_selector_size (ubyte)

AMDGPU does not use a segment selector so this is 0.

+
+
minimum_instruction_length (ubyte)

For GFX9-GFX10 this is 4.

+
+
maximum_operations_per_instruction (ubyte)

For GFX9-GFX10 this is 1.

+
+
+

Source text for online-compiled programs (for example, those compiled by the +OpenCL language runtime) may be embedded into the DWARF Version 5 line table. +See DWARF Version 5 section 6.2.4.1 which is updated by DWARF Extensions For +Heterogeneous Debugging section DW_LNCT_LLVM_source.

+

The Clang option used to control source embedding in AMDGPU is defined in +AMDGPU Clang Debug Options.

+
+
+ ++++ + + + + + + + + + + +
AMDGPU Clang Debug Options

Debug Flag

Description

-g[no-]embed-source

Enable/disable embedding source text in DWARF +debug sections. Useful for environments where +source cannot be written to disk, such as +when performing online compilation.

+
+

For example:

+
+
-gembed-source

Enable the embedded source.

+
+
-gno-embed-source

Disable the embedded source.

+
+
+
+
+

32-Bit and 64-Bit DWARF Formats

+

See DWARF Version 5 section 7.4 and +32-Bit and 64-Bit DWARF Formats.

+

For AMDGPU:

+
    +
  • For the amdgcn target architecture only the 64-bit process address space +is supported.

  • +
  • The producer can generate either 32-bit or 64-bit DWARF format. LLVM generates +the 32-bit DWARF format.

  • +
+
+
+

Unit Headers

+

For AMDGPU the following values apply for each of the unit headers described in +DWARF Version 5 sections 7.5.1.1, 7.5.1.2, and 7.5.1.3:

+
+
address_size (ubyte)

Matches the address size for the Global address space defined in +Address Space Identifier.

+
+
+
+
+
+

Code Conventions

+

This section provides code conventions used for each supported target triple OS +(see Target Triples).

+
+

AMDHSA

+

This section provides code conventions used when the target triple OS is +amdhsa (see Target Triples).

+
+

Code Object Metadata

+

The code object metadata specifies extensible metadata associated with the code +objects executed on HSA [HSA] compatible runtimes (see AMDGPU Operating Systems). The +encoding and semantics of this metadata depends on the code object version; see +Code Object V2 Metadata, +Code Object V3 Metadata, and +Code Object V4 Metadata.

+

Code object metadata is specified in a note record (see +Note Records) and is required when the target triple OS is +amdhsa (see Target Triples). It must contain the minimum +information necessary to support the HSA compatible runtime kernel queries. For +example, the segment sizes needed in a dispatch packet. In addition, a +high-level language runtime may require other information to be included. For +example, the AMD OpenCL runtime records kernel argument information.

+
+
Code Object V2 Metadata
+
+

Warning

+

Code object V2 is not the default code object version emitted by this version +of LLVM.

+
+

Code object V2 metadata is specified by the NT_AMD_HSA_METADATA note record +(see Code Object V2 Note Records).

+

The metadata is specified as a YAML formatted string (see [YAML] and +YAML I/O).

+

The metadata is represented as a single YAML document comprised of the mapping +defined in table AMDHSA Code Object V2 Metadata Map and +referenced tables.

+

For boolean values, the string values of false and true are used for +false and true respectively.

+

Additional information can be added to the mappings. To avoid conflicts, any +non-AMD key names should be prefixed by “vendor-name.”.

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V2 Metadata Map

String Key

Value Type

Required?

Description

“Version”

sequence of +2 integers

Required

    +
  • The first integer is the major +version. Currently 1.

  • +
  • The second integer is the minor +version. Currently 0.

  • +
+

“Printf”

sequence of +strings

Each string is encoded information +about a printf function call. The +encoded information is organized as +fields separated by colon (‘:’):

+

ID:N:S[0]:S[1]:...:S[N-1]:FormatString

+

where:

+
+
ID

A 32-bit integer as a unique id for +each printf function call

+
+
N

A 32-bit integer equal to the number +of arguments of printf function call +minus 1

+
+
S[i] (where i = 0, 1, … , N-1)

32-bit integers for the size in bytes +of the i-th FormatString argument of +the printf function call

+
+
FormatString

The format string passed to the +printf function call.

+
+
+

“Kernels”

sequence of +mapping

Required

Sequence of the mappings for each +kernel in the code object. See +AMDHSA Code Object V2 Kernel Metadata Map +for the definition of the mapping.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V2 Kernel Metadata Map

String Key

Value Type

Required?

Description

“Name”

string

Required

Source name of the kernel.

“SymbolName”

string

Required

Name of the kernel +descriptor ELF symbol.

“Language”

string

Source language of the kernel. +Values include:

+
    +
  • “OpenCL C”

  • +
  • “OpenCL C++”

  • +
  • “HCC”

  • +
  • “OpenMP”

  • +
+

“LanguageVersion”

sequence of +2 integers

    +
  • The first integer is the major +version.

  • +
  • The second integer is the +minor version.

  • +
+

“Attrs”

mapping

Mapping of kernel attributes. +See +AMDHSA Code Object V2 Kernel Attribute Metadata Map +for the mapping definition.

“Args”

sequence of +mapping

Sequence of mappings of the +kernel arguments. See +AMDHSA Code Object V2 Kernel Argument Metadata Map +for the definition of the mapping.

“CodeProps”

mapping

Mapping of properties related to +the kernel code. See +AMDHSA Code Object V2 Kernel Code Properties Metadata Map +for the mapping definition.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V2 Kernel Attribute Metadata Map

String Key

Value Type

Required?

Description

“ReqdWorkGroupSize”

sequence of +3 integers

If not 0, 0, 0 then all values +must be >=1 and the dispatch +work-group size X, Y, Z must +correspond to the specified +values. Defaults to 0, 0, 0.

+

Corresponds to the OpenCL +reqd_work_group_size +attribute.

+

“WorkGroupSizeHint”

sequence of +3 integers

The dispatch work-group size +X, Y, Z is likely to be the +specified values.

+

Corresponds to the OpenCL +work_group_size_hint +attribute.

+

“VecTypeHint”

string

The name of a scalar or vector +type.

+

Corresponds to the OpenCL +vec_type_hint attribute.

+

“RuntimeHandle”

string

The external symbol name +associated with a kernel. +OpenCL runtime allocates a +global buffer for the symbol +and saves the kernel’s address +to it, which is used for +device side enqueueing. Only +available for device side +enqueued kernels.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V2 Kernel Argument Metadata Map

String Key

Value Type

Required?

Description

“Name”

string

Kernel argument name.

“TypeName”

string

Kernel argument type name.

“Size”

integer

Required

Kernel argument size in bytes.

“Align”

integer

Required

Kernel argument alignment in +bytes. Must be a power of two.

“ValueKind”

string

Required

Kernel argument kind that +specifies how to set up the +corresponding argument. +Values include:

+
+
“ByValue”

The argument is copied +directly into the kernarg.

+
+
“GlobalBuffer”

A global address space pointer +to the buffer data is passed +in the kernarg.

+
+
“DynamicSharedPointer”

A group address space pointer +to dynamically allocated LDS +is passed in the kernarg.

+
+
“Sampler”

A global address space +pointer to a S# is passed in +the kernarg.

+
+
“Image”

A global address space +pointer to a T# is passed in +the kernarg.

+
+
“Pipe”

A global address space pointer +to an OpenCL pipe is passed in +the kernarg.

+
+
“Queue”

A global address space pointer +to an OpenCL device enqueue +queue is passed in the +kernarg.

+
+
“HiddenGlobalOffsetX”

The OpenCL grid dispatch +global offset for the X +dimension is passed in the +kernarg.

+
+
“HiddenGlobalOffsetY”

The OpenCL grid dispatch +global offset for the Y +dimension is passed in the +kernarg.

+
+
“HiddenGlobalOffsetZ”

The OpenCL grid dispatch +global offset for the Z +dimension is passed in the +kernarg.

+
+
“HiddenNone”

An argument that is not used +by the kernel. Space needs to +be left for it, but it does +not need to be set up.

+
+
“HiddenPrintfBuffer”

A global address space pointer +to the runtime printf buffer +is passed in kernarg.

+
+
“HiddenHostcallBuffer”

A global address space pointer +to the runtime hostcall buffer +is passed in kernarg.

+
+
“HiddenDefaultQueue”

A global address space pointer +to the OpenCL device enqueue +queue that should be used by +the kernel by default is +passed in the kernarg.

+
+
“HiddenCompletionAction”

A global address space pointer +to help link enqueued kernels into +the ancestor tree for determining +when the parent kernel has finished.

+
+
“HiddenMultiGridSyncArg”

A global address space pointer for +multi-grid synchronization is +passed in the kernarg.

+
+
+

“ValueType”

string

Unused and deprecated. This should no longer +be emitted, but is accepted for compatibility.

“PointeeAlign”

integer

Alignment in bytes of pointee +type for pointer type kernel +argument. Must be a power +of 2. Only present if +“ValueKind” is +“DynamicSharedPointer”.

“AddrSpaceQual”

string

Kernel argument address space +qualifier. Only present if +“ValueKind” is “GlobalBuffer” or +“DynamicSharedPointer”. Values +are:

+
    +
  • “Private”

  • +
  • “Global”

  • +
  • “Constant”

  • +
  • “Local”

  • +
  • “Generic”

  • +
  • “Region”

  • +
+

“AccQual”

string

Kernel argument access +qualifier. Only present if +“ValueKind” is “Image” or +“Pipe”. Values +are:

+
    +
  • “ReadOnly”

  • +
  • “WriteOnly”

  • +
  • “ReadWrite”

  • +
+

“ActualAccQual”

string

The actual memory accesses +performed by the kernel on the +kernel argument. Only present if +“ValueKind” is “GlobalBuffer”, +“Image”, or “Pipe”. This may be +more restrictive than indicated +by “AccQual” to reflect what the +kernel actual does. If not +present then the runtime must +assume what is implied by +“AccQual” and “IsConst”. Values +are:

+
    +
  • “ReadOnly”

  • +
  • “WriteOnly”

  • +
  • “ReadWrite”

  • +
+

“IsConst”

boolean

Indicates if the kernel argument +is const qualified. Only present +if “ValueKind” is +“GlobalBuffer”.

“IsRestrict”

boolean

Indicates if the kernel argument +is restrict qualified. Only +present if “ValueKind” is +“GlobalBuffer”.

“IsVolatile”

boolean

Indicates if the kernel argument +is volatile qualified. Only +present if “ValueKind” is +“GlobalBuffer”.

“IsPipe”

boolean

Indicates if the kernel argument +is pipe qualified. Only present +if “ValueKind” is “Pipe”.

+
+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V2 Kernel Code Properties Metadata Map

String Key

Value Type

Required?

Description

“KernargSegmentSize”

integer

Required

The size in bytes of +the kernarg segment +that holds the values +of the arguments to +the kernel.

“GroupSegmentFixedSize”

integer

Required

The amount of group +segment memory +required by a +work-group in +bytes. This does not +include any +dynamically allocated +group segment memory +that may be added +when the kernel is +dispatched.

“PrivateSegmentFixedSize”

integer

Required

The amount of fixed +private address space +memory required for a +work-item in +bytes. If the kernel +uses a dynamic call +stack then additional +space must be added +to this value for the +call stack.

“KernargSegmentAlign”

integer

Required

The maximum byte +alignment of +arguments in the +kernarg segment. Must +be a power of 2.

“WavefrontSize”

integer

Required

Wavefront size. Must +be a power of 2.

“NumSGPRs”

integer

Required

Number of scalar +registers used by a +wavefront for +GFX6-GFX10. This +includes the special +SGPRs for VCC, Flat +Scratch (GFX7-GFX10) +and XNACK (for +GFX8-GFX10). It does +not include the 16 +SGPR added if a trap +handler is +enabled. It is not +rounded up to the +allocation +granularity.

“NumVGPRs”

integer

Required

Number of vector +registers used by +each work-item for +GFX6-GFX10

“MaxFlatWorkGroupSize”

integer

Required

Maximum flat +work-group size +supported by the +kernel in work-items. +Must be >=1 and +consistent with +ReqdWorkGroupSize if +not 0, 0, 0.

“NumSpilledSGPRs”

integer

Number of stores from +a scalar register to +a register allocator +created spill +location.

“NumSpilledVGPRs”

integer

Number of stores from +a vector register to +a register allocator +created spill +location.

+
+
+
+
Code Object V3 Metadata
+

Code object V3 to V4 metadata is specified by the NT_AMDGPU_METADATA note +record (see Code Object V3 to V4 Note Records).

+

The metadata is represented as Message Pack formatted binary data (see +[MsgPack]). The top level is a Message Pack map that includes the +keys defined in table +AMDHSA Code Object V3 Metadata Map and referenced +tables.

+

Additional information can be added to the maps. To avoid conflicts, +any key names should be prefixed by “vendor-name.” where +vendor-name can be the name of the vendor and specific vendor +tool that generates the information. The prefix is abbreviated to +simply “.” when it appears within a map that has been added by the +same vendor-name.

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V3 Metadata Map

String Key

Value Type

Required?

Description

“amdhsa.version”

sequence of +2 integers

Required

    +
  • The first integer is the major +version. Currently 1.

  • +
  • The second integer is the minor +version. Currently 0.

  • +
+

“amdhsa.printf”

sequence of +strings

Each string is encoded information +about a printf function call. The +encoded information is organized as +fields separated by colon (‘:’):

+

ID:N:S[0]:S[1]:...:S[N-1]:FormatString

+

where:

+
+
ID

A 32-bit integer as a unique id for +each printf function call

+
+
N

A 32-bit integer equal to the number +of arguments of printf function call +minus 1

+
+
S[i] (where i = 0, 1, … , N-1)

32-bit integers for the size in bytes +of the i-th FormatString argument of +the printf function call

+
+
FormatString

The format string passed to the +printf function call.

+
+
+

“amdhsa.kernels”

sequence of +map

Required

Sequence of the maps for each +kernel in the code object. See +AMDHSA Code Object V3 Kernel Metadata Map +for the definition of the keys included +in that map.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V3 Kernel Metadata Map

String Key

Value Type

Required?

Description

“.name”

string

Required

Source name of the kernel.

“.symbol”

string

Required

Name of the kernel +descriptor ELF symbol.

“.language”

string

Source language of the kernel. +Values include:

+
    +
  • “OpenCL C”

  • +
  • “OpenCL C++”

  • +
  • “HCC”

  • +
  • “HIP”

  • +
  • “OpenMP”

  • +
  • “Assembler”

  • +
+

“.language_version”

sequence of +2 integers

    +
  • The first integer is the major +version.

  • +
  • The second integer is the +minor version.

  • +
+

“.args”

sequence of +map

Sequence of maps of the +kernel arguments. See +AMDHSA Code Object V3 Kernel Argument Metadata Map +for the definition of the keys +included in that map.

“.reqd_workgroup_size”

sequence of +3 integers

If not 0, 0, 0 then all values +must be >=1 and the dispatch +work-group size X, Y, Z must +correspond to the specified +values. Defaults to 0, 0, 0.

+

Corresponds to the OpenCL +reqd_work_group_size +attribute.

+

“.workgroup_size_hint”

sequence of +3 integers

The dispatch work-group size +X, Y, Z is likely to be the +specified values.

+

Corresponds to the OpenCL +work_group_size_hint +attribute.

+

“.vec_type_hint”

string

The name of a scalar or vector +type.

+

Corresponds to the OpenCL +vec_type_hint attribute.

+

“.device_enqueue_symbol”

string

The external symbol name +associated with a kernel. +OpenCL runtime allocates a +global buffer for the symbol +and saves the kernel’s address +to it, which is used for +device side enqueueing. Only +available for device side +enqueued kernels.

“.kernarg_segment_size”

integer

Required

The size in bytes of +the kernarg segment +that holds the values +of the arguments to +the kernel.

“.group_segment_fixed_size”

integer

Required

The amount of group +segment memory +required by a +work-group in +bytes. This does not +include any +dynamically allocated +group segment memory +that may be added +when the kernel is +dispatched.

“.private_segment_fixed_size”

integer

Required

The amount of fixed +private address space +memory required for a +work-item in +bytes. If the kernel +uses a dynamic call +stack then additional +space must be added +to this value for the +call stack.

“.kernarg_segment_align”

integer

Required

The maximum byte +alignment of +arguments in the +kernarg segment. Must +be a power of 2.

“.wavefront_size”

integer

Required

Wavefront size. Must +be a power of 2.

“.sgpr_count”

integer

Required

Number of scalar +registers required by a +wavefront for +GFX6-GFX9. A register +is required if it is +used explicitly, or +if a higher numbered +register is used +explicitly. This +includes the special +SGPRs for VCC, Flat +Scratch (GFX7-GFX9) +and XNACK (for +GFX8-GFX9). It does +not include the 16 +SGPR added if a trap +handler is +enabled. It is not +rounded up to the +allocation +granularity.

“.vgpr_count”

integer

Required

Number of vector +registers required by +each work-item for +GFX6-GFX9. A register +is required if it is +used explicitly, or +if a higher numbered +register is used +explicitly.

“.max_flat_workgroup_size”

integer

Required

Maximum flat +work-group size +supported by the +kernel in work-items. +Must be >=1 and +consistent with +ReqdWorkGroupSize if +not 0, 0, 0.

“.sgpr_spill_count”

integer

Number of stores from +a scalar register to +a register allocator +created spill +location.

“.vgpr_spill_count”

integer

Number of stores from +a vector register to +a register allocator +created spill +location.

“.kind”

string

The kind of the kernel +with the following +values:

+
+
“normal”

Regular kernels.

+
+
“init”

These kernels must be +invoked after loading +the containing code +object and must +complete before any +normal and fini +kernels in the same +code object are +invoked.

+
+
“fini”

These kernels must be +invoked before +unloading the +containing code object +and after all init and +normal kernels in the +same code object have +been invoked and +completed.

+
+
+

If omitted, “normal” is +assumed.

+
+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V3 Kernel Argument Metadata Map

String Key

Value Type

Required?

Description

“.name”

string

Kernel argument name.

“.type_name”

string

Kernel argument type name.

“.size”

integer

Required

Kernel argument size in bytes.

“.offset”

integer

Required

Kernel argument offset in +bytes. The offset must be a +multiple of the alignment +required by the argument.

“.value_kind”

string

Required

Kernel argument kind that +specifies how to set up the +corresponding argument. +Values include:

+
+
“by_value”

The argument is copied +directly into the kernarg.

+
+
“global_buffer”

A global address space pointer +to the buffer data is passed +in the kernarg.

+
+
“dynamic_shared_pointer”

A group address space pointer +to dynamically allocated LDS +is passed in the kernarg.

+
+
“sampler”

A global address space +pointer to a S# is passed in +the kernarg.

+
+
“image”

A global address space +pointer to a T# is passed in +the kernarg.

+
+
“pipe”

A global address space pointer +to an OpenCL pipe is passed in +the kernarg.

+
+
“queue”

A global address space pointer +to an OpenCL device enqueue +queue is passed in the +kernarg.

+
+
“hidden_global_offset_x”

The OpenCL grid dispatch +global offset for the X +dimension is passed in the +kernarg.

+
+
“hidden_global_offset_y”

The OpenCL grid dispatch +global offset for the Y +dimension is passed in the +kernarg.

+
+
“hidden_global_offset_z”

The OpenCL grid dispatch +global offset for the Z +dimension is passed in the +kernarg.

+
+
“hidden_none”

An argument that is not used +by the kernel. Space needs to +be left for it, but it does +not need to be set up.

+
+
“hidden_printf_buffer”

A global address space pointer +to the runtime printf buffer +is passed in kernarg.

+
+
“hidden_hostcall_buffer”

A global address space pointer +to the runtime hostcall buffer +is passed in kernarg.

+
+
“hidden_default_queue”

A global address space pointer +to the OpenCL device enqueue +queue that should be used by +the kernel by default is +passed in the kernarg.

+
+
“hidden_completion_action”

A global address space pointer +to help link enqueued kernels into +the ancestor tree for determining +when the parent kernel has finished.

+
+
“hidden_multigrid_sync_arg”

A global address space pointer for +multi-grid synchronization is +passed in the kernarg.

+
+
+

“.value_type”

string

Unused and deprecated. This should no longer +be emitted, but is accepted for compatibility.

“.pointee_align”

integer

Alignment in bytes of pointee +type for pointer type kernel +argument. Must be a power +of 2. Only present if +“.value_kind” is +“dynamic_shared_pointer”.

“.address_space”

string

Kernel argument address space +qualifier. Only present if +“.value_kind” is “global_buffer” or +“dynamic_shared_pointer”. Values +are:

+
    +
  • “private”

  • +
  • “global”

  • +
  • “constant”

  • +
  • “local”

  • +
  • “generic”

  • +
  • “region”

  • +
+

“.access”

string

Kernel argument access +qualifier. Only present if +“.value_kind” is “image” or +“pipe”. Values +are:

+
    +
  • “read_only”

  • +
  • “write_only”

  • +
  • “read_write”

  • +
+

“.actual_access”

string

The actual memory accesses +performed by the kernel on the +kernel argument. Only present if +“.value_kind” is “global_buffer”, +“image”, or “pipe”. This may be +more restrictive than indicated +by “.access” to reflect what the +kernel actual does. If not +present then the runtime must +assume what is implied by +“.access” and “.is_const” . Values +are:

+
    +
  • “read_only”

  • +
  • “write_only”

  • +
  • “read_write”

  • +
+

“.is_const”

boolean

Indicates if the kernel argument +is const qualified. Only present +if “.value_kind” is +“global_buffer”.

“.is_restrict”

boolean

Indicates if the kernel argument +is restrict qualified. Only +present if “.value_kind” is +“global_buffer”.

“.is_volatile”

boolean

Indicates if the kernel argument +is volatile qualified. Only +present if “.value_kind” is +“global_buffer”.

“.is_pipe”

boolean

Indicates if the kernel argument +is pipe qualified. Only present +if “.value_kind” is “pipe”.

+
+
+
+
+
Code Object V4 Metadata
+
+

Warning

+

Code object V4 is not the default code object version emitted by this version +of LLVM.

+
+

Code object V4 metadata is the same as +Code Object V3 Metadata with the changes and additions +defined in table AMDHSA Code Object V3 Metadata Map.

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + +
AMDHSA Code Object V4 Metadata Map Changes from Code Object V3 Metadata

String Key

Value Type

Required?

Description

“amdhsa.version”

sequence of +2 integers

Required

    +
  • The first integer is the major +version. Currently 1.

  • +
  • The second integer is the minor +version. Currently 1.

  • +
+

“amdhsa.target”

string

Required

The target name of the code using the syntax:

+
<target-triple> [ "-" <target-id> ]
+
+
+

A canonical target ID must be +used. See Target Triples +and Target ID.

+
+
+
+
+
+

Kernel Dispatch

+

The HSA architected queuing language (AQL) defines a user space memory interface +that can be used to control the dispatch of kernels, in an agent independent +way. An agent can have zero or more AQL queues created for it using an HSA +compatible runtime (see AMDGPU Operating Systems), in which AQL packets (all of which +are 64 bytes) can be placed. See the HSA Platform System Architecture +Specification [HSA] for the AQL queue mechanics and packet layouts.

+

The packet processor of a kernel agent is responsible for detecting and +dispatching HSA kernels from the AQL queues associated with it. For AMD GPUs the +packet processor is implemented by the hardware command processor (CP), +asynchronous dispatch controller (ADC) and shader processor input controller +(SPI).

+

An HSA compatible runtime can be used to allocate an AQL queue object. It uses +the kernel mode driver to initialize and register the AQL queue with CP.

+

To dispatch a kernel the following actions are performed. This can occur in the +CPU host program, or from an HSA kernel executing on a GPU.

+
    +
  1. A pointer to an AQL queue for the kernel agent on which the kernel is to be +executed is obtained.

  2. +
  3. A pointer to the kernel descriptor (see +Kernel Descriptor) of the kernel to execute is obtained. +It must be for a kernel that is contained in a code object that that was +loaded by an HSA compatible runtime on the kernel agent with which the AQL +queue is associated.

  4. +
  5. Space is allocated for the kernel arguments using the HSA compatible runtime +allocator for a memory region with the kernarg property for the kernel agent +that will execute the kernel. It must be at least 16-byte aligned.

  6. +
  7. Kernel argument values are assigned to the kernel argument memory +allocation. The layout is defined in the HSA Programmer’s Language +Reference [HSA]. For AMDGPU the kernel execution directly accesses the +kernel argument memory in the same way constant memory is accessed. (Note +that the HSA specification allows an implementation to copy the kernel +argument contents to another location that is accessed by the kernel.)

  8. +
  9. An AQL kernel dispatch packet is created on the AQL queue. The HSA compatible +runtime api uses 64-bit atomic operations to reserve space in the AQL queue +for the packet. The packet must be set up, and the final write must use an +atomic store release to set the packet kind to ensure the packet contents are +visible to the kernel agent. AQL defines a doorbell signal mechanism to +notify the kernel agent that the AQL queue has been updated. These rules, and +the layout of the AQL queue and kernel dispatch packet is defined in the HSA +System Architecture Specification [HSA].

  10. +
  11. A kernel dispatch packet includes information about the actual dispatch, +such as grid and work-group size, together with information from the code +object about the kernel, such as segment sizes. The HSA compatible runtime +queries on the kernel symbol can be used to obtain the code object values +which are recorded in the Code Object Metadata.

  12. +
  13. CP executes micro-code and is responsible for detecting and setting up the +GPU to execute the wavefronts of a kernel dispatch.

  14. +
  15. CP ensures that when the a wavefront starts executing the kernel machine +code, the scalar general purpose registers (SGPR) and vector general purpose +registers (VGPR) are set up as required by the machine code. The required +setup is defined in the Kernel Descriptor. The initial +register state is defined in +Initial Kernel Execution State.

  16. +
  17. The prolog of the kernel machine code (see +Kernel Prolog) sets up the machine state as necessary +before continuing executing the machine code that corresponds to the kernel.

  18. +
  19. When the kernel dispatch has completed execution, CP signals the completion +signal specified in the kernel dispatch packet if not 0.

  20. +
+
+
+

Memory Spaces

+

The memory space properties are:

+
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Memory Spaces

Memory Space Name

HSA Segment +Name

Hardware +Name

Address +Size

NULL Value

Private

private

scratch

32

0x00000000

Local

group

LDS

32

0xFFFFFFFF

Global

global

global

64

0x0000000000000000

Constant

constant

same as +global

64

0x0000000000000000

Generic

flat

flat

64

0x0000000000000000

Region

N/A

GDS

32

not implemented +for AMDHSA

+
+

The global and constant memory spaces both use global virtual addresses, which +are the same virtual address space used by the CPU. However, some virtual +addresses may only be accessible to the CPU, some only accessible by the GPU, +and some by both.

+

Using the constant memory space indicates that the data will not change during +the execution of the kernel. This allows scalar read instructions to be +used. The vector and scalar L1 caches are invalidated of volatile data before +each kernel dispatch execution to allow constant memory to change values between +kernel dispatches.

+

The local memory space uses the hardware Local Data Store (LDS) which is +automatically allocated when the hardware creates work-groups of wavefronts, and +freed when all the wavefronts of a work-group have terminated. The data store +(DS) instructions can be used to access it.

+

The private memory space uses the hardware scratch memory support. If the kernel +uses scratch, then the hardware allocates memory that is accessed using +wavefront lane dword (4 byte) interleaving. The mapping used from private +address to physical address is:

+
+

wavefront-scratch-base + +(private-address * wavefront-size * 4) + +(wavefront-lane-id * 4)

+
+

There are different ways that the wavefront scratch base address is determined +by a wavefront (see Initial Kernel Execution State). This +memory can be accessed in an interleaved manner using buffer instruction with +the scratch buffer descriptor and per wavefront scratch offset, by the scratch +instructions, or by flat instructions. If each lane of a wavefront accesses the +same private address, the interleaving results in adjacent dwords being accessed +and hence requires fewer cache lines to be fetched. Multi-dword access is not +supported except by flat and scratch instructions in GFX9-GFX10.

+

The generic address space uses the hardware flat address support available in +GFX7-GFX10. This uses two fixed ranges of virtual addresses (the private and +local apertures), that are outside the range of addressible global memory, to +map from a flat address to a private or local address.

+

FLAT instructions can take a flat address and access global, private (scratch) +and group (LDS) memory depending in if the address is within one of the +aperture ranges. Flat access to scratch requires hardware aperture setup and +setup in the kernel prologue (see +Flat Scratch). Flat access to LDS requires +hardware aperture setup and M0 (GFX7-GFX8) register setup (see +M0).

+

To convert between a segment address and a flat address the base address of the +apertures address can be used. For GFX7-GFX8 these are available in the +HSA AQL Queue the address of which can be obtained with +Queue Ptr SGPR (see Initial Kernel Execution State). For +GFX9-GFX10 the aperture base addresses are directly available as inline constant +registers SRC_SHARED_BASE/LIMIT and SRC_PRIVATE_BASE/LIMIT. In 64 bit +address mode the aperture sizes are 2^32 bytes and the base is aligned to 2^32 +which makes it easier to convert from flat to segment or segment to flat.

+
+
+

Image and Samplers

+

Image and sample handles created by an HSA compatible runtime (see +AMDGPU Operating Systems) are 64-bit addresses of a hardware 32-byte V# and 48 byte S# +object respectively. In order to support the HSA query_sampler operations +two extra dwords are used to store the HSA BRIG enumeration values for the +queries that are not trivially deducible from the S# representation.

+
+
+

HSA Signals

+

HSA signal handles created by an HSA compatible runtime (see AMDGPU Operating Systems) +are 64-bit addresses of a structure allocated in memory accessible from both the +CPU and GPU. The structure is defined by the runtime and subject to change +between releases. For example, see [AMD-ROCm-github].

+
+
+

HSA AQL Queue

+

The HSA AQL queue structure is defined by an HSA compatible runtime (see +AMDGPU Operating Systems) and subject to change between releases. For example, see +[AMD-ROCm-github]. For some processors it contains fields needed to implement +certain language features such as the flat address aperture bases. It also +contains fields used by CP such as managing the allocation of scratch memory.

+
+
+

Kernel Descriptor

+

A kernel descriptor consists of the information needed by CP to initiate the +execution of a kernel, including the entry point address of the machine code +that implements the kernel.

+
+
Code Object V3 Kernel Descriptor
+

CP microcode requires the Kernel descriptor to be allocated on 64-byte +alignment.

+

The fields used by CP for code objects before V3 also match those specified in +Code Object V3 Kernel Descriptor.

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Code Object V3 Kernel Descriptor

Bits

Size

Field Name

Description

31:0

4 bytes

GROUP_SEGMENT_FIXED_SIZE

The amount of fixed local +address space memory +required for a work-group +in bytes. This does not +include any dynamically +allocated local address +space memory that may be +added when the kernel is +dispatched.

63:32

4 bytes

PRIVATE_SEGMENT_FIXED_SIZE

The amount of fixed +private address space +memory required for a +work-item in bytes. +Additional space may need to +be added to this value if +the call stack has +non-inlined function calls.

95:64

4 bytes

KERNARG_SIZE

The size of the kernarg +memory pointed to by the +AQL dispatch packet. The +kernarg memory is used to +pass arguments to the +kernel.

+
    +
  • If the kernarg pointer in +the dispatch packet is NULL +then there are no kernel +arguments.

  • +
  • If the kernarg pointer in +the dispatch packet is +not NULL and this value +is 0 then the kernarg +memory size is +unspecified.

  • +
  • If the kernarg pointer in +the dispatch packet is +not NULL and this value +is not 0 then the value +specifies the kernarg +memory size in bytes. It +is recommended to provide +a value as it may be used +by CP to optimize making +the kernarg memory +visible to the kernel +code.

  • +
+

127:96

4 bytes

Reserved, must be 0.

191:128

8 bytes

KERNEL_CODE_ENTRY_BYTE_OFFSET

Byte offset (possibly +negative) from base +address of kernel +descriptor to kernel’s +entry point instruction +which must be 256 byte +aligned.

351:272

20 +bytes

Reserved, must be 0.

383:352

4 bytes

COMPUTE_PGM_RSRC3

+
GFX6-GFX9

Reserved, must be 0.

+
+
GFX90A

Compute Shader (CS) +program settings used by +CP to set up +COMPUTE_PGM_RSRC3 +configuration +register. See +compute_pgm_rsrc3 for GFX90A.

+
+
GFX10

Compute Shader (CS) +program settings used by +CP to set up +COMPUTE_PGM_RSRC3 +configuration +register. See +compute_pgm_rsrc3 for GFX10.

+
+
+

415:384

4 bytes

COMPUTE_PGM_RSRC1

Compute Shader (CS) +program settings used by +CP to set up +COMPUTE_PGM_RSRC1 +configuration +register. See +compute_pgm_rsrc1 for GFX6-GFX10.

447:416

4 bytes

COMPUTE_PGM_RSRC2

Compute Shader (CS) +program settings used by +CP to set up +COMPUTE_PGM_RSRC2 +configuration +register. See +compute_pgm_rsrc2 for GFX6-GFX10.

458:448

7 bits

See separate bits below.

Enable the setup of the +SGPR user data registers +(see +Initial Kernel Execution State).

+

The total number of SGPR +user data registers +requested must not exceed +16 and match value in +compute_pgm_rsrc2.user_sgpr.user_sgpr_count. +Any requests beyond 16 +will be ignored.

+

>448

1 bit

ENABLE_SGPR_PRIVATE_SEGMENT +_BUFFER

If the Target Properties +column of +AMDGPU Processors +specifies Architected flat +scratch then not supported +and must be 0,

>449

1 bit

ENABLE_SGPR_DISPATCH_PTR

>450

1 bit

ENABLE_SGPR_QUEUE_PTR

>451

1 bit

ENABLE_SGPR_KERNARG_SEGMENT_PTR

>452

1 bit

ENABLE_SGPR_DISPATCH_ID

>453

1 bit

ENABLE_SGPR_FLAT_SCRATCH_INIT

If the Target Properties +column of +AMDGPU Processors +specifies Architected flat +scratch then not supported +and must be 0,

>454

1 bit

ENABLE_SGPR_PRIVATE_SEGMENT +_SIZE

457:455

3 bits

Reserved, must be 0.

458

1 bit

ENABLE_WAVEFRONT_SIZE32

+
GFX6-GFX9

Reserved, must be 0.

+
+
GFX10
    +
  • If 0 execute in +wavefront size 64 mode.

  • +
  • If 1 execute in +native wavefront size +32 mode.

  • +
+
+
+

463:459

1 bit

Reserved, must be 0.

464

1 bit

RESERVED_464

Deprecated, must be 0.

467:465

3 bits

Reserved, must be 0.

468

1 bit

RESERVED_468

Deprecated, must be 0.

469:471

3 bits

Reserved, must be 0.

511:472

5 bytes

Reserved, must be 0.

512

Total size 64 bytes.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
compute_pgm_rsrc1 for GFX6-GFX10

Bits

Size

Field Name

Description

5:0

6 bits

GRANULATED_WORKITEM_VGPR_COUNT

Number of vector register +blocks used by each work-item; +granularity is device +specific:

+
+
GFX6-GFX9
    +
  • vgprs_used 0..256

  • +
  • max(0, ceil(vgprs_used / 4) - 1)

  • +
+
+
GFX90A
    +
  • vgprs_used 0..512

  • +
  • +
    vgprs_used = align(arch_vgprs, 4)
      +
    • acc_vgprs

    • +
    +
    +
    +
  • +
  • max(0, ceil(vgprs_used / 8) - 1)

  • +
+
+
GFX10 (wavefront size 64)
    +
  • max_vgpr 1..256

  • +
  • max(0, ceil(vgprs_used / 4) - 1)

  • +
+
+
GFX10 (wavefront size 32)
    +
  • max_vgpr 1..256

  • +
  • max(0, ceil(vgprs_used / 8) - 1)

  • +
+
+
+

Where vgprs_used is defined +as the highest VGPR number +explicitly referenced plus +one.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.VGPRS.

+

The +Assembler +calculates this +automatically for the +selected processor from +values provided to the +.amdhsa_kernel directive +by the +.amdhsa_next_free_vgpr +nested directive (see +AMDHSA Kernel Assembler Directives).

+

9:6

4 bits

GRANULATED_WAVEFRONT_SGPR_COUNT

Number of scalar register +blocks used by a wavefront; +granularity is device +specific:

+
+
GFX6-GFX8
    +
  • sgprs_used 0..112

  • +
  • max(0, ceil(sgprs_used / 8) - 1)

  • +
+
+
GFX9
    +
  • sgprs_used 0..112

  • +
  • 2 * max(0, ceil(sgprs_used / 16) - 1)

  • +
+
+
GFX10

Reserved, must be 0. +(128 SGPRs always +allocated.)

+
+
+

Where sgprs_used is +defined as the highest +SGPR number explicitly +referenced plus one, plus +a target specific number +of additional special +SGPRs for VCC, +FLAT_SCRATCH (GFX7+) and +XNACK_MASK (GFX8+), and +any additional +target specific +limitations. It does not +include the 16 SGPRs added +if a trap handler is +enabled.

+

The target specific +limitations and special +SGPR layout are defined in +the hardware +documentation, which can +be found in the +Processors +table.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.SGPRS.

+

The +Assembler +calculates this +automatically for the +selected processor from +values provided to the +.amdhsa_kernel directive +by the +.amdhsa_next_free_sgpr +and .amdhsa_reserve_* +nested directives (see +AMDHSA Kernel Assembler Directives).

+

11:10

2 bits

PRIORITY

Must be 0.

+

Start executing wavefront +at the specified priority.

+

CP is responsible for +filling in +COMPUTE_PGM_RSRC1.PRIORITY.

+

13:12

2 bits

FLOAT_ROUND_MODE_32

Wavefront starts execution +with specified rounding +mode for single (32 +bit) floating point +precision floating point +operations.

+

Floating point rounding +mode values are defined in +Floating Point Rounding Mode Enumeration Values.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.FLOAT_MODE.

+

15:14

2 bits

FLOAT_ROUND_MODE_16_64

Wavefront starts execution +with specified rounding +denorm mode for half/double (16 +and 64-bit) floating point +precision floating point +operations.

+

Floating point rounding +mode values are defined in +Floating Point Rounding Mode Enumeration Values.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.FLOAT_MODE.

+

17:16

2 bits

FLOAT_DENORM_MODE_32

Wavefront starts execution +with specified denorm mode +for single (32 +bit) floating point +precision floating point +operations.

+

Floating point denorm mode +values are defined in +Floating Point Denorm Mode Enumeration Values.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.FLOAT_MODE.

+

19:18

2 bits

FLOAT_DENORM_MODE_16_64

Wavefront starts execution +with specified denorm mode +for half/double (16 +and 64-bit) floating point +precision floating point +operations.

+

Floating point denorm mode +values are defined in +Floating Point Denorm Mode Enumeration Values.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.FLOAT_MODE.

+

20

1 bit

PRIV

Must be 0.

+

Start executing wavefront +in privilege trap handler +mode.

+

CP is responsible for +filling in +COMPUTE_PGM_RSRC1.PRIV.

+

21

1 bit

ENABLE_DX10_CLAMP

Wavefront starts execution +with DX10 clamp mode +enabled. Used by the vector +ALU to force DX10 style +treatment of NaN’s (when +set, clamp NaN to zero, +otherwise pass NaN +through).

+

Used by CP to set up +COMPUTE_PGM_RSRC1.DX10_CLAMP.

+

22

1 bit

DEBUG_MODE

Must be 0.

+

Start executing wavefront +in single step mode.

+

CP is responsible for +filling in +COMPUTE_PGM_RSRC1.DEBUG_MODE.

+

23

1 bit

ENABLE_IEEE_MODE

Wavefront starts execution +with IEEE mode +enabled. Floating point +opcodes that support +exception flag gathering +will quiet and propagate +signaling-NaN inputs per +IEEE 754-2008. Min_dx10 and +max_dx10 become IEEE +754-2008 compliant due to +signaling-NaN propagation +and quieting.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.IEEE_MODE.

+

24

1 bit

BULKY

Must be 0.

+

Only one work-group allowed +to execute on a compute +unit.

+

CP is responsible for +filling in +COMPUTE_PGM_RSRC1.BULKY.

+

25

1 bit

CDBG_USER

Must be 0.

+

Flag that can be used to +control debugging code.

+

CP is responsible for +filling in +COMPUTE_PGM_RSRC1.CDBG_USER.

+

26

1 bit

FP16_OVFL

+
GFX6-GFX8

Reserved, must be 0.

+
+
GFX9-GFX10

Wavefront starts execution +with specified fp16 overflow +mode.

+
    +
  • If 0, fp16 overflow generates ++/-INF values.

  • +
  • If 1, fp16 overflow that is the +result of an +/-INF input value +or divide by 0 produces a +/-INF, +otherwise clamps computed +overflow to +/-MAX_FP16 as +appropriate.

  • +
+

Used by CP to set up +COMPUTE_PGM_RSRC1.FP16_OVFL.

+
+
+

28:27

2 bits

Reserved, must be 0.

29

1 bit

WGP_MODE

+
GFX6-GFX9

Reserved, must be 0.

+
+
GFX10
    +
  • If 0 execute work-groups in +CU wavefront execution mode.

  • +
  • If 1 execute work-groups on +in WGP wavefront execution mode.

  • +
+

See Memory Model.

+

Used by CP to set up +COMPUTE_PGM_RSRC1.WGP_MODE.

+
+
+

30

1 bit

MEM_ORDERED

+
GFX6-GFX9

Reserved, must be 0.

+
+
GFX10

Controls the behavior of the +s_waitcnt’s vmcnt and vscnt +counters.

+
    +
  • If 0 vmcnt reports completion +of load and atomic with return +out of order with sample +instructions, and the vscnt +reports the completion of +store and atomic without +return in order.

  • +
  • If 1 vmcnt reports completion +of load, atomic with return +and sample instructions in +order, and the vscnt reports +the completion of store and +atomic without return in order.

  • +
+

Used by CP to set up +COMPUTE_PGM_RSRC1.MEM_ORDERED.

+
+
+

31

1 bit

FWD_PROGRESS

+
GFX6-GFX9

Reserved, must be 0.

+
+
GFX10
    +
  • If 0 execute SIMD wavefronts +using oldest first policy.

  • +
  • If 1 execute SIMD wavefronts to +ensure wavefronts will make some +forward progress.

  • +
+

Used by CP to set up +COMPUTE_PGM_RSRC1.FWD_PROGRESS.

+
+
+

32

Total size 4 bytes

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
compute_pgm_rsrc2 for GFX6-GFX10

Bits

Size

Field Name

Description

0

1 bit

ENABLE_PRIVATE_SEGMENT

+

Used by CP to set up +COMPUTE_PGM_RSRC2.SCRATCH_EN.

+

5:1

5 bits

USER_SGPR_COUNT

The total number of SGPR +user data registers +requested. This number must +match the number of user +data registers enabled.

+

Used by CP to set up +COMPUTE_PGM_RSRC2.USER_SGPR.

+

6

1 bit

ENABLE_TRAP_HANDLER

Must be 0.

+

This bit represents +COMPUTE_PGM_RSRC2.TRAP_PRESENT, +which is set by the CP if +the runtime has installed a +trap handler.

+

7

1 bit

ENABLE_SGPR_WORKGROUP_ID_X

Enable the setup of the +system SGPR register for +the work-group id in the X +dimension (see +Initial Kernel Execution State).

+

Used by CP to set up +COMPUTE_PGM_RSRC2.TGID_X_EN.

+

8

1 bit

ENABLE_SGPR_WORKGROUP_ID_Y

Enable the setup of the +system SGPR register for +the work-group id in the Y +dimension (see +Initial Kernel Execution State).

+

Used by CP to set up +COMPUTE_PGM_RSRC2.TGID_Y_EN.

+

9

1 bit

ENABLE_SGPR_WORKGROUP_ID_Z

Enable the setup of the +system SGPR register for +the work-group id in the Z +dimension (see +Initial Kernel Execution State).

+

Used by CP to set up +COMPUTE_PGM_RSRC2.TGID_Z_EN.

+

10

1 bit

ENABLE_SGPR_WORKGROUP_INFO

Enable the setup of the +system SGPR register for +work-group information (see +Initial Kernel Execution State).

+

Used by CP to set up +COMPUTE_PGM_RSRC2.TGID_SIZE_EN.

+

12:11

2 bits

ENABLE_VGPR_WORKITEM_ID

Enable the setup of the +VGPR system registers used +for the work-item ID. +System VGPR Work-Item ID Enumeration Values +defines the values.

+

Used by CP to set up +COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT.

+

13

1 bit

ENABLE_EXCEPTION_ADDRESS_WATCH

Must be 0.

+

Wavefront starts execution +with address watch +exceptions enabled which +are generated when L1 has +witnessed a thread access +an address of +interest.

+

CP is responsible for +filling in the address +watch bit in +COMPUTE_PGM_RSRC2.EXCP_EN_MSB +according to what the +runtime requests.

+

14

1 bit

ENABLE_EXCEPTION_MEMORY

Must be 0.

+

Wavefront starts execution +with memory violation +exceptions exceptions +enabled which are generated +when a memory violation has +occurred for this wavefront from +L1 or LDS +(write-to-read-only-memory, +mis-aligned atomic, LDS +address out of range, +illegal address, etc.).

+

CP sets the memory +violation bit in +COMPUTE_PGM_RSRC2.EXCP_EN_MSB +according to what the +runtime requests.

+

23:15

9 bits

GRANULATED_LDS_SIZE

Must be 0.

+

CP uses the rounded value +from the dispatch packet, +not this value, as the +dispatch may contain +dynamically allocated group +segment memory. CP writes +directly to +COMPUTE_PGM_RSRC2.LDS_SIZE.

+

Amount of group segment +(LDS) to allocate for each +work-group. Granularity is +device specific:

+
+
GFX6

roundup(lds-size / (64 * 4))

+
+
GFX7-GFX10

roundup(lds-size / (128 * 4))

+
+
+

24

1 bit

ENABLE_EXCEPTION_IEEE_754_FP +_INVALID_OPERATION

Wavefront starts execution +with specified exceptions +enabled.

+

Used by CP to set up +COMPUTE_PGM_RSRC2.EXCP_EN +(set from bits 0..6).

+

IEEE 754 FP Invalid +Operation

+

25

1 bit

ENABLE_EXCEPTION_FP_DENORMAL +_SOURCE

FP Denormal one or more +input operands is a +denormal number

26

1 bit

ENABLE_EXCEPTION_IEEE_754_FP +_DIVISION_BY_ZERO

IEEE 754 FP Division by +Zero

27

1 bit

ENABLE_EXCEPTION_IEEE_754_FP +_OVERFLOW

IEEE 754 FP FP Overflow

28

1 bit

ENABLE_EXCEPTION_IEEE_754_FP +_UNDERFLOW

IEEE 754 FP Underflow

29

1 bit

ENABLE_EXCEPTION_IEEE_754_FP +_INEXACT

IEEE 754 FP Inexact

30

1 bit

ENABLE_EXCEPTION_INT_DIVIDE_BY +_ZERO

Integer Division by Zero +(rcp_iflag_f32 instruction +only)

31

1 bit

Reserved, must be 0.

32

Total size 4 bytes.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
compute_pgm_rsrc3 for GFX90A

Bits

Size

Field Name

Description

5:0

6 bits

ACCUM_OFFSET

Offset of a first AccVGPR in the unified register file. Granularity 4. +Value 0-63. 0 - accum-offset = 4, 1 - accum-offset = 8, …, +63 - accum-offset = 256.

6:15

10 +bits

Reserved, must be 0.

16

1 bit

TG_SPLIT

    +
  • If 0 the waves of a work-group are +launched in the same CU.

  • +
  • If 1 the waves of a work-group can be +launched in different CUs. The waves +cannot use S_BARRIER or LDS.

  • +
+

17:31

15 +bits

Reserved, must be 0.

32

Total size 4 bytes.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + +
compute_pgm_rsrc3 for GFX10

Bits

Size

Field Name

Description

3:0

4 bits

SHARED_VGPR_COUNT

Number of shared VGPRs for wavefront size 64. Granularity 8. Value 0-120. +compute_pgm_rsrc1.vgprs + shared_vgpr_cnt cannot exceed 64.

31:4

28 +bits

Reserved, must be 0.

32

Total size 4 bytes.

+
+
+
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +
Floating Point Rounding Mode Enumeration Values

Enumeration Name

Value

Description

FLOAT_ROUND_MODE_NEAR_EVEN

0

Round Ties To Even

FLOAT_ROUND_MODE_PLUS_INFINITY

1

Round Toward +infinity

FLOAT_ROUND_MODE_MINUS_INFINITY

2

Round Toward -infinity

FLOAT_ROUND_MODE_ZERO

3

Round Toward 0

+
+
+
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +
Floating Point Denorm Mode Enumeration Values

Enumeration Name

Value

Description

FLOAT_DENORM_MODE_FLUSH_SRC_DST

0

Flush Source and Destination +Denorms

FLOAT_DENORM_MODE_FLUSH_DST

1

Flush Output Denorms

FLOAT_DENORM_MODE_FLUSH_SRC

2

Flush Source Denorms

FLOAT_DENORM_MODE_FLUSH_NONE

3

No Flush

+
+
+
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +
System VGPR Work-Item ID Enumeration Values

Enumeration Name

Value

Description

SYSTEM_VGPR_WORKITEM_ID_X

0

Set work-item X dimension +ID.

SYSTEM_VGPR_WORKITEM_ID_X_Y

1

Set work-item X and Y +dimensions ID.

SYSTEM_VGPR_WORKITEM_ID_X_Y_Z

2

Set work-item X, Y and Z +dimensions ID.

SYSTEM_VGPR_WORKITEM_ID_UNDEFINED

3

Undefined.

+
+
+
+
+

Initial Kernel Execution State

+

This section defines the register state that will be set up by the packet +processor prior to the start of execution of every wavefront. This is limited by +the constraints of the hardware controllers of CP/ADC/SPI.

+

The order of the SGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the enable_sgpr_* bit +fields (see Kernel Descriptor). The register numbers used +for enabled registers are dense starting at SGPR0: the first enabled register is +SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have +an SGPR number.

+

The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to +all wavefronts of the grid. It is possible to specify more than 16 User SGPRs +using the enable_sgpr_* bit fields, in which case only the first 16 are +actually initialized. These are then immediately followed by the System SGPRs +that are set up by ADC/SPI and can have different values for each wavefront of +the grid dispatch.

+

SGPR register initial state is defined in +SGPR Register Set Up Order.

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SGPR Register Set Up Order

SGPR Order

Name +(kernel descriptor enable +field)

Number +of +SGPRs

Description

First

Private Segment Buffer +(enable_sgpr_private +_segment_buffer)

4

See +Private Segment Buffer.

then

Dispatch Ptr +(enable_sgpr_dispatch_ptr)

2

64-bit address of AQL dispatch +packet for kernel dispatch +actually executing.

then

Queue Ptr +(enable_sgpr_queue_ptr)

2

64-bit address of amd_queue_t +object for AQL queue on which +the dispatch packet was +queued.

then

Kernarg Segment Ptr +(enable_sgpr_kernarg +_segment_ptr)

2

64-bit address of Kernarg +segment. This is directly +copied from the +kernarg_address in the kernel +dispatch packet.

+

Having CP load it once avoids +loading it at the beginning of +every wavefront.

+

then

Dispatch Id +(enable_sgpr_dispatch_id)

2

64-bit Dispatch ID of the +dispatch packet being +executed.

then

Flat Scratch Init +(enable_sgpr_flat_scratch +_init)

2

See +Flat Scratch.

then

Private Segment Size +(enable_sgpr_private +_segment_size)

1

The 32-bit byte size of a +single work-item’s memory +allocation. This is the +value from the kernel +dispatch packet Private +Segment Byte Size rounded up +by CP to a multiple of +DWORD.

+

Having CP load it once avoids +loading it at the beginning of +every wavefront.

+

This is not used for +GFX7-GFX8 since it is the same +value as the second SGPR of +Flat Scratch Init. However, it +may be needed for GFX9-GFX10 which +changes the meaning of the +Flat Scratch Init value.

+

then

Work-Group Id X +(enable_sgpr_workgroup_id +_X)

1

32-bit work-group id in X +dimension of grid for +wavefront.

then

Work-Group Id Y +(enable_sgpr_workgroup_id +_Y)

1

32-bit work-group id in Y +dimension of grid for +wavefront.

then

Work-Group Id Z +(enable_sgpr_workgroup_id +_Z)

1

32-bit work-group id in Z +dimension of grid for +wavefront.

then

Work-Group Info +(enable_sgpr_workgroup +_info)

1

{first_wavefront, 14’b0000, +ordered_append_term[10:0], +threadgroup_size_in_wavefronts[5:0]}

then

Scratch Wavefront Offset +(enable_sgpr_private +_segment_wavefront_offset)

1

See +Flat Scratch. +and +Private Segment Buffer.

+
+

The order of the VGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the enable_vgpr* bit +fields (see Kernel Descriptor). The register numbers used +for enabled registers are dense starting at VGPR0: the first enabled register is +VGPR0, the next enabled register is VGPR1 etc.; disabled registers do not have a +VGPR number.

+

There are different methods used for the VGPR initial state:

+
    +
  • Unless the Target Properties column of AMDGPU Processors +specifies otherwise, a separate VGPR register is used per work-item ID. The +VGPR register initial state for this method is defined in +VGPR Register Set Up Order for Unpacked Work-Item ID Method.

  • +
  • If Target Properties column of AMDGPU Processors +specifies Packed work-item IDs, the initial value of VGPR0 register is used +for all work-item IDs. The register layout for this method is defined in +Register Layout for Packed Work-Item ID Method.

    + + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + +
    VGPR Register Set Up Order for Unpacked Work-Item ID Method

    VGPR Order

    Name +(kernel descriptor enable +field)

    Number +of +VGPRs

    Description

    First

    Work-Item Id X +(Always initialized)

    1

    32-bit work-item id in X +dimension of work-group for +wavefront lane.

    then

    Work-Item Id Y +(enable_vgpr_workitem_id +> 0)

    1

    32-bit work-item id in Y +dimension of work-group for +wavefront lane.

    then

    Work-Item Id Z +(enable_vgpr_workitem_id +> 1)

    1

    32-bit work-item id in Z +dimension of work-group for +wavefront lane.

    +
  • +
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Register Layout for Packed Work-Item ID Method

Bits

Size

Field Name

Description

0:9

10 bits

Work-Item Id X

Work-item id in X +dimension of work-group for +wavefront lane.

+

Always initialized.

+

10:19

10 bits

Work-Item Id Y

Work-item id in Y +dimension of work-group for +wavefront lane.

+

Initialized if enable_vgpr_workitem_id > +0, otherwise set to 0.

+

20:29

10 bits

Work-Item Id Z

Work-item id in Z +dimension of work-group for +wavefront lane.

+

Initialized if enable_vgpr_workitem_id > +1, otherwise set to 0.

+

30:31

2 bits

Reserved, set to 0.

+
+

The setting of registers is done by GPU CP/ADC/SPI hardware as follows:

+
    +
  1. SGPRs before the Work-Group Ids are set by CP using the 16 User Data +registers.

  2. +
  3. Work-group Id registers X, Y, Z are set by ADC which supports any +combination including none.

  4. +
  5. Scratch Wavefront Offset is set by SPI in a per wavefront basis which is why +its value cannot be included with the flat scratch init value which is per +queue (see Flat Scratch).

  6. +
  7. The VGPRs are set by SPI which only supports specifying either (X), (X, Y) +or (X, Y, Z).

  8. +
  9. Flat Scratch register pair initialization is described in +Flat Scratch.

  10. +
+

The global segment can be accessed either using buffer instructions (GFX6 which +has V# 64-bit address support), flat instructions (GFX7-GFX10), or global +instructions (GFX9-GFX10).

+

If buffer operations are used, then the compiler can generate a V# with the +following properties:

+
    +
  • base address of 0

  • +
  • no swizzle

  • +
  • ATC: 1 if IOMMU present (such as APU)

  • +
  • ptr64: 1

  • +
  • MTYPE set to support memory coherence that matches the runtime (such as CC for +APU and NC for dGPU).

  • +
+
+
+

Kernel Prolog

+

The compiler performs initialization in the kernel prologue depending on the +target and information about things like stack usage in the kernel and called +functions. Some of this initialization requires the compiler to request certain +User and System SGPRs be present in the +Initial Kernel Execution State via the +Kernel Descriptor.

+
+
CFI
+
    +
  1. The CFI return address is undefined.

  2. +
  3. The CFI CFA is defined using an expression which evaluates to a location +description that comprises one memory location description for the +DW_ASPACE_AMDGPU_private_lane address space address 0.

  4. +
+
+
+
M0
+
+
GFX6-GFX8

The M0 register must be initialized with a value at least the total LDS size +if the kernel may access LDS via DS or flat operations. Total LDS size is +available in dispatch packet. For M0, it is also possible to use maximum +possible value of LDS for given target (0x7FFF for GFX6 and 0xFFFF for +GFX7-GFX8).

+
+
GFX9-GFX10

The M0 register is not used for range checking LDS accesses and so does not +need to be initialized in the prolog.

+
+
+
+
+
Stack Pointer
+

If the kernel has function calls it must set up the ABI stack pointer described +in Non-Kernel Functions by setting +SGPR32 to the unswizzled scratch offset of the address past the last local +allocation.

+
+
+
Frame Pointer
+

If the kernel needs a frame pointer for the reasons defined in +SIFrameLowering then SGPR33 is used and is always set to 0 in the +kernel prolog. If a frame pointer is not required then all uses of the frame +pointer are replaced with immediate 0 offsets.

+
+
+
Flat Scratch
+

There are different methods used for initializing flat scratch:

+
    +
  • If the Target Properties column of AMDGPU Processors +specifies Does not support generic address space:

    +

    Flat scratch is not supported and there is no flat scratch register pair.

    +
  • +
  • If the Target Properties column of AMDGPU Processors +specifies Offset flat scratch:

    +

    If the kernel or any function it calls may use flat operations to access +scratch memory, the prolog code must set up the FLAT_SCRATCH register pair +(FLAT_SCRATCH_LO/FLAT_SCRATCH_HI). Initialization uses Flat Scratch Init and +Scratch Wavefront Offset SGPR registers (see +Initial Kernel Execution State):

    +
      +
    1. The low word of Flat Scratch Init is the 32-bit byte offset from +SH_HIDDEN_PRIVATE_BASE_VIMID to the base of scratch backing memory +being managed by SPI for the queue executing the kernel dispatch. This is +the same value used in the Scratch Segment Buffer V# base address.

      +

      CP obtains this from the runtime. (The Scratch Segment Buffer base address +is SH_HIDDEN_PRIVATE_BASE_VIMID plus this offset.)

      +

      The prolog must add the value of Scratch Wavefront Offset to get the +wavefront’s byte scratch backing memory offset from +SH_HIDDEN_PRIVATE_BASE_VIMID.

      +

      The Scratch Wavefront Offset must also be used as an offset with Private +segment address when using the Scratch Segment Buffer.

      +

      Since FLAT_SCRATCH_LO is in units of 256 bytes, the offset must be right +shifted by 8 before moving into FLAT_SCRATCH_HI.

      +

      FLAT_SCRATCH_HI corresponds to SGPRn-4 on GFX7, and SGPRn-6 on GFX8 (where +SGPRn is the highest numbered SGPR allocated to the wavefront). +FLAT_SCRATCH_HI is multiplied by 256 (as it is in units of 256 bytes) and +added to SH_HIDDEN_PRIVATE_BASE_VIMID to calculate the per wavefront +FLAT SCRATCH BASE in flat memory instructions that access the scratch +aperture.

      +
    2. +
    3. The second word of Flat Scratch Init is 32-bit byte size of a single +work-items scratch memory usage.

      +

      CP obtains this from the runtime, and it is always a multiple of DWORD. CP +checks that the value in the kernel dispatch packet Private Segment Byte +Size is not larger and requests the runtime to increase the queue’s scratch +size if necessary.

      +

      CP directly loads from the kernel dispatch packet Private Segment Byte Size +field and rounds up to a multiple of DWORD. Having CP load it once avoids +loading it at the beginning of every wavefront.

      +

      The kernel prolog code must move it to FLAT_SCRATCH_LO which is SGPRn-3 on +GFX7 and SGPRn-5 on GFX8. FLAT_SCRATCH_LO is used as the FLAT SCRATCH SIZE +in flat memory instructions.

      +
    4. +
    +
  • +
  • If the Target Properties column of AMDGPU Processors +specifies Absolute flat scratch:

    +

    If the kernel or any function it calls may use flat operations to access +scratch memory, the prolog code must set up the FLAT_SCRATCH register pair +(FLAT_SCRATCH_LO/FLAT_SCRATCH_HI which are in SGPRn-4/SGPRn-3). Initialization +uses Flat Scratch Init and Scratch Wavefront Offset SGPR registers (see +Initial Kernel Execution State):

    +

    The Flat Scratch Init is the 64-bit address of the base of scratch backing +memory being managed by SPI for the queue executing the kernel dispatch.

    +

    CP obtains this from the runtime.

    +

    The kernel prolog must add the value of the wave’s Scratch Wavefront Offset +and move the result as a 64-bit value to the FLAT_SCRATCH SGPR register pair +which is SGPRn-6 and SGPRn-5. It is used as the FLAT SCRATCH BASE in flat +memory instructions.

    +

    The Scratch Wavefront Offset must also be used as an offset with Private +segment address when using the Scratch Segment Buffer (see +Private Segment Buffer).

    +
  • +
  • If the Target Properties column of AMDGPU Processors +specifies Architected flat scratch:

    +

    If ENABLE_PRIVATE_SEGMENT is enabled in +compute_pgm_rsrc2 for GFX6-GFX10 then the FLAT_SCRATCH +register pair will be initialized to the 64-bit address of the base of scratch +backing memory being managed by SPI for the queue executing the kernel +dispatch plus the value of the wave’s Scratch Wavefront Offset for use as the +flat scratch base in flat memory instructions.

    +
  • +
+
+
+
Private Segment Buffer
+

If the Target Properties column of AMDGPU Processors specifies +Architected flat scratch then a Private Segment Buffer is not supported. +Instead the flat SCRATCH instructions are used.

+

Otherwise, Private Segment Buffer SGPR register is used to initialize 4 SGPRs +that are used as a V# to access scratch. CP uses the value provided by the +runtime. It is used, together with Scratch Wavefront Offset as an offset, to +access the private memory space using a segment address. See +Initial Kernel Execution State.

+

The scratch V# is a four-aligned SGPR and always selected for the kernel as +follows:

+
+
    +
  • If it is known during instruction selection that there is stack usage, +SGPR0-3 is reserved for use as the scratch V#. Stack usage is assumed if +optimizations are disabled (-O0), if stack objects already exist (for +locals, etc.), or if there are any function calls.

  • +
  • Otherwise, four high numbered SGPRs beginning at a four-aligned SGPR index +are reserved for the tentative scratch V#. These will be used if it is +determined that spilling is needed.

    +
      +
    • If no use is made of the tentative scratch V#, then it is unreserved, +and the register count is determined ignoring it.

    • +
    • If use is made of the tentative scratch V#, then its register numbers +are shifted to the first four-aligned SGPR index after the highest one +allocated by the register allocator, and all uses are updated. The +register count includes them in the shifted location.

    • +
    • In either case, if the processor has the SGPR allocation bug, the +tentative allocation is not shifted or unreserved in order to ensure +the register count is higher to workaround the bug.

    • +
    +
    +

    Note

    +

    This approach of using a tentative scratch V# and shifting the register +numbers if used avoids having to perform register allocation a second +time if the tentative V# is eliminated. This is more efficient and +avoids the problem that the second register allocation may perform +spilling which will fail as there is no longer a scratch V#.

    +
    +
  • +
+
+

When the kernel prolog code is being emitted it is known whether the scratch V# +described above is actually used. If it is, the prolog code must set it up by +copying the Private Segment Buffer to the scratch V# registers and then adding +the Private Segment Wavefront Offset to the queue base address in the V#. The +result is a V# with a base address pointing to the beginning of the wavefront +scratch backing memory.

+

The Private Segment Buffer is always requested, but the Private Segment +Wavefront Offset is only requested if it is used (see +Initial Kernel Execution State).

+
+
+
+

Memory Model

+

This section describes the mapping of the LLVM memory model onto AMDGPU machine +code (see Memory Model for Concurrent Operations).

+

The AMDGPU backend supports the memory synchronization scopes specified in +Memory Scopes.

+

The code sequences used to implement the memory model specify the order of +instructions that a single thread must execute. The s_waitcnt and cache +management instructions such as buffer_wbinvl1_vol are defined with respect +to other memory instructions executed by the same thread. This allows them to be +moved earlier or later which can allow them to be combined with other instances +of the same instruction, or hoisted/sunk out of loops to improve performance. +Only the instructions related to the memory model are given; additional +s_waitcnt instructions are required to ensure registers are defined before +being used. These may be able to be combined with the memory model s_waitcnt +instructions as described above.

+

The AMDGPU backend supports the following memory models:

+
+
+
HSA Memory Model [HSA]

The HSA memory model uses a single happens-before relation for all address +spaces (see Address Spaces).

+
+
OpenCL Memory Model [OpenCL]

The OpenCL memory model which has separate happens-before relations for the +global and local address spaces. Only a fence specifying both global and +local address space, and seq_cst instructions join the relationships. Since +the LLVM memfence instruction does not allow an address space to be +specified the OpenCL fence has to conservatively assume both local and +global address space was specified. However, optimizations can often be +done to eliminate the additional s_waitcnt instructions when there are +no intervening memory instructions which access the corresponding address +space. The code sequences in the table indicate what can be omitted for the +OpenCL memory. The target triple environment is used to determine if the +source language is OpenCL (see OpenCL).

+
+
+
+

ds/flat_load/store/atomic instructions to local memory are termed LDS +operations.

+

buffer/global/flat_load/store/atomic instructions to global memory are +termed vector memory operations.

+

Private address space uses buffer_load/store using the scratch V# +(GFX6-GFX8), or scratch_load/store (GFX9-GFX10). Since only a single thread +is accessing the memory, atomic memory orderings are not meaningful, and all +accesses are treated as non-atomic.

+

Constant address space uses buffer/global_load instructions (or equivalent +scalar memory instructions). Since the constant address space contents do not +change during the execution of a kernel dispatch it is not legal to perform +stores, and atomic memory orderings are not meaningful, and all accesses are +treated as non-atomic.

+

A memory synchronization scope wider than work-group is not meaningful for the +group (LDS) address space and is treated as work-group.

+

The memory model does not support the region address space which is treated as +non-atomic.

+

Acquire memory ordering is not meaningful on store atomic instructions and is +treated as non-atomic.

+

Release memory ordering is not meaningful on load atomic instructions and is +treated a non-atomic.

+

Acquire-release memory ordering is not meaningful on load or store atomic +instructions and is treated as acquire and release respectively.

+

The memory order also adds the single thread optimization constraints defined in +table +AMDHSA Memory Model Single Thread Optimization Constraints.

+
+
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Memory Model Single Thread Optimization Constraints

LLVM Memory

Optimization Constraints

Ordering

unordered

none

monotonic

none

acquire

    +
  • If a load atomic/atomicrmw then no following load/load +atomic/store/store atomic/atomicrmw/fence instruction can be +moved before the acquire.

  • +
  • If a fence then same as load atomic, plus no preceding +associated fence-paired-atomic can be moved after the fence.

  • +
+

release

    +
  • If a store atomic/atomicrmw then no preceding load/load +atomic/store/store atomic/atomicrmw/fence instruction can be +moved after the release.

  • +
  • If a fence then same as store atomic, plus no following +associated fence-paired-atomic can be moved before the +fence.

  • +
+

acq_rel

Same constraints as both acquire and release.

seq_cst

    +
  • If a load atomic then same constraints as acquire, plus no +preceding sequentially consistent load atomic/store +atomic/atomicrmw/fence instruction can be moved after the +seq_cst.

  • +
  • If a store atomic then the same constraints as release, plus +no following sequentially consistent load atomic/store +atomic/atomicrmw/fence instruction can be moved before the +seq_cst.

  • +
  • If an atomicrmw/fence then same constraints as acq_rel.

  • +
+
+
+

The code sequences used to implement the memory model are defined in the +following sections:

+ +
+
Memory Model GFX6-GFX9
+

For GFX6-GFX9:

+
    +
  • Each agent has multiple shader arrays (SA).

  • +
  • Each SA has multiple compute units (CU).

  • +
  • Each CU has multiple SIMDs that execute wavefronts.

  • +
  • The wavefronts for a single work-group are executed in the same CU but may be +executed by different SIMDs.

  • +
  • Each CU has a single LDS memory shared by the wavefronts of the work-groups +executing on it.

  • +
  • All LDS operations of a CU are performed as wavefront wide operations in a +global order and involve no caching. Completion is reported to a wavefront in +execution order.

  • +
  • The LDS memory has multiple request queues shared by the SIMDs of a +CU. Therefore, the LDS operations performed by different wavefronts of a +work-group can be reordered relative to each other, which can result in +reordering the visibility of vector memory operations with respect to LDS +operations of other wavefronts in the same work-group. A s_waitcnt +lgkmcnt(0) is required to ensure synchronization between LDS operations and +vector memory operations between wavefronts of a work-group, but not between +operations performed by the same wavefront.

  • +
  • The vector memory operations are performed as wavefront wide operations and +completion is reported to a wavefront in execution order. The exception is +that for GFX7-GFX9 flat_load/store/atomic instructions can report out of +vector memory order if they access LDS memory, and out of LDS operation order +if they access global memory.

  • +
  • The vector memory operations access a single vector L1 cache shared by all +SIMDs a CU. Therefore, no special action is required for coherence between the +lanes of a single wavefront, or for coherence between wavefronts in the same +work-group. A buffer_wbinvl1_vol is required for coherence between +wavefronts executing in different work-groups as they may be executing on +different CUs.

  • +
  • The scalar memory operations access a scalar L1 cache shared by all wavefronts +on a group of CUs. The scalar and vector L1 caches are not coherent. However, +scalar operations are used in a restricted way so do not impact the memory +model. See Memory Spaces.

  • +
  • The vector and scalar memory operations use an L2 cache shared by all CUs on +the same agent.

  • +
  • The L2 cache has independent channels to service disjoint ranges of virtual +addresses.

  • +
  • Each CU has a separate request queue per channel. Therefore, the vector and +scalar memory operations performed by wavefronts executing in different +work-groups (which may be executing on different CUs) of an agent can be +reordered relative to each other. A s_waitcnt vmcnt(0) is required to +ensure synchronization between vector memory operations of different CUs. It +ensures a previous vector memory operation has completed before executing a +subsequent vector memory or LDS operation and so can be used to meet the +requirements of acquire and release.

  • +
  • The L2 cache can be kept coherent with other agents on some targets, or ranges +of virtual addresses can be set up to bypass it to ensure system coherence.

  • +
+

Scalar memory operations are only used to access memory that is proven to not +change during the execution of the kernel dispatch. This includes constant +address space and global address space for program scope const variables. +Therefore, the kernel machine code does not have to maintain the scalar cache to +ensure it is coherent with the vector caches. The scalar and vector caches are +invalidated between kernel dispatches by CP since constant address space data +may change between kernel dispatch executions. See +Memory Spaces.

+

The one exception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a s_dcache_wb is inserted before the s_endpgm and before a function +return since the locations may be used for vector memory instructions by a +future wavefront that uses the same scratch area, or a function call that +creates a frame at the same address, respectively. There is no need for a +s_dcache_inv as all scalar writes are write-before-read in the same thread.

+

For kernarg backing memory:

+
    +
  • CP invalidates the L1 cache at the start of each kernel dispatch.

  • +
  • On dGPU the kernarg backing memory is allocated in host memory accessed as +MTYPE UC (uncached) to avoid needing to invalidate the L2 cache. This also +causes it to be treated as non-volatile and so is not invalidated by +*_vol.

  • +
  • On APU the kernarg backing memory it is accessed as MTYPE CC (cache coherent) +and so the L2 cache will be coherent with the CPU and other agents.

  • +
+

Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC_NV (non-coherent non-volatile). Since the private address space is +only accessed by a single thread, and is always write-before-read, there is +never a need to invalidate these entries from the L1 cache. Hence all cache +invalidates are done as *_vol to only invalidate the volatile cache lines.

+

The code sequences used to implement the memory model for GFX6-GFX9 are defined +in table AMDHSA Memory Model Code Sequences GFX6-GFX9.

+
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Memory Model Code Sequences GFX6-GFX9

LLVM Instr

LLVM Memory +Ordering

LLVM Memory +Sync Scope

AMDGPU +Address +Space

AMDGPU Machine Code +GFX6-GFX9

Non-Atomic

load

none

none

    +
  • global

  • +
  • generic

  • +
  • private

  • +
  • constant

  • +
+
    +
  • !volatile & !nontemporal

    +
      +
    1. buffer/global/flat_load

    2. +
    +
  • +
  • !volatile & nontemporal

    +
      +
    1. buffer/global/flat_load +glc=1 slc=1

    2. +
    +
  • +
  • volatile

    +
      +
    1. buffer/global/flat_load +glc=1

    2. +
    3. s_waitcnt vmcnt(0)

    4. +
    +
    +
      +
    • Must happen before +any following volatile +global/generic +load/store.

    • +
    • Ensures that +volatile +operations to +different +addresses will not +be reordered by +hardware.

    • +
    +
    +
  • +
+

load

none

none

    +
  • local

  • +
+
    +
  1. ds_load

  2. +
+

store

none

none

    +
  • global

  • +
  • generic

  • +
  • private

  • +
  • constant

  • +
+
    +
  • !volatile & !nontemporal

    +
      +
    1. buffer/global/flat_store

    2. +
    +
  • +
  • !volatile & nontemporal

    +
      +
    1. buffer/global/flat_store +glc=1 slc=1

    2. +
    +
  • +
  • volatile

    +
      +
    1. buffer/global/flat_store

    2. +
    3. s_waitcnt vmcnt(0)

    4. +
    +
    +
      +
    • Must happen before +any following volatile +global/generic +load/store.

    • +
    • Ensures that +volatile +operations to +different +addresses will not +be reordered by +hardware.

    • +
    +
    +
  • +
+

store

none

none

    +
  • local

  • +
+
    +
  1. ds_store

  2. +
+

Unordered Atomic

load atomic

unordered

any

any

Same as non-atomic.

store atomic

unordered

any

any

Same as non-atomic.

atomicrmw

unordered

any

any

Same as monotonic atomic.

Monotonic Atomic

load atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_load

  2. +
+

load atomic

monotonic

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load +glc=1

  2. +
+

store atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_store

  2. +
+

atomicrmw

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_atomic

  2. +
+

Acquire Atomic

load atomic

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_load

  2. +
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_load

  2. +
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • local

  • +
  • generic

  • +
+
    +
  1. ds/flat_load

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than a local load +atomic value being +acquired.

  • +
+
+

load atomic

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_load +glc=1

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the load +has completed +before invalidating +the cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale global data.

  • +
+
+

load atomic

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_load glc=1

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the flat_load +has completed +before invalidating +the cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_atomic

  2. +
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_atomic

  2. +
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • local

  • +
  • generic

  • +
+
    +
  1. ds/flat_atomic

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than a local +atomicrmw value +being acquired.

  • +
+
+

atomicrmw

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

fence

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

acquire

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit.

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Must happen after +any preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than the +value read by the +fence-paired-atomic.

  • +
+
+

fence

acquire

    +
  • agent

  • +
  • system

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Must happen before +the following +buffer_wbinvl1_vol.

  • +
  • Ensures that the +fence-paired atomic +has completed +before invalidating +the +cache. Therefore +any following +locations read must +be no older than +the value read by +the +fence-paired-atomic.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before any +following global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

Release Atomic

store atomic

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_store

  2. +
+

store atomic

release

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +memory operations +to local have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

release

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_store

  2. +
+

store atomic

release

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +memory operations +to memory have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_store

  2. +
+

atomicrmw

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_atomic

  2. +
+

atomicrmw

release

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to local have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

release

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_atomic

  2. +
+

atomicrmw

release

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global and local +have completed +before performing +the atomicrmw that +is being released.

  • +
+
+
    +
  1. buffer/global/flat_atomic

  2. +
+

fence

release

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

release

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit.

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Must happen after +any preceding +local/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Must happen before +any following store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Ensures that all +memory operations +to local have +completed before +performing the +following +fence-paired-atomic.

  • +
+
+

fence

release

    +
  • agent

  • +
  • system

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +any following store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Ensures that all +memory operations +have +completed before +performing the +following +fence-paired-atomic.

  • +
+
+

Acquire-Release Atomic

atomicrmw

acq_rel

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_atomic

  2. +
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to local have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global_atomic

  2. +
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_atomic

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than the local load +atomic value being +acquired.

  • +
+
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to local have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than a local load +atomic value being +acquired.

  • +
+
+

atomicrmw

acq_rel

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acq_rel

    +
  • agent

  • +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

fence

acq_rel

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

acq_rel

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit.

  • +
  • However, +since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Must happen after +any preceding +local/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that all +memory operations +to local have +completed before +performing any +following global +memory operations.

  • +
  • Ensures that the +preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +acquire-fence-paired-atomic) +has completed +before following +global memory +operations. This +satisfies the +requirements of +acquire.

  • +
  • Ensures that all +previous memory +operations have +completed before a +following +local/generic store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +release-fence-paired-atomic). +This satisfies the +requirements of +release.

  • +
+
+

fence

acq_rel

    +
  • agent

  • +
  • system

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +buffer_wbinvl1_vol.

  • +
  • Ensures that the +preceding +global/local/generic +load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +acquire-fence-paired-atomic) +has completed +before invalidating +the cache. This +satisfies the +requirements of +acquire.

  • +
  • Ensures that all +previous memory +operations have +completed before a +following +global/local/generic +store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +release-fence-paired-atomic). +This satisfies the +requirements of +release.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data. This +satisfies the +requirements of +acquire.

  • +
+
+

Sequential Consistent Atomic

load atomic

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +load atomic acquire, +except must generated +all instructions even +for OpenCL.

load atomic

seq_cst

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0)

  2. +
+
+
    +
  • Must +happen after +preceding +local/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +lgkmcnt(0) and so do +not need to be +considered.)

  • +
  • Ensures any +preceding +sequential +consistent local +memory instructions +have completed +before executing +this sequentially +consistent +instruction. This +prevents reordering +a seq_cst store +followed by a +seq_cst load. (Note +that seq_cst is +stronger than +acquire/release as +the reordering of +load acquire +followed by a store +release is +prevented by the +s_waitcnt of +the release, but +there is nothing +preventing a store +release followed by +load acquire from +completing out of +order. The s_waitcnt +could be placed after +seq_store or before +the seq_load. We +choose the load to +make the s_waitcnt be +as late as possible +so that the store +may have already +completed.)

  • +
+
+
    +
  1. Following +instructions same as +corresponding load +atomic acquire, +except must generated +all instructions even +for OpenCL.

  2. +
+

load atomic

seq_cst

    +
  • workgroup

  • +
+
    +
  • local

  • +
+

Same as corresponding +load atomic acquire, +except must generated +all instructions even +for OpenCL.

load atomic

seq_cst

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • Could be split into +separate s_waitcnt +vmcnt(0) +and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +preceding +global/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +lgkmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vmcnt(0) +must happen after +preceding +global/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vmcnt(0) and so do +not need to be +considered.)

  • +
  • Ensures any +preceding +sequential +consistent global +memory instructions +have completed +before executing +this sequentially +consistent +instruction. This +prevents reordering +a seq_cst store +followed by a +seq_cst load. (Note +that seq_cst is +stronger than +acquire/release as +the reordering of +load acquire +followed by a store +release is +prevented by the +s_waitcnt of +the release, but +there is nothing +preventing a store +release followed by +load acquire from +completing out of +order. The s_waitcnt +could be placed after +seq_store or before +the seq_load. We +choose the load to +make the s_waitcnt be +as late as possible +so that the store +may have already +completed.)

  • +
+
+
    +
  1. Following +instructions same as +corresponding load +atomic acquire, +except must generated +all instructions even +for OpenCL.

  2. +
+

store atomic

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +store atomic release, +except must generated +all instructions even +for OpenCL.

atomicrmw

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +atomicrmw acq_rel, +except must generated +all instructions even +for OpenCL.

fence

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+

none

Same as corresponding +fence acq_rel, +except must generated +all instructions even +for OpenCL.

+
+
+
+
Memory Model GFX90A
+

For GFX90A:

+
    +
  • Each agent has multiple shader arrays (SA).

  • +
  • Each SA has multiple compute units (CU).

  • +
  • Each CU has multiple SIMDs that execute wavefronts.

  • +
  • The wavefronts for a single work-group are executed in the same CU but may be +executed by different SIMDs. The exception is when in tgsplit execution mode +when the wavefronts may be executed by different SIMDs in different CUs.

  • +
  • Each CU has a single LDS memory shared by the wavefronts of the work-groups +executing on it. The exception is when in tgsplit execution mode when no LDS +is allocated as wavefronts of the same work-group can be in different CUs.

  • +
  • All LDS operations of a CU are performed as wavefront wide operations in a +global order and involve no caching. Completion is reported to a wavefront in +execution order.

  • +
  • The LDS memory has multiple request queues shared by the SIMDs of a +CU. Therefore, the LDS operations performed by different wavefronts of a +work-group can be reordered relative to each other, which can result in +reordering the visibility of vector memory operations with respect to LDS +operations of other wavefronts in the same work-group. A s_waitcnt +lgkmcnt(0) is required to ensure synchronization between LDS operations and +vector memory operations between wavefronts of a work-group, but not between +operations performed by the same wavefront.

  • +
  • The vector memory operations are performed as wavefront wide operations and +completion is reported to a wavefront in execution order. The exception is +that flat_load/store/atomic instructions can report out of vector memory +order if they access LDS memory, and out of LDS operation order if they access +global memory.

  • +
  • The vector memory operations access a single vector L1 cache shared by all +SIMDs a CU. Therefore:

    +
      +
    • No special action is required for coherence between the lanes of a single +wavefront.

    • +
    • No special action is required for coherence between wavefronts in the same +work-group since they execute on the same CU. The exception is when in +tgsplit execution mode as wavefronts of the same work-group can be in +different CUs and so a buffer_wbinvl1_vol is required as described in +the following item.

    • +
    • A buffer_wbinvl1_vol is required for coherence between wavefronts +executing in different work-groups as they may be executing on different +CUs.

    • +
    +
  • +
  • The scalar memory operations access a scalar L1 cache shared by all wavefronts +on a group of CUs. The scalar and vector L1 caches are not coherent. However, +scalar operations are used in a restricted way so do not impact the memory +model. See Memory Spaces.

  • +
  • The vector and scalar memory operations use an L2 cache shared by all CUs on +the same agent.

    +
      +
    • The L2 cache has independent channels to service disjoint ranges of virtual +addresses.

    • +
    • Each CU has a separate request queue per channel. Therefore, the vector and +scalar memory operations performed by wavefronts executing in different +work-groups (which may be executing on different CUs), or the same +work-group if executing in tgsplit mode, of an agent can be reordered +relative to each other. A s_waitcnt vmcnt(0) is required to ensure +synchronization between vector memory operations of different CUs. It +ensures a previous vector memory operation has completed before executing a +subsequent vector memory or LDS operation and so can be used to meet the +requirements of acquire and release.

    • +
    • The L2 cache of one agent can be kept coherent with other agents by: +using the MTYPE RW (read-write) or MTYPE CC (cache-coherent) with the PTE +C-bit for memory local to the L2; and using the MTYPE NC (non-coherent) with +the PTE C-bit set or MTYPE UC (uncached) for memory not local to the L2.

      +
        +
      • Any local memory cache lines will be automatically invalidated by writes +from CUs associated with other L2 caches, or writes from the CPU, due to +the cache probe caused by coherent requests. Coherent requests are caused +by GPU accesses to pages with the PTE C-bit set, by CPU accesses over +XGMI, and by PCIe requests that are configured to be coherent requests.

      • +
      • XGMI accesses from the CPU to local memory may be cached on the CPU. +Subsequent access from the GPU will automatically invalidate or writeback +the CPU cache due to the L2 probe filter and and the PTE C-bit being set.

      • +
      • Since all work-groups on the same agent share the same L2, no L2 +invalidation or writeback is required for coherence.

      • +
      • To ensure coherence of local and remote memory writes of work-groups in +different agents a buffer_wbl2 is required. It will writeback dirty L2 +cache lines of MTYPE RW (used for local coarse grain memory) and MTYPE NC +()used for remote coarse grain memory). Note that MTYPE CC (used for local +fine grain memory) causes write through to DRAM, and MTYPE UC (used for +remote fine grain memory) bypasses the L2, so both will never result in +dirty L2 cache lines.

      • +
      • To ensure coherence of local and remote memory reads of work-groups in +different agents a buffer_invl2 is required. It will invalidate L2 +cache lines with MTYPE NC (used for remote coarse grain memory). Note that +MTYPE CC (used for local fine grain memory) and MTYPE RW (used for local +coarse memory) cause local reads to be invalidated by remote writes with +with the PTE C-bit so these cache lines are not invalidated. Note that +MTYPE UC (used for remote fine grain memory) bypasses the L2, so will +never result in L2 cache lines that need to be invalidated.

      • +
      +
    • +
    • PCIe access from the GPU to the CPU memory is kept coherent by using the +MTYPE UC (uncached) which bypasses the L2.

    • +
    +
  • +
+

Scalar memory operations are only used to access memory that is proven to not +change during the execution of the kernel dispatch. This includes constant +address space and global address space for program scope const variables. +Therefore, the kernel machine code does not have to maintain the scalar cache to +ensure it is coherent with the vector caches. The scalar and vector caches are +invalidated between kernel dispatches by CP since constant address space data +may change between kernel dispatch executions. See +Memory Spaces.

+

The one exception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a s_dcache_wb is inserted before the s_endpgm and before a function +return since the locations may be used for vector memory instructions by a +future wavefront that uses the same scratch area, or a function call that +creates a frame at the same address, respectively. There is no need for a +s_dcache_inv as all scalar writes are write-before-read in the same thread.

+

For kernarg backing memory:

+
    +
  • CP invalidates the L1 cache at the start of each kernel dispatch.

  • +
  • On dGPU over XGMI or PCIe the kernarg backing memory is allocated in host +memory accessed as MTYPE UC (uncached) to avoid needing to invalidate the L2 +cache. This also causes it to be treated as non-volatile and so is not +invalidated by *_vol.

  • +
  • On APU the kernarg backing memory is accessed as MTYPE CC (cache coherent) and +so the L2 cache will be coherent with the CPU and other agents.

  • +
+

Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC_NV (non-coherent non-volatile). Since the private address space is +only accessed by a single thread, and is always write-before-read, there is +never a need to invalidate these entries from the L1 cache. Hence all cache +invalidates are done as *_vol to only invalidate the volatile cache lines.

+

The code sequences used to implement the memory model for GFX90A are defined +in table AMDHSA Memory Model Code Sequences GFX90A.

+
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Memory Model Code Sequences GFX90A

LLVM Instr

LLVM Memory +Ordering

LLVM Memory +Sync Scope

AMDGPU +Address +Space

AMDGPU Machine Code +GFX90A

Non-Atomic

load

none

none

    +
  • global

  • +
  • generic

  • +
  • private

  • +
  • constant

  • +
+
    +
  • !volatile & !nontemporal

    +
      +
    1. buffer/global/flat_load

    2. +
    +
  • +
  • !volatile & nontemporal

    +
      +
    1. buffer/global/flat_load +glc=1 slc=1

    2. +
    +
  • +
  • volatile

    +
      +
    1. buffer/global/flat_load +glc=1

    2. +
    3. s_waitcnt vmcnt(0)

    4. +
    +
    +
      +
    • Must happen before +any following volatile +global/generic +load/store.

    • +
    • Ensures that +volatile +operations to +different +addresses will not +be reordered by +hardware.

    • +
    +
    +
  • +
+

load

none

none

    +
  • local

  • +
+
    +
  1. ds_load

  2. +
+

store

none

none

    +
  • global

  • +
  • generic

  • +
  • private

  • +
  • constant

  • +
+
    +
  • !volatile & !nontemporal

    +
      +
    1. buffer/global/flat_store

    2. +
    +
  • +
  • !volatile & nontemporal

    +
      +
    1. buffer/global/flat_store +glc=1 slc=1

    2. +
    +
  • +
  • volatile

    +
      +
    1. buffer/global/flat_store

    2. +
    3. s_waitcnt vmcnt(0)

    4. +
    +
    +
      +
    • Must happen before +any following volatile +global/generic +load/store.

    • +
    • Ensures that +volatile +operations to +different +addresses will not +be reordered by +hardware.

    • +
    +
    +
  • +
+

store

none

none

    +
  • local

  • +
+
    +
  1. ds_store

  2. +
+

Unordered Atomic

load atomic

unordered

any

any

Same as non-atomic.

store atomic

unordered

any

any

Same as non-atomic.

atomicrmw

unordered

any

any

Same as monotonic atomic.

Monotonic Atomic

load atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load

  2. +
+

load atomic

monotonic

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load +glc=1

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit glc=1.

  • +
+
+

load atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_load

  2. +
+

load atomic

monotonic

    +
  • agent

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load +glc=1

  2. +
+

load atomic

monotonic

    +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load +glc=1

  2. +
+

store atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

monotonic

    +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_store

  2. +
+

atomicrmw

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

monotonic

    +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_atomic

  2. +
+

Acquire Atomic

load atomic

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_load

  2. +
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_load glc=1

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit glc=1.

  • +
+
+
    +
  1. s_waitcnt vmcnt(0)

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Must happen before the +following buffer_wbinvl1_vol.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_load

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than the local load +atomic value being +acquired.

  • +
+
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_load glc=1

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit glc=1.

  • +
+
+
    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL, omit lgkmcnt(0).

  • +
  • Must happen before +the following +buffer_wbinvl1_vol and any +following global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than a local load +atomic value being +acquired.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

load atomic

acquire

    +
  • agent

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_load +glc=1

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the load +has completed +before invalidating +the cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale global data.

  • +
+
+

load atomic

acquire

    +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global/flat_load +glc=1

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures the load +has completed +before invalidating +the cache.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

load atomic

acquire

    +
  • agent

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_load glc=1

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the flat_load +has completed +before invalidating +the cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

load atomic

acquire

    +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_load glc=1

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures the flat_load +has completed +before invalidating +the caches.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

atomicrmw

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_atomic

  2. +
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Must happen before the +following buffer_wbinvl1_vol.

  • +
  • Ensures the atomicrmw +has completed +before invalidating +the cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_atomic

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than the local +atomicrmw value +being acquired.

  • +
+
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt lgkm/vmcnt(0)

  4. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL, omit lgkmcnt(0).

  • +
  • Must happen before +the following +buffer_wbinvl1_vol and +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than a local +atomicrmw value +being acquired.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acquire

    +
  • agent

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acquire

    +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

atomicrmw

acquire

    +
  • agent

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acquire

    +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

fence

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

acquire

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load +atomic/ +atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Must happen before +the following +buffer_wbinvl1_vol and +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than the +value read by the +fence-paired-atomic.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

fence

acquire

    +
  • agent

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Must happen before +the following +buffer_wbinvl1_vol.

  • +
  • Ensures that the +fence-paired atomic +has completed +before invalidating +the +cache. Therefore +any following +locations read must +be no older than +the value read by +the +fence-paired-atomic.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before any +following global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

fence

acquire

    +
  • system

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Must happen before +the following buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures that the +fence-paired atomic +has completed +before invalidating +the +cache. Therefore +any following +locations read must +be no older than +the value read by +the +fence-paired-atomic.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before any +following global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

Release Atomic

store atomic

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_store

  2. +
+

store atomic

release

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL, omit lgkmcnt(0).

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/store/ +load atomic/store atomic/ +atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

release

    +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_store

  2. +
+

store atomic

release

    +
  • agent

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +memory operations +to memory have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

release

    +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer_wbl2

  2. +
+
+
    +
  • Must happen before +following s_waitcnt.

  • +
  • Performs L2 writeback to +ensure previous +global/generic +store/atomicrmw are +visible at system scope.

  • +
+
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after any +preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after any +preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +memory operations +to memory and the L2 +writeback have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_store

  2. +
+

atomicrmw

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_atomic

  2. +
+

atomicrmw

release

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/store/ +load atomic/store atomic/ +atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

release

    +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_atomic

  2. +
+

atomicrmw

release

    +
  • agent

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global and local +have completed +before performing +the atomicrmw that +is being released.

  • +
+
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

release

    +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer_wbl2

  2. +
+
+
    +
  • Must happen before +following s_waitcnt.

  • +
  • Performs L2 writeback to +ensure previous +global/generic +store/atomicrmw are +visible at system scope.

  • +
+
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to memory and the L2 +writeback have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_atomic

  2. +
+

fence

release

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

release

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/ +load atomic/store atomic/ +atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Must happen before +any following store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Ensures that all +memory operations +have +completed before +performing the +following +fence-paired-atomic.

  • +
+
+

fence

release

    +
  • agent

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +any following store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Ensures that all +memory operations +have +completed before +performing the +following +fence-paired-atomic.

  • +
+
+

fence

release

    +
  • system

  • +
+

none

    +
  1. buffer_wbl2

  2. +
+
+
    +
  • If OpenCL and +address space is +local, omit.

  • +
  • Must happen before +following s_waitcnt.

  • +
  • Performs L2 writeback to +ensure previous +global/generic +store/atomicrmw are +visible at system scope.

  • +
+
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +any following store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Ensures that all +memory operations +have +completed before +performing the +following +fence-paired-atomic.

  • +
+
+

Acquire-Release Atomic

atomicrmw

acq_rel

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

acq_rel

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_atomic

  2. +
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/store/ +load atomic/store atomic/ +atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Must happen before +the following +buffer_wbinvl1_vol.

  • +
  • Ensures any +following global +data read is no +older than the +atomicrmw value +being acquired.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+
    +
  1. ds_atomic

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than the local load +atomic value being +acquired.

  • +
+
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • generic

  • +
+
    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/store/ +load atomic/store atomic/ +atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  4. +
+
+
    +
  • If not TgSplit execution +mode, omit vmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +the following +buffer_wbinvl1_vol and +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than a local load +atomic value being +acquired.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acq_rel

    +
  • agent

  • +
+
    +
  • global

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acq_rel

    +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. buffer_wbl2

  2. +
+
+
    +
  • Must happen before +following s_waitcnt.

  • +
  • Performs L2 writeback to +ensure previous +global/generic +store/atomicrmw are +visible at system scope.

  • +
+
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global and L2 writeback +have completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

atomicrmw

acq_rel

    +
  • agent

  • +
+
    +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +cache.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acq_rel

    +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. buffer_wbl2

  2. +
+
+
    +
  • Must happen before +following s_waitcnt.

  • +
  • Performs L2 writeback to +ensure previous +global/generic +store/atomicrmw are +visible at system scope.

  • +
+
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global and L2 writeback +have completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +following buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

fence

acq_rel

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

acq_rel

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0).

  • +
  • However, +since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/ +load atomic/store atomic/ +atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing any +following global +memory operations.

  • +
  • Ensures that the +preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +acquire-fence-paired-atomic) +has completed +before following +global memory +operations. This +satisfies the +requirements of +acquire.

  • +
  • Ensures that all +previous memory +operations have +completed before a +following +local/generic store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +release-fence-paired-atomic). +This satisfies the +requirements of +release.

  • +
  • Must happen before +the following +buffer_wbinvl1_vol.

  • +
  • Ensures that the +acquire-fence-paired +atomic has completed +before invalidating +the +cache. Therefore +any following +locations read must +be no older than +the value read by +the +acquire-fence-paired-atomic.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • If not TgSplit execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

fence

acq_rel

    +
  • agent

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +buffer_wbinvl1_vol.

  • +
  • Ensures that the +preceding +global/local/generic +load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +acquire-fence-paired-atomic) +has completed +before invalidating +the cache. This +satisfies the +requirements of +acquire.

  • +
  • Ensures that all +previous memory +operations have +completed before a +following +global/local/generic +store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +release-fence-paired-atomic). +This satisfies the +requirements of +release.

  • +
+
+
    +
  1. buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data. This +satisfies the +requirements of +acquire.

  • +
+
+

fence

acq_rel

    +
  • system

  • +
+

none

    +
  1. buffer_wbl2

  2. +
+
+
    +
  • If OpenCL and +address space is +local, omit.

  • +
  • Must happen before +following s_waitcnt.

  • +
  • Performs L2 writeback to +ensure previous +global/generic +store/atomicrmw are +visible at system scope.

  • +
+
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and +s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following buffer_invl2 and +buffer_wbinvl1_vol.

  • +
  • Ensures that the +preceding +global/local/generic +load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +acquire-fence-paired-atomic) +has completed +before invalidating +the cache. This +satisfies the +requirements of +acquire.

  • +
  • Ensures that all +previous memory +operations have +completed before a +following +global/local/generic +store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +release-fence-paired-atomic). +This satisfies the +requirements of +release.

  • +
+
+
    +
  1. buffer_invl2; +buffer_wbinvl1_vol

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale L1 global data, +nor see stale L2 MTYPE +NC global data. +MTYPE RW and CC memory will +never be stale in L2 due to +the memory probes.

  • +
+
+

Sequential Consistent Atomic

load atomic

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +load atomic acquire, +except must generated +all instructions even +for OpenCL.

load atomic

seq_cst

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkm/vmcnt(0)

  2. +
+
+
    +
  • Use lgkmcnt(0) if not +TgSplit execution mode +and vmcnt(0) if TgSplit +execution mode.

  • +
  • s_waitcnt lgkmcnt(0) must +happen after +preceding +local/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +lgkmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vmcnt(0) +must happen after +preceding +global/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vmcnt(0) and so do +not need to be +considered.)

  • +
  • Ensures any +preceding +sequential +consistent global/local +memory instructions +have completed +before executing +this sequentially +consistent +instruction. This +prevents reordering +a seq_cst store +followed by a +seq_cst load. (Note +that seq_cst is +stronger than +acquire/release as +the reordering of +load acquire +followed by a store +release is +prevented by the +s_waitcnt of +the release, but +there is nothing +preventing a store +release followed by +load acquire from +completing out of +order. The s_waitcnt +could be placed after +seq_store or before +the seq_load. We +choose the load to +make the s_waitcnt be +as late as possible +so that the store +may have already +completed.)

  • +
+
+
    +
  1. Following +instructions same as +corresponding load +atomic acquire, +except must generated +all instructions even +for OpenCL.

  2. +
+

load atomic

seq_cst

    +
  • workgroup

  • +
+
    +
  • local

  • +
+

If TgSplit execution mode, +local address space cannot +be used.

+

Same as corresponding +load atomic acquire, +except must generated +all instructions even +for OpenCL.

+

load atomic

seq_cst

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If TgSplit execution mode, +omit lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) +and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +preceding +global/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +lgkmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vmcnt(0) +must happen after +preceding +global/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vmcnt(0) and so do +not need to be +considered.)

  • +
  • Ensures any +preceding +sequential +consistent global +memory instructions +have completed +before executing +this sequentially +consistent +instruction. This +prevents reordering +a seq_cst store +followed by a +seq_cst load. (Note +that seq_cst is +stronger than +acquire/release as +the reordering of +load acquire +followed by a store +release is +prevented by the +s_waitcnt of +the release, but +there is nothing +preventing a store +release followed by +load acquire from +completing out of +order. The s_waitcnt +could be placed after +seq_store or before +the seq_load. We +choose the load to +make the s_waitcnt be +as late as possible +so that the store +may have already +completed.)

  • +
+
+
    +
  1. Following +instructions same as +corresponding load +atomic acquire, +except must generated +all instructions even +for OpenCL.

  2. +
+

store atomic

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +store atomic release, +except must generated +all instructions even +for OpenCL.

atomicrmw

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +atomicrmw acq_rel, +except must generated +all instructions even +for OpenCL.

fence

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+

none

Same as corresponding +fence acq_rel, +except must generated +all instructions even +for OpenCL.

+
+
+
+
Memory Model GFX10
+

For GFX10:

+
    +
  • Each agent has multiple shader arrays (SA).

  • +
  • Each SA has multiple work-group processors (WGP).

  • +
  • Each WGP has multiple compute units (CU).

  • +
  • Each CU has multiple SIMDs that execute wavefronts.

  • +
  • The wavefronts for a single work-group are executed in the same +WGP. In CU wavefront execution mode the wavefronts may be executed by +different SIMDs in the same CU. In WGP wavefront execution mode the +wavefronts may be executed by different SIMDs in different CUs in the same +WGP.

  • +
  • Each WGP has a single LDS memory shared by the wavefronts of the work-groups +executing on it.

  • +
  • All LDS operations of a WGP are performed as wavefront wide operations in a +global order and involve no caching. Completion is reported to a wavefront in +execution order.

  • +
  • The LDS memory has multiple request queues shared by the SIMDs of a +WGP. Therefore, the LDS operations performed by different wavefronts of a +work-group can be reordered relative to each other, which can result in +reordering the visibility of vector memory operations with respect to LDS +operations of other wavefronts in the same work-group. A s_waitcnt +lgkmcnt(0) is required to ensure synchronization between LDS operations and +vector memory operations between wavefronts of a work-group, but not between +operations performed by the same wavefront.

  • +
  • The vector memory operations are performed as wavefront wide operations. +Completion of load/store/sample operations are reported to a wavefront in +execution order of other load/store/sample operations performed by that +wavefront.

  • +
  • The vector memory operations access a vector L0 cache. There is a single L0 +cache per CU. Each SIMD of a CU accesses the same L0 cache. Therefore, no +special action is required for coherence between the lanes of a single +wavefront. However, a buffer_gl0_inv is required for coherence between +wavefronts executing in the same work-group as they may be executing on SIMDs +of different CUs that access different L0s. A buffer_gl0_inv is also +required for coherence between wavefronts executing in different work-groups +as they may be executing on different WGPs.

  • +
  • The scalar memory operations access a scalar L0 cache shared by all wavefronts +on a WGP. The scalar and vector L0 caches are not coherent. However, scalar +operations are used in a restricted way so do not impact the memory model. See +Memory Spaces.

  • +
  • The vector and scalar memory L0 caches use an L1 cache shared by all WGPs on +the same SA. Therefore, no special action is required for coherence between +the wavefronts of a single work-group. However, a buffer_gl1_inv is +required for coherence between wavefronts executing in different work-groups +as they may be executing on different SAs that access different L1s.

  • +
  • The L1 caches have independent quadrants to service disjoint ranges of virtual +addresses.

  • +
  • Each L0 cache has a separate request queue per L1 quadrant. Therefore, the +vector and scalar memory operations performed by different wavefronts, whether +executing in the same or different work-groups (which may be executing on +different CUs accessing different L0s), can be reordered relative to each +other. A s_waitcnt vmcnt(0) & vscnt(0) is required to ensure +synchronization between vector memory operations of different wavefronts. It +ensures a previous vector memory operation has completed before executing a +subsequent vector memory or LDS operation and so can be used to meet the +requirements of acquire, release and sequential consistency.

  • +
  • The L1 caches use an L2 cache shared by all SAs on the same agent.

  • +
  • The L2 cache has independent channels to service disjoint ranges of virtual +addresses.

  • +
  • Each L1 quadrant of a single SA accesses a different L2 channel. Each L1 +quadrant has a separate request queue per L2 channel. Therefore, the vector +and scalar memory operations performed by wavefronts executing in different +work-groups (which may be executing on different SAs) of an agent can be +reordered relative to each other. A s_waitcnt vmcnt(0) & vscnt(0) is +required to ensure synchronization between vector memory operations of +different SAs. It ensures a previous vector memory operation has completed +before executing a subsequent vector memory and so can be used to meet the +requirements of acquire, release and sequential consistency.

  • +
  • The L2 cache can be kept coherent with other agents on some targets, or ranges +of virtual addresses can be set up to bypass it to ensure system coherence.

  • +
+

Scalar memory operations are only used to access memory that is proven to not +change during the execution of the kernel dispatch. This includes constant +address space and global address space for program scope const variables. +Therefore, the kernel machine code does not have to maintain the scalar cache to +ensure it is coherent with the vector caches. The scalar and vector caches are +invalidated between kernel dispatches by CP since constant address space data +may change between kernel dispatch executions. See +Memory Spaces.

+

The one exception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a s_dcache_wb is inserted before the s_endpgm and before a function +return since the locations may be used for vector memory instructions by a +future wavefront that uses the same scratch area, or a function call that +creates a frame at the same address, respectively. There is no need for a +s_dcache_inv as all scalar writes are write-before-read in the same thread.

+

For kernarg backing memory:

+
    +
  • CP invalidates the L0 and L1 caches at the start of each kernel dispatch.

  • +
  • On dGPU the kernarg backing memory is accessed as MTYPE UC (uncached) to avoid +needing to invalidate the L2 cache.

  • +
  • On APU the kernarg backing memory is accessed as MTYPE CC (cache coherent) and +so the L2 cache will be coherent with the CPU and other agents.

  • +
+

Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC (non-coherent). Since the private address space is only accessed +by a single thread, and is always write-before-read, there is never a need to +invalidate these entries from the L0 or L1 caches.

+

Wavefronts are executed in native mode with in-order reporting of loads and +sample instructions. In this mode vmcnt reports completion of load, atomic with +return and sample instructions in order, and the vscnt reports the completion of +store and atomic without return in order. See MEM_ORDERED field in +compute_pgm_rsrc1 for GFX6-GFX10.

+

Wavefronts can be executed in WGP or CU wavefront execution mode:

+
    +
  • In WGP wavefront execution mode the wavefronts of a work-group are executed +on the SIMDs of both CUs of the WGP. Therefore, explicit management of the per +CU L0 caches is required for work-group synchronization. Also accesses to L1 +at work-group scope need to be explicitly ordered as the accesses from +different CUs are not ordered.

  • +
  • In CU wavefront execution mode the wavefronts of a work-group are executed on +the SIMDs of a single CU of the WGP. Therefore, all global memory access by +the work-group access the same L0 which in turn ensures L1 accesses are +ordered and so do not require explicit management of the caches for +work-group synchronization.

  • +
+

See WGP_MODE field in +compute_pgm_rsrc1 for GFX6-GFX10 and +Target Features.

+

The code sequences used to implement the memory model for GFX10 are defined in +table AMDHSA Memory Model Code Sequences GFX10.

+
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Memory Model Code Sequences GFX10

LLVM Instr

LLVM Memory +Ordering

LLVM Memory +Sync Scope

AMDGPU +Address +Space

AMDGPU Machine Code +GFX10

Non-Atomic

load

none

none

    +
  • global

  • +
  • generic

  • +
  • private

  • +
  • constant

  • +
+
    +
  • !volatile & !nontemporal

    +
      +
    1. buffer/global/flat_load

    2. +
    +
  • +
  • !volatile & nontemporal

    +
      +
    1. buffer/global/flat_load +slc=1

    2. +
    +
  • +
  • volatile

    +
      +
    1. buffer/global/flat_load +glc=1 dlc=1

    2. +
    3. s_waitcnt vmcnt(0)

    4. +
    +
    +
      +
    • Must happen before +any following volatile +global/generic +load/store.

    • +
    • Ensures that +volatile +operations to +different +addresses will not +be reordered by +hardware.

    • +
    +
    +
  • +
+

load

none

none

    +
  • local

  • +
+
    +
  1. ds_load

  2. +
+

store

none

none

    +
  • global

  • +
  • generic

  • +
  • private

  • +
  • constant

  • +
+
    +
  • !volatile & !nontemporal

    +
      +
    1. buffer/global/flat_store

    2. +
    +
  • +
  • !volatile & nontemporal

    +
    +
      +
    1. buffer/global/flat_store +slc=1

    2. +
    +
    +
  • +
  • volatile

    +
    +
      +
    1. buffer/global/flat_store

    2. +
    3. s_waitcnt vscnt(0)

    4. +
    +
      +
    • Must happen before +any following volatile +global/generic +load/store.

    • +
    • Ensures that +volatile +operations to +different +addresses will not +be reordered by +hardware.

    • +
    +
    +
  • +
+

store

none

none

    +
  • local

  • +
+
    +
  1. ds_store

  2. +
+

Unordered Atomic

load atomic

unordered

any

any

Same as non-atomic.

store atomic

unordered

any

any

Same as non-atomic.

atomicrmw

unordered

any

any

Same as monotonic atomic.

Monotonic Atomic

load atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load

  2. +
+

load atomic

monotonic

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load +glc=1

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit glc=1.

  • +
+
+

load atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_load

  2. +
+

load atomic

monotonic

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_load +glc=1 dlc=1

  2. +
+

store atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_store

  2. +
+

atomicrmw

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

monotonic

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_atomic

  2. +
+

Acquire Atomic

load atomic

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_load

  2. +
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_load glc=1

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit glc=1.

  • +
+
+
    +
  1. s_waitcnt vmcnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Must happen before +the following buffer_gl0_inv +and before any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_load

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +the following buffer_gl0_inv +and before any following +global/generic load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than the local load +atomic value being +acquired.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • If OpenCL, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

load atomic

acquire

    +
  • workgroup

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_load glc=1

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit glc=1.

  • +
+
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen before +the following +buffer_gl0_inv and any +following global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures any +following global +data read is no +older than a local load +atomic value being +acquired.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

load atomic

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_load +glc=1 dlc=1

  2. +
  3. s_waitcnt vmcnt(0)

  4. +
+
+
    +
  • Must happen before +following +buffer_gl*_inv.

  • +
  • Ensures the load +has completed +before invalidating +the caches.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following +loads will not see +stale global data.

  • +
+
+

load atomic

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_load glc=1 dlc=1

  2. +
  3. s_waitcnt vmcnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL omit +lgkmcnt(0).

  • +
  • Must happen before +following +buffer_gl*_invl.

  • +
  • Ensures the flat_load +has completed +before invalidating +the caches.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_atomic

  2. +
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vm/vscnt(0)

  4. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Use vmcnt(0) if atomic with +return and vscnt(0) if +atomic with no-return.

  • +
  • Must happen before +the following buffer_gl0_inv +and before any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. ds_atomic

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +the following +buffer_gl0_inv.

  • +
  • Ensures any +following global +data read is no +older than the local +atomicrmw value +being acquired.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If OpenCL omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acquire

    +
  • workgroup

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt lgkmcnt(0) & +vm/vscnt(0)

  4. +
+
+
    +
  • If CU wavefront execution +mode, omit vm/vscnt(0).

  • +
  • If OpenCL, omit lgkmcnt(0).

  • +
  • Use vmcnt(0) if atomic with +return and vscnt(0) if +atomic with no-return.

  • +
  • Must happen before +the following +buffer_gl0_inv.

  • +
  • Ensures any +following global +data read is no +older than a local +atomicrmw value +being acquired.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vm/vscnt(0)

  4. +
+
+
    +
  • Use vmcnt(0) if atomic with +return and vscnt(0) if +atomic with no-return.

  • +
  • Must happen before +following +buffer_gl*_inv.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acquire

    +
  • agent

  • +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vm/vscnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Use vmcnt(0) if atomic with +return and vscnt(0) if +atomic with no-return.

  • +
  • Must happen before +following +buffer_gl*_inv.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

fence

acquire

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

acquire

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0) and vscnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load +atomic/ +atomicrmw-with-return-value +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +atomicrmw-no-return-value +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Must happen before +the following +buffer_gl0_inv.

  • +
  • Ensures that the +fence-paired atomic +has completed +before invalidating +the +cache. Therefore +any following +locations read must +be no older than +the value read by +the +fence-paired-atomic.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

fence

acquire

    +
  • agent

  • +
  • system

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0) and vscnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load +atomic/ +atomicrmw-with-return-value +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +atomicrmw-no-return-value +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Must happen before +the following +buffer_gl*_inv.

  • +
  • Ensures that the +fence-paired atomic +has completed +before invalidating +the +caches. Therefore +any following +locations read must +be no older than +the value read by +the +fence-paired-atomic.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before any +following global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

Release Atomic

store atomic

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_store

  2. +
+

store atomic

release

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store +atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_store

  2. +
+

store atomic

release

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. s_waitcnt vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • If OpenCL, omit.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and s_waitcnt +vscnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +global memory +operations have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. ds_store

  2. +
+

store atomic

release

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt vscnt(0) +and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. buffer/global/flat_store

  2. +
+

atomicrmw

release

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_atomic

  2. +
+

atomicrmw

release

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL, omit lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store +atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global/flat_atomic

  2. +
+

atomicrmw

release

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. s_waitcnt vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • If OpenCL, omit.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and s_waitcnt +vscnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +global memory +operations have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. ds_atomic

  2. +
+

atomicrmw

release

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. +
    s_waitcnt lgkmcnt(0) &

    vmcnt(0) & vscnt(0)

    +
    +
    +
  2. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global and local +have completed +before performing +the atomicrmw that +is being released.

  • +
+
+
    +
  1. buffer/global/flat_atomic

  2. +
+

fence

release

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

release

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0) and vscnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store atomic/ +atomicrmw.

  • +
  • Must happen before +any following store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Ensures that all +memory operations +have +completed before +performing the +following +fence-paired-atomic.

  • +
+
+

fence

release

    +
  • agent

  • +
  • system

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0) and vscnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate. If +fence had an +address space then +set to address +space of OpenCL +fence flag, or to +generic if both +local and global +flags are +specified.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +any following store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +fence-paired-atomic).

  • +
  • Ensures that all +memory operations +have +completed before +performing the +following +fence-paired-atomic.

  • +
+
+

Acquire-Release Atomic

atomicrmw

acq_rel

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+
    +
  1. buffer/global/ds/flat_atomic

  2. +
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • global

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0), and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store +atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vm/vscnt(0)

  4. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Use vmcnt(0) if atomic with +return and vscnt(0) if +atomic with no-return.

  • +
  • Must happen before +the following +buffer_gl0_inv.

  • +
  • Ensures any +following global +data read is no +older than the +atomicrmw value +being acquired.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. s_waitcnt vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • If OpenCL, omit.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and s_waitcnt +vscnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • Must happen before +the following +store.

  • +
  • Ensures that all +global memory +operations have +completed before +performing the +store that is being +released.

  • +
+
+
    +
  1. ds_atomic

  2. +
  3. s_waitcnt lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit.

  • +
  • Must happen before +the following +buffer_gl0_inv.

  • +
  • Ensures any +following global +data read is no +older than the local load +atomic value being +acquired.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • If OpenCL omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acq_rel

    +
  • workgroup

  • +
+
    +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL, omit lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store +atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  4. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL, omit lgkmcnt(0).

  • +
  • Must happen before +the following +buffer_gl0_inv.

  • +
  • Ensures any +following global +data read is no +older than the load +atomic value being +acquired.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

atomicrmw

acq_rel

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +to global have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. buffer/global_atomic

  2. +
  3. s_waitcnt vm/vscnt(0)

  4. +
+
+
    +
  • Use vmcnt(0) if atomic with +return and vscnt(0) if +atomic with no-return.

  • +
  • Must happen before +following +buffer_gl*_inv.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

atomicrmw

acq_rel

    +
  • agent

  • +
  • system

  • +
+
    +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0), and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load atomic +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing the +atomicrmw that is +being released.

  • +
+
+
    +
  1. flat_atomic

  2. +
  3. s_waitcnt vm/vscnt(0) & +lgkmcnt(0)

  4. +
+
+
    +
  • If OpenCL, omit +lgkmcnt(0).

  • +
  • Use vmcnt(0) if atomic with +return and vscnt(0) if +atomic with no-return.

  • +
  • Must happen before +following +buffer_gl*_inv.

  • +
  • Ensures the +atomicrmw has +completed before +invalidating the +caches.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data.

  • +
+
+

fence

acq_rel

    +
  • singlethread

  • +
  • wavefront

  • +
+

none

none

fence

acq_rel

    +
  • workgroup

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0) and vscnt(0).

  • +
  • However, +since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store atomic/ +atomicrmw.

  • +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that all +memory operations +have +completed before +performing any +following global +memory operations.

  • +
  • Ensures that the +preceding +local/generic load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +acquire-fence-paired-atomic) +has completed +before following +global memory +operations. This +satisfies the +requirements of +acquire.

  • +
  • Ensures that all +previous memory +operations have +completed before a +following +local/generic store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +release-fence-paired-atomic). +This satisfies the +requirements of +release.

  • +
  • Must happen before +the following +buffer_gl0_inv.

  • +
  • Ensures that the +acquire-fence-paired +atomic has completed +before invalidating +the +cache. Therefore +any following +locations read must +be no older than +the value read by +the +acquire-fence-paired-atomic.

  • +
+
+
    +
  1. buffer_gl0_inv

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Ensures that +following +loads will not see +stale data.

  • +
+
+

fence

acq_rel

    +
  • agent

  • +
  • system

  • +
+

none

    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If OpenCL and +address space is +not generic, omit +lgkmcnt(0).

  • +
  • If OpenCL and +address space is +local, omit +vmcnt(0) and vscnt(0).

  • +
  • However, since LLVM +currently has no +address space on +the fence need to +conservatively +always generate +(see comment for +previous fence).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +must happen after +any preceding +global/generic +load/load +atomic/ +atomicrmw-with-return-value.

  • +
  • s_waitcnt vscnt(0) +must happen after +any preceding +global/generic +store/store atomic/ +atomicrmw-no-return-value.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +any preceding +local/generic +load/store/load +atomic/store +atomic/atomicrmw.

  • +
  • Must happen before +the following +buffer_gl*_inv.

  • +
  • Ensures that the +preceding +global/local/generic +load +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +acquire-fence-paired-atomic) +has completed +before invalidating +the caches. This +satisfies the +requirements of +acquire.

  • +
  • Ensures that all +previous memory +operations have +completed before a +following +global/local/generic +store +atomic/atomicrmw +with an equal or +wider sync scope +and memory ordering +stronger than +unordered (this is +termed the +release-fence-paired-atomic). +This satisfies the +requirements of +release.

  • +
+
+
    +
  1. buffer_gl0_inv; +buffer_gl1_inv

  2. +
+
+
    +
  • Must happen before +any following +global/generic +load/load +atomic/store/store +atomic/atomicrmw.

  • +
  • Ensures that +following loads +will not see stale +global data. This +satisfies the +requirements of +acquire.

  • +
+
+

Sequential Consistent Atomic

load atomic

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +load atomic acquire, +except must generated +all instructions even +for OpenCL.

load atomic

seq_cst

    +
  • workgroup

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit vmcnt(0) and +vscnt(0).

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0), and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt lgkmcnt(0) must +happen after +preceding +local/generic load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +lgkmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vmcnt(0) +must happen after +preceding +global/generic load +atomic/ +atomicrmw-with-return-value +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vscnt(0) +Must happen after +preceding +global/generic store +atomic/ +atomicrmw-no-return-value +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vscnt(0) and so do +not need to be +considered.)

  • +
  • Ensures any +preceding +sequential +consistent global/local +memory instructions +have completed +before executing +this sequentially +consistent +instruction. This +prevents reordering +a seq_cst store +followed by a +seq_cst load. (Note +that seq_cst is +stronger than +acquire/release as +the reordering of +load acquire +followed by a store +release is +prevented by the +s_waitcnt of +the release, but +there is nothing +preventing a store +release followed by +load acquire from +completing out of +order. The s_waitcnt +could be placed after +seq_store or before +the seq_load. We +choose the load to +make the s_waitcnt be +as late as possible +so that the store +may have already +completed.)

  • +
+
+
    +
  1. Following +instructions same as +corresponding load +atomic acquire, +except must generated +all instructions even +for OpenCL.

  2. +
+

load atomic

seq_cst

    +
  • workgroup

  • +
+
    +
  • local

  • +
+
    +
  1. s_waitcnt vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • If CU wavefront execution +mode, omit.

  • +
  • Could be split into +separate s_waitcnt +vmcnt(0) and s_waitcnt +vscnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt vmcnt(0) +Must happen after +preceding +global/generic load +atomic/ +atomicrmw-with-return-value +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vscnt(0) +Must happen after +preceding +global/generic store +atomic/ +atomicrmw-no-return-value +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vscnt(0) and so do +not need to be +considered.)

  • +
  • Ensures any +preceding +sequential +consistent global +memory instructions +have completed +before executing +this sequentially +consistent +instruction. This +prevents reordering +a seq_cst store +followed by a +seq_cst load. (Note +that seq_cst is +stronger than +acquire/release as +the reordering of +load acquire +followed by a store +release is +prevented by the +s_waitcnt of +the release, but +there is nothing +preventing a store +release followed by +load acquire from +completing out of +order. The s_waitcnt +could be placed after +seq_store or before +the seq_load. We +choose the load to +make the s_waitcnt be +as late as possible +so that the store +may have already +completed.)

  • +
+
+
    +
  1. Following +instructions same as +corresponding load +atomic acquire, +except must generated +all instructions even +for OpenCL.

  2. +
+

load atomic

seq_cst

    +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • generic

  • +
+
    +
  1. s_waitcnt lgkmcnt(0) & +vmcnt(0) & vscnt(0)

  2. +
+
+
    +
  • Could be split into +separate s_waitcnt +vmcnt(0), s_waitcnt +vscnt(0) and s_waitcnt +lgkmcnt(0) to allow +them to be +independently moved +according to the +following rules.

  • +
  • s_waitcnt lgkmcnt(0) +must happen after +preceding +local load +atomic/store +atomic/atomicrmw +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +lgkmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vmcnt(0) +must happen after +preceding +global/generic load +atomic/ +atomicrmw-with-return-value +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vmcnt(0) and so do +not need to be +considered.)

  • +
  • s_waitcnt vscnt(0) +Must happen after +preceding +global/generic store +atomic/ +atomicrmw-no-return-value +with memory +ordering of seq_cst +and with equal or +wider sync scope. +(Note that seq_cst +fences have their +own s_waitcnt +vscnt(0) and so do +not need to be +considered.)

  • +
  • Ensures any +preceding +sequential +consistent global +memory instructions +have completed +before executing +this sequentially +consistent +instruction. This +prevents reordering +a seq_cst store +followed by a +seq_cst load. (Note +that seq_cst is +stronger than +acquire/release as +the reordering of +load acquire +followed by a store +release is +prevented by the +s_waitcnt of +the release, but +there is nothing +preventing a store +release followed by +load acquire from +completing out of +order. The s_waitcnt +could be placed after +seq_store or before +the seq_load. We +choose the load to +make the s_waitcnt be +as late as possible +so that the store +may have already +completed.)

  • +
+
+
    +
  1. Following +instructions same as +corresponding load +atomic acquire, +except must generated +all instructions even +for OpenCL.

  2. +
+

store atomic

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +store atomic release, +except must generated +all instructions even +for OpenCL.

atomicrmw

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+
    +
  • global

  • +
  • local

  • +
  • generic

  • +
+

Same as corresponding +atomicrmw acq_rel, +except must generated +all instructions even +for OpenCL.

fence

seq_cst

    +
  • singlethread

  • +
  • wavefront

  • +
  • workgroup

  • +
  • agent

  • +
  • system

  • +
+

none

Same as corresponding +fence acq_rel, +except must generated +all instructions even +for OpenCL.

+
+
+
+
+

Trap Handler ABI

+

For code objects generated by the AMDGPU backend for HSA [HSA] compatible +runtimes (see AMDGPU Operating Systems), the runtime installs a trap handler that +supports the s_trap instruction. For usage see:

+
    +
  • AMDGPU Trap Handler for AMDHSA OS Code Object V2

  • +
  • AMDGPU Trap Handler for AMDHSA OS Code Object V3

  • +
  • AMDGPU Trap Handler for AMDHSA OS Code Object V4

    + + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    AMDGPU Trap Handler for AMDHSA OS Code Object V2

    Usage

    Code Sequence

    Trap Handler +Inputs

    Description

    reserved

    s_trap 0x00

    Reserved by hardware.

    debugtrap(arg)

    s_trap 0x01

    +
    SGPR0-1:

    queue_ptr

    +
    +
    VGPR0:

    arg

    +
    +
    +

    Reserved for Finalizer HSA debugtrap +intrinsic (not implemented).

    llvm.trap

    s_trap 0x02

    +
    SGPR0-1:

    queue_ptr

    +
    +
    +

    Causes wave to be halted with the PC at +the trap instruction. The associated +queue is signalled to put it into the +error state. When the queue is put in +the error state, the waves executing +dispatches on the queue will be +terminated.

    llvm.debugtrap

    s_trap 0x03

    none

      +
    • If debugger not enabled then behaves +as a no-operation. The trap handler +is entered and immediately returns to +continue execution of the wavefront.

    • +
    • If the debugger is enabled, causes +the debug trap to be reported by the +debugger and the wavefront is put in +the halt state with the PC at the +instruction. The debugger must +increment the PC and resume the wave.

    • +
    +

    reserved

    s_trap 0x04

    Reserved.

    reserved

    s_trap 0x05

    Reserved.

    reserved

    s_trap 0x06

    Reserved.

    reserved

    s_trap 0x07

    Reserved.

    reserved

    s_trap 0x08

    Reserved.

    reserved

    s_trap 0xfe

    Reserved.

    reserved

    s_trap 0xff

    Reserved.

    +
  • +
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU Trap Handler for AMDHSA OS Code Object V3

Usage

Code Sequence

Trap Handler +Inputs

Description

reserved

s_trap 0x00

Reserved by hardware.

debugger breakpoint

s_trap 0x01

none

Reserved for debugger to use for +breakpoints. Causes wave to be halted +with the PC at the trap instruction. +The debugger is responsible to resume +the wave, including the instruction +that the breakpoint overwrote.

llvm.trap

s_trap 0x02

+
SGPR0-1:

queue_ptr

+
+
+

Causes wave to be halted with the PC at +the trap instruction. The associated +queue is signalled to put it into the +error state. When the queue is put in +the error state, the waves executing +dispatches on the queue will be +terminated.

llvm.debugtrap

s_trap 0x03

none

    +
  • If debugger not enabled then behaves +as a no-operation. The trap handler +is entered and immediately returns to +continue execution of the wavefront.

  • +
  • If the debugger is enabled, causes +the debug trap to be reported by the +debugger and the wavefront is put in +the halt state with the PC at the +instruction. The debugger must +increment the PC and resume the wave.

  • +
+

reserved

s_trap 0x04

Reserved.

reserved

s_trap 0x05

Reserved.

reserved

s_trap 0x06

Reserved.

reserved

s_trap 0x07

Reserved.

reserved

s_trap 0x08

Reserved.

reserved

s_trap 0xfe

Reserved.

reserved

s_trap 0xff

Reserved.

+
+
+
+ +++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDGPU Trap Handler for AMDHSA OS Code Object V4

Usage

Code Sequence

GFX6-GFX8 Inputs

GFX9-GFX10 Inputs

Description

reserved

s_trap 0x00

Reserved by hardware.

debugger breakpoint

s_trap 0x01

none

none

Reserved for debugger to use for +breakpoints. Causes wave to be halted +with the PC at the trap instruction. +The debugger is responsible to resume +the wave, including the instruction +that the breakpoint overwrote.

llvm.trap

s_trap 0x02

+
SGPR0-1:

queue_ptr

+
+
+

none

Causes wave to be halted with the PC at +the trap instruction. The associated +queue is signalled to put it into the +error state. When the queue is put in +the error state, the waves executing +dispatches on the queue will be +terminated.

llvm.debugtrap

s_trap 0x03

none

none

    +
  • If debugger not enabled then behaves +as a no-operation. The trap handler +is entered and immediately returns to +continue execution of the wavefront.

  • +
  • If the debugger is enabled, causes +the debug trap to be reported by the +debugger and the wavefront is put in +the halt state with the PC at the +instruction. The debugger must +increment the PC and resume the wave.

  • +
+

reserved

s_trap 0x04

Reserved.

reserved

s_trap 0x05

Reserved.

reserved

s_trap 0x06

Reserved.

reserved

s_trap 0x07

Reserved.

reserved

s_trap 0x08

Reserved.

reserved

s_trap 0xfe

Reserved.

reserved

s_trap 0xff

Reserved.

+
+
+
+

Call Convention

+
+

Note

+

This section is currently incomplete and has inaccuracies. It is WIP that will +be updated as information is determined.

+
+

See Address Space Identifier for information on swizzled +addresses. Unswizzled addresses are normal linear addresses.

+
+
Kernel Functions
+

This section describes the call convention ABI for the outer kernel function.

+

See Initial Kernel Execution State for the kernel call +convention.

+

The following is not part of the AMDGPU kernel calling convention but describes +how the AMDGPU implements function calls:

+
    +
  1. Clang decides the kernarg layout to match the HSA Programmer’s Language +Reference [HSA].

    +
      +
    • All structs are passed directly.

    • +
    • Lambda values are passed TBA.

    • +
    +
  2. +
+
    +
  1. The kernel performs certain setup in its prolog, as described in +Kernel Prolog.

  2. +
+
+
+
Non-Kernel Functions
+

This section describes the call convention ABI for functions other than the +outer kernel function.

+

If a kernel has function calls then scratch is always allocated and used for +the call stack which grows from low address to high address using the swizzled +scratch address space.

+

On entry to a function:

+
    +
  1. SGPR0-3 contain a V# with the following properties (see +Private Segment Buffer):

    +
      +
    • Base address pointing to the beginning of the wavefront scratch backing +memory.

    • +
    • Swizzled with dword element size and stride of wavefront size elements.

    • +
    +
  2. +
  3. The FLAT_SCRATCH register pair is setup. See +Flat Scratch.

  4. +
  5. GFX6-GFX8: M0 register set to the size of LDS in bytes. See +M0.

  6. +
  7. The EXEC register is set to the lanes active on entry to the function.

  8. +
  9. MODE register: TBD

  10. +
  11. VGPR0-31 and SGPR4-29 are used to pass function input arguments as described +below.

  12. +
  13. SGPR30-31 return address (RA). The code address that the function must +return to when it completes. The value is undefined if the function is no +return.

  14. +
  15. SGPR32 is used for the stack pointer (SP). It is an unswizzled scratch +offset relative to the beginning of the wavefront scratch backing memory.

    +

    The unswizzled SP can be used with buffer instructions as an unswizzled SGPR +offset with the scratch V# in SGPR0-3 to access the stack in a swizzled +manner.

    +

    The unswizzled SP value can be converted into the swizzled SP value by:

    +
    +
    +
    swizzled SP = unswizzled SP / wavefront size
    +
    +
    +

    This may be used to obtain the private address space address of stack +objects and to convert this address to a flat address by adding the flat +scratch aperture base address.

    +

    The swizzled SP value is always 4 bytes aligned for the r600 +architecture and 16 byte aligned for the amdgcn architecture.

    +
    +

    Note

    +

    The amdgcn value is selected to avoid dynamic stack alignment for the +OpenCL language which has the largest base type defined as 16 bytes.

    +
    +

    On entry, the swizzled SP value is the address of the first function +argument passed on the stack. Other stack passed arguments are positive +offsets from the entry swizzled SP value.

    +

    The function may use positive offsets beyond the last stack passed argument +for stack allocated local variables and register spill slots. If necessary, +the function may align these to greater alignment than 16 bytes. After these +the function may dynamically allocate space for such things as runtime sized +alloca local allocations.

    +

    If the function calls another function, it will place any stack allocated +arguments after the last local allocation and adjust SGPR32 to the address +after the last local allocation.

    +
  16. +
  17. All other registers are unspecified.

  18. +
  19. Any necessary s_waitcnt has been performed to ensure memory is available +to the function.

  20. +
+

On exit from a function:

+
    +
  1. VGPR0-31 and SGPR4-29 are used to pass function result arguments as +described below. Any registers used are considered clobbered registers.

  2. +
  3. The following registers are preserved and have the same value as on entry:

    +
      +
    • FLAT_SCRATCH

    • +
    • EXEC

    • +
    • GFX6-GFX8: M0

    • +
    • All SGPR registers except the clobbered registers of SGPR4-31.

    • +
    • VGPR40-47

    • +
    • VGPR56-63

    • +
    • VGPR72-79

    • +
    • VGPR88-95

    • +
    • VGPR104-111

    • +
    • VGPR120-127

    • +
    • VGPR136-143

    • +
    • VGPR152-159

    • +
    • VGPR168-175

    • +
    • VGPR184-191

    • +
    • VGPR200-207

    • +
    • VGPR216-223

    • +
    • VGPR232-239

    • +
    • VGPR248-255

      +
      +
      +

      Note

      +

      Except the argument registers, the VGPRs clobbered and the preserved +registers are intermixed at regular intervals in order to keep a +similar ratio independent of the number of allocated VGPRs.

      +
      +
      +
    • +
    • GFX90A: All AGPR registers except the clobbered registers AGPR0-31.

    • +
    • Lanes of all VGPRs that are inactive at the call site.

      +

      For the AMDGPU backend, an inter-procedural register allocation (IPRA) +optimization may mark some of clobbered SGPR and VGPR registers as +preserved if it can be determined that the called function does not change +their value.

      +
    • +
    +
  4. +
+
    +
  1. The PC is set to the RA provided on entry.

  2. +
  3. MODE register: TBD.

  4. +
  5. All other registers are clobbered.

  6. +
  7. Any necessary s_waitcnt has been performed to ensure memory accessed by +function is available to the caller.

  8. +
+

The function input arguments are made up of the formal arguments explicitly +declared by the source language function plus the implicit input arguments used +by the implementation.

+

The source language input arguments are:

+
    +
  1. Any source language implicit this or self argument comes first as a +pointer type.

  2. +
  3. Followed by the function formal arguments in left to right source order.

  4. +
+

The source language result arguments are:

+
    +
  1. The function result argument.

  2. +
+

The source language input or result struct type arguments that are less than or +equal to 16 bytes, are decomposed recursively into their base type fields, and +each field is passed as if a separate argument. For input arguments, if the +called function requires the struct to be in memory, for example because its +address is taken, then the function body is responsible for allocating a stack +location and copying the field arguments into it. Clang terms this direct +struct.

+

The source language input struct type arguments that are greater than 16 bytes, +are passed by reference. The caller is responsible for allocating a stack +location to make a copy of the struct value and pass the address as the input +argument. The called function is responsible to perform the dereference when +accessing the input argument. Clang terms this by-value struct.

+

A source language result struct type argument that is greater than 16 bytes, is +returned by reference. The caller is responsible for allocating a stack location +to hold the result value and passes the address as the last input argument +(before the implicit input arguments). In this case there are no result +arguments. The called function is responsible to perform the dereference when +storing the result value. Clang terms this structured return (sret).

+

TODO: correct the ``sret`` definition.

+

Lambda argument types are treated as struct types with an implementation defined +set of fields.

+

For AMDGPU backend all source language arguments (including the decomposed +struct type arguments) are passed in VGPRs unless marked inreg in which case +they are passed in SGPRs.

+

The AMDGPU backend walks the function call graph from the leaves to determine +which implicit input arguments are used, propagating to each caller of the +function. The used implicit arguments are appended to the function arguments +after the source language arguments in the following order:

+
    +
  1. Work-Item ID (1 VGPR)

    +

    The X, Y and Z work-item ID are packed into a single VGRP with the following +layout. Only fields actually used by the function are set. The other bits +are undefined.

    +

    The values come from the initial kernel execution state. See +Initial Kernel Execution State.

    + + +++++ + + + + + + + + + + + + + + + + + + + + + + + + +
    Work-item implicit argument layout

    Bits

    Size

    Field Name

    9:0

    10 bits

    X Work-Item ID

    19:10

    10 bits

    Y Work-Item ID

    29:20

    10 bits

    Z Work-Item ID

    31:30

    2 bits

    Unused

    +
  2. +
  3. Dispatch Ptr (2 SGPRs)

    +

    The value comes from the initial kernel execution state. See +SGPR Register Set Up Order.

    +
  4. +
  5. Queue Ptr (2 SGPRs)

    +

    The value comes from the initial kernel execution state. See +SGPR Register Set Up Order.

    +
  6. +
  7. Kernarg Segment Ptr (2 SGPRs)

    +

    The value comes from the initial kernel execution state. See +SGPR Register Set Up Order.

    +
  8. +
  9. Dispatch id (2 SGPRs)

    +

    The value comes from the initial kernel execution state. See +SGPR Register Set Up Order.

    +
  10. +
  11. Work-Group ID X (1 SGPR)

    +

    The value comes from the initial kernel execution state. See +SGPR Register Set Up Order.

    +
  12. +
  13. Work-Group ID Y (1 SGPR)

    +

    The value comes from the initial kernel execution state. See +SGPR Register Set Up Order.

    +
  14. +
  15. Work-Group ID Z (1 SGPR)

    +

    The value comes from the initial kernel execution state. See +SGPR Register Set Up Order.

    +
  16. +
  17. Implicit Argument Ptr (2 SGPRs)

    +

    The value is computed by adding an offset to Kernarg Segment Ptr to get the +global address space pointer to the first kernarg implicit argument.

    +
  18. +
+

The input and result arguments are assigned in order in the following manner:

+
+

Note

+

There are likely some errors and omissions in the following description that +need correction.

+
+
    +
  • VGPR arguments are assigned to consecutive VGPRs starting at VGPR0 up to +VGPR31.

    +

    If there are more arguments than will fit in these registers, the remaining +arguments are allocated on the stack in order on naturally aligned +addresses.

    +
  • +
  • SGPR arguments are assigned to consecutive SGPRs starting at SGPR0 up to +SGPR29.

    +

    If there are more arguments than will fit in these registers, the remaining +arguments are allocated on the stack in order on naturally aligned +addresses.

    +
  • +
+

Note that decomposed struct type arguments may have some fields passed in +registers and some in memory.

+

The following is not part of the AMDGPU function calling convention but +describes how the AMDGPU implements function calls:

+
    +
  1. SGPR33 is used as a frame pointer (FP) if necessary. Like the SP it is an +unswizzled scratch address. It is only needed if runtime sized alloca +are used, or for the reasons defined in SIFrameLowering.

  2. +
  3. Runtime stack alignment is supported. SGPR34 is used as a base pointer (BP) +to access the incoming stack arguments in the function. The BP is needed +only when the function requires the runtime stack alignment.

  4. +
  5. Allocating SGPR arguments on the stack are not supported.

  6. +
  7. No CFI is currently generated. See +Call Frame Information.

    +
    +

    Note

    +

    CFI will be generated that defines the CFA as the unswizzled address +relative to the wave scratch base in the unswizzled private address space +of the lowest address stack allocated local variable.

    +

    DW_AT_frame_base will be defined as the swizzled address in the +swizzled private address space by dividing the CFA by the wavefront size +(since CFA is always at least dword aligned which matches the scratch +swizzle element size).

    +

    If no dynamic stack alignment was performed, the stack allocated arguments +are accessed as negative offsets relative to DW_AT_frame_base, and the +local variables and register spill slots are accessed as positive offsets +relative to DW_AT_frame_base.

    +
    +
  8. +
  9. Function argument passing is implemented by copying the input physical +registers to virtual registers on entry. The register allocator can spill if +necessary. These are copied back to physical registers at call sites. The +net effect is that each function call can have these values in entirely +distinct locations. The IPRA can help avoid shuffling argument registers.

  10. +
  11. Call sites are implemented by setting up the arguments at positive offsets +from SP. Then SP is incremented to account for the known frame size before +the call and decremented after the call.

    +
    +

    Note

    +

    The CFI will reflect the changed calculation needed to compute the CFA +from SP.

    +
    +
  12. +
  13. 4 byte spill slots are used in the stack frame. One slot is allocated for an +emergency spill slot. Buffer instructions are used for stack accesses and +not the flat_scratch instruction.

    +
  14. +
+
+
+
+
+

AMDPAL

+

This section provides code conventions used when the target triple OS is +amdpal (see Target Triples).

+
+

Code Object Metadata

+
+

Note

+

The metadata is currently in development and is subject to major +changes. Only the current version is supported. When this document +was generated the version was 2.6.

+
+

Code object metadata is specified by the NT_AMDGPU_METADATA note +record (see Code Object V3 to V4 Note Records).

+

The metadata is represented as Message Pack formatted binary data (see +[MsgPack]). The top level is a Message Pack map that includes the keys +defined in table AMDPAL Code Object Metadata Map +and referenced tables.

+

Additional information can be added to the maps. To avoid conflicts, any +key names should be prefixed by “vendor-name.” where vendor-name +can be the name of the vendor and specific vendor tool that generates the +information. The prefix is abbreviated to simply “.” when it appears +within a map that has been added by the same vendor-name.

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + +
AMDPAL Code Object Metadata Map

String Key

Value Type

Required?

Description

“amdpal.version”

sequence of +2 integers

Required

PAL code object metadata (major, minor) version. The current values +are defined by Util::Abi::PipelineMetadata(Major|Minor)Version.

“amdpal.pipelines”

sequence of +map

Required

Per-pipeline metadata. See +AMDPAL Code Object Pipeline Metadata Map for the +definition of the keys included in that map.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDPAL Code Object Pipeline Metadata Map

String Key

Value Type

Required?

Description

“.name”

string

Source name of the pipeline.

“.type”

string

Pipeline type, e.g. VsPs. Values include:

+
+
    +
  • “VsPs”

  • +
  • “Gs”

  • +
  • “Cs”

  • +
  • “Ngg”

  • +
  • “Tess”

  • +
  • “GsTess”

  • +
  • “NggTess”

  • +
+
+

“.internal_pipeline_hash”

sequence of +2 integers

Required

Internal compiler hash for this pipeline. Lower +64 bits is the “stable” portion of the hash, used +for e.g. shader replacement lookup. Upper 64 bits +is the “unique” portion of the hash, used for +e.g. pipeline cache lookup. The value is +implementation defined, and can not be relied on +between different builds of the compiler.

“.shaders”

map

Per-API shader metadata. See +AMDPAL Code Object Shader Map +for the definition of the keys included in that +map.

“.hardware_stages”

map

Per-hardware stage metadata. See +AMDPAL Code Object Hardware Stage Map +for the definition of the keys included in that +map.

“.shader_functions”

map

Per-shader function metadata. See +AMDPAL Code Object Shader Function Map +for the definition of the keys included in that +map.

“.registers”

map

Required

Hardware register configuration. See +AMDPAL Code Object Register Map +for the definition of the keys included in that +map.

“.user_data_limit”

integer

Number of user data entries accessed by this +pipeline.

“.spill_threshold”

integer

The user data spill threshold. 0xFFFF for +NoUserDataSpilling.

“.uses_viewport_array_index”

boolean

Indicates whether or not the pipeline uses the +viewport array index feature. Pipelines which use +this feature can render into all 16 viewports, +whereas pipelines which do not use it are +restricted to viewport #0.

“.es_gs_lds_size”

integer

Size in bytes of LDS space used internally for +handling data-passing between the ES and GS +shader stages. This can be zero if the data is +passed using off-chip buffers. This value should +be used to program all user-SGPRs which have been +marked with “UserDataMapping::EsGsLdsSize” +(typically only the GS and VS HW stages will ever +have a user-SGPR so marked).

“.nggSubgroupSize”

integer

Explicit maximum subgroup size for NGG shaders +(maximum number of threads in a subgroup).

“.num_interpolants”

integer

Graphics only. Number of PS interpolants.

“.mesh_scratch_memory_size”

integer

Max mesh shader scratch memory used.

“.api”

string

Name of the client graphics API.

“.api_create_info”

binary

Graphics API shader create info binary blob. Can +be defined by the driver using the compiler if +they want to be able to correlate API-specific +information used during creation at a later time.

+
+
+
+ +++++ + + + + + + + + + + + + +
AMDPAL Code Object Shader Map

String Key

Value Type

Description

    +
  • “.compute”

  • +
  • “.vertex”

  • +
  • “.hull”

  • +
  • “.domain”

  • +
  • “.geometry”

  • +
  • “.pixel”

  • +
+

map

See AMDPAL Code Object API Shader Metadata Map +for the definition of the keys included in that map.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + +
AMDPAL Code Object API Shader Metadata Map

String Key

Value Type

Required?

Description

“.api_shader_hash”

sequence of +2 integers

Required

Input shader hash, typically passed in from the client. The value +is implementation defined, and can not be relied on between +different builds of the compiler.

“.hardware_mapping”

sequence of +string

Required

Flags indicating the HW stages this API shader maps to. Values +include:

+
+
    +
  • “.ls”

  • +
  • “.hs”

  • +
  • “.es”

  • +
  • “.gs”

  • +
  • “.vs”

  • +
  • “.ps”

  • +
  • “.cs”

  • +
+
+
+
+
+
+ +++++ + + + + + + + + + + + + +
AMDPAL Code Object Hardware Stage Map

String Key

Value Type

Description

    +
  • “.ls”

  • +
  • “.hs”

  • +
  • “.es”

  • +
  • “.gs”

  • +
  • “.vs”

  • +
  • “.ps”

  • +
  • “.cs”

  • +
+

map

See AMDPAL Code Object Hardware Stage Metadata Map +for the definition of the keys included in that map.

+
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDPAL Code Object Hardware Stage Metadata Map

String Key

Value Type

Required?

Description

“.entry_point”

string

The ELF symbol pointing to this pipeline’s stage entry point.

“.scratch_memory_size”

integer

Scratch memory size in bytes.

“.lds_size”

integer

Local Data Share size in bytes.

“.perf_data_buffer_size”

integer

Performance data buffer size in bytes.

“.vgpr_count”

integer

Number of VGPRs used.

“.sgpr_count”

integer

Number of SGPRs used.

“.vgpr_limit”

integer

If non-zero, indicates the shader was compiled with a +directive to instruct the compiler to limit the VGPR usage to +be less than or equal to the specified value (only set if +different from HW default).

“.sgpr_limit”

integer

SGPR count upper limit (only set if different from HW +default).

“.threadgroup_dimensions”

sequence of +3 integers

Thread-group X/Y/Z dimensions (Compute only).

“.wavefront_size”

integer

Wavefront size (only set if different from HW default).

“.uses_uavs”

boolean

The shader reads or writes UAVs.

“.uses_rovs”

boolean

The shader reads or writes ROVs.

“.writes_uavs”

boolean

The shader writes to one or more UAVs.

“.writes_depth”

boolean

The shader writes out a depth value.

“.uses_append_consume”

boolean

The shader uses append and/or consume operations, either +memory or GDS.

“.uses_prim_id”

boolean

The shader uses PrimID.

+
+
+
+ +++++ + + + + + + + + + + + + +
AMDPAL Code Object Shader Function Map

String Key

Value Type

Description

symbol name

map

symbol name is the ELF symbol name of the shader function code +entry address. The value is the function’s metadata. See +AMDPAL Code Object Shader Function Metadata Map.

+
+
+
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDPAL Code Object Shader Function Metadata Map

String Key

Value Type

Description

“.api_shader_hash”

sequence of +2 integers

Input shader hash, typically passed in from the client. The value +is implementation defined, and can not be relied on between +different builds of the compiler.

“.scratch_memory_size”

integer

Size in bytes of scratch memory used by the shader.

“.lds_size”

integer

Size in bytes of LDS memory.

“.vgpr_count”

integer

Number of VGPRs used by the shader.

“.sgpr_count”

integer

Number of SGPRs used by the shader.

“.stack_frame_size_in_bytes”

integer

Amount of stack size used by the shader.

“.shader_subtype”

string

Shader subtype/kind. Values include:

+
+
    +
  • “Unknown”

  • +
+
+
+
+
+
+ +++++ + + + + + + + + + + + + +
AMDPAL Code Object Register Map

32-bit Integer Key

Value Type

Description

reg offset

32-bit integer

reg offset is the dword offset into the GFXIP register space of +a GRBM register (i.e., driver accessible GPU register number, not +shader GPR register number). The driver is required to program each +specified register to the corresponding specified value when +executing this pipeline. Typically, the reg offsets are the +uint16_t offsets to each register as defined by the hardware +chip headers. The register is set to the provided value. However, a +reg offset that specifies a user data register (e.g., +COMPUTE_USER_DATA_0) needs special treatment. See +User Data section for more +information.

+
+
+
User Data
+

Each hardware stage has a set of 32-bit physical SPI user data registers +(either 16 or 32 based on graphics IP and the stage) which can be +written from a command buffer and then loaded into SGPRs when waves are +launched via a subsequent dispatch or draw operation. This is the way +most arguments are passed from the application/runtime to a hardware +shader.

+

PAL abstracts this functionality by exposing a set of 128 user data +entries per pipeline a client can use to pass arguments from a command +buffer to one or more shaders in that pipeline. The ELF code object must +specify a mapping from virtualized user data entries to physical user +data registers, and PAL is responsible for implementing that mapping, +including spilling overflow user data entries to memory if needed.

+

Since the user data registers are GRBM-accessible SPI registers, this +mapping is actually embedded in the .registers metadata entry. For +most registers, the value in that map is a literal 32-bit value that +should be written to the register by the driver. However, when the +register is a user data register (any USER_DATA register e.g., +SPI_SHADER_USER_DATA_PS_5), the value is instead an encoding that tells +the driver to write either a user data entry value or one of several +driver-internal values to the register. This encoding is described in +the following table:

+
+

Note

+

Currently, user data registers 0 and 1 (e.g., SPI_SHADER_USER_DATA_PS_0, +and SPI_SHADER_USER_DATA_PS_1) are reserved. User data register 0 must +always be programmed to the address of the GlobalTable, and user data +register 1 must always be programmed to the address of the PerShaderTable.

+
+
+
+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDPAL User Data Mapping

Value

Name

Description

0..127

User Data Entry

32-bit value of user_data_entry[N] as specified via CmdSetUserData()

0x10000000

GlobalTable

32-bit pointer to GPU memory containing the global internal table (should +always point to user data register 0).

0x10000001

PerShaderTable

32-bit pointer to GPU memory containing the per-shader internal table. See +Per-Shader Table +for more detail (should always point to user data register 1).

0x10000002

SpillTable

32-bit pointer to GPU memory containing the user data spill table. See +Spill Table for +more detail.

0x10000003

BaseVertex

Vertex offset (32-bit unsigned integer). Not needed if the pipeline doesn’t +reference the draw index in the vertex shader. Only supported by the first +stage in a graphics pipeline.

0x10000004

BaseInstance

Instance offset (32-bit unsigned integer). Only supported by the first stage in +a graphics pipeline.

0x10000005

DrawIndex

Draw index (32-bit unsigned integer). Only supported by the first stage in a +graphics pipeline.

0x10000006

Workgroup

Thread group count (32-bit unsigned integer). Low half of a 64-bit address of +a buffer containing the grid dimensions for a Compute dispatch operation. The +high half of the address is stored in the next sequential user-SGPR. Only +supported by compute pipelines.

0x1000000A

EsGsLdsSize

Indicates that PAL will program this user-SGPR to contain the amount of LDS +space used for the ES/GS pseudo-ring-buffer for passing data between shader +stages.

0x1000000B

ViewId

View id (32-bit unsigned integer) identifies a view of graphic +pipeline instancing.

0x1000000C

StreamOutTable

32-bit pointer to GPU memory containing the stream out target SRD table. This +can only appear for one shader stage per pipeline.

0x1000000D

PerShaderPerfData

32-bit pointer to GPU memory containing the per-shader performance data buffer.

0x1000000F

VertexBufferTable

32-bit pointer to GPU memory containing the vertex buffer SRD table. This can +only appear for one shader stage per pipeline.

0x10000010

UavExportTable

32-bit pointer to GPU memory containing the UAV export SRD table. This can +only appear for one shader stage per pipeline (PS). These replace color targets +and are completely separate from any UAVs used by the shader. This is optional, +and only used by the PS when UAV exports are used to replace color-target +exports to optimize specific shaders.

0x10000011

NggCullingData

64-bit pointer to GPU memory containing the hardware register data needed by +some NGG pipelines to perform culling. This value contains the address of the +first of two consecutive registers which provide the full GPU address.

0x10000015

FetchShaderPtr

64-bit pointer to GPU memory containing the fetch shader subroutine.

+
+
+
Per-Shader Table
+

Low 32 bits of the GPU address for an optional buffer in the .data +section of the ELF. The high 32 bits of the address match the high 32 bits +of the shader’s program counter.

+

The buffer can be anything the shader compiler needs it for, and +allows each shader to have its own region of the .data section. +Typically, this could be a table of buffer SRD’s and the data pointed to +by the buffer SRD’s, but it could be a flat-address region of memory as +well. Its layout and usage are defined by the shader compiler.

+

Each shader’s table in the .data section is referenced by the symbol +_amdgpu_xs_shdr_intrl_data where xs corresponds with the +hardware shader stage the data is for. E.g., +_amdgpu_cs_shdr_intrl_data for the compute shader hardware stage.

+
+
+
Spill Table
+

It is possible for a hardware shader to need access to more user data +entries than there are slots available in user data registers for one +or more hardware shader stages. In that case, the PAL runtime expects +the necessary user data entries to be spilled to GPU memory and use +one user data register to point to the spilled user data memory. The +value of the user data entry must then represent the location where +a shader expects to read the low 32-bits of the table’s GPU virtual +address. The spill table itself represents a set of 32-bit values +managed by the PAL runtime in GPU-accessible memory that can be made +indirectly accessible to a hardware shader.

+
+
+
+
+
+

Unspecified OS

+

This section provides code conventions used when the target triple OS is +empty (see Target Triples).

+
+

Trap Handler ABI

+

For code objects generated by AMDGPU backend for non-amdhsa OS, the runtime does +not install a trap handler. The llvm.trap and llvm.debugtrap +instructions are handled as follows:

+
+
+ +++++ + + + + + + + + + + + + + + + + +
AMDGPU Trap Handler for Non-AMDHSA OS

Usage

Code Sequence

Description

llvm.trap

s_endpgm

Causes wavefront to be terminated.

llvm.debugtrap

none

Compiler warning given that there is no +trap handler installed.

+
+
+
+
+
+

Source Languages

+
+

OpenCL

+

When the language is OpenCL the following differences occur:

+
    +
  1. The OpenCL memory model is used (see Memory Model).

  2. +
  3. The AMDGPU backend appends additional arguments to the kernel’s explicit +arguments for the AMDHSA OS (see +OpenCL kernel implicit arguments appended for AMDHSA OS).

  4. +
  5. Additional metadata is generated +(see Code Object Metadata).

  6. +
+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
OpenCL kernel implicit arguments appended for AMDHSA OS

Position

Byte +Size

Byte +Alignment

Description

1

8

8

OpenCL Global Offset X

2

8

8

OpenCL Global Offset Y

3

8

8

OpenCL Global Offset Z

4

8

8

OpenCL address of printf buffer

5

8

8

OpenCL address of virtual queue used by +enqueue_kernel.

6

8

8

OpenCL address of AqlWrap struct used by +enqueue_kernel.

7

8

8

Pointer argument used for Multi-gird +synchronization.

+
+
+
+

HCC

+

When the language is HCC the following differences occur:

+
    +
  1. The HSA memory model is used (see Memory Model).

  2. +
+
+
+

Assembler

+

AMDGPU backend has LLVM-MC based assembler which is currently in development. +It supports AMDGCN GFX6-GFX10.

+

This section describes general syntax for instructions and operands.

+
+

Instructions

+

An instruction has the following syntax:

+
+
+
<opcode> <operand0>, <operand1>,... +<modifier0> <modifier1>...
+
+
+

Operands are comma-separated while +modifiers are space-separated.

+

The order of operands and modifiers is fixed. +Most modifiers are optional and may be omitted.

+

Links to detailed instruction syntax description may be found in the following +table. Note that features under development are not included +in this description.

+
+
++++ + + + + + + + + + + + + + + + + + + + +

Core ISA

ISA Extensions

GFX7

-

GFX8

-

GFX9

gfx900

+

gfx902

+

gfx904

+

gfx906

+

gfx908

+

gfx909

+

gfx90a

+

GFX10

gfx1011

+

gfx1012

+
+
+

For more information about instructions, their semantics and supported +combinations of operands, refer to one of instruction set architecture manuals +[AMD-GCN-GFX6], [AMD-GCN-GFX7], [AMD-GCN-GFX8], +[AMD-GCN-GFX900-GFX904-VEGA], [AMD-GCN-GFX906-VEGA7NM] +[AMD-GCN-GFX908-CDNA1], [AMD-GCN-GFX10-RDNA1] and [AMD-GCN-GFX10-RDNA2].

+
+
+

Operands

+

Detailed description of operands may be found here.

+
+
+

Modifiers

+

Detailed description of modifiers may be found +here.

+
+
+

Instruction Examples

+
+
DS
+
ds_add_u32 v2, v4 offset:16
+ds_write_src2_b64 v2 offset0:4 offset1:8
+ds_cmpst_f32 v2, v4, v6
+ds_min_rtn_f64 v[8:9], v2, v[4:5]
+
+
+

For full list of supported instructions, refer to “LDS/GDS instructions” in ISA +Manual.

+
+
+
FLAT
+
flat_load_dword v1, v[3:4]
+flat_store_dwordx3 v[3:4], v[5:7]
+flat_atomic_swap v1, v[3:4], v5 glc
+flat_atomic_cmpswap v1, v[3:4], v[5:6] glc slc
+flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc
+
+
+

For full list of supported instructions, refer to “FLAT instructions” in ISA +Manual.

+
+
+
MUBUF
+
buffer_load_dword v1, off, s[4:7], s1
+buffer_store_dwordx4 v[1:4], v2, ttmp[4:7], s1 offen offset:4 glc tfe
+buffer_store_format_xy v[1:2], off, s[4:7], s1
+buffer_wbinvl1
+buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 slc
+
+
+

For full list of supported instructions, refer to “MUBUF Instructions” in ISA +Manual.

+
+
+
SMRD/SMEM
+
s_load_dword s1, s[2:3], 0xfc
+s_load_dwordx8 s[8:15], s[2:3], s4
+s_load_dwordx16 s[88:103], s[2:3], s4
+s_dcache_inv_vol
+s_memtime s[4:5]
+
+
+

For full list of supported instructions, refer to “Scalar Memory Operations” in +ISA Manual.

+
+
+
SOP1
+
s_mov_b32 s1, s2
+s_mov_b64 s[0:1], 0x80000000
+s_cmov_b32 s1, 200
+s_wqm_b64 s[2:3], s[4:5]
+s_bcnt0_i32_b64 s1, s[2:3]
+s_swappc_b64 s[2:3], s[4:5]
+s_cbranch_join s[4:5]
+
+
+

For full list of supported instructions, refer to “SOP1 Instructions” in ISA +Manual.

+
+
+
SOP2
+
s_add_u32 s1, s2, s3
+s_and_b64 s[2:3], s[4:5], s[6:7]
+s_cselect_b32 s1, s2, s3
+s_andn2_b32 s2, s4, s6
+s_lshr_b64 s[2:3], s[4:5], s6
+s_ashr_i32 s2, s4, s6
+s_bfm_b64 s[2:3], s4, s6
+s_bfe_i64 s[2:3], s[4:5], s6
+s_cbranch_g_fork s[4:5], s[6:7]
+
+
+

For full list of supported instructions, refer to “SOP2 Instructions” in ISA +Manual.

+
+
+
SOPC
+
s_cmp_eq_i32 s1, s2
+s_bitcmp1_b32 s1, s2
+s_bitcmp0_b64 s[2:3], s4
+s_setvskip s3, s5
+
+
+

For full list of supported instructions, refer to “SOPC Instructions” in ISA +Manual.

+
+
+
SOPP
+
s_barrier
+s_nop 2
+s_endpgm
+s_waitcnt 0 ; Wait for all counters to be 0
+s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0) ; Equivalent to above
+s_waitcnt vmcnt(1) ; Wait for vmcnt counter to be 1.
+s_sethalt 9
+s_sleep 10
+s_sendmsg 0x1
+s_sendmsg sendmsg(MSG_INTERRUPT)
+s_trap 1
+
+
+

For full list of supported instructions, refer to “SOPP Instructions” in ISA +Manual.

+

Unless otherwise mentioned, little verification is performed on the operands +of SOPP Instructions, so it is up to the programmer to be familiar with the +range or acceptable values.

+
+
+
VALU
+

For vector ALU instruction opcodes (VOP1, VOP2, VOP3, VOPC, VOP_DPP, VOP_SDWA), +the assembler will automatically use optimal encoding based on its operands. To +force specific encoding, one can add a suffix to the opcode of the instruction:

+
    +
  • _e32 for 32-bit VOP1/VOP2/VOPC

  • +
  • _e64 for 64-bit VOP3

  • +
  • _dpp for VOP_DPP

  • +
  • _sdwa for VOP_SDWA

  • +
+

VOP1/VOP2/VOP3/VOPC examples:

+
v_mov_b32 v1, v2
+v_mov_b32_e32 v1, v2
+v_nop
+v_cvt_f64_i32_e32 v[1:2], v2
+v_floor_f32_e32 v1, v2
+v_bfrev_b32_e32 v1, v2
+v_add_f32_e32 v1, v2, v3
+v_mul_i32_i24_e64 v1, v2, 3
+v_mul_i32_i24_e32 v1, -3, v3
+v_mul_i32_i24_e32 v1, -100, v3
+v_addc_u32 v1, s[0:1], v2, v3, s[2:3]
+v_max_f16_e32 v1, v2, v3
+
+
+

VOP_DPP examples:

+
v_mov_b32 v0, v0 quad_perm:[0,2,1,1]
+v_sin_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
+v_mov_b32 v0, v0 wave_shl:1
+v_mov_b32 v0, v0 row_mirror
+v_mov_b32 v0, v0 row_bcast:31
+v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0
+v_add_f32 v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
+v_max_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0
+
+
+

VOP_SDWA examples:

+
v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD
+v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
+v_sin_f32 v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1
+v_fract_f32 v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+v_cmpx_le_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0
+
+
+

For full list of supported instructions, refer to “Vector ALU instructions”.

+
+
+
+

Code Object V2 Predefined Symbols

+
+

Warning

+

Code object V2 is not the default code object version emitted by +this version of LLVM.

+
+

The AMDGPU assembler defines and updates some symbols automatically. These +symbols do not affect code generation.

+
+
.option.machine_version_major
+

Set to the GFX major generation number of the target being assembled for. For +example, when assembling for a “GFX9” target this will be set to the integer +value “9”. The possible GFX major generation numbers are presented in +Processors.

+
+
+
.option.machine_version_minor
+

Set to the GFX minor generation number of the target being assembled for. For +example, when assembling for a “GFX810” target this will be set to the integer +value “1”. The possible GFX minor generation numbers are presented in +Processors.

+
+
+
.option.machine_version_stepping
+

Set to the GFX stepping generation number of the target being assembled for. +For example, when assembling for a “GFX704” target this will be set to the +integer value “4”. The possible GFX stepping generation numbers are presented +in Processors.

+
+
+
.kernel.vgpr_count
+

Set to zero each time a +.amdgpu_hsa_kernel (name) directive is +encountered. At each instruction, if the current value of this symbol is less +than or equal to the maximum VGPR number explicitly referenced within that +instruction then the symbol value is updated to equal that VGPR number plus +one.

+
+
+
.kernel.sgpr_count
+

Set to zero each time a +.amdgpu_hsa_kernel (name) directive is +encountered. At each instruction, if the current value of this symbol is less +than or equal to the maximum VGPR number explicitly referenced within that +instruction then the symbol value is updated to equal that SGPR number plus +one.

+
+
+
+

Code Object V2 Directives

+
+

Warning

+

Code object V2 is not the default code object version emitted by +this version of LLVM.

+
+

AMDGPU ABI defines auxiliary data in output code object. In assembly source, +one can specify them with assembler directives.

+
+
.hsa_code_object_version major, minor
+

major and minor are integers that specify the version of the HSA code +object that will be generated by the assembler.

+
+
+
.hsa_code_object_isa [major, minor, stepping, vendor, arch]
+

major, minor, and stepping are all integers that describe the instruction +set architecture (ISA) version of the assembly program.

+

vendor and arch are quoted strings. vendor should always be equal to +“AMD” and arch should always be equal to “AMDGPU”.

+

By default, the assembler will derive the ISA version, vendor, and arch +from the value of the -mcpu option that is passed to the assembler.

+
+
+
.amdgpu_hsa_kernel (name)
+

This directives specifies that the symbol with given name is a kernel entry +point (label) and the object should contain corresponding symbol of type +STT_AMDGPU_HSA_KERNEL.

+
+
+
.amd_kernel_code_t
+

This directive marks the beginning of a list of key / value pairs that are used +to specify the amd_kernel_code_t object that will be emitted by the assembler. +The list must be terminated by the .end_amd_kernel_code_t directive. For any +amd_kernel_code_t values that are unspecified a default value will be used. The +default value for all keys is 0, with the following exceptions:

+
    +
  • amd_code_version_major defaults to 1.

  • +
  • amd_kernel_code_version_minor defaults to 2.

  • +
  • amd_machine_kind defaults to 1.

  • +
  • amd_machine_version_major, machine_version_minor, and +amd_machine_version_stepping are derived from the value of the -mcpu option +that is passed to the assembler.

  • +
  • kernel_code_entry_byte_offset defaults to 256.

  • +
  • wavefront_size defaults 6 for all targets before GFX10. For GFX10 onwards +defaults to 6 if target feature wavefrontsize64 is enabled, otherwise 5. +Note that wavefront size is specified as a power of two, so a value of n +means a size of 2^ n.

  • +
  • call_convention defaults to -1.

  • +
  • kernarg_segment_alignment, group_segment_alignment, and +private_segment_alignment default to 4. Note that alignments are specified +as a power of 2, so a value of n means an alignment of 2^ n.

  • +
  • enable_tg_split defaults to 1 if target feature tgsplit is enabled for +GFX90A onwards.

  • +
  • enable_wgp_mode defaults to 1 if target feature cumode is disabled for +GFX10 onwards.

  • +
  • enable_mem_ordered defaults to 1 for GFX10 onwards.

  • +
+

The .amd_kernel_code_t directive must be placed immediately after the +function label and before any instructions.

+

For a full list of amd_kernel_code_t keys, refer to AMDGPU ABI document, +comments in lib/Target/AMDGPU/AmdKernelCodeT.h and test/CodeGen/AMDGPU/hsa.s.

+
+
+
+

Code Object V2 Example Source Code

+
+

Warning

+

Code Object V2 is not the default code object version emitted by +this version of LLVM.

+
+

Here is an example of a minimal assembly source file, defining one HSA kernel:

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
.hsa_code_object_version 1,0
+.hsa_code_object_isa
+
+.hsatext
+.globl  hello_world
+.p2align 8
+.amdgpu_hsa_kernel hello_world
+
+hello_world:
+
+   .amd_kernel_code_t
+      enable_sgpr_kernarg_segment_ptr = 1
+      is_ptr64 = 1
+      compute_pgm_rsrc1_vgprs = 0
+      compute_pgm_rsrc1_sgprs = 0
+      compute_pgm_rsrc2_user_sgpr = 2
+      compute_pgm_rsrc1_wgp_mode = 0
+      compute_pgm_rsrc1_mem_ordered = 0
+      compute_pgm_rsrc1_fwd_progress = 1
+  .end_amd_kernel_code_t
+
+  s_load_dwordx2 s[0:1], s[0:1] 0x0
+  v_mov_b32 v0, 3.14159
+  s_waitcnt lgkmcnt(0)
+  v_mov_b32 v1, s0
+  v_mov_b32 v2, s1
+  flat_store_dword v[1:2], v0
+  s_endpgm
+.Lfunc_end0:
+     .size   hello_world, .Lfunc_end0-hello_world
+
+
+
+
+

Code Object V3 to V4 Predefined Symbols

+

The AMDGPU assembler defines and updates some symbols automatically. These +symbols do not affect code generation.

+
+
.amdgcn.gfx_generation_number
+

Set to the GFX major generation number of the target being assembled for. For +example, when assembling for a “GFX9” target this will be set to the integer +value “9”. The possible GFX major generation numbers are presented in +Processors.

+
+
+
.amdgcn.gfx_generation_minor
+

Set to the GFX minor generation number of the target being assembled for. For +example, when assembling for a “GFX810” target this will be set to the integer +value “1”. The possible GFX minor generation numbers are presented in +Processors.

+
+
+
.amdgcn.gfx_generation_stepping
+

Set to the GFX stepping generation number of the target being assembled for. +For example, when assembling for a “GFX704” target this will be set to the +integer value “4”. The possible GFX stepping generation numbers are presented +in Processors.

+
+
+
.amdgcn.next_free_vgpr
+

Set to zero before assembly begins. At each instruction, if the current value +of this symbol is less than or equal to the maximum VGPR number explicitly +referenced within that instruction then the symbol value is updated to equal +that VGPR number plus one.

+

May be used to set the .amdhsa_next_free_vgpr directive in +AMDHSA Kernel Assembler Directives.

+

May be set at any time, e.g. manually set to zero at the start of each kernel.

+
+
+
.amdgcn.next_free_sgpr
+

Set to zero before assembly begins. At each instruction, if the current value +of this symbol is less than or equal the maximum SGPR number explicitly +referenced within that instruction then the symbol value is updated to equal +that SGPR number plus one.

+

May be used to set the .amdhsa_next_free_spgr directive in +AMDHSA Kernel Assembler Directives.

+

May be set at any time, e.g. manually set to zero at the start of each kernel.

+
+
+
+

Code Object V3 to V4 Directives

+

Directives which begin with .amdgcn are valid for all amdgcn +architecture processors, and are not OS-specific. Directives which begin with +.amdhsa are specific to amdgcn architecture processors when the +amdhsa OS is specified. See Target Triples and +Processors.

+
+
.amdgcn_target <target-triple> “-” <target-id>
+

Optional directive which declares the <target-triple>-<target-id> supported +by the containing assembler source file. Used by the assembler to validate +command-line options such as -triple, -mcpu, and +--offload-arch=<target-id>. A non-canonical target ID is allowed. See +Target Triples and Target ID.

+
+

Note

+

The target ID syntax used for code object V2 to V3 for this directive differs +from that used elsewhere. See Code Object V2 to V3 Target ID.

+
+
+
+
.amdhsa_kernel <name>
+

Creates a correctly aligned AMDHSA kernel descriptor and a symbol, +<name>.kd, in the current location of the current section. Only valid when +the OS is amdhsa. <name> must be a symbol that labels the first +instruction to execute, and does not need to be previously defined.

+

Marks the beginning of a list of directives used to generate the bytes of a +kernel descriptor, as described in Kernel Descriptor. +Directives which may appear in this list are described in +AMDHSA Kernel Assembler Directives. Directives may appear in any order, must +be valid for the target being assembled for, and cannot be repeated. Directives +support the range of values specified by the field they reference in +Kernel Descriptor. If a directive is not specified, it is +assumed to have its default value, unless it is marked as “Required”, in which +case it is an error to omit the directive. This list of directives is +terminated by an .end_amdhsa_kernel directive.

+
+
+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AMDHSA Kernel Assembler Directives

Directive

Default

Supported On

Description

.amdhsa_group_segment_fixed_size

0

GFX6-GFX10

Controls GROUP_SEGMENT_FIXED_SIZE in +Code Object V3 Kernel Descriptor.

.amdhsa_private_segment_fixed_size

0

GFX6-GFX10

Controls PRIVATE_SEGMENT_FIXED_SIZE in +Code Object V3 Kernel Descriptor.

.amdhsa_kernarg_size

0

GFX6-GFX10

Controls KERNARG_SIZE in +Code Object V3 Kernel Descriptor.

.amdhsa_user_sgpr_private_segment_buffer

0

GFX6-GFX10

Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in +Code Object V3 Kernel Descriptor.

.amdhsa_user_sgpr_dispatch_ptr

0

GFX6-GFX10

Controls ENABLE_SGPR_DISPATCH_PTR in +Code Object V3 Kernel Descriptor.

.amdhsa_user_sgpr_queue_ptr

0

GFX6-GFX10

Controls ENABLE_SGPR_QUEUE_PTR in +Code Object V3 Kernel Descriptor.

.amdhsa_user_sgpr_kernarg_segment_ptr

0

GFX6-GFX10

Controls ENABLE_SGPR_KERNARG_SEGMENT_PTR in +Code Object V3 Kernel Descriptor.

.amdhsa_user_sgpr_dispatch_id

0

GFX6-GFX10

Controls ENABLE_SGPR_DISPATCH_ID in +Code Object V3 Kernel Descriptor.

.amdhsa_user_sgpr_flat_scratch_init

0

GFX6-GFX10

Controls ENABLE_SGPR_FLAT_SCRATCH_INIT in +Code Object V3 Kernel Descriptor.

.amdhsa_user_sgpr_private_segment_size

0

GFX6-GFX10

Controls ENABLE_SGPR_PRIVATE_SEGMENT_SIZE in +Code Object V3 Kernel Descriptor.

.amdhsa_wavefront_size32

Target +Feature +Specific +(wavefrontsize64)

GFX10

Controls ENABLE_WAVEFRONT_SIZE32 in +Code Object V3 Kernel Descriptor.

.amdhsa_system_sgpr_private_segment_wavefront_offset

0

GFX6-GFX10

Controls ENABLE_PRIVATE_SEGMENT in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_system_sgpr_workgroup_id_x

1

GFX6-GFX10

Controls ENABLE_SGPR_WORKGROUP_ID_X in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_system_sgpr_workgroup_id_y

0

GFX6-GFX10

Controls ENABLE_SGPR_WORKGROUP_ID_Y in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_system_sgpr_workgroup_id_z

0

GFX6-GFX10

Controls ENABLE_SGPR_WORKGROUP_ID_Z in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_system_sgpr_workgroup_info

0

GFX6-GFX10

Controls ENABLE_SGPR_WORKGROUP_INFO in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_system_vgpr_workitem_id

0

GFX6-GFX10

Controls ENABLE_VGPR_WORKITEM_ID in +compute_pgm_rsrc2 for GFX6-GFX10. +Possible values are defined in +System VGPR Work-Item ID Enumeration Values.

.amdhsa_next_free_vgpr

Required

GFX6-GFX10

Maximum VGPR number explicitly referenced, plus one. +Used to calculate GRANULATED_WORKITEM_VGPR_COUNT in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_next_free_sgpr

Required

GFX6-GFX10

Maximum SGPR number explicitly referenced, plus one. +Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_accum_offset

Required

GFX90A

Offset of a first AccVGPR in the unified register file. +Used to calculate ACCUM_OFFSET in +compute_pgm_rsrc3 for GFX90A.

.amdhsa_reserve_vcc

1

GFX6-GFX10

Whether the kernel may use the special VCC SGPR. +Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_reserve_flat_scratch

1

GFX7-GFX10

Whether the kernel may use flat instructions to access +scratch memory. Used to calculate +GRANULATED_WAVEFRONT_SGPR_COUNT in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_reserve_xnack_mask

Target +Feature +Specific +(xnack)

GFX8-GFX10

Whether the kernel may trigger XNACK replay. +Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_float_round_mode_32

0

GFX6-GFX10

Controls FLOAT_ROUND_MODE_32 in +compute_pgm_rsrc1 for GFX6-GFX10. +Possible values are defined in +Floating Point Rounding Mode Enumeration Values.

.amdhsa_float_round_mode_16_64

0

GFX6-GFX10

Controls FLOAT_ROUND_MODE_16_64 in +compute_pgm_rsrc1 for GFX6-GFX10. +Possible values are defined in +Floating Point Rounding Mode Enumeration Values.

.amdhsa_float_denorm_mode_32

0

GFX6-GFX10

Controls FLOAT_DENORM_MODE_32 in +compute_pgm_rsrc1 for GFX6-GFX10. +Possible values are defined in +Floating Point Denorm Mode Enumeration Values.

.amdhsa_float_denorm_mode_16_64

3

GFX6-GFX10

Controls FLOAT_DENORM_MODE_16_64 in +compute_pgm_rsrc1 for GFX6-GFX10. +Possible values are defined in +Floating Point Denorm Mode Enumeration Values.

.amdhsa_dx10_clamp

1

GFX6-GFX10

Controls ENABLE_DX10_CLAMP in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_ieee_mode

1

GFX6-GFX10

Controls ENABLE_IEEE_MODE in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_fp16_overflow

0

GFX9-GFX10

Controls FP16_OVFL in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_tg_split

Target +Feature +Specific +(tgsplit)

GFX90A

Controls TG_SPLIT in +compute_pgm_rsrc3 for GFX90A.

.amdhsa_workgroup_processor_mode

Target +Feature +Specific +(cumode)

GFX10

Controls ENABLE_WGP_MODE in +Code Object V3 Kernel Descriptor.

.amdhsa_memory_ordered

1

GFX10

Controls MEM_ORDERED in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_forward_progress

0

GFX10

Controls FWD_PROGRESS in +compute_pgm_rsrc1 for GFX6-GFX10.

.amdhsa_exception_fp_ieee_invalid_op

0

GFX6-GFX10

Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_exception_fp_denorm_src

0

GFX6-GFX10

Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_exception_fp_ieee_div_zero

0

GFX6-GFX10

Controls ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_exception_fp_ieee_overflow

0

GFX6-GFX10

Controls ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_exception_fp_ieee_underflow

0

GFX6-GFX10

Controls ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_exception_fp_ieee_inexact

0

GFX6-GFX10

Controls ENABLE_EXCEPTION_IEEE_754_FP_INEXACT in +compute_pgm_rsrc2 for GFX6-GFX10.

.amdhsa_exception_int_div_zero

0

GFX6-GFX10

Controls ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO in +compute_pgm_rsrc2 for GFX6-GFX10.

+
+
+
+
.amdgpu_metadata
+

Optional directive which declares the contents of the NT_AMDGPU_METADATA +note record (see AMDGPU Code Object V3 to V4 ELF Note Records).

+

The contents must be in the [YAML] markup format, with the same structure and +semantics described in Code Object V3 Metadata or +Code Object V4 Metadata.

+

This directive is terminated by an .end_amdgpu_metadata directive.

+
+
+
+

Code Object V3 to V4 Example Source Code

+

Here is an example of a minimal assembly source file, defining one HSA kernel:

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
.amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" // optional
+
+.text
+.globl hello_world
+.p2align 8
+.type hello_world,@function
+hello_world:
+  s_load_dwordx2 s[0:1], s[0:1] 0x0
+  v_mov_b32 v0, 3.14159
+  s_waitcnt lgkmcnt(0)
+  v_mov_b32 v1, s0
+  v_mov_b32 v2, s1
+  flat_store_dword v[1:2], v0
+  s_endpgm
+.Lfunc_end0:
+  .size   hello_world, .Lfunc_end0-hello_world
+
+.rodata
+.p2align 6
+.amdhsa_kernel hello_world
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr
+  .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr
+.end_amdhsa_kernel
+
+.amdgpu_metadata
+---
+amdhsa.version:
+  - 1
+  - 0
+amdhsa.kernels:
+  - .name: hello_world
+    .symbol: hello_world.kd
+    .kernarg_segment_size: 48
+    .group_segment_fixed_size: 0
+    .private_segment_fixed_size: 0
+    .kernarg_segment_align: 4
+    .wavefront_size: 64
+    .sgpr_count: 2
+    .vgpr_count: 3
+    .max_flat_workgroup_size: 256
+    .args:
+      - .size: 8
+        .offset: 0
+        .value_kind: global_buffer
+        .address_space: global
+        .actual_access: write_only
+//...
+.end_amdgpu_metadata
+
+
+

This kernel is equivalent to the following HIP program:

+
1
+2
+3
__global__ void hello_world(float *p) {
+    *p = 3.14159f;
+}
+
+
+

If an assembly source file contains multiple kernels and/or functions, the +.amdgcn.next_free_vgpr and +.amdgcn.next_free_sgpr symbols may be reset using +the .set <symbol>, <expression> directive. For example, in the case of two +kernels, where function1 is only called from kernel1 it is sufficient +to group the function with the kernel that calls it and reset the symbols +between the two connected components:

+
 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
.amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" // optional
+
+// gpr tracking symbols are implicitly set to zero
+
+.text
+.globl kern0
+.p2align 8
+.type kern0,@function
+kern0:
+  // ...
+  s_endpgm
+.Lkern0_end:
+  .size   kern0, .Lkern0_end-kern0
+
+.rodata
+.p2align 6
+.amdhsa_kernel kern0
+  // ...
+  .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr
+  .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr
+.end_amdhsa_kernel
+
+// reset symbols to begin tracking usage in func1 and kern1
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.text
+.hidden func1
+.global func1
+.p2align 2
+.type func1,@function
+func1:
+  // ...
+  s_setpc_b64 s[30:31]
+.Lfunc1_end:
+.size func1, .Lfunc1_end-func1
+
+.globl kern1
+.p2align 8
+.type kern1,@function
+kern1:
+  // ...
+  s_getpc_b64 s[4:5]
+  s_add_u32 s4, s4, func1@rel32@lo+4
+  s_addc_u32 s5, s5, func1@rel32@lo+4
+  s_swappc_b64 s[30:31], s[4:5]
+  // ...
+  s_endpgm
+.Lkern1_end:
+  .size   kern1, .Lkern1_end-kern1
+
+.rodata
+.p2align 6
+.amdhsa_kernel kern1
+  // ...
+  .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr
+  .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr
+.end_amdhsa_kernel
+
+
+

These symbols cannot identify connected components in order to automatically +track the usage for each kernel. However, in some cases careful organization of +the kernels and functions in the source file means there is minimal additional +effort required to accurately calculate GPR usage.

+
+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Atomics.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Atomics.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Atomics.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Atomics.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,716 @@ + + + + + + + + + LLVM Atomic Instructions and Concurrency Guide — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Atomic Instructions and Concurrency Guide

+ +
+

Introduction

+

LLVM supports instructions which are well-defined in the presence of threads and +asynchronous signals.

+

The atomic instructions are designed specifically to provide readable IR and +optimized code generation for the following:

+ +

Atomic and volatile in the IR are orthogonal; “volatile” is the C/C++ volatile, +which ensures that every volatile load and store happens and is performed in the +stated order. A couple examples: if a SequentiallyConsistent store is +immediately followed by another SequentiallyConsistent store to the same +address, the first store can be erased. This transformation is not allowed for a +pair of volatile stores. On the other hand, a non-volatile non-atomic load can +be moved across a volatile load freely, but not an Acquire load.

+

This document is intended to provide a guide to anyone either writing a frontend +for LLVM or working on optimization passes for LLVM with a guide for how to deal +with instructions with special semantics in the presence of concurrency. This +is not intended to be a precise guide to the semantics; the details can get +extremely complicated and unreadable, and are not usually necessary.

+
+
+

Optimization outside atomic

+

The basic 'load' and 'store' allow a variety of optimizations, but can +lead to undefined results in a concurrent environment; see NotAtomic. This +section specifically goes into the one optimizer restriction which applies in +concurrent environments, which gets a bit more of an extended description +because any optimization dealing with stores needs to be aware of it.

+

From the optimizer’s point of view, the rule is that if there are not any +instructions with atomic ordering involved, concurrency does not matter, with +one exception: if a variable might be visible to another thread or signal +handler, a store cannot be inserted along a path where it might not execute +otherwise. Take the following example:

+
/* C code, for readability; run through clang -O2 -S -emit-llvm to get
+    equivalent IR */
+ int x;
+ void f(int* a) {
+   for (int i = 0; i < 100; i++) {
+     if (a[i])
+       x += 1;
+   }
+ }
+
+
+

The following is equivalent in non-concurrent situations:

+
int x;
+void f(int* a) {
+  int xtemp = x;
+  for (int i = 0; i < 100; i++) {
+    if (a[i])
+      xtemp += 1;
+  }
+  x = xtemp;
+}
+
+
+

However, LLVM is not allowed to transform the former to the latter: it could +indirectly introduce undefined behavior if another thread can access x at +the same time. That thread would read undef instead of the value it was +expecting, which can lead to undefined behavior down the line. (This example is +particularly of interest because before the concurrency model was implemented, +LLVM would perform this transformation.)

+

Note that speculative loads are allowed; a load which is part of a race returns +undef, but does not have undefined behavior.

+
+
+

Atomic instructions

+

For cases where simple loads and stores are not sufficient, LLVM provides +various atomic instructions. The exact guarantees provided depend on the +ordering; see Atomic orderings.

+

load atomic and store atomic provide the same basic functionality as +non-atomic loads and stores, but provide additional guarantees in situations +where threads and signals are involved.

+

cmpxchg and atomicrmw are essentially like an atomic load followed by an +atomic store (where the store is conditional for cmpxchg), but no other +memory operation can happen on any thread between the load and store.

+

A fence provides Acquire and/or Release ordering which is not part of +another operation; it is normally used along with Monotonic memory operations. +A Monotonic load followed by an Acquire fence is roughly equivalent to an +Acquire load, and a Monotonic store following a Release fence is roughly +equivalent to a Release store. SequentiallyConsistent fences behave as both +an Acquire and a Release fence, and offer some additional complicated +guarantees, see the C++11 standard for details.

+

Frontends generating atomic instructions generally need to be aware of the +target to some degree; atomic instructions are guaranteed to be lock-free, and +therefore an instruction which is wider than the target natively supports can be +impossible to generate.

+
+
+

Atomic orderings

+

In order to achieve a balance between performance and necessary guarantees, +there are six levels of atomicity. They are listed in order of strength; each +level includes all the guarantees of the previous level except for +Acquire/Release. (See also LangRef Ordering.)

+
+

NotAtomic

+

NotAtomic is the obvious, a load or store which is not atomic. (This isn’t +really a level of atomicity, but is listed here for comparison.) This is +essentially a regular load or store. If there is a race on a given memory +location, loads from that location return undef.

+
+
Relevant standard

This is intended to match shared variables in C/C++, and to be used in any +other context where memory access is necessary, and a race is impossible. (The +precise definition is in LangRef Memory Model.)

+
+
Notes for frontends

The rule is essentially that all memory accessed with basic loads and stores +by multiple threads should be protected by a lock or other synchronization; +otherwise, you are likely to run into undefined behavior. If your frontend is +for a “safe” language like Java, use Unordered to load and store any shared +variable. Note that NotAtomic volatile loads and stores are not properly +atomic; do not try to use them as a substitute. (Per the C/C++ standards, +volatile does provide some limited guarantees around asynchronous signals, but +atomics are generally a better solution.)

+
+
Notes for optimizers

Introducing loads to shared variables along a codepath where they would not +otherwise exist is allowed; introducing stores to shared variables is not. See +Optimization outside atomic.

+
+
Notes for code generation

The one interesting restriction here is that it is not allowed to write to +bytes outside of the bytes relevant to a store. This is mostly relevant to +unaligned stores: it is not allowed in general to convert an unaligned store +into two aligned stores of the same width as the unaligned store. Backends are +also expected to generate an i8 store as an i8 store, and not an instruction +which writes to surrounding bytes. (If you are writing a backend for an +architecture which cannot satisfy these restrictions and cares about +concurrency, please send an email to llvm-dev.)

+
+
+
+
+

Unordered

+

Unordered is the lowest level of atomicity. It essentially guarantees that races +produce somewhat sane results instead of having undefined behavior. It also +guarantees the operation to be lock-free, so it does not depend on the data +being part of a special atomic structure or depend on a separate per-process +global lock. Note that code generation will fail for unsupported atomic +operations; if you need such an operation, use explicit locking.

+
+
Relevant standard

This is intended to match the Java memory model for shared variables.

+
+
Notes for frontends

This cannot be used for synchronization, but is useful for Java and other +“safe” languages which need to guarantee that the generated code never +exhibits undefined behavior. Note that this guarantee is cheap on common +platforms for loads of a native width, but can be expensive or unavailable for +wider loads, like a 64-bit store on ARM. (A frontend for Java or other “safe” +languages would normally split a 64-bit store on ARM into two 32-bit unordered +stores.)

+
+
Notes for optimizers

In terms of the optimizer, this prohibits any transformation that transforms a +single load into multiple loads, transforms a store into multiple stores, +narrows a store, or stores a value which would not be stored otherwise. Some +examples of unsafe optimizations are narrowing an assignment into a bitfield, +rematerializing a load, and turning loads and stores into a memcpy +call. Reordering unordered operations is safe, though, and optimizers should +take advantage of that because unordered operations are common in languages +that need them.

+
+
Notes for code generation

These operations are required to be atomic in the sense that if you use +unordered loads and unordered stores, a load cannot see a value which was +never stored. A normal load or store instruction is usually sufficient, but +note that an unordered load or store cannot be split into multiple +instructions (or an instruction which does multiple memory operations, like +LDRD on ARM without LPAE, or not naturally-aligned LDRD on LPAE ARM).

+
+
+
+
+

Monotonic

+

Monotonic is the weakest level of atomicity that can be used in synchronization +primitives, although it does not provide any general synchronization. It +essentially guarantees that if you take all the operations affecting a specific +address, a consistent ordering exists.

+
+
Relevant standard

This corresponds to the C++11/C11 memory_order_relaxed; see those +standards for the exact definition.

+
+
Notes for frontends

If you are writing a frontend which uses this directly, use with caution. The +guarantees in terms of synchronization are very weak, so make sure these are +only used in a pattern which you know is correct. Generally, these would +either be used for atomic operations which do not protect other memory (like +an atomic counter), or along with a fence.

+
+
Notes for optimizers

In terms of the optimizer, this can be treated as a read+write on the relevant +memory location (and alias analysis will take advantage of that). In addition, +it is legal to reorder non-atomic and Unordered loads around Monotonic +loads. CSE/DSE and a few other optimizations are allowed, but Monotonic +operations are unlikely to be used in ways which would make those +optimizations useful.

+
+
Notes for code generation

Code generation is essentially the same as that for unordered for loads and +stores. No fences are required. cmpxchg and atomicrmw are required +to appear as a single operation.

+
+
+
+
+

Acquire

+

Acquire provides a barrier of the sort necessary to acquire a lock to access +other memory with normal loads and stores.

+
+
Relevant standard

This corresponds to the C++11/C11 memory_order_acquire. It should also be +used for C++11/C11 memory_order_consume.

+
+
Notes for frontends

If you are writing a frontend which uses this directly, use with caution. +Acquire only provides a semantic guarantee when paired with a Release +operation.

+
+
Notes for optimizers

Optimizers not aware of atomics can treat this like a nothrow call. It is +also possible to move stores from before an Acquire load or read-modify-write +operation to after it, and move non-Acquire loads from before an Acquire +operation to after it.

+
+
Notes for code generation

Architectures with weak memory ordering (essentially everything relevant today +except x86 and SPARC) require some sort of fence to maintain the Acquire +semantics. The precise fences required varies widely by architecture, but for +a simple implementation, most architectures provide a barrier which is strong +enough for everything (dmb on ARM, sync on PowerPC, etc.). Putting +such a fence after the equivalent Monotonic operation is sufficient to +maintain Acquire semantics for a memory operation.

+
+
+
+
+

Release

+

Release is similar to Acquire, but with a barrier of the sort necessary to +release a lock.

+
+
Relevant standard

This corresponds to the C++11/C11 memory_order_release.

+
+
Notes for frontends

If you are writing a frontend which uses this directly, use with caution. +Release only provides a semantic guarantee when paired with a Acquire +operation.

+
+
Notes for optimizers

Optimizers not aware of atomics can treat this like a nothrow call. It is +also possible to move loads from after a Release store or read-modify-write +operation to before it, and move non-Release stores from after a Release +operation to before it.

+
+
Notes for code generation

See the section on Acquire; a fence before the relevant operation is usually +sufficient for Release. Note that a store-store fence is not sufficient to +implement Release semantics; store-store fences are generally not exposed to +IR because they are extremely difficult to use correctly.

+
+
+
+
+

AcquireRelease

+

AcquireRelease (acq_rel in IR) provides both an Acquire and a Release +barrier (for fences and operations which both read and write memory).

+
+
Relevant standard

This corresponds to the C++11/C11 memory_order_acq_rel.

+
+
Notes for frontends

If you are writing a frontend which uses this directly, use with caution. +Acquire only provides a semantic guarantee when paired with a Release +operation, and vice versa.

+
+
Notes for optimizers

In general, optimizers should treat this like a nothrow call; the possible +optimizations are usually not interesting.

+
+
Notes for code generation

This operation has Acquire and Release semantics; see the sections on Acquire +and Release.

+
+
+
+
+

SequentiallyConsistent

+

SequentiallyConsistent (seq_cst in IR) provides Acquire semantics for loads +and Release semantics for stores. Additionally, it guarantees that a total +ordering exists between all SequentiallyConsistent operations.

+
+
Relevant standard

This corresponds to the C++11/C11 memory_order_seq_cst, Java volatile, and +the gcc-compatible __sync_* builtins which do not specify otherwise.

+
+
Notes for frontends

If a frontend is exposing atomic operations, these are much easier to reason +about for the programmer than other kinds of operations, and using them is +generally a practical performance tradeoff.

+
+
Notes for optimizers

Optimizers not aware of atomics can treat this like a nothrow call. For +SequentiallyConsistent loads and stores, the same reorderings are allowed as +for Acquire loads and Release stores, except that SequentiallyConsistent +operations may not be reordered.

+
+
Notes for code generation

SequentiallyConsistent loads minimally require the same barriers as Acquire +operations and SequentiallyConsistent stores require Release +barriers. Additionally, the code generator must enforce ordering between +SequentiallyConsistent stores followed by SequentiallyConsistent loads. This +is usually done by emitting either a full fence before the loads or a full +fence after the stores; which is preferred varies by architecture.

+
+
+
+
+
+

Atomics and IR optimization

+

Predicates for optimizer writers to query:

+
    +
  • isSimple(): A load or store which is not volatile or atomic. This is +what, for example, memcpyopt would check for operations it might transform.

  • +
  • isUnordered(): A load or store which is not volatile and at most +Unordered. This would be checked, for example, by LICM before hoisting an +operation.

  • +
  • mayReadFromMemory()/mayWriteToMemory(): Existing predicate, but note +that they return true for any operation which is volatile or at least +Monotonic.

  • +
  • isStrongerThan / isAtLeastOrStrongerThan: These are predicates on +orderings. They can be useful for passes that are aware of atomics, for +example to do DSE across a single atomic access, but not across a +release-acquire pair (see MemoryDependencyAnalysis for an example of this)

  • +
  • Alias analysis: Note that AA will return ModRef for anything Acquire or +Release, and for the address accessed by any Monotonic operation.

  • +
+

To support optimizing around atomic operations, make sure you are using the +right predicates; everything should work if that is done. If your pass should +optimize some atomic operations (Unordered operations in particular), make sure +it doesn’t replace an atomic load or store with a non-atomic operation.

+

Some examples of how optimizations interact with various kinds of atomic +operations:

+
    +
  • memcpyopt: An atomic operation cannot be optimized into part of a +memcpy/memset, including unordered loads/stores. It can pull operations +across some atomic operations.

  • +
  • LICM: Unordered loads/stores can be moved out of a loop. It just treats +monotonic operations like a read+write to a memory location, and anything +stricter than that like a nothrow call.

  • +
  • DSE: Unordered stores can be DSE’ed like normal stores. Monotonic stores can +be DSE’ed in some cases, but it’s tricky to reason about, and not especially +important. It is possible in some case for DSE to operate across a stronger +atomic operation, but it is fairly tricky. DSE delegates this reasoning to +MemoryDependencyAnalysis (which is also used by other passes like GVN).

  • +
  • Folding a load: Any atomic load from a constant global can be constant-folded, +because it cannot be observed. Similar reasoning allows sroa with +atomic loads and stores.

  • +
+
+
+

Atomics and Codegen

+

Atomic operations are represented in the SelectionDAG with ATOMIC_* opcodes. +On architectures which use barrier instructions for all atomic ordering (like +ARM), appropriate fences can be emitted by the AtomicExpand Codegen pass if +setInsertFencesForAtomic() was used.

+

The MachineMemOperand for all atomic operations is currently marked as volatile; +this is not correct in the IR sense of volatile, but CodeGen handles anything +marked volatile very conservatively. This should get fixed at some point.

+

One very important property of the atomic operations is that if your backend +supports any inline lock-free atomic operations of a given size, you should +support ALL operations of that size in a lock-free manner.

+

When the target implements atomic cmpxchg or LL/SC instructions (as most do) +this is trivial: all the other operations can be implemented on top of those +primitives. However, on many older CPUs (e.g. ARMv5, SparcV8, Intel 80386) there +are atomic load and store instructions, but no cmpxchg or LL/SC. As it is +invalid to implement atomic load using the native instruction, but +cmpxchg using a library call to a function that uses a mutex, atomic +load must also expand to a library call on such architectures, so that it +can remain atomic with regards to a simultaneous cmpxchg, by using the same +mutex.

+

AtomicExpandPass can help with that: it will expand all atomic operations to the +proper __atomic_* libcalls for any size above the maximum set by +setMaxAtomicSizeInBitsSupported (which defaults to 0).

+

On x86, all atomic loads generate a MOV. SequentiallyConsistent stores +generate an XCHG, other stores generate a MOV. SequentiallyConsistent +fences generate an MFENCE, other fences do not cause any code to be +generated. cmpxchg uses the LOCK CMPXCHG instruction. atomicrmw xchg +uses XCHG, atomicrmw add and atomicrmw sub use XADD, and all +other atomicrmw operations generate a loop with LOCK CMPXCHG. Depending +on the users of the result, some atomicrmw operations can be translated into +operations like LOCK AND, but that does not work in general.

+

On ARM (before v8), MIPS, and many other RISC architectures, Acquire, Release, +and SequentiallyConsistent semantics require barrier instructions for every such +operation. Loads and stores generate normal instructions. cmpxchg and +atomicrmw can be represented using a loop with LL/SC-style instructions +which take some sort of exclusive lock on a cache line (LDREX and STREX +on ARM, etc.).

+

It is often easiest for backends to use AtomicExpandPass to lower some of the +atomic constructs. Here are some lowerings it can do:

+
    +
  • cmpxchg -> loop with load-linked/store-conditional +by overriding shouldExpandAtomicCmpXchgInIR(), emitLoadLinked(), +emitStoreConditional()

  • +
  • large loads/stores -> ll-sc/cmpxchg +by overriding shouldExpandAtomicStoreInIR()/shouldExpandAtomicLoadInIR()

  • +
  • strong atomic accesses -> monotonic accesses + fences by overriding +shouldInsertFencesForAtomic(), emitLeadingFence(), and +emitTrailingFence()

  • +
  • atomic rmw -> loop with cmpxchg or load-linked/store-conditional +by overriding expandAtomicRMWInIR()

  • +
  • expansion to __atomic_* libcalls for unsupported sizes.

  • +
  • part-word atomicrmw/cmpxchg -> target-specific intrinsic by overriding +shouldExpandAtomicRMWInIR, emitMaskedAtomicRMWIntrinsic, +shouldExpandAtomicCmpXchgInIR, and emitMaskedAtomicCmpXchgIntrinsic.

  • +
+

For an example of these look at the ARM (first five lowerings) or RISC-V (last +lowering) backend.

+

AtomicExpandPass supports two strategies for lowering atomicrmw/cmpxchg to +load-linked/store-conditional (LL/SC) loops. The first expands the LL/SC loop +in IR, calling target lowering hooks to emit intrinsics for the LL and SC +operations. However, many architectures have strict requirements for LL/SC +loops to ensure forward progress, such as restrictions on the number and type +of instructions in the loop. It isn’t possible to enforce these restrictions +when the loop is expanded in LLVM IR, and so affected targets may prefer to +expand to LL/SC loops at a very late stage (i.e. after register allocation). +AtomicExpandPass can help support lowering of part-word atomicrmw or cmpxchg +using this strategy by producing IR for any shifting and masking that can be +performed outside of the LL/SC loop.

+
+
+

Libcalls: __atomic_*

+

There are two kinds of atomic library calls that are generated by LLVM. Please +note that both sets of library functions somewhat confusingly share the names of +builtin functions defined by clang. Despite this, the library functions are +not directly related to the builtins: it is not the case that __atomic_* +builtins lower to __atomic_* library calls and __sync_* builtins lower +to __sync_* library calls.

+

The first set of library functions are named __atomic_*. This set has been +“standardized” by GCC, and is described below. (See also GCC’s documentation)

+

LLVM’s AtomicExpandPass will translate atomic operations on data sizes above +MaxAtomicSizeInBitsSupported into calls to these functions.

+

There are four generic functions, which can be called with data of any size or +alignment:

+
void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
+void __atomic_store(size_t size, void *ptr, void *val, int ordering)
+void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int ordering)
+bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void *desired, int success_order, int failure_order)
+
+
+

There are also size-specialized versions of the above functions, which can only +be used with naturally-aligned pointers of the appropriate size. In the +signatures below, “N” is one of 1, 2, 4, 8, and 16, and “iN” is the appropriate +integer type of that size; if no such integer type exists, the specialization +cannot be used:

+
iN __atomic_load_N(iN *ptr, iN val, int ordering)
+void __atomic_store_N(iN *ptr, iN val, int ordering)
+iN __atomic_exchange_N(iN *ptr, iN val, int ordering)
+bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, int success_order, int failure_order)
+
+
+

Finally there are some read-modify-write functions, which are only available in +the size-specific variants (any other sizes use a __atomic_compare_exchange +loop):

+
iN __atomic_fetch_add_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_sub_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_and_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_or_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_xor_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_nand_N(iN *ptr, iN val, int ordering)
+
+
+

This set of library functions have some interesting implementation requirements +to take note of:

+
    +
  • They support all sizes and alignments – including those which cannot be +implemented natively on any existing hardware. Therefore, they will certainly +use mutexes in for some sizes/alignments.

  • +
  • As a consequence, they cannot be shipped in a statically linked +compiler-support library, as they have state which must be shared amongst all +DSOs loaded in the program. They must be provided in a shared library used by +all objects.

  • +
  • The set of atomic sizes supported lock-free must be a superset of the sizes +any compiler can emit. That is: if a new compiler introduces support for +inline-lock-free atomics of size N, the __atomic_* functions must also have a +lock-free implementation for size N. This is a requirement so that code +produced by an old compiler (which will have called the __atomic_* function) +interoperates with code produced by the new compiler (which will use native +the atomic instruction).

  • +
+

Note that it’s possible to write an entirely target-independent implementation +of these library functions by using the compiler atomic builtins themselves to +implement the operations on naturally-aligned pointers of supported sizes, and a +generic mutex implementation otherwise.

+
+
+

Libcalls: __sync_*

+

Some targets or OS/target combinations can support lock-free atomics, but for +various reasons, it is not practical to emit the instructions inline.

+

There’s two typical examples of this.

+

Some CPUs support multiple instruction sets which can be switched back and forth +on function-call boundaries. For example, MIPS supports the MIPS16 ISA, which +has a smaller instruction encoding than the usual MIPS32 ISA. ARM, similarly, +has the Thumb ISA. In MIPS16 and earlier versions of Thumb, the atomic +instructions are not encodable. However, those instructions are available via a +function call to a function with the longer encoding.

+

Additionally, a few OS/target pairs provide kernel-supported lock-free +atomics. ARM/Linux is an example of this: the kernel provides a +function which on older CPUs contains a “magically-restartable” atomic sequence +(which looks atomic so long as there’s only one CPU), and contains actual atomic +instructions on newer multicore models. This sort of functionality can typically +be provided on any architecture, if all CPUs which are missing atomic +compare-and-swap support are uniprocessor (no SMP). This is almost always the +case. The only common architecture without that property is SPARC – SPARCV8 SMP +systems were common, yet it doesn’t support any sort of compare-and-swap +operation.

+

In either of these cases, the Target in LLVM can claim support for atomics of an +appropriate size, and then implement some subset of the operations via libcalls +to a __sync_* function. Such functions must not use locks in their +implementation, because unlike the __atomic_* routines used by +AtomicExpandPass, these may be mixed-and-matched with native instructions by the +target lowering.

+

Further, these routines do not need to be shared, as they are stateless. So, +there is no issue with having multiple copies included in one binary. Thus, +typically these routines are implemented by the statically-linked compiler +runtime support library.

+

LLVM will emit a call to an appropriate __sync_* routine if the target +ISelLowering code has set the corresponding ATOMIC_CMPXCHG, ATOMIC_SWAP, +or ATOMIC_LOAD_* operation to “Expand”, and if it has opted-into the +availability of those library functions via a call to initSyncLibcalls().

+

The full set of functions that may be called by LLVM is (for N being 1, 2, +4, 8, or 16):

+
iN __sync_val_compare_and_swap_N(iN *ptr, iN expected, iN desired)
+iN __sync_lock_test_and_set_N(iN *ptr, iN val)
+iN __sync_fetch_and_add_N(iN *ptr, iN val)
+iN __sync_fetch_and_sub_N(iN *ptr, iN val)
+iN __sync_fetch_and_and_N(iN *ptr, iN val)
+iN __sync_fetch_and_or_N(iN *ptr, iN val)
+iN __sync_fetch_and_xor_N(iN *ptr, iN val)
+iN __sync_fetch_and_nand_N(iN *ptr, iN val)
+iN __sync_fetch_and_max_N(iN *ptr, iN val)
+iN __sync_fetch_and_umax_N(iN *ptr, iN val)
+iN __sync_fetch_and_min_N(iN *ptr, iN val)
+iN __sync_fetch_and_umin_N(iN *ptr, iN val)
+
+
+

This list doesn’t include any function for atomic load or store; all known +architectures support atomic loads and stores directly (possibly by emitting a +fence on either side of a normal load or store.)

+

There’s also, somewhat separately, the possibility to lower ATOMIC_FENCE to +__sync_synchronize(). This may happen or not happen independent of all the +above, controlled purely by setOperationAction(ISD::ATOMIC_FENCE, ...).

+

On AArch64, a variant of the __sync_* routines is used which contain the memory +order as part of the function name. These routines may determine at runtime +whether the single-instruction atomic operations which were introduced as part +of AArch64 Large System Extensions “LSE” instruction set are available, or if +it needs to fall back to an LL/SC loop. The following helper functions are +implemented in both compiler-rt and libgcc libraries +(N is one of 1, 2, 4, 8, and M is one of 1, 2, 4, 8 and 16, and +ORDER is one of ‘relax’, ‘acq’, ‘rel’, ‘acq_rel’):

+
iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
+iN __aarch64_swpN_ORDER(iN val, iN *ptr)
+iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
+iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
+iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
+iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
+
+
+

Please note, if LSE instruction set is specified for AArch64 target then +out-of-line atomics calls are not generated and single-instruction atomic +operations are used in place.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Benchmarking.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Benchmarking.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Benchmarking.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Benchmarking.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,226 @@ + + + + + + + + + Benchmarking tips — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Benchmarking tips

+
+

Introduction

+

For benchmarking a patch we want to reduce all possible sources of +noise as much as possible. How to do that is very OS dependent.

+

Note that low noise is required, but not sufficient. It does not +exclude measurement bias. See +https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf for +example.

+
+
+

General

+
    +
  • Use a high resolution timer, e.g. perf under linux.

  • +
  • Run the benchmark multiple times to be able to recognize noise.

  • +
  • Disable as many processes or services as possible on the target system.

  • +
  • Disable frequency scaling, turbo boost and address space +randomization (see OS specific section).

  • +
  • Static link if the OS supports it. That avoids any variation that +might be introduced by loading dynamic libraries. This can be done +by passing -DLLVM_BUILD_STATIC=ON to cmake.

  • +
  • Try to avoid storage. On some systems you can use tmpfs. Putting the +program, inputs and outputs on tmpfs avoids touching a real storage +system, which can have a pretty big variability.

    +

    To mount it (on linux and freebsd at least):

    +
    mount -t tmpfs -o size=<XX>g none dir_to_mount
    +
    +
    +
  • +
+
+
+

Linux

+
    +
  • Disable address space randomization:

    +
    echo 0 > /proc/sys/kernel/randomize_va_space
    +
    +
    +
  • +
  • Set scaling_governor to performance:

    +
    for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
    +do
    +  echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
    +done
    +
    +
    +
  • +
  • Use https://github.com/lpechacek/cpuset to reserve cpus for just the +program you are benchmarking. If using perf, leave at least 2 cores +so that perf runs in one and your program in another:

    +
    cset shield -c N1,N2 -k on
    +
    +
    +

    This will move all threads out of N1 and N2. The -k on means +that even kernel threads are moved out.

    +
  • +
  • Disable the SMT pair of the cpus you will use for the benchmark. The +pair of cpu N can be found in +/sys/devices/system/cpu/cpuN/topology/thread_siblings_list and +disabled with:

    +
    echo 0 > /sys/devices/system/cpu/cpuX/online
    +
    +
    +
  • +
  • Run the program with:

    +
    cset shield --exec -- perf stat -r 10 <cmd>
    +
    +
    +

    This will run the command after -- in the isolated cpus. The +particular perf command runs the <cmd> 10 times and reports +statistics.

    +
  • +
+

With these in place you can expect perf variations of less than 0.1%.

+
+

Linux Intel

+
    +
  • Disable turbo mode:

    +
    echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
    +
    +
    +
  • +
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BigEndianNEON.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BigEndianNEON.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BigEndianNEON.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BigEndianNEON.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,349 @@ + + + + + + + + + Using ARM NEON instructions in big endian mode — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Using ARM NEON instructions in big endian mode

+ +
+

Introduction

+

Generating code for big endian ARM processors is for the most part straightforward. NEON loads and stores however have some interesting properties that make code generation decisions less obvious in big endian mode.

+

The aim of this document is to explain the problem with NEON loads and stores, and the solution that has been implemented in LLVM.

+

In this document the term “vector” refers to what the ARM ABI calls a “short vector”, which is a sequence of items that can fit in a NEON register. This sequence can be 64 or 128 bits in length, and can constitute 8, 16, 32 or 64 bit items. This document refers to A64 instructions throughout, but is almost applicable to the A32/ARMv7 instruction sets also. The ABI format for passing vectors in A32 is slightly different to A64. Apart from that, the same concepts apply.

+
+

Example: C-level intrinsics -> assembly

+

It may be helpful first to illustrate how C-level ARM NEON intrinsics are lowered to instructions.

+

This trivial C function takes a vector of four ints and sets the zero’th lane to the value “42”:

+
#include <arm_neon.h>
+int32x4_t f(int32x4_t p) {
+    return vsetq_lane_s32(42, p, 0);
+}
+
+
+

arm_neon.h intrinsics generate “generic” IR where possible (that is, normal IR instructions not llvm.arm.neon.* intrinsic calls). The above generates:

+
define <4 x i32> @f(<4 x i32> %p) {
+  %vset_lane = insertelement <4 x i32> %p, i32 42, i32 0
+  ret <4 x i32> %vset_lane
+}
+
+
+

Which then becomes the following trivial assembly:

+
f:                                      // @f
+        movz        w8, #0x2a
+        ins         v0.s[0], w8
+        ret
+
+
+
+
+
+

Problem

+

The main problem is how vectors are represented in memory and in registers.

+

First, a recap. The “endianness” of an item affects its representation in memory only. In a register, a number is just a sequence of bits - 64 bits in the case of AArch64 general purpose registers. Memory, however, is a sequence of addressable units of 8 bits in size. Any number greater than 8 bits must therefore be split up into 8-bit chunks, and endianness describes the order in which these chunks are laid out in memory.

+

A “little endian” layout has the least significant byte first (lowest in memory address). A “big endian” layout has the most significant byte first. This means that when loading an item from big endian memory, the lowest 8-bits in memory must go in the most significant 8-bits, and so forth.

+
+
+

LDR and LD1

+
+_images/ARM-BE-ldr.png +

Big endian vector load using LDR.

+
+

A vector is a consecutive sequence of items that are operated on simultaneously. To load a 64-bit vector, 64 bits need to be read from memory. In little endian mode, we can do this by just performing a 64-bit load - LDR q0, [foo]. However if we try this in big endian mode, because of the byte swapping the lane indices end up being swapped! The zero’th item as laid out in memory becomes the n’th lane in the vector.

+
+_images/ARM-BE-ld1.png +

Big endian vector load using LD1. Note that the lanes retain the correct ordering.

+
+

Because of this, the instruction LD1 performs a vector load but performs byte swapping not on the entire 64 bits, but on the individual items within the vector. This means that the register content is the same as it would have been on a little endian system.

+

It may seem that LD1 should suffice to peform vector loads on a big endian machine. However there are pros and cons to the two approaches that make it less than simple which register format to pick.

+

There are two options:

+
+
    +
  1. The content of a vector register is the same as if it had been loaded with an LDR instruction.

  2. +
  3. The content of a vector register is the same as if it had been loaded with an LD1 instruction.

  4. +
+
+

Because LD1 == LDR + REV and similarly LDR == LD1 + REV (on a big endian system), we can simulate either type of load with the other type of load plus a REV instruction. So we’re not deciding which instructions to use, but which format to use (which will then influence which instruction is best to use).

+
+

Note that throughout this section we only mention loads. Stores have exactly the same problems as their associated loads, so have been skipped for brevity.

+
+
+
+

Considerations

+
+

LLVM IR Lane ordering

+

LLVM IR has first class vector types. In LLVM IR, the zero’th element of a vector resides at the lowest memory address. The optimizer relies on this property in certain areas, for example when concatenating vectors together. The intention is for arrays and vectors to have identical memory layouts - [4 x i8] and <4 x i8> should be represented the same in memory. Without this property there would be many special cases that the optimizer would have to cleverly handle.

+

Use of LDR would break this lane ordering property. This doesn’t preclude the use of LDR, but we would have to do one of two things:

+
+
    +
  1. Insert a REV instruction to reverse the lane order after every LDR.

  2. +
  3. Disable all optimizations that rely on lane layout, and for every access to an individual lane (insertelement/extractelement/shufflevector) reverse the lane index.

  4. +
+
+
+
+

AAPCS

+

The ARM procedure call standard (AAPCS) defines the ABI for passing vectors between functions in registers. It states:

+
+

When a short vector is transferred between registers and memory it is treated as an opaque object. That is a short vector is stored in memory as if it were stored with a single STR of the entire register; a short vector is loaded from memory using the corresponding LDR instruction. On a little-endian system this means that element 0 will always contain the lowest addressed element of a short vector; on a big-endian system element 0 will contain the highest-addressed element of a short vector.

+

—Procedure Call Standard for the ARM 64-bit Architecture (AArch64), 4.1.2 Short Vectors

+
+

The use of LDR and STR as the ABI defines has at least one advantage over LD1 and ST1. LDR and STR are oblivious to the size of the individual lanes of a vector. LD1 and ST1 are not - the lane size is encoded within them. This is important across an ABI boundary, because it would become necessary to know the lane width the callee expects. Consider the following code:

+
<callee.c>
+void callee(uint32x2_t v) {
+  ...
+}
+
+<caller.c>
+extern void callee(uint32x2_t);
+void caller() {
+  callee(...);
+}
+
+
+

If callee changed its signature to uint16x4_t, which is equivalent in register content, if we passed as LD1 we’d break this code until caller was updated and recompiled.

+

There is an argument that if the signatures of the two functions are different then the behaviour should be undefined. But there may be functions that are agnostic to the lane layout of the vector, and treating the vector as an opaque value (just loading it and storing it) would be impossible without a common format across ABI boundaries.

+

So to preserve ABI compatibility, we need to use the LDR lane layout across function calls.

+
+
+

Alignment

+

In strict alignment mode, LDR qX requires its address to be 128-bit aligned, whereas LD1 only requires it to be as aligned as the lane size. If we canonicalised on using LDR, we’d still need to use LD1 in some places to avoid alignment faults (the result of the LD1 would then need to be reversed with REV).

+

Most operating systems however do not run with alignment faults enabled, so this is often not an issue.

+
+
+

Summary

+

The following table summarises the instructions that are required to be emitted for each property mentioned above for each of the two solutions.

+ +++++ + + + + + + + + + + + + + + + + + + + + +

LDR layout

LD1 layout

Lane ordering

LDR + REV

LD1

AAPCS

LDR

LD1 + REV

Alignment for strict mode

LDR / LD1 + REV

LD1

+

Neither approach is perfect, and choosing one boils down to choosing the lesser of two evils. The issue with lane ordering, it was decided, would have to change target-agnostic compiler passes and would result in a strange IR in which lane indices were reversed. It was decided that this was worse than the changes that would have to be made to support LD1, so LD1 was chosen as the canonical vector load instruction (and by inference, ST1 for vector stores).

+
+
+
+

Implementation

+

There are 3 parts to the implementation:

+
+
    +
  1. Predicate LDR and STR instructions so that they are never allowed to be selected to generate vector loads and stores. The exception is one-lane vectors 1 - these by definition cannot have lane ordering problems so are fine to use LDR/STR.

  2. +
  3. Create code generation patterns for bitconverts that create REV instructions.

  4. +
  5. Make sure appropriate bitconverts are created so that vector values get passed over call boundaries as 1-element vectors (which is the same as if they were loaded with LDR).

  6. +
+
+
+

Bitconverts

+_images/ARM-BE-bitcastfail.png +

The main problem with the LD1 solution is dealing with bitconverts (or bitcasts, or reinterpret casts). These are pseudo instructions that only change the compiler’s interpretation of data, not the underlying data itself. A requirement is that if data is loaded and then saved again (called a “round trip”), the memory contents should be the same after the store as before the load. If a vector is loaded and is then bitconverted to a different vector type before storing, the round trip will currently be broken.

+

Take for example this code sequence:

+
%0 = load <4 x i32> %x
+%1 = bitcast <4 x i32> %0 to <2 x i64>
+     store <2 x i64> %1, <2 x i64>* %y
+
+
+

This would produce a code sequence such as that in the figure on the right. The mismatched LD1 and ST1 cause the stored data to differ from the loaded data.

+
+

When we see a bitcast from type X to type Y, what we need to do is to change the in-register representation of the data to be as if it had just been loaded by a LD1 of type Y.

+
+_images/ARM-BE-bitcastsuccess.png +

Conceptually this is simple - we can insert a REV undoing the LD1 of type X (converting the in-register representation to the same as if it had been loaded by LDR) and then insert another REV to change the representation to be as if it had been loaded by an LD1 of type Y.

+

For the previous example, this would be:

+
LD1   v0.4s, [x]
+
+REV64 v0.4s, v0.4s                  // There is no REV128 instruction, so it must be synthesizedcd
+EXT   v0.16b, v0.16b, v0.16b, #8    // with a REV64 then an EXT to swap the two 64-bit elements.
+
+REV64 v0.2d, v0.2d
+EXT   v0.16b, v0.16b, v0.16b, #8
+
+ST1   v0.2d, [y]
+
+
+

It turns out that these REV pairs can, in almost all cases, be squashed together into a single REV. For the example above, a REV128 4s + REV128 2d is actually a REV64 4s, as shown in the figure on the right.

+
+
1
+

One lane vectors may seem useless as a concept but they serve to distinguish between values held in general purpose registers and values held in NEON/VFP registers. For example, an i64 would live in an x register, but <1 x i64> would live in a d register.

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BitCodeFormat.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BitCodeFormat.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BitCodeFormat.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BitCodeFormat.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,1355 @@ + + + + + + + + + LLVM Bitcode File Format — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Bitcode File Format

+
+ +
+
+

Abstract

+

This document describes the LLVM bitstream file format and the encoding of the +LLVM IR into it.

+
+
+

Overview

+

What is commonly known as the LLVM bitcode file format (also, sometimes +anachronistically known as bytecode) is actually two things: a bitstream +container format and an encoding of LLVM IR into the container format.

+

The bitstream format is an abstract encoding of structured data, very similar to +XML in some ways. Like XML, bitstream files contain tags, and nested +structures, and you can parse the file without having to understand the tags. +Unlike XML, the bitstream format is a binary encoding, and unlike XML it +provides a mechanism for the file to self-describe “abbreviations”, which are +effectively size optimizations for the content.

+

LLVM IR files may be optionally embedded into a wrapper structure, or in a +native object file. Both of these mechanisms make it easy to embed extra +data along with LLVM IR files.

+

This document first describes the LLVM bitstream format, describes the wrapper +format, then describes the record structure used by LLVM IR files.

+
+
+

Bitstream Format

+

The bitstream format is literally a stream of bits, with a very simple +structure. This structure consists of the following concepts:

+
    +
  • A “magic number” that identifies the contents of the stream.

  • +
  • Encoding primitives like variable bit-rate integers.

  • +
  • Blocks, which define nested content.

  • +
  • Data Records, which describe entities within the file.

  • +
  • Abbreviations, which specify compression optimizations for the file.

  • +
+

Note that the llvm-bcanalyzer tool can be +used to dump and inspect arbitrary bitstreams, which is very useful for +understanding the encoding.

+
+

Magic Numbers

+

The first four bytes of a bitstream are used as an application-specific magic +number. Generic bitcode tools may look at the first four bytes to determine +whether the stream is a known stream type. However, these tools should not +determine whether a bitstream is valid based on its magic number alone. New +application-specific bitstream formats are being developed all the time; tools +should not reject them just because they have a hitherto unseen magic number.

+
+
+

Primitives

+

A bitstream literally consists of a stream of bits, which are read in order +starting with the least significant bit of each byte. The stream is made up of +a number of primitive values that encode a stream of unsigned integer values. +These integers are encoded in two ways: either as Fixed Width Integers or as +Variable Width Integers.

+
+

Fixed Width Integers

+

Fixed-width integer values have their low bits emitted directly to the file. +For example, a 3-bit integer value encodes 1 as 001. Fixed width integers are +used when there are a well-known number of options for a field. For example, +boolean values are usually encoded with a 1-bit wide integer.

+
+
+

Variable Width Integers

+

Variable-width integer (VBR) values encode values of arbitrary size, optimizing +for the case where the values are small. Given a 4-bit VBR field, any 3-bit +value (0 through 7) is encoded directly, with the high bit set to zero. Values +larger than N-1 bits emit their bits in a series of N-1 bit chunks, where all +but the last set the high bit.

+

For example, the value 30 (0x1E) is encoded as 62 (0b0011’1110) when emitted as +a vbr4 value. The first set of four bits starting from the least significant +indicates the value 6 (110) with a continuation piece (indicated by a high bit +of 1). The next set of four bits indicates a value of 24 (011 << 3) with no +continuation. The sum (6+24) yields the value 30.

+
+
+

6-bit characters

+

6-bit characters encode common characters into a fixed 6-bit field. They +represent the following characters with the following 6-bit values:

+
'a' .. 'z' ---  0 .. 25
+'A' .. 'Z' --- 26 .. 51
+'0' .. '9' --- 52 .. 61
+       '.' --- 62
+       '_' --- 63
+
+
+

This encoding is only suitable for encoding characters and strings that consist +only of the above characters. It is completely incapable of encoding characters +not in the set.

+
+
+

Word Alignment

+

Occasionally, it is useful to emit zero bits until the bitstream is a multiple +of 32 bits. This ensures that the bit position in the stream can be represented +as a multiple of 32-bit words.

+
+
+
+

Abbreviation IDs

+

A bitstream is a sequential series of Blocks and Data Records. Both of +these start with an abbreviation ID encoded as a fixed-bitwidth field. The +width is specified by the current block, as described below. The value of the +abbreviation ID specifies either a builtin ID (which have special meanings, +defined below) or one of the abbreviation IDs defined for the current block by +the stream itself.

+

The set of builtin abbrev IDs is:

+
    +
  • 0 - END_BLOCK — This abbrev ID marks the end of the current block.

  • +
  • 1 - ENTER_SUBBLOCK — This abbrev ID marks the beginning of a new +block.

  • +
  • 2 - DEFINE_ABBREV — This defines a new abbreviation.

  • +
  • 3 - UNABBREV_RECORD — This ID specifies the definition of an +unabbreviated record.

  • +
+

Abbreviation IDs 4 and above are defined by the stream itself, and specify an +abbreviated record encoding.

+
+
+

Blocks

+

Blocks in a bitstream denote nested regions of the stream, and are identified by +a content-specific id number (for example, LLVM IR uses an ID of 12 to represent +function bodies). Block IDs 0-7 are reserved for standard blocks whose +meaning is defined by Bitcode; block IDs 8 and greater are application +specific. Nested blocks capture the hierarchical structure of the data encoded +in it, and various properties are associated with blocks as the file is parsed. +Block definitions allow the reader to efficiently skip blocks in constant time +if the reader wants a summary of blocks, or if it wants to efficiently skip data +it does not understand. The LLVM IR reader uses this mechanism to skip function +bodies, lazily reading them on demand.

+

When reading and encoding the stream, several properties are maintained for the +block. In particular, each block maintains:

+
    +
  1. A current abbrev id width. This value starts at 2 at the beginning of the +stream, and is set every time a block record is entered. The block entry +specifies the abbrev id width for the body of the block.

  2. +
  3. A set of abbreviations. Abbreviations may be defined within a block, in +which case they are only defined in that block (neither subblocks nor +enclosing blocks see the abbreviation). Abbreviations can also be defined +inside a BLOCKINFO block, in which case they are defined in all blocks +that match the ID that the BLOCKINFO block is describing.

  4. +
+

As sub blocks are entered, these properties are saved and the new sub-block has +its own set of abbreviations, and its own abbrev id width. When a sub-block is +popped, the saved values are restored.

+
+

ENTER_SUBBLOCK Encoding

+

+[ENTER_SUBBLOCK, blockidvbr8, newabbrevlenvbr4, <align32bits>, blocklen_32] +

+

The ENTER_SUBBLOCK abbreviation ID specifies the start of a new block +record. The blockid value is encoded as an 8-bit VBR identifier, and +indicates the type of block being entered, which can be a standard block or +an application-specific block. The newabbrevlen value is a 4-bit VBR, which +specifies the abbrev id width for the sub-block. The blocklen value is a +32-bit aligned value that specifies the size of the subblock in 32-bit +words. This value allows the reader to skip over the entire block in one jump.

+
+
+

END_BLOCK Encoding

+

[END_BLOCK, <align32bits>]

+

The END_BLOCK abbreviation ID specifies the end of the current block record. +Its end is aligned to 32-bits to ensure that the size of the block is an even +multiple of 32-bits.

+
+
+
+

Data Records

+

Data records consist of a record code and a number of (up to) 64-bit integer +values. The interpretation of the code and values is application specific and +may vary between different block types. Records can be encoded either using an +unabbrev record, or with an abbreviation. In the LLVM IR format, for example, +there is a record which encodes the target triple of a module. The code is +MODULE_CODE_TRIPLE, and the values of the record are the ASCII codes for the +characters in the string.

+
+

UNABBREV_RECORD Encoding

+

+[UNABBREV_RECORD, codevbr6, numopsvbr6, op0vbr6, op1vbr6, …] +

+

An UNABBREV_RECORD provides a default fallback encoding, which is both +completely general and extremely inefficient. It can describe an arbitrary +record by emitting the code and operands as VBRs.

+

For example, emitting an LLVM IR target triple as an unabbreviated record +requires emitting the UNABBREV_RECORD abbrevid, a vbr6 for the +MODULE_CODE_TRIPLE code, a vbr6 for the length of the string, which is equal +to the number of operands, and a vbr6 for each character. Because there are no +letters with values less than 32, each letter would need to be emitted as at +least a two-part VBR, which means that each letter would require at least 12 +bits. This is not an efficient encoding, but it is fully general.

+
+
+

Abbreviated Record Encoding

+

[<abbrevid>, fields...]

+

An abbreviated record is an abbreviation id followed by a set of fields that are +encoded according to the abbreviation definition. This allows records to be +encoded significantly more densely than records encoded with the +UNABBREV_RECORD type, and allows the abbreviation types to be specified in +the stream itself, which allows the files to be completely self describing. The +actual encoding of abbreviations is defined below.

+

The record code, which is the first field of an abbreviated record, may be +encoded in the abbreviation definition (as a literal operand) or supplied in the +abbreviated record (as a Fixed or VBR operand value).

+
+
+
+

Abbreviations

+

Abbreviations are an important form of compression for bitstreams. The idea is +to specify a dense encoding for a class of records once, then use that encoding +to emit many records. It takes space to emit the encoding into the file, but +the space is recouped (hopefully plus some) when the records that use it are +emitted.

+

Abbreviations can be determined dynamically per client, per file. Because the +abbreviations are stored in the bitstream itself, different streams of the same +format can contain different sets of abbreviations according to the needs of the +specific stream. As a concrete example, LLVM IR files usually emit an +abbreviation for binary operators. If a specific LLVM module contained no or +few binary operators, the abbreviation does not need to be emitted.

+
+

DEFINE_ABBREV Encoding

+

+[DEFINE_ABBREV, numabbrevopsvbr5, abbrevop0, abbrevop1, …] +

+

A DEFINE_ABBREV record adds an abbreviation to the list of currently defined +abbreviations in the scope of this block. This definition only exists inside +this immediate block — it is not visible in subblocks or enclosing blocks. +Abbreviations are implicitly assigned IDs sequentially starting from 4 (the +first application-defined abbreviation ID). Any abbreviations defined in a +BLOCKINFO record for the particular block type receive IDs first, in order, +followed by any abbreviations defined within the block itself. Abbreviated data +records reference this ID to indicate what abbreviation they are invoking.

+

An abbreviation definition consists of the DEFINE_ABBREV abbrevid followed +by a VBR that specifies the number of abbrev operands, then the abbrev operands +themselves. Abbreviation operands come in three forms. They all start with a +single bit that indicates whether the abbrev operand is a literal operand (when +the bit is 1) or an encoding operand (when the bit is 0).

+
    +
  1. Literal operands — [11, litvaluevbr8] — Literal operands specify that the value in +the result is always a single specific value. This specific value is emitted +as a vbr8 after the bit indicating that it is a literal operand.

  2. +
  3. Encoding info without data — [01, encoding3] — Operand encodings that do not have extra data +are just emitted as their code.

  4. +
  5. Encoding info with data — [01, encoding3, valuevbr5] — Operand encodings that do +have extra data are emitted as their code, followed by the extra data.

  6. +
+

The possible operand encodings are:

+
    +
  • Fixed (code 1): The field should be emitted as a fixed-width value, whose +width is specified by the operand’s extra data.

  • +
  • VBR (code 2): The field should be emitted as a variable-width value, whose +width is specified by the operand’s extra data.

  • +
  • Array (code 3): This field is an array of values. The array operand has no +extra data, but expects another operand to follow it, indicating the element +type of the array. When reading an array in an abbreviated record, the first +integer is a vbr6 that indicates the array length, followed by the encoded +elements of the array. An array may only occur as the last operand of an +abbreviation (except for the one final operand that gives the array’s +type).

  • +
  • Char6 (code 4): This field should be emitted as a char6-encoded value. +This operand type takes no extra data. Char6 encoding is normally used as an +array element type.

  • +
  • Blob (code 5): This field is emitted as a vbr6, followed by padding to a +32-bit boundary (for alignment) and an array of 8-bit objects. The array of +bytes is further followed by tail padding to ensure that its total length is a +multiple of 4 bytes. This makes it very efficient for the reader to decode +the data without having to make a copy of it: it can use a pointer to the data +in the mapped in file and poke directly at it. A blob may only occur as the +last operand of an abbreviation.

  • +
+

For example, target triples in LLVM modules are encoded as a record of the form +[TRIPLE, 'a', 'b', 'c', 'd']. Consider if the bitstream emitted the +following abbrev entry:

+
[0, Fixed, 4]
+[0, Array]
+[0, Char6]
+
+
+

When emitting a record with this abbreviation, the above entry would be emitted +as:

+

+[4abbrevwidth, 24, 4vbr6, 06, 16, 26, 36] +

+

These values are:

+
    +
  1. The first value, 4, is the abbreviation ID for this abbreviation.

  2. +
  3. The second value, 2, is the record code for TRIPLE records within LLVM IR +file MODULE_BLOCK blocks.

  4. +
  5. The third value, 4, is the length of the array.

  6. +
  7. The rest of the values are the char6 encoded values for "abcd".

  8. +
+

With this abbreviation, the triple is emitted with only 37 bits (assuming a +abbrev id width of 3). Without the abbreviation, significantly more space would +be required to emit the target triple. Also, because the TRIPLE value is +not emitted as a literal in the abbreviation, the abbreviation can also be used +for any other string value.

+
+
+
+

Standard Blocks

+

In addition to the basic block structure and record encodings, the bitstream +also defines specific built-in block types. These block types specify how the +stream is to be decoded or other metadata. In the future, new standard blocks +may be added. Block IDs 0-7 are reserved for standard blocks.

+
+

#0 - BLOCKINFO Block

+

The BLOCKINFO block allows the description of metadata for other blocks. +The currently specified records are:

+
[SETBID (#1), blockid]
+[DEFINE_ABBREV, ...]
+[BLOCKNAME, ...name...]
+[SETRECORDNAME, RecordID, ...name...]
+
+
+

The SETBID record (code 1) indicates which block ID is being described. +SETBID records can occur multiple times throughout the block to change which +block ID is being described. There must be a SETBID record prior to any +other records.

+

Standard DEFINE_ABBREV records can occur inside BLOCKINFO blocks, but +unlike their occurrence in normal blocks, the abbreviation is defined for blocks +matching the block ID we are describing, not the BLOCKINFO block +itself. The abbreviations defined in BLOCKINFO blocks receive abbreviation +IDs as described in DEFINE_ABBREV.

+

The BLOCKNAME record (code 2) can optionally occur in this block. The +elements of the record are the bytes of the string name of the block. +llvm-bcanalyzer can use this to dump out bitcode files symbolically.

+

The SETRECORDNAME record (code 3) can also optionally occur in this block. +The first operand value is a record ID number, and the rest of the elements of +the record are the bytes for the string name of the record. llvm-bcanalyzer can +use this to dump out bitcode files symbolically.

+

Note that although the data in BLOCKINFO blocks is described as “metadata,” +the abbreviations they contain are essential for parsing records from the +corresponding blocks. It is not safe to skip them.

+
+
+
+
+

Bitcode Wrapper Format

+

Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper +structure. This structure contains a simple header that indicates the offset +and size of the embedded BC file. This allows additional information to be +stored alongside the BC file. The structure of this file header is:

+

+[Magic32, Version32, Offset32, Size32, CPUType32] +

+

Each of the fields are 32-bit fields stored in little endian form (as with the +rest of the bitcode file fields). The Magic number is always 0x0B17C0DE and +the version is currently always 0. The Offset field is the offset in bytes +to the start of the bitcode stream in the file, and the Size field is the size +in bytes of the stream. CPUType is a target-specific value that can be used to +encode the CPU of the target.

+
+
+

Native Object File Wrapper Format

+

Bitcode files for LLVM IR may also be wrapped in a native object file +(i.e. ELF, COFF, Mach-O). The bitcode must be stored in a section of the object +file named __LLVM,__bitcode for MachO and .llvmbc for the other object +formats. This wrapper format is useful for accommodating LTO in compilation +pipelines where intermediate objects must be native object files which contain +metadata in other sections.

+

Not all tools support this format.

+
+
+

LLVM IR Encoding

+

LLVM IR is encoded into a bitstream by defining blocks and records. It uses +blocks for things like constant pools, functions, symbol tables, etc. It uses +records for things like instructions, global variable descriptors, type +descriptions, etc. This document does not describe the set of abbreviations +that the writer uses, as these are fully self-described in the file, and the +reader is not allowed to build in any knowledge of this.

+
+

Basics

+
+

LLVM IR Magic Number

+

The magic number for LLVM IR files is:

+

+[‘B’8, ‘C’8, 0x04, 0xC4, 0xE4, 0xD4] +

+
+
+

Signed VBRs

+

Variable Width Integer encoding is an efficient way to encode arbitrary sized +unsigned values, but is an extremely inefficient for encoding signed values, as +signed values are otherwise treated as maximally large unsigned values.

+

As such, signed VBR values of a specific width are emitted as follows:

+
    +
  • Positive values are emitted as VBRs of the specified width, but with their +value shifted left by one.

  • +
  • Negative values are emitted as VBRs of the specified width, but the negated +value is shifted left by one, and the low bit is set.

  • +
+

With this encoding, small positive and small negative values can both be emitted +efficiently. Signed VBR encoding is used in CST_CODE_INTEGER and +CST_CODE_WIDE_INTEGER records within CONSTANTS_BLOCK blocks. +It is also used for phi instruction operands in MODULE_CODE_VERSION 1.

+
+
+

LLVM IR Blocks

+

LLVM IR is defined with the following blocks:

+
    +
  • 8 — MODULE_BLOCK — This is the top-level block that contains the entire +module, and describes a variety of per-module information.

  • +
  • 9 — PARAMATTR_BLOCK — This enumerates the parameter attributes.

  • +
  • 10 — PARAMATTR_GROUP_BLOCK — This describes the attribute group table.

  • +
  • 11 — CONSTANTS_BLOCK — This describes constants for a module or +function.

  • +
  • 12 — FUNCTION_BLOCK — This describes a function body.

  • +
  • 14 — VALUE_SYMTAB_BLOCK — This describes a value symbol table.

  • +
  • 15 — METADATA_BLOCK — This describes metadata items.

  • +
  • 16 — METADATA_ATTACHMENT — This contains records associating metadata +with function instruction values.

  • +
  • 17 — TYPE_BLOCK — This describes all of the types in the module.

  • +
  • 23 — STRTAB_BLOCK — The bitcode file’s string table.

  • +
+
+
+
+

MODULE_BLOCK Contents

+

The MODULE_BLOCK block (id 8) is the top-level block for LLVM bitcode files, +and each bitcode file must contain exactly one. In addition to records +(described below) containing information about the module, a MODULE_BLOCK +block may contain the following sub-blocks:

+ +
+

MODULE_CODE_VERSION Record

+

[VERSION, version#]

+

The VERSION record (code 1) contains a single value indicating the format +version. Versions 0, 1 and 2 are supported at this time. The difference between +version 0 and 1 is in the encoding of instruction operands in +each FUNCTION_BLOCK.

+

In version 0, each value defined by an instruction is assigned an ID +unique to the function. Function-level value IDs are assigned starting from +NumModuleValues since they share the same namespace as module-level +values. The value enumerator resets after each function. When a value is +an operand of an instruction, the value ID is used to represent the operand. +For large functions or large modules, these operand values can be large.

+

The encoding in version 1 attempts to avoid large operand values +in common cases. Instead of using the value ID directly, operands are +encoded as relative to the current instruction. Thus, if an operand +is the value defined by the previous instruction, the operand +will be encoded as 1.

+

For example, instead of

+
#n = load #n-1
+#n+1 = icmp eq #n, #const0
+br #n+1, label #(bb1), label #(bb2)
+
+
+

version 1 will encode the instructions as

+
#n = load #1
+#n+1 = icmp eq #1, (#n+1)-#const0
+br #1, label #(bb1), label #(bb2)
+
+
+

Note in the example that operands which are constants also use +the relative encoding, while operands like basic block labels +do not use the relative encoding.

+

Forward references will result in a negative value. +This can be inefficient, as operands are normally encoded +as unsigned VBRs. However, forward references are rare, except in the +case of phi instructions. For phi instructions, operands are encoded as +Signed VBRs to deal with forward references.

+

In version 2, the meaning of module records FUNCTION, GLOBALVAR, +ALIAS, IFUNC and COMDAT change such that the first two operands +specify an offset and size of a string in a string table (see STRTAB_BLOCK +Contents), the function name is removed from the FNENTRY record in the +value symbol table, and the top-level VALUE_SYMTAB_BLOCK may only contain +FNENTRY records.

+
+
+

MODULE_CODE_TRIPLE Record

+

[TRIPLE, ...string...]

+

The TRIPLE record (code 2) contains a variable number of values representing +the bytes of the target triple specification string.

+
+
+

MODULE_CODE_DATALAYOUT Record

+

[DATALAYOUT, ...string...]

+

The DATALAYOUT record (code 3) contains a variable number of values +representing the bytes of the target datalayout specification string.

+
+
+

MODULE_CODE_ASM Record

+

[ASM, ...string...]

+

The ASM record (code 4) contains a variable number of values representing +the bytes of module asm strings, with individual assembly blocks separated +by newline (ASCII 10) characters.

+
+
+

MODULE_CODE_SECTIONNAME Record

+

[SECTIONNAME, ...string...]

+

The SECTIONNAME record (code 5) contains a variable number of values +representing the bytes of a single section name string. There should be one +SECTIONNAME record for each section name referenced (e.g., in global +variable or function section attributes) within the module. These records +can be referenced by the 1-based index in the section fields of GLOBALVAR +or FUNCTION records.

+
+
+

MODULE_CODE_DEPLIB Record

+

[DEPLIB, ...string...]

+

The DEPLIB record (code 6) contains a variable number of values representing +the bytes of a single dependent library name string, one of the libraries +mentioned in a deplibs declaration. There should be one DEPLIB record +for each library name referenced.

+
+
+

MODULE_CODE_GLOBALVAR Record

+

[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat, attributes, preemptionspecifier]

+

The GLOBALVAR record (code 7) marks the declaration or definition of a +global variable. The operand fields are:

+
    +
  • strtab offset, strtab size: Specifies the name of the global variable. +See STRTAB_BLOCK Contents.

  • +
  • pointer type: The type index of the pointer type used to point to this +global variable

  • +
  • isconst: Non-zero if the variable is treated as constant within the module, +or zero if it is not

  • +
  • initid: If non-zero, the value index of the initializer for this variable, +plus 1.

  • +
+
    +
  • linkage: An encoding of the linkage type for this variable:

    +
      +
    • external: code 0

    • +
    • weak: code 1

    • +
    • appending: code 2

    • +
    • internal: code 3

    • +
    • linkonce: code 4

    • +
    • dllimport: code 5

    • +
    • dllexport: code 6

    • +
    • extern_weak: code 7

    • +
    • common: code 8

    • +
    • private: code 9

    • +
    • weak_odr: code 10

    • +
    • linkonce_odr: code 11

    • +
    • available_externally: code 12

    • +
    • deprecated : code 13

    • +
    • deprecated : code 14

    • +
    +
  • +
  • alignment*: The logarithm base 2 of the variable’s requested alignment, plus 1

  • +
  • section: If non-zero, the 1-based section index in the table of +MODULE_CODE_SECTIONNAME entries.

  • +
+
    +
  • visibility: If present, an encoding of the visibility of this variable:

    +
      +
    • default: code 0

    • +
    • hidden: code 1

    • +
    • protected: code 2

    • +
    +
  • +
+
    +
  • threadlocal: If present, an encoding of the thread local storage mode of the +variable:

    +
      +
    • not thread local: code 0

    • +
    • thread local; default TLS model: code 1

    • +
    • localdynamic: code 2

    • +
    • initialexec: code 3

    • +
    • localexec: code 4

    • +
    +
  • +
+
    +
  • unnamed_addr: If present, an encoding of the unnamed_addr attribute of this +variable:

    +
      +
    • not unnamed_addr: code 0

    • +
    • unnamed_addr: code 1

    • +
    • local_unnamed_addr: code 2

    • +
    +
  • +
+
    +
  • dllstorageclass: If present, an encoding of the DLL storage class of this variable:

    +
      +
    • default: code 0

    • +
    • dllimport: code 1

    • +
    • dllexport: code 2

    • +
    +
  • +
  • comdat: An encoding of the COMDAT of this function

  • +
  • attributes: If nonzero, the 1-based index into the table of AttributeLists.

  • +
+
    +
  • preemptionspecifier: If present, an encoding of the runtime preemption specifier of this variable:

    +
      +
    • dso_preemptable: code 0

    • +
    • dso_local: code 1

    • +
    +
  • +
+
+
+

MODULE_CODE_FUNCTION Record

+

[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn, preemptionspecifier]

+

The FUNCTION record (code 8) marks the declaration or definition of a +function. The operand fields are:

+
    +
  • strtab offset, strtab size: Specifies the name of the function. +See STRTAB_BLOCK Contents.

  • +
  • type: The type index of the function type describing this function

  • +
  • callingconv: The calling convention number: +* ccc: code 0 +* fastcc: code 8 +* coldcc: code 9 +* webkit_jscc: code 12 +* anyregcc: code 13 +* preserve_mostcc: code 14 +* preserve_allcc: code 15 +* swiftcc : code 16 +* cxx_fast_tlscc: code 17 +* tailcc : code 18 +* cfguard_checkcc : code 19 +* swifttailcc : code 20 +* x86_stdcallcc: code 64 +* x86_fastcallcc: code 65 +* arm_apcscc: code 66 +* arm_aapcscc: code 67 +* arm_aapcs_vfpcc: code 68

  • +
  • isproto*: Non-zero if this entry represents a declaration rather than a +definition

  • +
  • linkage: An encoding of the linkage type for this function

  • +
  • paramattr: If nonzero, the 1-based parameter attribute index into the table +of PARAMATTR_CODE_ENTRY entries.

  • +
  • alignment: The logarithm base 2 of the function’s requested alignment, plus +1

  • +
  • section: If non-zero, the 1-based section index in the table of +MODULE_CODE_SECTIONNAME entries.

  • +
  • visibility: An encoding of the visibility of this function

  • +
  • gc: If present and nonzero, the 1-based garbage collector index in the table +of MODULE_CODE_GCNAME entries.

  • +
  • unnamed_addr: If present, an encoding of the +unnamed_addr attribute of this function

  • +
  • prologuedata: If non-zero, the value index of the prologue data for this function, +plus 1.

  • +
  • dllstorageclass: An encoding of the +dllstorageclass of this function

  • +
  • comdat: An encoding of the COMDAT of this function

  • +
  • prefixdata: If non-zero, the value index of the prefix data for this function, +plus 1.

  • +
  • personalityfn: If non-zero, the value index of the personality function for this function, +plus 1.

  • +
  • preemptionspecifier: If present, an encoding of the runtime preemption specifier of this function.

  • +
+
+
+

MODULE_CODE_ALIAS Record

+

[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr, preemptionspecifier]

+

The ALIAS record (code 9) marks the definition of an alias. The operand +fields are

+
    +
  • strtab offset, strtab size: Specifies the name of the alias. +See STRTAB_BLOCK Contents.

  • +
  • alias type: The type index of the alias

  • +
  • aliasee val#: The value index of the aliased value

  • +
  • linkage: An encoding of the linkage type for this alias

  • +
  • visibility: If present, an encoding of the visibility of the alias

  • +
  • dllstorageclass: If present, an encoding of the +dllstorageclass of the alias

  • +
  • threadlocal: If present, an encoding of the +thread local property of the alias

  • +
  • unnamed_addr: If present, an encoding of the +unnamed_addr attribute of this alias

  • +
  • preemptionspecifier: If present, an encoding of the runtime preemption specifier of this alias.

  • +
+
+
+

MODULE_CODE_GCNAME Record

+

[GCNAME, ...string...]

+

The GCNAME record (code 11) contains a variable number of values +representing the bytes of a single garbage collector name string. There should +be one GCNAME record for each garbage collector name referenced in function +gc attributes within the module. These records can be referenced by 1-based +index in the gc fields of FUNCTION records.

+
+
+
+

PARAMATTR_BLOCK Contents

+

The PARAMATTR_BLOCK block (id 9) contains a table of entries describing the +attributes of function parameters. These entries are referenced by 1-based index +in the paramattr field of module block FUNCTION records, or within the +attr field of function block INST_INVOKE and INST_CALL records.

+

Entries within PARAMATTR_BLOCK are constructed to ensure that each is unique +(i.e., no two indices represent equivalent attribute lists).

+
+

PARAMATTR_CODE_ENTRY Record

+

[ENTRY, attrgrp0, attrgrp1, ...]

+

The ENTRY record (code 2) contains a variable number of values describing a +unique set of function parameter attributes. Each attrgrp value is used as a +key with which to look up an entry in the attribute group table described +in the PARAMATTR_GROUP_BLOCK block.

+
+
+

PARAMATTR_CODE_ENTRY_OLD Record

+
+

Note

+

This is a legacy encoding for attributes, produced by LLVM versions 3.2 and +earlier. It is guaranteed to be understood by the current LLVM version, as +specified in the IR Backwards Compatibility policy.

+
+

[ENTRY, paramidx0, attr0, paramidx1, attr1...]

+

The ENTRY record (code 1) contains an even number of values describing a +unique set of function parameter attributes. Each paramidx value indicates +which set of attributes is represented, with 0 representing the return value +attributes, 0xFFFFFFFF representing function attributes, and other values +representing 1-based function parameters. Each attr value is a bitmap with the +following interpretation:

+
    +
  • bit 0: zeroext

  • +
  • bit 1: signext

  • +
  • bit 2: noreturn

  • +
  • bit 3: inreg

  • +
  • bit 4: sret

  • +
  • bit 5: nounwind

  • +
  • bit 6: noalias

  • +
  • bit 7: byval

  • +
  • bit 8: nest

  • +
  • bit 9: readnone

  • +
  • bit 10: readonly

  • +
  • bit 11: noinline

  • +
  • bit 12: alwaysinline

  • +
  • bit 13: optsize

  • +
  • bit 14: ssp

  • +
  • bit 15: sspreq

  • +
  • bits 16-31: align n

  • +
  • bit 32: nocapture

  • +
  • bit 33: noredzone

  • +
  • bit 34: noimplicitfloat

  • +
  • bit 35: naked

  • +
  • bit 36: inlinehint

  • +
  • bits 37-39: alignstack n, represented as the logarithm +base 2 of the requested alignment, plus 1

  • +
+
+
+
+

PARAMATTR_GROUP_BLOCK Contents

+

The PARAMATTR_GROUP_BLOCK block (id 10) contains a table of entries +describing the attribute groups present in the module. These entries can be +referenced within PARAMATTR_CODE_ENTRY entries.

+
+

PARAMATTR_GRP_CODE_ENTRY Record

+

[ENTRY, grpid, paramidx, attr0, attr1, ...]

+

The ENTRY record (code 3) contains grpid and paramidx values, followed +by a variable number of values describing a unique group of attributes. The +grpid value is a unique key for the attribute group, which can be referenced +within PARAMATTR_CODE_ENTRY entries. The paramidx value indicates which +set of attributes is represented, with 0 representing the return value +attributes, 0xFFFFFFFF representing function attributes, and other values +representing 1-based function parameters.

+

Each attr is itself represented as a variable number of values:

+

kind, key [, ...], [value [, ...]]

+

Each attribute is either a well-known LLVM attribute (possibly with an integer +value associated with it), or an arbitrary string (possibly with an arbitrary +string value associated with it). The kind value is an integer code +distinguishing between these possibilities:

+
    +
  • code 0: well-known attribute

  • +
  • code 1: well-known attribute with an integer value

  • +
  • code 3: string attribute

  • +
  • code 4: string attribute with a string value

  • +
+

For well-known attributes (code 0 or 1), the key value is an integer code +identifying the attribute. For attributes with an integer argument (code 1), +the value value indicates the argument.

+

For string attributes (code 3 or 4), the key value is actually a variable +number of values representing the bytes of a null-terminated string. For +attributes with a string argument (code 4), the value value is similarly a +variable number of values representing the bytes of a null-terminated string.

+

The integer codes are mapped to well-known attributes as follows.

+
    +
  • code 1: align(<n>)

  • +
  • code 2: alwaysinline

  • +
  • code 3: byval

  • +
  • code 4: inlinehint

  • +
  • code 5: inreg

  • +
  • code 6: minsize

  • +
  • code 7: naked

  • +
  • code 8: nest

  • +
  • code 9: noalias

  • +
  • code 10: nobuiltin

  • +
  • code 11: nocapture

  • +
  • code 12: nodeduplicate

  • +
  • code 13: noimplicitfloat

  • +
  • code 14: noinline

  • +
  • code 15: nonlazybind

  • +
  • code 16: noredzone

  • +
  • code 17: noreturn

  • +
  • code 18: nounwind

  • +
  • code 19: optsize

  • +
  • code 20: readnone

  • +
  • code 21: readonly

  • +
  • code 22: returned

  • +
  • code 23: returns_twice

  • +
  • code 24: signext

  • +
  • code 25: alignstack(<n>)

  • +
  • code 26: ssp

  • +
  • code 27: sspreq

  • +
  • code 28: sspstrong

  • +
  • code 29: sret

  • +
  • code 30: sanitize_address

  • +
  • code 31: sanitize_thread

  • +
  • code 32: sanitize_memory

  • +
  • code 33: uwtable

  • +
  • code 34: zeroext

  • +
  • code 35: builtin

  • +
  • code 36: cold

  • +
  • code 37: optnone

  • +
  • code 38: inalloca

  • +
  • code 39: nonnull

  • +
  • code 40: jumptable

  • +
  • code 41: dereferenceable(<n>)

  • +
  • code 42: dereferenceable_or_null(<n>)

  • +
  • code 43: convergent

  • +
  • code 44: safestack

  • +
  • code 45: argmemonly

  • +
  • code 46: swiftself

  • +
  • code 47: swifterror

  • +
  • code 48: norecurse

  • +
  • code 49: inaccessiblememonly

  • +
  • code 50: inaccessiblememonly_or_argmemonly

  • +
  • code 51: allocsize(<EltSizeParam>[, <NumEltsParam>])

  • +
  • code 52: writeonly

  • +
  • code 53: speculatable

  • +
  • code 54: strictfp

  • +
  • code 55: sanitize_hwaddress

  • +
  • code 56: nocf_check

  • +
  • code 57: optforfuzzing

  • +
  • code 58: shadowcallstack

  • +
  • code 59: speculative_load_hardening

  • +
  • code 60: immarg

  • +
  • code 61: willreturn

  • +
  • code 62: nofree

  • +
  • code 63: nosync

  • +
  • code 64: sanitize_memtag

  • +
  • code 65: preallocated

  • +
  • code 66: no_merge

  • +
  • code 67: null_pointer_is_valid

  • +
  • code 68: noundef

  • +
  • code 69: byref

  • +
  • code 70: mustprogress

  • +
  • code 74: vscale_range(<Min>[, <Max>])

  • +
  • code 75: swiftasync

  • +
  • code 76: nosanitize_coverage

  • +
  • code 77: elementtype

  • +
  • code 78: disable_sanitizer_instrumentation

  • +
+
+

Note

+

The allocsize attribute has a special encoding for its arguments. Its two +arguments, which are 32-bit integers, are packed into one 64-bit integer value +(i.e. (EltSizeParam << 32) | NumEltsParam), with NumEltsParam taking on +the sentinel value -1 if it is not specified.

+
+
+

Note

+

The vscale_range attribute has a special encoding for its arguments. Its two +arguments, which are 32-bit integers, are packed into one 64-bit integer value +(i.e. (Min << 32) | Max), with Max taking on the value of Min if +it is not specified.

+
+
+
+
+

TYPE_BLOCK Contents

+

The TYPE_BLOCK block (id 17) contains records which constitute a table of +type operator entries used to represent types referenced within an LLVM +module. Each record (with the exception of NUMENTRY) generates a single type +table entry, which may be referenced by 0-based index from instructions, +constants, metadata, type symbol table entries, or other type operator records.

+

Entries within TYPE_BLOCK are constructed to ensure that each entry is +unique (i.e., no two indices represent structurally equivalent types).

+
+

TYPE_CODE_NUMENTRY Record

+

[NUMENTRY, numentries]

+

The NUMENTRY record (code 1) contains a single value which indicates the +total number of type code entries in the type table of the module. If present, +NUMENTRY should be the first record in the block.

+
+
+

TYPE_CODE_VOID Record

+

[VOID]

+

The VOID record (code 2) adds a void type to the type table.

+
+
+

TYPE_CODE_HALF Record

+

[HALF]

+

The HALF record (code 10) adds a half (16-bit floating point) type to +the type table.

+
+
+

TYPE_CODE_BFLOAT Record

+

[BFLOAT]

+

The BFLOAT record (code 23) adds a bfloat (16-bit brain floating point) +type to the type table.

+
+
+

TYPE_CODE_FLOAT Record

+

[FLOAT]

+

The FLOAT record (code 3) adds a float (32-bit floating point) type to +the type table.

+
+
+

TYPE_CODE_DOUBLE Record

+

[DOUBLE]

+

The DOUBLE record (code 4) adds a double (64-bit floating point) type to +the type table.

+
+
+

TYPE_CODE_LABEL Record

+

[LABEL]

+

The LABEL record (code 5) adds a label type to the type table.

+
+
+

TYPE_CODE_OPAQUE Record

+

[OPAQUE]

+

The OPAQUE record (code 6) adds an opaque type to the type table, with +a name defined by a previously encountered STRUCT_NAME record. Note that +distinct opaque types are not unified.

+
+
+

TYPE_CODE_INTEGER Record

+

[INTEGER, width]

+

The INTEGER record (code 7) adds an integer type to the type table. The +single width field indicates the width of the integer type.

+
+
+

TYPE_CODE_POINTER Record

+

[POINTER, pointee type, address space]

+

The POINTER record (code 8) adds a pointer type to the type table. The +operand fields are

+
    +
  • pointee type: The type index of the pointed-to type

  • +
  • address space: If supplied, the target-specific numbered address space where +the pointed-to object resides. Otherwise, the default address space is zero.

  • +
+
+
+

TYPE_CODE_FUNCTION_OLD Record

+
+

Note

+

This is a legacy encoding for functions, produced by LLVM versions 3.0 and +earlier. It is guaranteed to be understood by the current LLVM version, as +specified in the IR Backwards Compatibility policy.

+
+

[FUNCTION_OLD, vararg, ignored, retty, ...paramty... ]

+

The FUNCTION_OLD record (code 9) adds a function type to the type table. +The operand fields are

+
    +
  • vararg: Non-zero if the type represents a varargs function

  • +
  • ignored: This value field is present for backward compatibility only, and is +ignored

  • +
  • retty: The type index of the function’s return type

  • +
  • paramty: Zero or more type indices representing the parameter types of the +function

  • +
+
+
+

TYPE_CODE_ARRAY Record

+

[ARRAY, numelts, eltty]

+

The ARRAY record (code 11) adds an array type to the type table. The +operand fields are

+
    +
  • numelts: The number of elements in arrays of this type

  • +
  • eltty: The type index of the array element type

  • +
+
+
+

TYPE_CODE_VECTOR Record

+

[VECTOR, numelts, eltty]

+

The VECTOR record (code 12) adds a vector type to the type table. The +operand fields are

+
    +
  • numelts: The number of elements in vectors of this type

  • +
  • eltty: The type index of the vector element type

  • +
+
+
+

TYPE_CODE_X86_FP80 Record

+

[X86_FP80]

+

The X86_FP80 record (code 13) adds an x86_fp80 (80-bit floating point) +type to the type table.

+
+
+

TYPE_CODE_FP128 Record

+

[FP128]

+

The FP128 record (code 14) adds an fp128 (128-bit floating point) type +to the type table.

+
+
+

TYPE_CODE_PPC_FP128 Record

+

[PPC_FP128]

+

The PPC_FP128 record (code 15) adds a ppc_fp128 (128-bit floating point) +type to the type table.

+
+
+

TYPE_CODE_METADATA Record

+

[METADATA]

+

The METADATA record (code 16) adds a metadata type to the type table.

+
+
+

TYPE_CODE_X86_MMX Record

+

[X86_MMX]

+

The X86_MMX record (code 17) adds an x86_mmx type to the type table.

+
+
+

TYPE_CODE_STRUCT_ANON Record

+

[STRUCT_ANON, ispacked, ...eltty...]

+

The STRUCT_ANON record (code 18) adds a literal struct type to the type +table. The operand fields are

+
    +
  • ispacked: Non-zero if the type represents a packed structure

  • +
  • eltty: Zero or more type indices representing the element types of the +structure

  • +
+
+
+

TYPE_CODE_STRUCT_NAME Record

+

[STRUCT_NAME, ...string...]

+

The STRUCT_NAME record (code 19) contains a variable number of values +representing the bytes of a struct name. The next OPAQUE or +STRUCT_NAMED record will use this name.

+
+
+

TYPE_CODE_STRUCT_NAMED Record

+

[STRUCT_NAMED, ispacked, ...eltty...]

+

The STRUCT_NAMED record (code 20) adds an identified struct type to the +type table, with a name defined by a previously encountered STRUCT_NAME +record. The operand fields are

+
    +
  • ispacked: Non-zero if the type represents a packed structure

  • +
  • eltty: Zero or more type indices representing the element types of the +structure

  • +
+
+
+

TYPE_CODE_FUNCTION Record

+

[FUNCTION, vararg, retty, ...paramty... ]

+

The FUNCTION record (code 21) adds a function type to the type table. The +operand fields are

+
    +
  • vararg: Non-zero if the type represents a varargs function

  • +
  • retty: The type index of the function’s return type

  • +
  • paramty: Zero or more type indices representing the parameter types of the +function

  • +
+
+
+

TYPE_CODE_X86_AMX Record

+

[X86_AMX]

+

The X86_AMX record (code 24) adds an x86_amx type to the type table.

+
+
+
+

CONSTANTS_BLOCK Contents

+

The CONSTANTS_BLOCK block (id 11) …

+
+
+

FUNCTION_BLOCK Contents

+

The FUNCTION_BLOCK block (id 12) …

+

In addition to the record types described below, a FUNCTION_BLOCK block may +contain the following sub-blocks:

+ +
+
+

VALUE_SYMTAB_BLOCK Contents

+

The VALUE_SYMTAB_BLOCK block (id 14) …

+
+
+

METADATA_BLOCK Contents

+

The METADATA_BLOCK block (id 15) …

+
+
+

METADATA_ATTACHMENT Contents

+

The METADATA_ATTACHMENT block (id 16) …

+
+
+

STRTAB_BLOCK Contents

+

The STRTAB block (id 23) contains a single record (STRTAB_BLOB, id 1) +with a single blob operand containing the bitcode file’s string table.

+

Strings in the string table are not null terminated. A record’s strtab +offset and strtab size operands specify the byte offset and size of a +string within the string table.

+

The string table is used by all preceding blocks in the bitcode file that are +not succeeded by another intervening STRTAB block. Normally a bitcode +file will have a single string table, but it may have more than one if it +was created by binary concatenation of multiple bitcode files.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BlockFrequencyTerminology.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BlockFrequencyTerminology.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BlockFrequencyTerminology.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BlockFrequencyTerminology.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,258 @@ + + + + + + + + + LLVM Block Frequency Terminology — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Block Frequency Terminology

+ +
+

Introduction

+

Block Frequency is a metric for estimating the relative frequency of different +basic blocks. This document describes the terminology that the +BlockFrequencyInfo and MachineBlockFrequencyInfo analysis passes use.

+
+
+

Branch Probability

+

Blocks with multiple successors have probabilities associated with each +outgoing edge. These are called branch probabilities. For a given block, the +sum of its outgoing branch probabilities should be 1.0.

+
+
+

Branch Weight

+

Rather than storing fractions on each edge, we store an integer weight. +Weights are relative to the other edges of a given predecessor block. The +branch probability associated with a given edge is its own weight divided by +the sum of the weights on the predecessor’s outgoing edges.

+

For example, consider this IR:

+
define void @foo() {
+    ; ...
+    A:
+        br i1 %cond, label %B, label %C, !prof !0
+    ; ...
+}
+!0 = metadata !{metadata !"branch_weights", i32 7, i32 8}
+
+
+

and this simple graph representation:

+
A -> B  (edge-weight: 7)
+A -> C  (edge-weight: 8)
+
+
+

The probability of branching from block A to block B is 7/15, and the +probability of branching from block A to block C is 8/15.

+

See LLVM Branch Weight Metadata for details about the branch weight IR +representation.

+
+
+

Block Frequency

+

Block frequency is a relative metric that represents the number of times a +block executes. The ratio of a block frequency to the entry block frequency is +the expected number of times the block will execute per entry to the function.

+

Block frequency is the main output of the BlockFrequencyInfo and +MachineBlockFrequencyInfo analysis passes.

+
+
+

Implementation: a series of DAGs

+

The implementation of the block frequency calculation analyses each loop, +bottom-up, ignoring backedges; i.e., as a DAG. After each loop is processed, +it’s packaged up to act as a pseudo-node in its parent loop’s (or the +function’s) DAG analysis.

+
+
+

Block Mass

+

For each DAG, the entry node is assigned a mass of UINT64_MAX and mass is +distributed to successors according to branch weights. Block Mass uses a +fixed-point representation where UINT64_MAX represents 1.0 and 0 +represents a number just above 0.0.

+

After mass is fully distributed, in any cut of the DAG that separates the exit +nodes from the entry node, the sum of the block masses of the nodes succeeded +by a cut edge should equal UINT64_MAX. In other words, mass is conserved +as it “falls” through the DAG.

+

If a function’s basic block graph is a DAG, then block masses are valid block +frequencies. This works poorly in practice though, since downstream users rely +on adding block frequencies together without hitting the maximum.

+
+
+

Loop Scale

+

Loop scale is a metric that indicates how many times a loop iterates per entry. +As mass is distributed through the loop’s DAG, the (otherwise ignored) backedge +mass is collected. This backedge mass is used to compute the exit frequency, +and thus the loop scale.

+
+
+

Implementation: Getting from mass and scale to frequency

+

After analysing the complete series of DAGs, each block has a mass (local to +its containing loop, if any), and each loop pseudo-node has a loop scale and +its own mass (from its parent’s DAG).

+

We can get an initial frequency assignment (with entry frequency of 1.0) by +multiplying these masses and loop scales together. A given block’s frequency +is the product of its mass, the mass of containing loops’ pseudo nodes, and the +containing loops’ loop scales.

+

Since downstream users need integers (not floating point), this initial +frequency assignment is shifted as necessary into the range of uint64_t.

+
+
+

Block Bias

+

Block bias is a proposed absolute metric to indicate a bias toward or away +from a given block during a function’s execution. The idea is that bias can be +used in isolation to indicate whether a block is relatively hot or cold, or to +compare two blocks to indicate whether one is hotter or colder than the other.

+

The proposed calculation involves calculating a reference block frequency, +where:

+
    +
  • every branch weight is assumed to be 1 (i.e., every branch probability +distribution is even) and

  • +
  • loop scales are ignored.

  • +
+

This reference frequency represents what the block frequency would be in an +unbiased graph.

+

The bias is the ratio of the block frequency to this reference block frequency.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BranchWeightMetadata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BranchWeightMetadata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BranchWeightMetadata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BranchWeightMetadata.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,356 @@ + + + + + + + + + LLVM Branch Weight Metadata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Branch Weight Metadata

+ +
+

Introduction

+

Branch Weight Metadata represents branch weights as its likeliness to be taken +(see LLVM Block Frequency Terminology). Metadata is assigned to an +Instruction that is a terminator as a MDNode of the MD_prof kind. +The first operator is always a MDString node with the string +“branch_weights”. Number of operators depends on the terminator type.

+

Branch weights might be fetch from the profiling file, or generated based on +__builtin_expect and __builtin_expect_with_probability instruction.

+

All weights are represented as an unsigned 32-bit values, where higher value +indicates greater chance to be taken.

+
+
+

Supported Instructions

+
+

BranchInst

+

Metadata is only assigned to the conditional branches. There are two extra +operands for the true and the false branch.

+
!0 = metadata !{
+  metadata !"branch_weights",
+  i32 <TRUE_BRANCH_WEIGHT>,
+  i32 <FALSE_BRANCH_WEIGHT>
+}
+
+
+
+
+

SwitchInst

+

Branch weights are assigned to every case (including the default case which +is always case #0).

+
!0 = metadata !{
+  metadata !"branch_weights",
+  i32 <DEFAULT_BRANCH_WEIGHT>
+  [ , i32 <CASE_BRANCH_WEIGHT> ... ]
+}
+
+
+
+
+

IndirectBrInst

+

Branch weights are assigned to every destination.

+
!0 = metadata !{
+  metadata !"branch_weights",
+  i32 <LABEL_BRANCH_WEIGHT>
+  [ , i32 <LABEL_BRANCH_WEIGHT> ... ]
+}
+
+
+
+
+

CallInst

+

Calls may have branch weight metadata, containing the execution count of +the call. It is currently used in SamplePGO mode only, to augment the +block and entry counts which may not be accurate with sampling.

+
!0 = metadata !{
+  metadata !"branch_weights",
+  i32 <CALL_BRANCH_WEIGHT>
+}
+
+
+
+
+

InvokeInst

+

Invoke instruction may have branch weight metadata with one or two weights. +The second weight is optional and corresponds to the unwind branch. +If only one weight is set then it contains the execution count of the call +and used in SamplePGO mode only as described for the call instruction. If both +weights are specified then the second weight contains count of unwind branch +taken and the first weights contains the execution count of the call minus +the count of unwind branch taken. Both weights specified are used to calculate +BranchProbability as for BranchInst and for SamplePGO the sum of both weights +is used.

+
!0 = metadata !{
+  metadata !"branch_weights",
+  i32 <INVOKE_NORMAL_WEIGHT>
+  [ , i32 <INVOKE_UNWIND_WEIGHT> ]
+}
+
+
+
+
+

Other

+

Other terminator instructions are not allowed to contain Branch Weight Metadata.

+
+
+
+

Built-in expect Instructions

+

__builtin_expect(long exp, long c) instruction provides branch prediction +information. The return value is the value of exp.

+

It is especially useful in conditional statements. Currently Clang supports two +conditional statements:

+
+

if statement

+

The exp parameter is the condition. The c parameter is the expected +comparison value. If it is equal to 1 (true), the condition is likely to be +true, in other case condition is likely to be false. For example:

+
if (__builtin_expect(x > 0, 1)) {
+  // This block is likely to be taken.
+}
+
+
+
+
+

switch statement

+

The exp parameter is the value. The c parameter is the expected +value. If the expected value doesn’t show on the cases list, the default +case is assumed to be likely taken.

+
switch (__builtin_expect(x, 5)) {
+default: break;
+case 0:  // ...
+case 3:  // ...
+case 5:  // This case is likely to be taken.
+}
+
+
+
+
+
+

Built-in expect.with.probability Instruction

+

__builtin_expect_with_probability(long exp, long c, double probability) has +the same semantics as __builtin_expect, but the caller provides the +probability that exp == c. The last argument probability must be +constant floating-point expression and be in the range [0.0, 1.0] inclusive. +The usage is also similar as __builtin_expect, for example:

+
+

if statement

+

If the expect comparison value c is equal to 1(true), and probability +value probability is set to 0.8, that means the probability of condition +to be true is 80% while that of false is 20%.

+
if (__builtin_expect_with_probability(x > 0, 1, 0.8)) {
+  // This block is likely to be taken with probability 80%.
+}
+
+
+
+
+

switch statement

+

This is basically the same as switch statement in __builtin_expect. +The probability that exp is equal to the expect value is given in +the third argument probability, while the probability of other value is +the average of remaining probability(1.0 - probability). For example:

+
switch (__builtin_expect_with_probability(x, 5, 0.7)) {
+default: break;  // Take this case with probability 10%
+case 0:  break;  // Take this case with probability 10%
+case 3:  break;  // Take this case with probability 10%
+case 5:  break;  // This case is likely to be taken with probability 70%
+}
+
+
+
+
+
+

CFG Modifications

+

Branch Weight Metatada is not proof against CFG changes. If terminator operands’ +are changed some action should be taken. In other case some misoptimizations may +occur due to incorrect branch prediction information.

+
+
+

Function Entry Counts

+

To allow comparing different functions during inter-procedural analysis and +optimization, MD_prof nodes can also be assigned to a function definition. +The first operand is a string indicating the name of the associated counter.

+

Currently, one counter is supported: “function_entry_count”. The second operand +is a 64-bit counter that indicates the number of times that this function was +invoked (in the case of instrumentation-based profiles). In the case of +sampling-based profiles, this operand is an approximation of how many times +the function was invoked.

+

For example, in the code below, the instrumentation for function foo() +indicates that it was called 2,590 times at runtime.

+
define i32 @foo() !prof !1 {
+  ret i32 0
+}
+!1 = !{!"function_entry_count", i64 2590}
+
+
+

If “function_entry_count” has more than 2 operands, the later operands are +the GUID of the functions that needs to be imported by ThinLTO. This is only +set by sampling based profile. It is needed because the sampling based profile +was collected on a binary that had already imported and inlined these functions, +and we need to ensure the IR matches in the ThinLTO backends for profile +annotation. The reason why we cannot annotate this on the callsite is that it +can only goes down 1 level in the call chain. For the cases where +foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels +in the call chain to import both bar_in_b_cc and baz_in_c_cc.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BugLifeCycle.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BugLifeCycle.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BugLifeCycle.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BugLifeCycle.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,264 @@ + + + + + + + + + LLVM Bug Life Cycle — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Bug Life Cycle

+ +
+

Introduction - Achieving consistency in how we deal with bug reports

+

We aim to achieve a basic level of consistency in how reported bugs evolve from +being reported, to being worked on, and finally getting closed out. The +consistency helps reporters, developers and others to gain a better +understanding of what a particular bug state actually means and what to expect +might happen next.

+

At the same time, we aim to not over-specify the life cycle of bugs in the +the LLVM Bug Tracking System, as the +overall goal is to make it easier to work with and understand the bug reports.

+

The main parts of the life cycle documented here are:

+
    +
  1. Reporting

  2. +
  3. Triaging

  4. +
  5. Actively working on fixing

  6. +
  7. Closing

  8. +
+

Furthermore, some of the metadata in the bug tracker, such as who to notify on +newly reported bugs or what the breakdown into products & components is we use, +needs to be maintained. See the following for details:

+
    +
  1. Maintenance of Bug products/component metadata

  2. +
  3. Maintenance of cc-by-default settings

  4. +
+
+
+

Reporting bugs

+

See How to submit an LLVM bug report on further details on how to submit good bug reports.

+

Make sure that you have one or more people on cc on the bug report that you +think will react to it. We aim to automatically add specific people on cc for +most products/components, but may not always succeed in doing so.

+

If you know the area of LLVM code the root cause of the bug is in, good +candidates to add as cc may be the same people you’d ask for a code review in +that area. See Finding potential reviewers for more details.

+
+
+

Triaging bugs

+

Bugs with status NEW indicate that they still need to be triaged. +When triage is complete, the status of the bug is moved to CONFIRMED.

+

The goal of triaging a bug is to make sure a newly reported bug ends up in a +good, actionable, state. Try to answer the following questions while triaging.

+
    +
  • Is the reported behavior actually wrong?

    +
      +
    • E.g. does a miscompile example depend on undefined behavior?

    • +
    +
  • +
  • Can you easily reproduce the bug?

    +
      +
    • If not, are there reasonable excuses why it cannot easily be reproduced?

    • +
    +
  • +
  • Is it related to an already reported bug?

    +
      +
    • Use the “See also”/”depends on”/”blocks” fields if so.

    • +
    • Close it as a duplicate if so, pointing to the issue it duplicates.

    • +
    +
  • +
  • Are the following fields filled in correctly?

    +
      +
    • Product

    • +
    • Component

    • +
    • Title

    • +
    +
  • +
  • CC others not already cc’ed that you happen to know would be good to pull in.

  • +
  • Add the “beginner” keyword if you think this would be a good bug to be fixed +by someone new to LLVM.

  • +
+
+
+

Actively working on fixing bugs

+

Please remember to assign the bug to yourself if you’re actively working on +fixing it and to unassign it when you’re no longer actively working on it. You +unassign a bug by setting the Assignee field to “unassignedbugs@nondot.org”.

+
+
+

Resolving/Closing bugs

+

For simplicity, we only have 1 status for all resolved or closed bugs: +RESOLVED.

+

Resolving bugs is good! Make sure to properly record the reason for resolving. +Examples of reasons for resolving are:

+
    +
  • Revision NNNNNN fixed the bug.

  • +
  • The bug cannot be reproduced with revision NNNNNN.

  • +
  • The circumstances for the bug don’t apply anymore.

  • +
  • There is a sound reason for not fixing it (WONTFIX).

  • +
  • There is a specific and plausible reason to think that a given bug is +otherwise inapplicable or obsolete.

    +
      +
    • One example is an old open bug that doesn’t contain enough information to +clearly understand the problem being reported (e.g. not reproducible). It is +fine to resolve such a bug e.g. with resolution WORKSFORME and leaving a +comment to encourage the reporter to reopen the bug with more information +if it’s still reproducible on their end.

    • +
    +
  • +
+

If a bug is resolved, please fill in the revision number it was fixed in in the +“Fixed by Commit(s)” field.

+
+
+

Maintenance of products/components metadata

+

Please raise a bug against “Bugzilla Admin”/”Products” to request any changes +to be made to the breakdown of products & components modeled in Bugzilla.

+
+
+

Maintenance of cc-by-default settings

+

Please raise a bug against “Bugzilla Admin”/”Products” to request any changes +to be made to the cc-by-default settings for specific components.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Bugpoint.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Bugpoint.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Bugpoint.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Bugpoint.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,343 @@ + + + + + + + + + LLVM bugpoint tool: design and usage — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM bugpoint tool: design and usage

+ +
+

Description

+

bugpoint narrows down the source of problems in LLVM tools and passes. It +can be used to debug three types of failures: optimizer crashes, miscompilations +by optimizers, or bad native code generation (including problems in the static +and JIT compilers). It aims to reduce large test cases to small, useful ones. +For example, if opt crashes while optimizing a file, it will identify the +optimization (or combination of optimizations) that causes the crash, and reduce +the file down to a small example which triggers the crash.

+

For detailed case scenarios, such as debugging opt, or one of the LLVM code +generators, see How to submit an LLVM bug report.

+
+
+

Design Philosophy

+

bugpoint is designed to be a useful tool without requiring any hooks into +the LLVM infrastructure at all. It works with any and all LLVM passes and code +generators, and does not need to “know” how they work. Because of this, it may +appear to do stupid things or miss obvious simplifications. bugpoint is +also designed to trade off programmer time for computer time in the +compiler-debugging process; consequently, it may take a long period of +(unattended) time to reduce a test case, but we feel it is still worth it. Note +that bugpoint is generally very quick unless debugging a miscompilation +where each test of the program (which requires executing it) takes a long time.

+
+

Automatic Debugger Selection

+

bugpoint reads each .bc or .ll file specified on the command line +and links them together into a single module, called the test program. If any +LLVM passes are specified on the command line, it runs these passes on the test +program. If any of the passes crash, or if they produce malformed output (which +causes the verifier to abort), bugpoint starts the crash debugger.

+

Otherwise, if the -output option was not specified, bugpoint runs the +test program with the “safe” backend (which is assumed to generate good code) to +generate a reference output. Once bugpoint has a reference output for the +test program, it tries executing it with the selected code generator. If the +selected code generator crashes, bugpoint starts the crash debugger on +the code generator. Otherwise, if the resulting output differs from the +reference output, it assumes the difference resulted from a code generator +failure, and starts the code generator debugger.

+

Finally, if the output of the selected code generator matches the reference +output, bugpoint runs the test program after all of the LLVM passes have +been applied to it. If its output differs from the reference output, it assumes +the difference resulted from a failure in one of the LLVM passes, and enters the +miscompilation debugger. Otherwise, there is no problem bugpoint can +debug.

+
+
+

Crash debugger

+

If an optimizer or code generator crashes, bugpoint will try as hard as it +can to reduce the list of passes (for optimizer crashes) and the size of the +test program. First, bugpoint figures out which combination of optimizer +passes triggers the bug. This is useful when debugging a problem exposed by +opt, for example, because it runs over 38 passes.

+

Next, bugpoint tries removing functions from the test program, to reduce its +size. Usually it is able to reduce a test program to a single function, when +debugging intraprocedural optimizations. Once the number of functions has been +reduced, it attempts to delete various edges in the control flow graph, to +reduce the size of the function as much as possible. Finally, bugpoint +deletes any individual LLVM instructions whose absence does not eliminate the +failure. At the end, bugpoint should tell you what passes crash, give you a +bitcode file, and give you instructions on how to reproduce the failure with +opt or llc.

+
+
+

Code generator debugger

+

The code generator debugger attempts to narrow down the amount of code that is +being miscompiled by the selected code generator. To do this, it takes the test +program and partitions it into two pieces: one piece which it compiles with the +“safe” backend (into a shared object), and one piece which it runs with either +the JIT or the static LLC compiler. It uses several techniques to reduce the +amount of code pushed through the LLVM code generator, to reduce the potential +scope of the problem. After it is finished, it emits two bitcode files (called +“test” [to be compiled with the code generator] and “safe” [to be compiled with +the “safe” backend], respectively), and instructions for reproducing the +problem. The code generator debugger assumes that the “safe” backend produces +good code.

+
+
+

Miscompilation debugger

+

The miscompilation debugger works similarly to the code generator debugger. It +works by splitting the test program into two pieces, running the optimizations +specified on one piece, linking the two pieces back together, and then executing +the result. It attempts to narrow down the list of passes to the one (or few) +which are causing the miscompilation, then reduce the portion of the test +program which is being miscompiled. The miscompilation debugger assumes that +the selected code generator is working properly.

+
+
+
+

Advice for using bugpoint

+

bugpoint can be a remarkably useful tool, but it sometimes works in +non-obvious ways. Here are some hints and tips:

+
    +
  • In the code generator and miscompilation debuggers, bugpoint only works +with programs that have deterministic output. Thus, if the program outputs +argv[0], the date, time, or any other “random” data, bugpoint may +misinterpret differences in these data, when output, as the result of a +miscompilation. Programs should be temporarily modified to disable outputs +that are likely to vary from run to run.

  • +
  • In the crash debugger, bugpoint does not distinguish different crashes +during reduction. Thus, if new crash or miscompilation happens, bugpoint +will continue with the new crash instead. If you would like to stick to +particular crash, you should write check scripts to validate the error +message, see -compile-command in bugpoint - automatic test case reduction tool.

  • +
  • In the code generator and miscompilation debuggers, debugging will go faster +if you manually modify the program or its inputs to reduce the runtime, but +still exhibit the problem.

  • +
  • bugpoint is extremely useful when working on a new optimization: it helps +track down regressions quickly. To avoid having to relink bugpoint every +time you change your optimization however, have bugpoint dynamically load +your optimization with the -load option.

  • +
  • bugpoint can generate a lot of output and run for a long period of time. +It is often useful to capture the output of the program to file. For example, +in the C shell, you can run:

    +
    $ bugpoint  ... |& tee bugpoint.log
    +
    +
    +

    to get a copy of bugpoint’s output in the file bugpoint.log, as well +as on your terminal.

    +
  • +
  • bugpoint cannot debug problems with the LLVM linker. If bugpoint +crashes before you see its “All input ok” message, you might try llvm-link +-v on the same set of input files. If that also crashes, you may be +experiencing a linker bug.

  • +
  • bugpoint is useful for proactively finding bugs in LLVM. Invoking +bugpoint with the -find-bugs option will cause the list of specified +optimizations to be randomized and applied to the program. This process will +repeat until a bug is found or the user kills bugpoint.

  • +
  • bugpoint can produce IR which contains long names. Run opt +-metarenamer over the IR to rename everything using easy-to-read, +metasyntactic names. Alternatively, run opt -strip -instnamer to rename +everything with very short (often purely numeric) names.

  • +
+
+
+

What to do when bugpoint isn’t enough

+

Sometimes, bugpoint is not enough. In particular, InstCombine and +TargetLowering both have visitor structured code with lots of potential +transformations. If the process of using bugpoint has left you with still too +much code to figure out and the problem seems to be in instcombine, the +following steps may help. These same techniques are useful with TargetLowering +as well.

+

Turn on -debug-only=instcombine and see which transformations within +instcombine are firing by selecting out lines with “IC” in them.

+

At this point, you have a decision to make. Is the number of transformations +small enough to step through them using a debugger? If so, then try that.

+

If there are too many transformations, then a source modification approach may +be helpful. In this approach, you can modify the source code of instcombine to +disable just those transformations that are being performed on your test input +and perform a binary search over the set of transformations. One set of places +to modify are the “visit*” methods of InstCombiner (e.g. +visitICmpInst) by adding a “return false” as the first line of the +method.

+

If that still doesn’t remove enough, then change the caller of +InstCombiner::DoOneIteration, InstCombiner::runOnFunction to limit the +number of iterations.

+

You may also find it useful to use “-stats” now to see what parts of +instcombine are firing. This can guide where to put additional reporting code.

+

At this point, if the amount of transformations is still too large, then +inserting code to limit whether or not to execute the body of the code in the +visit function can be helpful. Add a static counter which is incremented on +every invocation of the function. Then add code which simply returns false on +desired ranges. For example:

+
static int calledCount = 0;
+calledCount++;
+LLVM_DEBUG(if (calledCount < 212) return false);
+LLVM_DEBUG(if (calledCount > 217) return false);
+LLVM_DEBUG(if (calledCount == 213) return false);
+LLVM_DEBUG(if (calledCount == 214) return false);
+LLVM_DEBUG(if (calledCount == 215) return false);
+LLVM_DEBUG(if (calledCount == 216) return false);
+LLVM_DEBUG(dbgs() << "visitXOR calledCount: " << calledCount << "\n");
+LLVM_DEBUG(dbgs() << "I: "; I->dump());
+
+
+

could be added to visitXOR to limit visitXor to being applied only to +calls 212 and 217. This is from an actual test case and raises an important +point—a simple binary search may not be sufficient, as transformations that +interact may require isolating more than one call. In TargetLowering, use +return SDNode(); instead of return false;.

+

Now that the number of transformations is down to a manageable number, try +examining the output to see if you can figure out which transformations are +being done. If that can be figured out, then do the usual debugging. If which +code corresponds to the transformation being performed isn’t obvious, set a +breakpoint after the call count based disabling and step through the code. +Alternatively, you can use “printf” style debugging to report waypoints.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BugpointRedesign.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BugpointRedesign.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BugpointRedesign.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BugpointRedesign.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,229 @@ + + + + + + + + + Bugpoint Redesign — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Bugpoint Redesign

+

Author: Diego Treviño (diegotf@google.com)

+

Date: 2019-06-05

+

Status: Draft

+
+

Introduction

+

As use of bugpoint has grown several areas of improvement have been identified +through years of use: confusing to use, slow, it doesn’t always produce high +quality test cases, etc. This document proposes a new approach with a narrower +focus: minimization of IR test cases.

+
+
+

Proposed New Design

+
+

Narrow focus: test-case reduction

+

The main focus will be a code reduction strategy to obtain much smaller test +cases that still have the same property as the original one. This will be done +via classic delta debugging and by adding some IR-specific reductions (e.g. +replacing globals, removing unused instructions, etc), similar to what +already exists, but with more in-depth minimization.

+

Granted, if the community differs on this proposal, the legacy code could still +be present in the tool, but with the caveat of still being documented and +designed towards delta reduction.

+
+
+

Command-Line Options

+

We are proposing to reduce the plethora of bugpoint’s options to just two: an +interesting-ness test and the arguments for said test, similar to other delta +reduction tools such as CReduce, Delta, and Lithium; the tool should feel less +cluttered, and there should also be no uncertainty about how to operate it.

+

The interesting-ness test that’s going to be run to reduce the code is given +by name: +--test=<test_name> +If a --test option is not given, the program exits; this option is similar +to bugpoint’s current -compile-custom option, which lets the user run a +custom script.

+

The interesting-ness test would be defined as a script that returns 0 when the +IR achieves a user-defined behaviour (e.g. failure to compile on clang) and a +nonzero value when otherwise. Leaving the user the freedom to determine what is +and isn’t interesting to the tool, and thus, streamlining the process of +reducing a test-case.

+

If the test accepts any arguments (excluding the input ll/bc file), they are +given via the following flag: +--test_args=<test_arguments> +If unspecified, the test is run as given. It’s worth noting that the input file +would be passed as a parameter to the test, similar how -compile-custom +currently operates.

+
+
+

Implementation

+

The tool would behave similar to CReduce’s functionality in that it would have a +list of passes that try to minimize the given test-case. We should be able to +modularize the tool’s behavior, as well as making it easier to maintain and +expand.

+

The first version of this redesign would try to:

+
    +
  • Discard functions, instructions and metadata that don’t influence the +interesting-ness test

  • +
  • Remove unused parameters from functions

  • +
  • Eliminate unvisited conditional paths

  • +
  • Rename variables to more regular ones (such as “a”, “b”, “c”, etc.)

  • +
+

Once these passes are implemented, more meaningful reductions (such as type +reduction) would be added to the tool, to even further reduce IR.

+
+
+
+

Background on historical bugpoint issues

+
+

Root Cause Analysis

+

Presently, bugpoint takes a long time to find the source problem in a given IR +file, mainly due to the fact that it tries to debug the input by running +various strategies to classify the bug, which in turn run multiple optimizer +and compilation passes over the input, taking up a lot of time. Furthermore, +when the IR crashes, it tries to reduce it by performing some sub-optimal +passes (e.g. a lot of unreachable blocks), and sometimes even fails to minimize +at all.

+
+
+

“Quirky” Interface

+

Bugpoint’s current interface overwhelms and confuses the user, the help screen +alone ends up confusing rather providing guidance. And, not only are there +numerous features and options, but some of them also work in unexpected ways +and most of the time the user ends up using a custom script. Pruning and +simplifying the interface will be worth considering in order to make the tool +more useful in the general case and easier to maintain.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/.buildinfo llvm-toolchain-13-13.0.0/llvm/docs/_build/html/.buildinfo --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/.buildinfo 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/.buildinfo 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 2dee7551b0e2197f1dcd1ce272caaf4b +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BuildingADistribution.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BuildingADistribution.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/BuildingADistribution.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/BuildingADistribution.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,369 @@ + + + + + + + + + Building a Distribution of LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Building a Distribution of LLVM

+ +
+

Introduction

+

This document is geared toward people who want to build and package LLVM and any +combination of LLVM sub-project tools for distribution. This document covers +useful features of the LLVM build system as well as best practices and general +information about packaging LLVM.

+

If you are new to CMake you may find the Building LLVM with CMake or CMake Primer +documentation useful. Some of the things covered in this document are the inner +workings of the builds described in the Advanced Build Configurations document.

+
+
+

General Distribution Guidance

+

When building a distribution of a compiler it is generally advised to perform a +bootstrap build of the compiler. That means building a “stage 1” compiler with +your host toolchain, then building the “stage 2” compiler using the “stage 1” +compiler. This is done so that the compiler you distribute benefits from all the +bug fixes, performance optimizations and general improvements provided by the +new compiler.

+

In deciding how to build your distribution there are a few trade-offs that you +will need to evaluate. The big two are:

+
    +
  1. Compile time of the distribution against performance of the built compiler

  2. +
  3. Binary size of the distribution against performance of the built compiler

  4. +
+

The guidance for maximizing performance of the generated compiler is to use LTO, +PGO, and statically link everything. This will result in an overall larger +distribution, and it will take longer to generate, but it provides the most +opportunity for the compiler to optimize.

+

The guidance for minimizing distribution size is to dynamically link LLVM and +Clang libraries into the tools to reduce code duplication. This will come at a +substantial performance penalty to the generated binary both because it reduces +optimization opportunity, and because dynamic linking requires resolving symbols +at process launch time, which can be very slow for C++ code.

+
+

Warning

+

One very important note: Distributions should never be built using the +BUILD_SHARED_LIBS CMake option. That option exists for optimizing developer +workflow only. Due to design and implementation decisions, LLVM relies on +global data which can end up being duplicated across shared libraries +resulting in bugs. As such this is not a safe way to distribute LLVM or +LLVM-based tools.

+
+

The simplest example of building a distribution with reasonable performance is +captured in the DistributionExample CMake cache file located at +clang/cmake/caches/DistributionExample.cmake. The following command will perform +and install the distribution build:

+
$ cmake -G Ninja -C <path to clang>/cmake/caches/DistributionExample.cmake <path to LLVM source>
+$ ninja stage2-distribution
+$ ninja stage2-install-distribution
+
+
+
+

Difference between install and install-distribution

+

One subtle but important thing to note is the difference between the install +and install-distribution targets. The install target is expected to +install every part of LLVM that your build is configured to generate except the +LLVM testing tools. Alternatively the install-distribution target, which is +recommended for building distributions, only installs specific parts of LLVM as +specified at configuration time by LLVM_DISTRIBUTION_COMPONENTS.

+

Additionally by default the install target will install the LLVM testing +tools as the public tools. This can be changed well by setting +LLVM_INSTALL_TOOLCHAIN_ONLY to On. The LLVM tools are intended for +development and testing of LLVM, and should only be included in distributions +that support LLVM development.

+

When building with LLVM_DISTRIBUTION_COMPONENTS the build system also +generates a distribution target which builds all the components specified in +the list. This is a convenience build target to allow building just the +distributed pieces without needing to build all configured targets.

+
+
+

Multi-distribution configurations

+

The install-distribution target described above is for building a single +distribution. LLVM’s build system also supports building multiple distributions, +which can be used to e.g. have one distribution containing just tools and +another for libraries (to enable development). These are configured by setting +the LLVM_DISTRIBUTIONS variable to hold a list of all distribution names +(which conventionally start with an uppercase letter, e.g. “Development”), and +then setting the LLVM_<distribution>_DISTRIBUTION_COMPONENTS variable to the +list of targets for that distribution. For each distribution, the build system +generates an install-${distribution}-distribution target, where +${distribution} is the name of the distribution in lowercase, to install +that distribution. Each target can only be in one distribution.

+

Each distribution creates its own set of CMake exports, and the target to +install the CMake exports for a particular distribution for a project is named +${project}-${distribution}-cmake-exports, where ${project} is the name +of the project in lowercase and ${distribution} is the name of the +distribution in lowercase, unless the project is LLVM, in which case the target +is just named ${distribution}-cmake-exports. These targets need to be +explicitly included in the LLVM_<distribution>_DISTRIBUTION_COMPONENTS +variable in order to be included as part of the distribution.

+

Unlike with the single distribution setup, when building multiple distributions, +any components specified in LLVM_RUNTIME_DISTRIBUTION_COMPONENTS are not +automatically added to any distribution. Instead, you must include the targets +explicitly in some LLVM_<distribution>_DISTRIBUTION_COMPONENTS list.

+

We strongly encourage looking at clang/cmake/caches/MultiDistributionExample.cmake +as an example of configuring multiple distributions.

+
+
+

Special Notes for Library-only Distributions

+

One of the most powerful features of LLVM is its library-first design mentality +and the way you can compose a wide variety of tools using different portions of +LLVM. Even in this situation using BUILD_SHARED_LIBS is not supported. If you +want to distribute LLVM as a shared library for use in a tool, the recommended +method is using LLVM_BUILD_LLVM_DYLIB, and you can use LLVM_DYLIB_COMPONENTS +to configure which LLVM components are part of libLLVM. +Note: LLVM_BUILD_LLVM_DYLIB is not available on Windows.

+
+
+
+

Options for Optimizing LLVM

+

There are four main build optimizations that our CMake build system supports. +When performing a bootstrap build it is not beneficial to do anything other than +setting CMAKE_BUILD_TYPE to Release for the stage-1 compiler. This is +because the more intensive optimizations are expensive to perform and the +stage-1 compiler is thrown away. All of the further options described should be +set on the stage-2 compiler either using a CMake cache file, or by prefixing the +option with BOOTSTRAP_.

+

The first and simplest to use is the compiler optimization level by setting the +CMAKE_BUILD_TYPE option. The main values of interest are Release or +RelWithDebInfo. By default the Release option uses the -O3 +optimization level, and RelWithDebInfo uses -O2. If you want to generate +debug information and use -O3 you can override the +CMAKE_<LANG>_FLAGS_RELWITHDEBINFO option for C and CXX. +DistributionExample.cmake does this.

+

Another easy to use option is Link-Time-Optimization. You can set the +LLVM_ENABLE_LTO option on your stage-2 build to Thin or Full to enable +building LLVM with LTO. These options will significantly increase link time of +the binaries in the distribution, but it will create much faster binaries. This +option should not be used if your distribution includes static archives, as the +objects inside the archive will be LLVM bitcode, which is not portable.

+

The Advanced Build Configurations documentation describes the built-in tooling for +generating LLVM profiling information to drive Profile-Guided-Optimization. The +in-tree profiling tests are very limited, and generating the profile takes a +significant amount of time, but it can result in a significant improvement in +the performance of the generated binaries.

+

In addition to PGO profiling we also have limited support in-tree for generating +linker order files. These files provide the linker with a suggested ordering for +functions in the final binary layout. This can measurably speed up clang by +physically grouping functions that are called temporally close to each other. +The current tooling is only available on Darwin systems with dtrace(1). It +is worth noting that dtrace is non-deterministic, and so the order file +generation using dtrace is also non-deterministic.

+
+
+

Options for Reducing Size

+
+

Warning

+

Any steps taken to reduce the binary size will come at a cost of runtime +performance in the generated binaries.

+
+

The simplest and least significant way to reduce binary size is to set the +CMAKE_BUILD_TYPE variable to MinSizeRel, which will set the compiler +optimization level to -Os which optimizes for binary size. This will have +both the least benefit to size and the least impact on performance.

+

The most impactful way to reduce binary size is to dynamically link LLVM into +all the tools. This reduces code size by decreasing duplication of common code +between the LLVM-based tools. This can be done by setting the following two +CMake options to On: LLVM_BUILD_LLVM_DYLIB and LLVM_LINK_LLVM_DYLIB.

+
+

Warning

+

Distributions should never be built using the BUILD_SHARED_LIBS CMake +option. (See the warning above for more explanation.).

+
+
+
+

Relevant CMake Options

+

This section provides documentation of the CMake options that are intended to +help construct distributions. This is not an exhaustive list, and many +additional options are documented in the Building LLVM with CMake page. Some key options +that are already documented include: LLVM_TARGETS_TO_BUILD, +LLVM_ENABLE_PROJECTS, LLVM_BUILD_LLVM_DYLIB, and LLVM_LINK_LLVM_DYLIB.

+
+
LLVM_ENABLE_RUNTIMES:STRING

When building a distribution that includes LLVM runtime projects (i.e. libcxx, +compiler-rt, libcxxabi, libunwind…), it is important to build those projects +with the just-built compiler.

+
+
LLVM_DISTRIBUTION_COMPONENTS:STRING

This variable can be set to a semi-colon separated list of LLVM build system +components to install. All LLVM-based tools are components, as well as most +of the libraries and runtimes. Component names match the names of the build +system targets.

+
+
LLVM_DISTRIBUTIONS:STRING

This variable can be set to a semi-colon separated list of distributions. See +the Multi-distribution configurations section above for details on this +and other CMake variables to configure multiple distributions.

+
+
LLVM_RUNTIME_DISTRIBUTION_COMPONENTS:STRING

This variable can be set to a semi-colon separated list of runtime library +components. This is used in conjunction with LLVM_ENABLE_RUNTIMES to specify +components of runtime libraries that you want to include in your distribution. +Just like with LLVM_DISTRIBUTION_COMPONENTS, component names match the names +of the build system targets.

+
+
LLVM_DYLIB_COMPONENTS:STRING

This variable can be set to a semi-colon separated name of LLVM library +components. LLVM library components are either library names with the LLVM +prefix removed (i.e. Support, Demangle…), LLVM target names, or special +purpose component names. The special purpose component names are:

+
    +
  1. all - All LLVM available component libraries

  2. +
  3. Native - The LLVM target for the Native system

  4. +
  5. AllTargetsAsmParsers - All the included target ASM parsers libraries

  6. +
  7. AllTargetsDescs - All the included target descriptions libraries

  8. +
  9. AllTargetsDisassemblers - All the included target dissassemblers libraries

  10. +
  11. AllTargetsInfos - All the included target info libraries

  12. +
+
+
LLVM_INSTALL_TOOLCHAIN_ONLY:BOOL

This option defaults to Off: when set to On it removes many of the +LLVM development and testing tools as well as component libraries from the +default install target. Including the development tools is not recommended +for distributions as many of the LLVM tools are only intended for development +and testing use.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CFIVerify.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CFIVerify.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CFIVerify.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CFIVerify.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,230 @@ + + + + + + + + + Control Flow Verification Tool Design Document — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Control Flow Verification Tool Design Document

+ +
+

Objective

+

This document provides an overview of an external tool to verify the protection +mechanisms implemented by Clang’s Control Flow Integrity (CFI) schemes +(-fsanitize=cfi). This tool, provided a binary or DSO, should infer whether +indirect control flow operations are protected by CFI, and should output these +results in a human-readable form.

+

This tool should also be added as part of Clang’s continuous integration testing +framework, where modifications to the compiler ensure that CFI protection +schemes are still present in the final binary.

+
+
+

Location

+

This tool will be present as a part of the LLVM toolchain, and will reside in +the “/llvm/tools/llvm-cfi-verify” directory, relative to the LLVM trunk. It will +be tested in two methods:

+
    +
  • Unit tests to validate code sections, present in +“/llvm/unittests/tools/llvm-cfi-verify”.

  • +
  • Integration tests, present in “/llvm/tools/clang/test/LLVMCFIVerify”. These +integration tests are part of clang as part of a continuous integration +framework, ensuring updates to the compiler that reduce CFI coverage on +indirect control flow instructions are identified.

  • +
+
+
+

Background

+

This tool will continuously validate that CFI directives are properly +implemented around all indirect control flows by analysing the output machine +code. The analysis of machine code is important as it ensures that any bugs +present in linker or compiler do not subvert CFI protections in the final +shipped binary.

+

Unprotected indirect control flow instructions will be flagged for manual +review. These unexpected control flows may simply have not been accounted for in +the compiler implementation of CFI (e.g. indirect jumps to facilitate switch +statements may not be fully protected).

+

It may be possible in the future to extend this tool to flag unnecessary CFI +directives (e.g. CFI directives around a static call to a non-polymorphic base +type). This type of directive has no security implications, but may present +performance impacts.

+
+
+

Design Ideas

+

This tool will disassemble binaries and DSO’s from their machine code format and +analyse the disassembled machine code. The tool will inspect virtual calls and +indirect function calls. This tool will also inspect indirect jumps, as inlined +functions and jump tables should also be subject to CFI protections. Non-virtual +calls (-fsanitize=cfi-nvcall) and cast checks (-fsanitize=cfi-*cast*) +are not implemented due to a lack of information provided by the bytecode.

+

The tool would operate by searching for indirect control flow instructions in +the disassembly. A control flow graph would be generated from a small buffer of +the instructions surrounding the ‘target’ control flow instruction. If the +target instruction is branched-to, the fallthrough of the branch should be the +CFI trap (on x86, this is a ud2 instruction). If the target instruction is +the fallthrough (i.e. immediately succeeds) of a conditional jump, the +conditional jump target should be the CFI trap. If an indirect control flow +instruction does not conform to one of these formats, the target will be noted +as being CFI-unprotected.

+

Note that in the second case outlined above (where the target instruction is the +fallthrough of a conditional jump), if the target represents a vcall that takes +arguments, these arguments may be pushed to the stack after the branch but +before the target instruction. In these cases, a secondary ‘spill graph’ in +constructed, to ensure the register argument used by the indirect jump/call is +not spilled from the stack at any point in the interim period. If there are no +spills that affect the target register, the target is marked as CFI-protected.

+
+

Other Design Notes

+

Only machine code sections that are marked as executable will be subject to this +analysis. Non-executable sections do not require analysis as any execution +present in these sections has already violated the control flow integrity.

+

Suitable extensions may be made at a later date to include analysis for indirect +control flow operations across DSO boundaries. Currently, these CFI features are +only experimental with an unstable ABI, making them unsuitable for analysis.

+

The tool currently only supports the x86, x86_64, and AArch64 architectures.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CMake.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CMake.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CMake.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CMake.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,976 @@ + + + + + + + + + Building LLVM with CMake — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Building LLVM with CMake

+ +
+

Introduction

+

CMake is a cross-platform build-generator tool. CMake +does not build the project, it generates the files needed by your build tool +(GNU make, Visual Studio, etc.) for building LLVM.

+

If you are a new contributor, please start with the Getting Started with the LLVM System +page. This page is geared for existing contributors moving from the +legacy configure/make system.

+

If you are really anxious about getting a functional LLVM build, go to the +Quick start section. If you are a CMake novice, start with Basic CMake usage +and then go back to the Quick start section once you know what you are doing. The +Options and variables section is a reference for customizing your build. If +you already have experience with CMake, this is the recommended starting point.

+

This page is geared towards users of the LLVM CMake build. If you’re looking for +information about modifying the LLVM CMake build system you may want to see the +CMake Primer page. It has a basic overview of the CMake language.

+
+
+

Quick start

+

We use here the command-line, non-interactive CMake interface.

+
    +
  1. Download and install +CMake. Version 3.13.4 is the minimum required.

  2. +
  3. Open a shell. Your development tools must be reachable from this shell +through the PATH environment variable.

  4. +
  5. Create a build directory. Building LLVM in the source +directory is not supported. cd to this directory:

    +
    $ mkdir mybuilddir
    +$ cd mybuilddir
    +
    +
    +
  6. +
  7. Execute this command in the shell replacing path/to/llvm/source/root with +the path to the root of your LLVM source tree:

    +
    $ cmake path/to/llvm/source/root
    +
    +
    +

    CMake will detect your development environment, perform a series of tests, and +generate the files required for building LLVM. CMake will use default values +for all build parameters. See the Options and variables section for +a list of build parameters that you can modify.

    +

    This can fail if CMake can’t detect your toolset, or if it thinks that the +environment is not sane enough. In this case, make sure that the toolset that +you intend to use is the only one reachable from the shell, and that the shell +itself is the correct one for your development environment. CMake will refuse +to build MinGW makefiles if you have a POSIX shell reachable through the PATH +environment variable, for instance. You can force CMake to use a given build +tool; for instructions, see the Usage section, below. You may +also wish to control which targets LLVM enables, or which LLVM +components are built; see the Frequently Used LLVM-related +variables below.

    +
  8. +
  9. After CMake has finished running, proceed to use IDE project files, or start +the build from the build directory:

    +
    $ cmake --build .
    +
    +
    +

    The --build option tells cmake to invoke the underlying build +tool (make, ninja, xcodebuild, msbuild, etc.)

    +

    The underlying build tool can be invoked directly, of course, but +the --build option is portable.

    +
  10. +
  11. After LLVM has finished building, install it from the build directory:

    +
    $ cmake --build . --target install
    +
    +
    +

    The --target option with install parameter in addition to +the --build option tells cmake to build the install target.

    +

    It is possible to set a different install prefix at installation time +by invoking the cmake_install.cmake script generated in the +build directory:

    +
    $ cmake -DCMAKE_INSTALL_PREFIX=/tmp/llvm -P cmake_install.cmake
    +
    +
    +
  12. +
+
+
+

Basic CMake usage

+

This section explains basic aspects of CMake +which you may need in your day-to-day usage.

+

CMake comes with extensive documentation, in the form of html files, and as +online help accessible via the cmake executable itself. Execute cmake +--help for further help options.

+

CMake allows you to specify a build tool (e.g., GNU make, Visual Studio, +or Xcode). If not specified on the command line, CMake tries to guess which +build tool to use, based on your environment. Once it has identified your +build tool, CMake uses the corresponding Generator to create files for your +build tool (e.g., Makefiles or Visual Studio or Xcode project files). You can +explicitly specify the generator with the command line option -G "Name of the +generator". To see a list of the available generators on your system, execute

+
$ cmake --help
+
+
+

This will list the generator names at the end of the help text.

+

Generators’ names are case-sensitive, and may contain spaces. For this reason, +you should enter them exactly as they are listed in the cmake --help +output, in quotes. For example, to generate project files specifically for +Visual Studio 12, you can execute:

+
$ cmake -G "Visual Studio 12" path/to/llvm/source/root
+
+
+

For a given development platform there can be more than one adequate +generator. If you use Visual Studio, “NMake Makefiles” is a generator you can use +for building with NMake. By default, CMake chooses the most specific generator +supported by your development environment. If you want an alternative generator, +you must tell this to CMake with the -G option.

+
+
+

Options and variables

+

Variables customize how the build will be generated. Options are boolean +variables, with possible values ON/OFF. Options and variables are defined on the +CMake command line like this:

+
$ cmake -DVARIABLE=value path/to/llvm/source
+
+
+

You can set a variable after the initial CMake invocation to change its +value. You can also undefine a variable:

+
$ cmake -UVARIABLE path/to/llvm/source
+
+
+

Variables are stored in the CMake cache. This is a file named CMakeCache.txt +stored at the root of your build directory that is generated by cmake. +Editing it yourself is not recommended.

+

Variables are listed in the CMake cache and later in this document with +the variable name and type separated by a colon. You can also specify the +variable and type on the CMake command line:

+
$ cmake -DVARIABLE:TYPE=value path/to/llvm/source
+
+
+
+

Frequently-used CMake variables

+

Here are some of the CMake variables that are used often, along with a +brief explanation. For full documentation, consult the CMake manual, +or execute cmake --help-variable VARIABLE_NAME. See Frequently +Used LLVM-related Variables below for information about commonly +used variables that control features of LLVM and enabled subprojects.

+
+
CMAKE_BUILD_TYPE:STRING

Sets the build type for make-based generators. Possible values are +Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as +Visual Studio, you should use the IDE settings to set the build type. +Be aware that Release and RelWithDebInfo use different optimization levels on +most platforms. Be aware that Release and +RelWithDebInfo use different optimization levels on most +platforms, and that the default value of LLVM_ENABLE_ASSERTIONS +is affected.

+
+
CMAKE_INSTALL_PREFIX:PATH

Path where LLVM will be installed when the “install” target is built.

+
+
CMAKE_{C,CXX}_FLAGS:STRING

Extra flags to use when compiling C and C++ source files respectively.

+
+
CMAKE_{C,CXX}_COMPILER:STRING

Specify the C and C++ compilers to use. If you have multiple +compilers installed, CMake might not default to the one you wish to +use.

+
+
+
+ +
+

Rarely-used CMake variables

+

Here are some of the CMake variables that are rarely used, along with a brief +explanation and LLVM-related notes. For full documentation, consult the CMake +manual, or execute cmake --help-variable VARIABLE_NAME.

+
+
CMAKE_CXX_STANDARD:STRING

Sets the C++ standard to conform to when building LLVM. Possible values are +14, 17, 20. LLVM Requires C++ 14 or higher. This defaults to 14.

+
+
+
+ +
+
+

CMake Caches

+

Recently LLVM and Clang have been adding some more complicated build system +features. Utilizing these new features often involves a complicated chain of +CMake variables passed on the command line. Clang provides a collection of CMake +cache scripts to make these features more approachable.

+

CMake cache files are utilized using CMake’s -C flag:

+
$ cmake -C <path to cache file> <path to sources>
+
+
+

CMake cache scripts are processed in an isolated scope, only cached variables +remain set when the main configuration runs. CMake cached variables do not reset +variables that are already set unless the FORCE option is specified.

+

A few notes about CMake Caches:

+
    +
  • Order of command line arguments is important

    +
      +
    • -D arguments specified before -C are set before the cache is processed and +can be read inside the cache file

    • +
    • -D arguments specified after -C are set after the cache is processed and +are unset inside the cache file

    • +
    +
  • +
  • All -D arguments will override cache file settings

  • +
  • CMAKE_TOOLCHAIN_FILE is evaluated after both the cache file and the command +line arguments

  • +
  • It is recommended that all -D options should be specified before -C

  • +
+

For more information about some of the advanced build configurations supported +via Cache files see Advanced Build Configurations.

+
+
+

Executing the Tests

+

Testing is performed when the check-all target is built. For instance, if you are +using Makefiles, execute this command in the root of your build directory:

+
$ make check-all
+
+
+

On Visual Studio, you may run tests by building the project “check-all”. +For more information about testing, see the LLVM Testing Infrastructure Guide.

+
+
+

Cross compiling

+

See this wiki page for +generic instructions on how to cross-compile with CMake. It goes into detailed +explanations and may seem daunting, but it is not. On the wiki page there are +several examples including toolchain files. Go directly to the +Information how to set up various cross compiling toolchains section +for a quick solution.

+

Also see the LLVM-related variables section for variables used when +cross-compiling.

+
+
+

Embedding LLVM in your project

+

From LLVM 3.5 onwards the CMake build system exports LLVM libraries as +importable CMake targets. This means that clients of LLVM can now reliably use +CMake to develop their own LLVM-based projects against an installed version of +LLVM regardless of how it was built.

+

Here is a simple example of a CMakeLists.txt file that imports the LLVM libraries +and uses them to build a simple application simple-tool.

+
cmake_minimum_required(VERSION 3.13.4)
+project(SimpleProject)
+
+find_package(LLVM REQUIRED CONFIG)
+
+message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
+
+# Set your project compile flags.
+# E.g. if using the C++ header files
+# you will need to enable C++11 support
+# for your compiler.
+
+include_directories(${LLVM_INCLUDE_DIRS})
+separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
+add_definitions(${LLVM_DEFINITIONS_LIST})
+
+# Now build our tools
+add_executable(simple-tool tool.cpp)
+
+# Find the libraries that correspond to the LLVM components
+# that we wish to use
+llvm_map_components_to_libnames(llvm_libs support core irreader)
+
+# Link against LLVM libraries
+target_link_libraries(simple-tool ${llvm_libs})
+
+
+

The find_package(...) directive when used in CONFIG mode (as in the above +example) will look for the LLVMConfig.cmake file in various locations (see +cmake manual for details). It creates a LLVM_DIR cache entry to save the +directory where LLVMConfig.cmake is found or allows the user to specify the +directory (e.g. by passing -DLLVM_DIR=/usr/lib/cmake/llvm to +the cmake command or by setting it directly in ccmake or cmake-gui).

+

This file is available in two different locations.

+
    +
  • <INSTALL_PREFIX>/lib/cmake/llvm/LLVMConfig.cmake where +<INSTALL_PREFIX> is the install prefix of an installed version of LLVM. +On Linux typically this is /usr/lib/cmake/llvm/LLVMConfig.cmake.

  • +
  • <LLVM_BUILD_ROOT>/lib/cmake/llvm/LLVMConfig.cmake where +<LLVM_BUILD_ROOT> is the root of the LLVM build tree. Note: this is only +available when building LLVM with CMake.

  • +
+

If LLVM is installed in your operating system’s normal installation prefix (e.g. +on Linux this is usually /usr/) find_package(LLVM ...) will +automatically find LLVM if it is installed correctly. If LLVM is not installed +or you wish to build directly against the LLVM build tree you can use +LLVM_DIR as previously mentioned.

+

The LLVMConfig.cmake file sets various useful variables. Notable variables +include

+
+
LLVM_CMAKE_DIR

The path to the LLVM CMake directory (i.e. the directory containing +LLVMConfig.cmake).

+
+
LLVM_DEFINITIONS

A list of preprocessor defines that should be used when building against LLVM.

+
+
LLVM_ENABLE_ASSERTIONS

This is set to ON if LLVM was built with assertions, otherwise OFF.

+
+
LLVM_ENABLE_EH

This is set to ON if LLVM was built with exception handling (EH) enabled, +otherwise OFF.

+
+
LLVM_ENABLE_RTTI

This is set to ON if LLVM was built with run time type information (RTTI), +otherwise OFF.

+
+
LLVM_INCLUDE_DIRS

A list of include paths to directories containing LLVM header files.

+
+
LLVM_PACKAGE_VERSION

The LLVM version. This string can be used with CMake conditionals, e.g., if +(${LLVM_PACKAGE_VERSION} VERSION_LESS "3.5").

+
+
LLVM_TOOLS_BINARY_DIR

The path to the directory containing the LLVM tools (e.g. llvm-as).

+
+
+

Notice that in the above example we link simple-tool against several LLVM +libraries. The list of libraries is determined by using the +llvm_map_components_to_libnames() CMake function. For a list of available +components look at the output of running llvm-config --components.

+

Note that for LLVM < 3.5 llvm_map_components_to_libraries() was +used instead of llvm_map_components_to_libnames(). This is now deprecated +and will be removed in a future version of LLVM.

+
+

Developing LLVM passes out of source

+

It is possible to develop LLVM passes out of LLVM’s source tree (i.e. against an +installed or built LLVM). An example of a project layout is provided below.

+
<project dir>/
+    |
+    CMakeLists.txt
+    <pass name>/
+        |
+        CMakeLists.txt
+        Pass.cpp
+        ...
+
+
+

Contents of <project dir>/CMakeLists.txt:

+
find_package(LLVM REQUIRED CONFIG)
+
+separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
+add_definitions(${LLVM_DEFINITIONS_LIST})
+include_directories(${LLVM_INCLUDE_DIRS})
+
+add_subdirectory(<pass name>)
+
+
+

Contents of <project dir>/<pass name>/CMakeLists.txt:

+
add_library(LLVMPassname MODULE Pass.cpp)
+
+
+

Note if you intend for this pass to be merged into the LLVM source tree at some +point in the future it might make more sense to use LLVM’s internal +add_llvm_library function with the MODULE argument instead by…

+

Adding the following to <project dir>/CMakeLists.txt (after +find_package(LLVM ...))

+
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
+include(AddLLVM)
+
+
+

And then changing <project dir>/<pass name>/CMakeLists.txt to

+
add_llvm_library(LLVMPassname MODULE
+  Pass.cpp
+  )
+
+
+

When you are done developing your pass, you may wish to integrate it +into the LLVM source tree. You can achieve it in two easy steps:

+
    +
  1. Copying <pass name> folder into <LLVM root>/lib/Transform directory.

  2. +
  3. Adding add_subdirectory(<pass name>) line into +<LLVM root>/lib/Transform/CMakeLists.txt.

  4. +
+
+
+
+

Compiler/Platform-specific topics

+

Notes for specific compilers and/or platforms.

+
+

Microsoft Visual C++

+
+
LLVM_COMPILER_JOBS:STRING

Specifies the maximum number of parallel compiler jobs to use per project +when building with msbuild or Visual Studio. Only supported for the Visual +Studio 2010 CMake generator. 0 means use all processors. Default is 0.

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CMakePrimer.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CMakePrimer.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CMakePrimer.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CMakePrimer.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,550 @@ + + + + + + + + + CMake Primer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

CMake Primer

+ +
+

Warning

+

Disclaimer: This documentation is written by LLVM project contributors not +anyone affiliated with the CMake project. This document may contain +inaccurate terminology, phrasing, or technical details. It is provided with +the best intentions.

+
+
+

Introduction

+

The LLVM project and many of the core projects built on LLVM build using CMake. +This document aims to provide a brief overview of CMake for developers modifying +LLVM projects or building their own projects on top of LLVM.

+

The official CMake language references is available in the cmake-language +manpage and cmake-language online documentation.

+
+
+

10,000 ft View

+

CMake is a tool that reads script files in its own language that describe how a +software project builds. As CMake evaluates the scripts it constructs an +internal representation of the software project. Once the scripts have been +fully processed, if there are no errors, CMake will generate build files to +actually build the project. CMake supports generating build files for a variety +of command line build tools as well as for popular IDEs.

+

When a user runs CMake it performs a variety of checks similar to how autoconf +worked historically. During the checks and the evaluation of the build +description scripts CMake caches values into the CMakeCache. This is useful +because it allows the build system to skip long-running checks during +incremental development. CMake caching also has some drawbacks, but that will be +discussed later.

+
+
+

Scripting Overview

+

CMake’s scripting language has a very simple grammar. Every language construct +is a command that matches the pattern _name_(_args_). Commands come in three +primary types: language-defined (commands implemented in C++ in CMake), defined +functions, and defined macros. The CMake distribution also contains a suite of +CMake modules that contain definitions for useful functionality.

+

The example below is the full CMake build for building a C++ “Hello World” +program. The example uses only CMake language-defined functions.

+
cmake_minimum_required(VERSION 3.15)
+project(HelloWorld)
+add_executable(HelloWorld HelloWorld.cpp)
+
+
+

The CMake language provides control flow constructs in the form of foreach loops +and if blocks. To make the example above more complicated you could add an if +block to define “APPLE” when targeting Apple platforms:

+
cmake_minimum_required(VERSION 3.15)
+project(HelloWorld)
+add_executable(HelloWorld HelloWorld.cpp)
+if(APPLE)
+  target_compile_definitions(HelloWorld PUBLIC APPLE)
+endif()
+
+
+
+
+

Variables, Types, and Scope

+
+

Dereferencing

+

In CMake variables are “stringly” typed. All variables are represented as +strings throughout evaluation. Wrapping a variable in ${} dereferences it +and results in a literal substitution of the name for the value. CMake refers to +this as “variable evaluation” in their documentation. Dereferences are performed +before the command being called receives the arguments. This means +dereferencing a list results in multiple separate arguments being passed to the +command.

+

Variable dereferences can be nested and be used to model complex data. For +example:

+
set(var_name var1)
+set(${var_name} foo) # same as "set(var1 foo)"
+set(${${var_name}}_var bar) # same as "set(foo_var bar)"
+
+
+

Dereferencing an unset variable results in an empty expansion. It is a common +pattern in CMake to conditionally set variables knowing that it will be used in +code paths that the variable isn’t set. There are examples of this throughout +the LLVM CMake build system.

+

An example of variable empty expansion is:

+
if(APPLE)
+  set(extra_sources Apple.cpp)
+endif()
+add_executable(HelloWorld HelloWorld.cpp ${extra_sources})
+
+
+

In this example the extra_sources variable is only defined if you’re +targeting an Apple platform. For all other targets the extra_sources will be +evaluated as empty before add_executable is given its arguments.

+
+
+

Lists

+

In CMake lists are semi-colon delimited strings, and it is strongly advised that +you avoid using semi-colons in lists; it doesn’t go smoothly. A few examples of +defining lists:

+
# Creates a list with members a, b, c, and d
+set(my_list a b c d)
+set(my_list "a;b;c;d")
+
+# Creates a string "a b c d"
+set(my_string "a b c d")
+
+
+
+
+

Lists of Lists

+

One of the more complicated patterns in CMake is lists of lists. Because a list +cannot contain an element with a semi-colon to construct a list of lists you +make a list of variable names that refer to other lists. For example:

+
set(list_of_lists a b c)
+set(a 1 2 3)
+set(b 4 5 6)
+set(c 7 8 9)
+
+
+

With this layout you can iterate through the list of lists printing each value +with the following code:

+
foreach(list_name IN LISTS list_of_lists)
+  foreach(value IN LISTS ${list_name})
+    message(${value})
+  endforeach()
+endforeach()
+
+
+

You’ll notice that the inner foreach loop’s list is doubly dereferenced. This is +because the first dereference turns list_name into the name of the sub-list +(a, b, or c in the example), then the second dereference is to get the value of +the list.

+

This pattern is used throughout CMake, the most common example is the compiler +flags options, which CMake refers to using the following variable expansions: +CMAKE_${LANGUAGE}_FLAGS and CMAKE_${LANGUAGE}_FLAGS_${CMAKE_BUILD_TYPE}.

+
+
+

Other Types

+

Variables that are cached or specified on the command line can have types +associated with them. The variable’s type is used by CMake’s UI tool to display +the right input field. A variable’s type generally doesn’t impact evaluation, +however CMake does have special handling for some variables such as PATH. +You can read more about the special handling in CMake’s set documentation.

+
+
+

Scope

+

CMake inherently has a directory-based scoping. Setting a variable in a +CMakeLists file, will set the variable for that file, and all subdirectories. +Variables set in a CMake module that is included in a CMakeLists file will be +set in the scope they are included from, and all subdirectories.

+

When a variable that is already set is set again in a subdirectory it overrides +the value in that scope and any deeper subdirectories.

+

The CMake set command provides two scope-related options. PARENT_SCOPE sets a +variable into the parent scope, and not the current scope. The CACHE option sets +the variable in the CMakeCache, which results in it being set in all scopes. The +CACHE option will not set a variable that already exists in the CACHE unless the +FORCE option is specified.

+

In addition to directory-based scope, CMake functions also have their own scope. +This means variables set inside functions do not bleed into the parent scope. +This is not true of macros, and it is for this reason LLVM prefers functions +over macros whenever reasonable.

+
+

Note

+

Unlike C-based languages, CMake’s loop and control flow blocks do not have +their own scopes.

+
+
+
+
+

Control Flow

+

CMake features the same basic control flow constructs you would expect in any +scripting language, but there are a few quirks because, as with everything in +CMake, control flow constructs are commands.

+
+

If, ElseIf, Else

+
+

Note

+

For the full documentation on the CMake if command go +here. That resource is +far more complete.

+
+

In general CMake if blocks work the way you’d expect:

+
if(<condition>)
+  message("do stuff")
+elseif(<condition>)
+  message("do other stuff")
+else()
+  message("do other other stuff")
+endif()
+
+
+

The single most important thing to know about CMake’s if blocks coming from a C +background is that they do not have their own scope. Variables set inside +conditional blocks persist after the endif().

+
+
+

Loops

+

The most common form of the CMake foreach block is:

+
foreach(var ...)
+  message("do stuff")
+endforeach()
+
+
+

The variable argument portion of the foreach block can contain dereferenced +lists, values to iterate, or a mix of both:

+
foreach(var foo bar baz)
+  message(${var})
+endforeach()
+# prints:
+#  foo
+#  bar
+#  baz
+
+set(my_list 1 2 3)
+foreach(var ${my_list})
+  message(${var})
+endforeach()
+# prints:
+#  1
+#  2
+#  3
+
+foreach(var ${my_list} out_of_bounds)
+  message(${var})
+endforeach()
+# prints:
+#  1
+#  2
+#  3
+#  out_of_bounds
+
+
+

There is also a more modern CMake foreach syntax. The code below is equivalent +to the code above:

+
foreach(var IN ITEMS foo bar baz)
+  message(${var})
+endforeach()
+# prints:
+#  foo
+#  bar
+#  baz
+
+set(my_list 1 2 3)
+foreach(var IN LISTS my_list)
+  message(${var})
+endforeach()
+# prints:
+#  1
+#  2
+#  3
+
+foreach(var IN LISTS my_list ITEMS out_of_bounds)
+  message(${var})
+endforeach()
+# prints:
+#  1
+#  2
+#  3
+#  out_of_bounds
+
+
+

Similar to the conditional statements, these generally behave how you would +expect, and they do not have their own scope.

+

CMake also supports while loops, although they are not widely used in LLVM.

+
+
+
+

Modules, Functions and Macros

+
+

Modules

+

Modules are CMake’s vehicle for enabling code reuse. CMake modules are just +CMake script files. They can contain code to execute on include as well as +definitions for commands.

+

In CMake macros and functions are universally referred to as commands, and they +are the primary method of defining code that can be called multiple times.

+

In LLVM we have several CMake modules that are included as part of our +distribution for developers who don’t build our project from source. Those +modules are the fundamental pieces needed to build LLVM-based projects with +CMake. We also rely on modules as a way of organizing the build system’s +functionality for maintainability and re-use within LLVM projects.

+
+
+

Argument Handling

+

When defining a CMake command handling arguments is very useful. The examples +in this section will all use the CMake function block, but this all applies +to the macro block as well.

+

CMake commands can have named arguments that are required at every call site. In +addition, all commands will implicitly accept a variable number of extra +arguments (In C parlance, all commands are varargs functions). When a command is +invoked with extra arguments (beyond the named ones) CMake will store the full +list of arguments (both named and unnamed) in a list named ARGV, and the +sublist of unnamed arguments in ARGN. Below is a trivial example of +providing a wrapper function for CMake’s built in function add_dependencies.

+
function(add_deps target)
+  add_dependencies(${target} ${ARGN})
+endfunction()
+
+
+

This example defines a new macro named add_deps which takes a required first +argument, and just calls another function passing through the first argument and +all trailing arguments.

+

CMake provides a module CMakeParseArguments which provides an implementation +of advanced argument parsing. We use this all over LLVM, and it is recommended +for any function that has complex argument-based behaviors or optional +arguments. CMake’s official documentation for the module is in the +cmake-modules manpage, and is also available at the +cmake-modules online documentation.

+
+

Note

+

As of CMake 3.5 the cmake_parse_arguments command has become a native command +and the CMakeParseArguments module is empty and only left around for +compatibility.

+
+
+
+

Functions Vs Macros

+

Functions and Macros look very similar in how they are used, but there is one +fundamental difference between the two. Functions have their own scope, and +macros don’t. This means variables set in macros will bleed out into the calling +scope. That makes macros suitable for defining very small bits of functionality +only.

+

The other difference between CMake functions and macros is how arguments are +passed. Arguments to macros are not set as variables, instead dereferences to +the parameters are resolved across the macro before executing it. This can +result in some unexpected behavior if using unreferenced variables. For example:

+
macro(print_list my_list)
+  foreach(var IN LISTS my_list)
+    message("${var}")
+  endforeach()
+endmacro()
+
+set(my_list a b c d)
+set(my_list_of_numbers 1 2 3 4)
+print_list(my_list_of_numbers)
+# prints:
+# a
+# b
+# c
+# d
+
+
+

Generally speaking this issue is uncommon because it requires using +non-dereferenced variables with names that overlap in the parent scope, but it +is important to be aware of because it can lead to subtle bugs.

+
+
+
+

LLVM Project Wrappers

+

LLVM projects provide lots of wrappers around critical CMake built-in commands. +We use these wrappers to provide consistent behaviors across LLVM components +and to reduce code duplication.

+

We generally (but not always) follow the convention that commands prefaced with +llvm_ are intended to be used only as building blocks for other commands. +Wrapper commands that are intended for direct use are generally named following +with the project in the middle of the command name (i.e. add_llvm_executable +is the wrapper for add_executable). The LLVM add_* wrapper functions are +all defined in AddLLVM.cmake which is installed as part of the LLVM +distribution. It can be included and used by any LLVM sub-project that requires +LLVM.

+
+

Note

+

Not all LLVM projects require LLVM for all use cases. For example compiler-rt +can be built without LLVM, and the compiler-rt sanitizer libraries are used +with GCC.

+
+
+
+

Useful Built-in Commands

+

CMake has a bunch of useful built-in commands. This document isn’t going to +go into details about them because The CMake project has excellent +documentation. To highlight a few useful functions see:

+ +

The full documentation for CMake commands is in the cmake-commands manpage +and available on CMake’s website

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodeGenerator.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodeGenerator.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodeGenerator.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodeGenerator.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,2590 @@ + + + + + + + + + The LLVM Target-Independent Code Generator — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The LLVM Target-Independent Code Generator

+
+ +
+
+

Warning

+

This is a work in progress.

+
+
+

Introduction

+

The LLVM target-independent code generator is a framework that provides a suite +of reusable components for translating the LLVM internal representation to the +machine code for a specified target—either in assembly form (suitable for a +static compiler) or in binary machine code format (usable for a JIT +compiler). The LLVM target-independent code generator consists of six main +components:

+
    +
  1. Abstract target description interfaces which capture important properties +about various aspects of the machine, independently of how they will be used. +These interfaces are defined in include/llvm/Target/.

  2. +
  3. Classes used to represent the code being generated for a target. These +classes are intended to be abstract enough to represent the machine code for +any target machine. These classes are defined in +include/llvm/CodeGen/. At this level, concepts like “constant pool +entries” and “jump tables” are explicitly exposed.

  4. +
  5. Classes and algorithms used to represent code at the object file level, the +MC Layer. These classes represent assembly level constructs like labels, +sections, and instructions. At this level, concepts like “constant pool +entries” and “jump tables” don’t exist.

  6. +
  7. Target-independent algorithms used to implement various phases of native +code generation (register allocation, scheduling, stack frame representation, +etc). This code lives in lib/CodeGen/.

  8. +
  9. Implementations of the abstract target description interfaces for +particular targets. These machine descriptions make use of the components +provided by LLVM, and can optionally provide custom target-specific passes, +to build complete code generators for a specific target. Target descriptions +live in lib/Target/.

  10. +
  11. The target-independent JIT components. The LLVM JIT is completely target +independent (it uses the TargetJITInfo structure to interface for +target-specific issues. The code for the target-independent JIT lives in +lib/ExecutionEngine/JIT.

  12. +
+

Depending on which part of the code generator you are interested in working on, +different pieces of this will be useful to you. In any case, you should be +familiar with the target description and machine code representation +classes. If you want to add a backend for a new target, you will need to +implement the target description classes for your new target and understand +the LLVM code representation. If you are interested in +implementing a new code generation algorithm, it should only depend on the +target-description and machine code representation classes, ensuring that it is +portable.

+
+

Required components in the code generator

+

The two pieces of the LLVM code generator are the high-level interface to the +code generator and the set of reusable components that can be used to build +target-specific backends. The two most important interfaces ( +TargetMachine and DataLayout +) are the only ones that are required to be defined for a +backend to fit into the LLVM system, but the others must be defined if the +reusable code generator components are going to be used.

+

This design has two important implications. The first is that LLVM can support +completely non-traditional code generation targets. For example, the C backend +does not require register allocation, instruction selection, or any of the other +standard components provided by the system. As such, it only implements these +two interfaces, and does its own thing. Note that C backend was removed from the +trunk since LLVM 3.1 release. Another example of a code generator like this is a +(purely hypothetical) backend that converts LLVM to the GCC RTL form and uses +GCC to emit machine code for a target.

+

This design also implies that it is possible to design and implement radically +different code generators in the LLVM system that do not make use of any of the +built-in components. Doing so is not recommended at all, but could be required +for radically different targets that do not fit into the LLVM machine +description model: FPGAs for example.

+
+
+

The high-level design of the code generator

+

The LLVM target-independent code generator is designed to support efficient and +quality code generation for standard register-based microprocessors. Code +generation in this model is divided into the following stages:

+
    +
  1. Instruction Selection — This phase determines an efficient way to +express the input LLVM code in the target instruction set. This stage +produces the initial code for the program in the target instruction set, then +makes use of virtual registers in SSA form and physical registers that +represent any required register assignments due to target constraints or +calling conventions. This step turns the LLVM code into a DAG of target +instructions.

  2. +
  3. Scheduling and Formation — This phase takes the DAG of target +instructions produced by the instruction selection phase, determines an +ordering of the instructions, then emits the instructions as +MachineInstrs with that ordering. Note that we +describe this in the instruction selection section because it operates on +a SelectionDAG.

  4. +
  5. SSA-based Machine Code Optimizations — This optional stage consists of a +series of machine-code optimizations that operate on the SSA-form produced by +the instruction selector. Optimizations like modulo-scheduling or peephole +optimization work here.

  6. +
  7. Register Allocation — The target code is transformed from an infinite +virtual register file in SSA form to the concrete register file used by the +target. This phase introduces spill code and eliminates all virtual register +references from the program.

  8. +
  9. Prolog/Epilog Code Insertion — Once the machine code has been generated +for the function and the amount of stack space required is known (used for +LLVM alloca’s and spill slots), the prolog and epilog code for the function +can be inserted and “abstract stack location references” can be eliminated. +This stage is responsible for implementing optimizations like frame-pointer +elimination and stack packing.

  10. +
  11. Late Machine Code Optimizations — Optimizations that operate on “final” +machine code can go here, such as spill code scheduling and peephole +optimizations.

  12. +
  13. Code Emission — The final stage actually puts out the code for the +current function, either in the target assembler format or in machine +code.

  14. +
+

The code generator is based on the assumption that the instruction selector will +use an optimal pattern matching selector to create high-quality sequences of +native instructions. Alternative code generator designs based on pattern +expansion and aggressive iterative peephole optimization are much slower. This +design permits efficient compilation (important for JIT environments) and +aggressive optimization (used when generating code offline) by allowing +components of varying levels of sophistication to be used for any step of +compilation.

+

In addition to these stages, target implementations can insert arbitrary +target-specific passes into the flow. For example, the X86 target uses a +special pass to handle the 80x87 floating point stack architecture. Other +targets with unusual requirements can be supported with custom passes as needed.

+
+
+

Using TableGen for target description

+

The target description classes require a detailed description of the target +architecture. These target descriptions often have a large amount of common +information (e.g., an add instruction is almost identical to a sub +instruction). In order to allow the maximum amount of commonality to be +factored out, the LLVM code generator uses the +TableGen Overview tool to describe big chunks of the +target machine, which allows the use of domain-specific and target-specific +abstractions to reduce the amount of repetition.

+

As LLVM continues to be developed and refined, we plan to move more and more of +the target description to the .td form. Doing so gives us a number of +advantages. The most important is that it makes it easier to port LLVM because +it reduces the amount of C++ code that has to be written, and the surface area +of the code generator that needs to be understood before someone can get +something working. Second, it makes it easier to change things. In particular, +if tables and other things are all emitted by tblgen, we only need a change +in one place (tblgen) to update all of the targets to a new interface.

+
+
+
+

Target description classes

+

The LLVM target description classes (located in the include/llvm/Target +directory) provide an abstract description of the target machine independent of +any particular client. These classes are designed to capture the abstract +properties of the target (such as the instructions and registers it has), and do +not incorporate any particular pieces of code generation algorithms.

+

All of the target description classes (except the DataLayout + class) are designed to be subclassed by the concrete target +implementation, and have virtual methods implemented. To get to these +implementations, the TargetMachine class +provides accessors that should be implemented by the target.

+
+

The TargetMachine class

+

The TargetMachine class provides virtual methods that are used to access the +target-specific implementations of the various target description classes via +the get*Info methods (getInstrInfo, getRegisterInfo, +getFrameInfo, etc.). This class is designed to be specialized by a concrete +target implementation (e.g., X86TargetMachine) which implements the various +virtual methods. The only required target description class is the + DataLayout class, but if the code +generator components are to be used, the other interfaces should be implemented +as well.

+
+
+

The DataLayout class

+

The DataLayout class is the only required target description class, and it +is the only class that is not extensible (you cannot derive a new class from +it). DataLayout specifies information about how the target lays out memory +for structures, the alignment requirements for various data types, the size of +pointers in the target, and whether the target is little-endian or +big-endian.

+
+
+

The TargetLowering class

+

The TargetLowering class is used by SelectionDAG based instruction selectors +primarily to describe how LLVM code should be lowered to SelectionDAG +operations. Among other things, this class indicates:

+
    +
  • an initial register class to use for various ValueTypes,

  • +
  • which operations are natively supported by the target machine,

  • +
  • the return type of setcc operations,

  • +
  • the type to use for shift amounts, and

  • +
  • various high-level characteristics, like whether it is profitable to turn +division by a constant into a multiplication sequence.

  • +
+
+
+

The TargetRegisterInfo class

+

The TargetRegisterInfo class is used to describe the register file of the +target and any interactions between the registers.

+

Registers are represented in the code generator by unsigned integers. Physical +registers (those that actually exist in the target description) are unique +small numbers, and virtual registers are generally large. Note that +register #0 is reserved as a flag value.

+

Each register in the processor description has an associated +TargetRegisterDesc entry, which provides a textual name for the register +(used for assembly output and debugging dumps) and a set of aliases (used to +indicate whether one register overlaps with another).

+

In addition to the per-register description, the TargetRegisterInfo class +exposes a set of processor specific register classes (instances of the +TargetRegisterClass class). Each register class contains sets of registers +that have the same properties (for example, they are all 32-bit integer +registers). Each SSA virtual register created by the instruction selector has +an associated register class. When the register allocator runs, it replaces +virtual registers with a physical register in the set.

+

The target-specific implementations of these classes is auto-generated from a +TableGen Overview description of the register file.

+
+
+

The TargetInstrInfo class

+

The TargetInstrInfo class is used to describe the machine instructions +supported by the target. Descriptions define things like the mnemonic for +the opcode, the number of operands, the list of implicit register uses and defs, +whether the instruction has certain target-independent properties (accesses +memory, is commutable, etc), and holds any target-specific flags.

+
+
+

The TargetFrameLowering class

+

The TargetFrameLowering class is used to provide information about the stack +frame layout of the target. It holds the direction of stack growth, the known +stack alignment on entry to each function, and the offset to the local area. +The offset to the local area is the offset from the stack pointer on function +entry to the first location where function data (local variables, spill +locations) can be stored.

+
+
+

The TargetSubtarget class

+

The TargetSubtarget class is used to provide information about the specific +chip set being targeted. A sub-target informs code generation of which +instructions are supported, instruction latencies and instruction execution +itinerary; i.e., which processing units are used, in what order, and for how +long.

+
+
+

The TargetJITInfo class

+

The TargetJITInfo class exposes an abstract interface used by the +Just-In-Time code generator to perform target-specific activities, such as +emitting stubs. If a TargetMachine supports JIT code generation, it should +provide one of these objects through the getJITInfo method.

+
+
+
+

Machine code description classes

+

At the high-level, LLVM code is translated to a machine specific representation +formed out of MachineFunction , + MachineBasicBlock , and +MachineInstr instances (defined in +include/llvm/CodeGen). This representation is completely target agnostic, +representing instructions in their most abstract form: an opcode and a series of +operands. This representation is designed to support both an SSA representation +for machine code, as well as a register allocated, non-SSA form.

+
+

The MachineInstr class

+

Target machine instructions are represented as instances of the MachineInstr +class. This class is an extremely abstract way of representing machine +instructions. In particular, it only keeps track of an opcode number and a set +of operands.

+

The opcode number is a simple unsigned integer that only has meaning to a +specific backend. All of the instructions for a target should be defined in the +*InstrInfo.td file for the target. The opcode enum values are auto-generated +from this description. The MachineInstr class does not have any information +about how to interpret the instruction (i.e., what the semantics of the +instruction are); for that you must refer to the +TargetInstrInfo class.

+

The operands of a machine instruction can be of several different types: a +register reference, a constant integer, a basic block reference, etc. In +addition, a machine operand should be marked as a def or a use of the value +(though only registers are allowed to be defs).

+

By convention, the LLVM code generator orders instruction operands so that all +register definitions come before the register uses, even on architectures that +are normally printed in other orders. For example, the SPARC add instruction: +“add %i1, %i2, %i3” adds the “%i1”, and “%i2” registers and stores the +result into the “%i3” register. In the LLVM code generator, the operands should +be stored as “%i3, %i1, %i2”: with the destination first.

+

Keeping destination (definition) operands at the beginning of the operand list +has several advantages. In particular, the debugging printer will print the +instruction like this:

+
%r3 = add %i1, %i2
+
+
+

Also if the first operand is a def, it is easier to create instructions whose +only def is the first operand.

+
+

Using the MachineInstrBuilder.h functions

+

Machine instructions are created by using the BuildMI functions, located in +the include/llvm/CodeGen/MachineInstrBuilder.h file. The BuildMI +functions make it easy to build arbitrary machine instructions. Usage of the +BuildMI functions look like this:

+
// Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42')
+// instruction and insert it at the end of the given MachineBasicBlock.
+const TargetInstrInfo &TII = ...
+MachineBasicBlock &MBB = ...
+DebugLoc DL;
+MachineInstr *MI = BuildMI(MBB, DL, TII.get(X86::MOV32ri), DestReg).addImm(42);
+
+// Create the same instr, but insert it before a specified iterator point.
+MachineBasicBlock::iterator MBBI = ...
+BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), DestReg).addImm(42);
+
+// Create a 'cmp Reg, 0' instruction, no destination reg.
+MI = BuildMI(MBB, DL, TII.get(X86::CMP32ri8)).addReg(Reg).addImm(42);
+
+// Create an 'sahf' instruction which takes no operands and stores nothing.
+MI = BuildMI(MBB, DL, TII.get(X86::SAHF));
+
+// Create a self looping branch instruction.
+BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(&MBB);
+
+
+

If you need to add a definition operand (other than the optional destination +register), you must explicitly mark it as such:

+
MI.addReg(Reg, RegState::Define);
+
+
+
+
+

Fixed (preassigned) registers

+

One important issue that the code generator needs to be aware of is the presence +of fixed registers. In particular, there are often places in the instruction +stream where the register allocator must arrange for a particular value to be +in a particular register. This can occur due to limitations of the instruction +set (e.g., the X86 can only do a 32-bit divide with the EAX/EDX +registers), or external factors like calling conventions. In any case, the +instruction selector should emit code that copies a virtual register into or out +of a physical register when needed.

+

For example, consider this simple LLVM example:

+
define i32 @test(i32 %X, i32 %Y) {
+  %Z = sdiv i32 %X, %Y
+  ret i32 %Z
+}
+
+
+

The X86 instruction selector might produce this machine code for the div and +ret:

+
;; Start of div
+%EAX = mov %reg1024           ;; Copy X (in reg1024) into EAX
+%reg1027 = sar %reg1024, 31
+%EDX = mov %reg1027           ;; Sign extend X into EDX
+idiv %reg1025                 ;; Divide by Y (in reg1025)
+%reg1026 = mov %EAX           ;; Read the result (Z) out of EAX
+
+;; Start of ret
+%EAX = mov %reg1026           ;; 32-bit return value goes in EAX
+ret
+
+
+

By the end of code generation, the register allocator would coalesce the +registers and delete the resultant identity moves producing the following +code:

+
;; X is in EAX, Y is in ECX
+mov %EAX, %EDX
+sar %EDX, 31
+idiv %ECX
+ret
+
+
+

This approach is extremely general (if it can handle the X86 architecture, it +can handle anything!) and allows all of the target specific knowledge about the +instruction stream to be isolated in the instruction selector. Note that +physical registers should have a short lifetime for good code generation, and +all physical registers are assumed dead on entry to and exit from basic blocks +(before register allocation). Thus, if you need a value to be live across basic +block boundaries, it must live in a virtual register.

+
+
+

Call-clobbered registers

+

Some machine instructions, like calls, clobber a large number of physical +registers. Rather than adding <def,dead> operands for all of them, it is +possible to use an MO_RegisterMask operand instead. The register mask +operand holds a bit mask of preserved registers, and everything else is +considered to be clobbered by the instruction.

+
+
+

Machine code in SSA form

+

MachineInstr’s are initially selected in SSA-form, and are maintained in +SSA-form until register allocation happens. For the most part, this is +trivially simple since LLVM is already in SSA form; LLVM PHI nodes become +machine code PHI nodes, and virtual registers are only allowed to have a single +definition.

+

After register allocation, machine code is no longer in SSA-form because there +are no virtual registers left in the code.

+
+
+
+

The MachineBasicBlock class

+

The MachineBasicBlock class contains a list of machine instructions +( MachineInstr instances). It roughly +corresponds to the LLVM code input to the instruction selector, but there can be +a one-to-many mapping (i.e. one LLVM basic block can map to multiple machine +basic blocks). The MachineBasicBlock class has a “getBasicBlock” method, +which returns the LLVM basic block that it comes from.

+
+
+

The MachineFunction class

+

The MachineFunction class contains a list of machine basic blocks +( MachineBasicBlock instances). It +corresponds one-to-one with the LLVM function input to the instruction selector. +In addition to a list of basic blocks, the MachineFunction contains a a +MachineConstantPool, a MachineFrameInfo, a MachineFunctionInfo, and +a MachineRegisterInfo. See include/llvm/CodeGen/MachineFunction.h for +more information.

+
+
+

MachineInstr Bundles

+

LLVM code generator can model sequences of instructions as MachineInstr +bundles. A MI bundle can model a VLIW group / pack which contains an arbitrary +number of parallel instructions. It can also be used to model a sequential list +of instructions (potentially with data dependencies) that cannot be legally +separated (e.g. ARM Thumb2 IT blocks).

+

Conceptually a MI bundle is a MI with a number of other MIs nested within:

+
--------------
+|   Bundle   | ---------
+--------------          \
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |
+--------------
+|   Bundle   | --------
+--------------         \
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |                  ...
+       |
+--------------
+|   Bundle   | --------
+--------------         \
+       |
+      ...
+
+
+

MI bundle support does not change the physical representations of +MachineBasicBlock and MachineInstr. All the MIs (including top level and nested +ones) are stored as sequential list of MIs. The “bundled” MIs are marked with +the ‘InsideBundle’ flag. A top level MI with the special BUNDLE opcode is used +to represent the start of a bundle. It’s legal to mix BUNDLE MIs with individual +MIs that are not inside bundles nor represent bundles.

+

MachineInstr passes should operate on a MI bundle as a single unit. Member +methods have been taught to correctly handle bundles and MIs inside bundles. +The MachineBasicBlock iterator has been modified to skip over bundled MIs to +enforce the bundle-as-a-single-unit concept. An alternative iterator +instr_iterator has been added to MachineBasicBlock to allow passes to iterate +over all of the MIs in a MachineBasicBlock, including those which are nested +inside bundles. The top level BUNDLE instruction must have the correct set of +register MachineOperand’s that represent the cumulative inputs and outputs of +the bundled MIs.

+

Packing / bundling of MachineInstrs for VLIW architectures should +generally be done as part of the register allocation super-pass. More +specifically, the pass which determines what MIs should be bundled +together should be done after code generator exits SSA form +(i.e. after two-address pass, PHI elimination, and copy coalescing). +Such bundles should be finalized (i.e. adding BUNDLE MIs and input and +output register MachineOperands) after virtual registers have been +rewritten into physical registers. This eliminates the need to add +virtual register operands to BUNDLE instructions which would +effectively double the virtual register def and use lists. Bundles may +use virtual registers and be formed in SSA form, but may not be +appropriate for all use cases.

+
+
+
+

The “MC” Layer

+

The MC Layer is used to represent and process code at the raw machine code +level, devoid of “high level” information like “constant pools”, “jump tables”, +“global variables” or anything like that. At this level, LLVM handles things +like label names, machine instructions, and sections in the object file. The +code in this layer is used for a number of important purposes: the tail end of +the code generator uses it to write a .s or .o file, and it is also used by the +llvm-mc tool to implement standalone machine code assemblers and disassemblers.

+

This section describes some of the important classes. There are also a number +of important subsystems that interact at this layer, they are described later in +this manual.

+
+

The MCStreamer API

+

MCStreamer is best thought of as an assembler API. It is an abstract API which +is implemented in different ways (e.g. to output a .s file, output an ELF .o +file, etc) but whose API correspond directly to what you see in a .s file. +MCStreamer has one method per directive, such as EmitLabel, EmitSymbolAttribute, +SwitchSection, EmitValue (for .byte, .word), etc, which directly correspond to +assembly level directives. It also has an EmitInstruction method, which is used +to output an MCInst to the streamer.

+

This API is most important for two clients: the llvm-mc stand-alone assembler is +effectively a parser that parses a line, then invokes a method on MCStreamer. In +the code generator, the Code Emission phase of the code generator lowers +higher level LLVM IR and Machine* constructs down to the MC layer, emitting +directives through MCStreamer.

+

On the implementation side of MCStreamer, there are two major implementations: +one for writing out a .s file (MCAsmStreamer), and one for writing out a .o +file (MCObjectStreamer). MCAsmStreamer is a straightforward implementation +that prints out a directive for each method (e.g. EmitValue -> .byte), but +MCObjectStreamer implements a full assembler.

+

For target specific directives, the MCStreamer has a MCTargetStreamer instance. +Each target that needs it defines a class that inherits from it and is a lot +like MCStreamer itself: It has one method per directive and two classes that +inherit from it, a target object streamer and a target asm streamer. The target +asm streamer just prints it (emitFnStart -> .fnstart), and the object +streamer implement the assembler logic for it.

+

To make llvm use these classes, the target initialization must call +TargetRegistry::RegisterAsmStreamer and TargetRegistry::RegisterMCObjectStreamer +passing callbacks that allocate the corresponding target streamer and pass it +to createAsmStreamer or to the appropriate object streamer constructor.

+
+
+

The MCContext class

+

The MCContext class is the owner of a variety of uniqued data structures at the +MC layer, including symbols, sections, etc. As such, this is the class that you +interact with to create symbols and sections. This class can not be subclassed.

+
+
+

The MCSymbol class

+

The MCSymbol class represents a symbol (aka label) in the assembly file. There +are two interesting kinds of symbols: assembler temporary symbols, and normal +symbols. Assembler temporary symbols are used and processed by the assembler +but are discarded when the object file is produced. The distinction is usually +represented by adding a prefix to the label, for example “L” labels are +assembler temporary labels in MachO.

+

MCSymbols are created by MCContext and uniqued there. This means that MCSymbols +can be compared for pointer equivalence to find out if they are the same symbol. +Note that pointer inequality does not guarantee the labels will end up at +different addresses though. It’s perfectly legal to output something like this +to the .s file:

+
foo:
+bar:
+  .byte 4
+
+
+

In this case, both the foo and bar symbols will have the same address.

+
+
+

The MCSection class

+

The MCSection class represents an object-file specific section. It is +subclassed by object file specific implementations (e.g. MCSectionMachO, +MCSectionCOFF, MCSectionELF) and these are created and uniqued by +MCContext. The MCStreamer has a notion of the current section, which can be +changed with the SwitchToSection method (which corresponds to a “.section” +directive in a .s file).

+
+
+

The MCInst class

+

The MCInst class is a target-independent representation of an instruction. +It is a simple class (much more so than MachineInstr) that holds a +target-specific opcode and a vector of MCOperands. MCOperand, in turn, is a +simple discriminated union of three cases: 1) a simple immediate, 2) a target +register ID, 3) a symbolic expression (e.g. “Lfoo-Lbar+42”) as an MCExpr.

+

MCInst is the common currency used to represent machine instructions at the MC +layer. It is the type used by the instruction encoder, the instruction printer, +and the type generated by the assembly parser and disassembler.

+
+
+
+

Target-independent code generation algorithms

+

This section documents the phases described in the high-level design of the +code generator. It explains how they work and some of the rationale behind +their design.

+
+

Instruction Selection

+

Instruction Selection is the process of translating LLVM code presented to the +code generator into target-specific machine instructions. There are several +well-known ways to do this in the literature. LLVM uses a SelectionDAG based +instruction selector.

+

Portions of the DAG instruction selector are generated from the target +description (*.td) files. Our goal is for the entire instruction selector +to be generated from these .td files, though currently there are still +things that require custom C++ code.

+

GlobalISel is another +instruction selection framework.

+
+

Introduction to SelectionDAGs

+

The SelectionDAG provides an abstraction for code representation in a way that +is amenable to instruction selection using automatic techniques +(e.g. dynamic-programming based optimal pattern matching selectors). It is also +well-suited to other phases of code generation; in particular, instruction +scheduling (SelectionDAG’s are very close to scheduling DAGs post-selection). +Additionally, the SelectionDAG provides a host representation where a large +variety of very-low-level (but target-independent) optimizations may be +performed; ones which require extensive information about the instructions +efficiently supported by the target.

+

The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the +SDNode class. The primary payload of the SDNode is its operation code +(Opcode) that indicates what operation the node performs and the operands to the +operation. The various operation node types are described at the top of the +include/llvm/CodeGen/ISDOpcodes.h file.

+

Although most operations define a single value, each node in the graph may +define multiple values. For example, a combined div/rem operation will define +both the dividend and the remainder. Many other situations require multiple +values as well. Each node also has some number of operands, which are edges to +the node defining the used value. Because nodes may define multiple values, +edges are represented by instances of the SDValue class, which is a +<SDNode, unsigned> pair, indicating the node and result value being used, +respectively. Each value produced by an SDNode has an associated MVT +(Machine Value Type) indicating what the type of the value is.

+

SelectionDAGs contain two different kinds of values: those that represent data +flow and those that represent control flow dependencies. Data values are simple +edges with an integer or floating point value type. Control edges are +represented as “chain” edges which are of type MVT::Other. These edges +provide an ordering between nodes that have side effects (such as loads, stores, +calls, returns, etc). All nodes that have side effects should take a token +chain as input and produce a new one as output. By convention, token chain +inputs are always operand #0, and chain results are always the last value +produced by an operation. However, after instruction selection, the +machine nodes have their chain after the instruction’s operands, and +may be followed by glue nodes.

+

A SelectionDAG has designated “Entry” and “Root” nodes. The Entry node is +always a marker node with an Opcode of ISD::EntryToken. The Root node is +the final side-effecting node in the token chain. For example, in a single basic +block function it would be the return node.

+

One important concept for SelectionDAGs is the notion of a “legal” vs. +“illegal” DAG. A legal DAG for a target is one that only uses supported +operations and supported types. On a 32-bit PowerPC, for example, a DAG with a +value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a +SREM or UREM operation. The legalize types and legalize operations phases +are responsible for turning an illegal DAG into a legal DAG.

+
+
+

SelectionDAG Instruction Selection Process

+

SelectionDAG-based instruction selection consists of the following steps:

+
    +
  1. Build initial DAG — This stage performs a simple translation from the +input LLVM code to an illegal SelectionDAG.

  2. +
  3. Optimize SelectionDAG — This stage performs simple optimizations on the +SelectionDAG to simplify it, and recognize meta instructions (like rotates +and div/rem pairs) for targets that support these meta operations. +This makes the resultant code more efficient and the select instructions +from DAG phase (below) simpler.

  4. +
  5. Legalize SelectionDAG Types — This stage transforms SelectionDAG nodes +to eliminate any types that are unsupported on the target.

  6. +
  7. Optimize SelectionDAG — The SelectionDAG optimizer is run to clean up +redundancies exposed by type legalization.

  8. +
  9. Legalize SelectionDAG Ops — This stage transforms SelectionDAG nodes to +eliminate any operations that are unsupported on the target.

  10. +
  11. Optimize SelectionDAG — The SelectionDAG optimizer is run to eliminate +inefficiencies introduced by operation legalization.

  12. +
  13. Select instructions from DAG — Finally, the target instruction selector +matches the DAG operations to target instructions. This process translates +the target-independent input DAG into another DAG of target instructions.

  14. +
  15. SelectionDAG Scheduling and Formation — The last phase assigns a linear +order to the instructions in the target-instruction DAG and emits them into +the MachineFunction being compiled. This step uses traditional prepass +scheduling techniques.

  16. +
+

After all of these steps are complete, the SelectionDAG is destroyed and the +rest of the code generation passes are run.

+

One great way to visualize what is going on here is to take advantage of a few +LLC command line options. The following options pop up a window displaying the +SelectionDAG at specific times (if you only get errors printed to the console +while using this, you probably need to configure your +system to add support for it).

+
    +
  • -view-dag-combine1-dags displays the DAG after being built, before the +first optimization pass.

  • +
  • -view-legalize-dags displays the DAG before Legalization.

  • +
  • -view-dag-combine2-dags displays the DAG before the second optimization +pass.

  • +
  • -view-isel-dags displays the DAG before the Select phase.

  • +
  • -view-sched-dags displays the DAG before Scheduling.

  • +
+

The -view-sunit-dags displays the Scheduler’s dependency graph. This graph +is based on the final SelectionDAG, with nodes that must be scheduled together +bundled into a single scheduling-unit node, and with immediate operands and +other nodes that aren’t relevant for scheduling omitted.

+

The option -filter-view-dags allows to select the name of the basic block +that you are interested to visualize and filters all the previous +view-*-dags options.

+
+
+

Initial SelectionDAG Construction

+

The initial SelectionDAG is naïvely peephole expanded from +the LLVM input by the SelectionDAGBuilder class. The intent of this pass +is to expose as much low-level, target-specific details to the SelectionDAG as +possible. This pass is mostly hard-coded (e.g. an LLVM add turns into an +SDNode add while a getelementptr is expanded into the obvious +arithmetic). This pass requires target-specific hooks to lower calls, returns, +varargs, etc. For these features, the TargetLowering + interface is used.

+
+
+

SelectionDAG LegalizeTypes Phase

+

The Legalize phase is in charge of converting a DAG to only use the types that +are natively supported by the target.

+

There are two main ways of converting values of unsupported scalar types to +values of supported types: converting small types to larger types (“promoting”), +and breaking up large integer types into smaller ones (“expanding”). For +example, a target might require that all f32 values are promoted to f64 and that +all i1/i8/i16 values are promoted to i32. The same target might require that +all i64 values be expanded into pairs of i32 values. These changes can insert +sign and zero extensions as needed to make sure that the final code has the same +behavior as the input.

+

There are two main ways of converting values of unsupported vector types to +value of supported types: splitting vector types, multiple times if necessary, +until a legal type is found, and extending vector types by adding elements to +the end to round them out to legal types (“widening”). If a vector gets split +all the way down to single-element parts with no supported vector type being +found, the elements are converted to scalars (“scalarizing”).

+

A target implementation tells the legalizer which types are supported (and which +register class to use for them) by calling the addRegisterClass method in +its TargetLowering constructor.

+
+
+

SelectionDAG Legalize Phase

+

The Legalize phase is in charge of converting a DAG to only use the operations +that are natively supported by the target.

+

Targets often have weird constraints, such as not supporting every operation on +every supported datatype (e.g. X86 does not support byte conditional moves and +PowerPC does not support sign-extending loads from a 16-bit memory location). +Legalize takes care of this by open-coding another sequence of operations to +emulate the operation (“expansion”), by promoting one type to a larger type that +supports the operation (“promotion”), or by using a target-specific hook to +implement the legalization (“custom”).

+

A target implementation tells the legalizer which operations are not supported +(and which of the above three actions to take) by calling the +setOperationAction method in its TargetLowering constructor.

+

If a target has legal vector types, it is expected to produce efficient machine +code for common forms of the shufflevector IR instruction using those types. +This may require custom legalization for SelectionDAG vector operations that +are created from the shufflevector IR. The shufflevector forms that should be +handled include:

+
    +
  • Vector select — Each element of the vector is chosen from either of the +corresponding elements of the 2 input vectors. This operation may also be +known as a “blend” or “bitwise select” in target assembly. This type of shuffle +maps directly to the shuffle_vector SelectionDAG node.

  • +
  • Insert subvector — A vector is placed into a longer vector type starting +at index 0. This type of shuffle maps directly to the insert_subvector +SelectionDAG node with the index operand set to 0.

  • +
  • Extract subvector — A vector is pulled from a longer vector type starting +at index 0. This type of shuffle maps directly to the extract_subvector +SelectionDAG node with the index operand set to 0.

  • +
  • Splat — All elements of the vector have identical scalar elements. This +operation may also be known as a “broadcast” or “duplicate” in target assembly. +The shufflevector IR instruction may change the vector length, so this operation +may map to multiple SelectionDAG nodes including shuffle_vector, +concat_vectors, insert_subvector, and extract_subvector.

  • +
+

Prior to the existence of the Legalize passes, we required that every target +selector supported and handled every operator and type even if they are not +natively supported. The introduction of the Legalize phases allows all of the +canonicalization patterns to be shared across targets, and makes it very easy to +optimize the canonicalized code because it is still in the form of a DAG.

+
+
+

SelectionDAG Optimization Phase: the DAG Combiner

+

The SelectionDAG optimization phase is run multiple times for code generation, +immediately after the DAG is built and once after each legalization. The first +run of the pass allows the initial code to be cleaned up (e.g. performing +optimizations that depend on knowing that the operators have restricted type +inputs). Subsequent runs of the pass clean up the messy code generated by the +Legalize passes, which allows Legalize to be very simple (it can focus on making +code legal instead of focusing on generating good and legal code).

+

One important class of optimizations performed is optimizing inserted sign and +zero extension instructions. We currently use ad-hoc techniques, but could move +to more rigorous techniques in the future. Here are some good papers on the +subject:

+

Widening integer arithmetic
+Kevin Redwine and Norman Ramsey
+International Conference on Compiler Construction (CC) 2004

+

Effective sign extension elimination
+Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani
+Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language Design +and Implementation.

+
+
+

SelectionDAG Select Phase

+

The Select phase is the bulk of the target-specific code for instruction +selection. This phase takes a legal SelectionDAG as input, pattern matches the +instructions supported by the target to this DAG, and produces a new DAG of +target code. For example, consider the following LLVM fragment:

+
%t1 = fadd float %W, %X
+%t2 = fmul float %t1, %Y
+%t3 = fadd float %t2, %Z
+
+
+

This LLVM code corresponds to a SelectionDAG that looks basically like this:

+
(fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z)
+
+
+

If a target supports floating point multiply-and-add (FMA) operations, one of +the adds can be merged with the multiply. On the PowerPC, for example, the +output of the instruction selector might look like this DAG:

+
(FMADDS (FADDS W, X), Y, Z)
+
+
+

The FMADDS instruction is a ternary instruction that multiplies its first +two operands and adds the third (as single-precision floating-point numbers). +The FADDS instruction is a simple binary single-precision add instruction. +To perform this pattern match, the PowerPC backend includes the following +instruction definitions:

+
def FMADDS : AForm_1<59, 29,
+                    (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                    "fmadds $FRT, $FRA, $FRC, $FRB",
+                    [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+                                           F4RC:$FRB))]>;
+def FADDS : AForm_2<59, 21,
+                    (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+                    "fadds $FRT, $FRA, $FRB",
+                    [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+
+
+

The highlighted portion of the instruction definitions indicates the pattern +used to match the instructions. The DAG operators (like fmul/fadd) +are defined in the include/llvm/Target/TargetSelectionDAG.td file. +“F4RC” is the register class of the input and result values.

+

The TableGen DAG instruction selector generator reads the instruction patterns +in the .td file and automatically builds parts of the pattern matching code +for your target. It has the following strengths:

+
    +
  • At compiler-compile time, it analyzes your instruction patterns and tells you +if your patterns make sense or not.

  • +
  • It can handle arbitrary constraints on operands for the pattern match. In +particular, it is straight-forward to say things like “match any immediate +that is a 13-bit sign-extended value”. For examples, see the immSExt16 +and related tblgen classes in the PowerPC backend.

  • +
  • It knows several important identities for the patterns defined. For example, +it knows that addition is commutative, so it allows the FMADDS pattern +above to match “(fadd X, (fmul Y, Z))” as well as “(fadd (fmul X, Y), +Z)”, without the target author having to specially handle this case.

  • +
  • It has a full-featured type-inferencing system. In particular, you should +rarely have to explicitly tell the system what type parts of your patterns +are. In the FMADDS case above, we didn’t have to tell tblgen that all +of the nodes in the pattern are of type ‘f32’. It was able to infer and +propagate this knowledge from the fact that F4RC has type ‘f32’.

  • +
  • Targets can define their own (and rely on built-in) “pattern fragments”. +Pattern fragments are chunks of reusable patterns that get inlined into your +patterns during compiler-compile time. For example, the integer “(not +x)” operation is actually defined as a pattern fragment that expands as +“(xor x, -1)”, since the SelectionDAG does not have a native ‘not’ +operation. Targets can define their own short-hand fragments as they see fit. +See the definition of ‘not’ and ‘ineg’ for examples.

  • +
  • In addition to instructions, targets can specify arbitrary patterns that map +to one or more instructions using the ‘Pat’ class. For example, the PowerPC +has no way to load an arbitrary integer immediate into a register in one +instruction. To tell tblgen how to do this, it defines:

    +
    // Arbitrary immediate support.  Implement in terms of LIS/ORI.
    +def : Pat<(i32 imm:$imm),
    +          (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
    +
    +
    +

    If none of the single-instruction patterns for loading an immediate into a +register match, this will be used. This rule says “match an arbitrary i32 +immediate, turning it into an ORI (‘or a 16-bit immediate’) and an LIS +(‘load 16-bit immediate, where the immediate is shifted to the left 16 bits’) +instruction”. To make this work, the LO16/HI16 node transformations +are used to manipulate the input immediate (in this case, take the high or low +16-bits of the immediate).

    +
  • +
  • When using the ‘Pat’ class to map a pattern to an instruction that has one +or more complex operands (like e.g. X86 addressing mode), the pattern may +either specify the operand as a whole using a ComplexPattern, or else it +may specify the components of the complex operand separately. The latter is +done e.g. for pre-increment instructions by the PowerPC back end:

    +
    def STWU  : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst),
    +                "stwu $rS, $dst", LdStStoreUpd, []>,
    +                RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
    +
    +def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff),
    +          (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>;
    +
    +
    +

    Here, the pair of ptroff and ptrreg operands is matched onto the +complex operand dst of class memri in the STWU instruction.

    +
  • +
  • While the system does automate a lot, it still allows you to write custom C++ +code to match special cases if there is something that is hard to +express.

  • +
+

While it has many strengths, the system currently has some limitations, +primarily because it is a work in progress and is not yet finished:

+
    +
  • Overall, there is no way to define or match SelectionDAG nodes that define +multiple values (e.g. SMUL_LOHI, LOAD, CALL, etc). This is the +biggest reason that you currently still have to write custom C++ code +for your instruction selector.

  • +
  • There is no great way to support matching complex addressing modes yet. In +the future, we will extend pattern fragments to allow them to define multiple +values (e.g. the four operands of the X86 addressing mode, which are +currently matched with custom C++ code). In addition, we’ll extend fragments +so that a fragment can match multiple different patterns.

  • +
  • We don’t automatically infer flags like isStore/isLoad yet.

  • +
  • We don’t automatically generate the set of supported registers and operations +for the Legalizer yet.

  • +
  • We don’t have a way of tying in custom legalized nodes yet.

  • +
+

Despite these limitations, the instruction selector generator is still quite +useful for most of the binary and logical operations in typical instruction +sets. If you run into any problems or can’t figure out how to do something, +please let Chris know!

+
+
+

SelectionDAG Scheduling and Formation Phase

+

The scheduling phase takes the DAG of target instructions from the selection +phase and assigns an order. The scheduler can pick an order depending on +various constraints of the machines (i.e. order for minimal register pressure or +try to cover instruction latencies). Once an order is established, the DAG is +converted to a list of MachineInstrs and +the SelectionDAG is destroyed.

+

Note that this phase is logically separate from the instruction selection phase, +but is tied to it closely in the code because it operates on SelectionDAGs.

+
+
+

Future directions for the SelectionDAG

+
    +
  1. Optional function-at-a-time selection.

  2. +
  3. Auto-generate entire selector from .td file.

  4. +
+
+
+ +
+

Live Intervals

+

Live Intervals are the ranges (intervals) where a variable is live. They are +used by some register allocator passes to determine if two or more virtual +registers which require the same physical register are live at the same point in +the program (i.e., they conflict). When this situation occurs, one virtual +register must be spilled.

+
+

Live Variable Analysis

+

The first step in determining the live intervals of variables is to calculate +the set of registers that are immediately dead after the instruction (i.e., the +instruction calculates the value, but it is never used) and the set of registers +that are used by the instruction, but are never used after the instruction +(i.e., they are killed). Live variable information is computed for +each virtual register and register allocatable physical register +in the function. This is done in a very efficient manner because it uses SSA to +sparsely compute lifetime information for virtual registers (which are in SSA +form) and only has to track physical registers within a block. Before register +allocation, LLVM can assume that physical registers are only live within a +single basic block. This allows it to do a single, local analysis to resolve +physical register lifetimes within each basic block. If a physical register is +not register allocatable (e.g., a stack pointer or condition codes), it is not +tracked.

+

Physical registers may be live in to or out of a function. Live in values are +typically arguments in registers. Live out values are typically return values in +registers. Live in values are marked as such, and are given a dummy “defining” +instruction during live intervals analysis. If the last basic block of a +function is a return, then it’s marked as using all live out values in the +function.

+

PHI nodes need to be handled specially, because the calculation of the live +variable information from a depth first traversal of the CFG of the function +won’t guarantee that a virtual register used by the PHI node is defined +before it’s used. When a PHI node is encountered, only the definition is +handled, because the uses will be handled in other basic blocks.

+

For each PHI node of the current basic block, we simulate an assignment at +the end of the current basic block and traverse the successor basic blocks. If a +successor basic block has a PHI node and one of the PHI node’s operands +is coming from the current basic block, then the variable is marked as alive +within the current basic block and all of its predecessor basic blocks, until +the basic block with the defining instruction is encountered.

+
+
+

Live Intervals Analysis

+

We now have the information available to perform the live intervals analysis and +build the live intervals themselves. We start off by numbering the basic blocks +and machine instructions. We then handle the “live-in” values. These are in +physical registers, so the physical register is assumed to be killed by the end +of the basic block. Live intervals for virtual registers are computed for some +ordering of the machine instructions [1, N]. A live interval is an interval +[i, j), where 1 >= i >= j > N, for which a variable is live.

+
+

Note

+

More to come…

+
+
+
+
+

Register Allocation

+

The Register Allocation problem consists in mapping a program + Pv, that can use an unbounded +number of virtual registers, to a program Pp that contains a finite (possibly small) number of physical +registers. Each target architecture has a different number of physical +registers. If the number of physical registers is not enough to accommodate all +the virtual registers, some of them will have to be mapped into memory. These +virtuals are called spilled virtuals.

+
+

How registers are represented in LLVM

+

In LLVM, physical registers are denoted by integer numbers that normally range +from 1 to 1023. To see how this numbering is defined for a particular +architecture, you can read the GenRegisterNames.inc file for that +architecture. For instance, by inspecting +lib/Target/X86/X86GenRegisterInfo.inc we see that the 32-bit register +EAX is denoted by 43, and the MMX register MM0 is mapped to 65.

+

Some architectures contain registers that share the same physical location. A +notable example is the X86 platform. For instance, in the X86 architecture, the +registers EAX, AX and AL share the first eight bits. These physical +registers are marked as aliased in LLVM. Given a particular architecture, you +can check which registers are aliased by inspecting its RegisterInfo.td +file. Moreover, the class MCRegAliasIterator enumerates all the physical +registers aliased to a register.

+

Physical registers, in LLVM, are grouped in Register Classes. Elements in the +same register class are functionally equivalent, and can be interchangeably +used. Each virtual register can only be mapped to physical registers of a +particular class. For instance, in the X86 architecture, some virtuals can only +be allocated to 8 bit registers. A register class is described by +TargetRegisterClass objects. To discover if a virtual register is +compatible with a given physical, this code can be used:

+
bool RegMapping_Fer::compatible_class(MachineFunction &mf,
+                                      unsigned v_reg,
+                                      unsigned p_reg) {
+  assert(TargetRegisterInfo::isPhysicalRegister(p_reg) &&
+         "Target register must be physical");
+  const TargetRegisterClass *trc = mf.getRegInfo().getRegClass(v_reg);
+  return trc->contains(p_reg);
+}
+
+
+

Sometimes, mostly for debugging purposes, it is useful to change the number of +physical registers available in the target architecture. This must be done +statically, inside the TargetRegisterInfo.td file. Just grep for +RegisterClass, the last parameter of which is a list of registers. Just +commenting some out is one simple way to avoid them being used. A more polite +way is to explicitly exclude some registers from the allocation order. See the +definition of the GR8 register class in +lib/Target/X86/X86RegisterInfo.td for an example of this.

+

Virtual registers are also denoted by integer numbers. Contrary to physical +registers, different virtual registers never share the same number. Whereas +physical registers are statically defined in a TargetRegisterInfo.td file +and cannot be created by the application developer, that is not the case with +virtual registers. In order to create new virtual registers, use the method +MachineRegisterInfo::createVirtualRegister(). This method will return a new +virtual register. Use an IndexedMap<Foo, VirtReg2IndexFunctor> to hold +information per virtual register. If you need to enumerate all virtual +registers, use the function TargetRegisterInfo::index2VirtReg() to find the +virtual register numbers:

+
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+  unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i);
+  stuff(VirtReg);
+}
+
+
+

Before register allocation, the operands of an instruction are mostly virtual +registers, although physical registers may also be used. In order to check if a +given machine operand is a register, use the boolean function +MachineOperand::isRegister(). To obtain the integer code of a register, use +MachineOperand::getReg(). An instruction may define or use a register. For +instance, ADD reg:1026 := reg:1025 reg:1024 defines the registers 1024, and +uses registers 1025 and 1026. Given a register operand, the method +MachineOperand::isUse() informs if that register is being used by the +instruction. The method MachineOperand::isDef() informs if that registers is +being defined.

+

We will call physical registers present in the LLVM bitcode before register +allocation pre-colored registers. Pre-colored registers are used in many +different situations, for instance, to pass parameters of functions calls, and +to store results of particular instructions. There are two types of pre-colored +registers: the ones implicitly defined, and those explicitly +defined. Explicitly defined registers are normal operands, and can be accessed +with MachineInstr::getOperand(int)::getReg(). In order to check which +registers are implicitly defined by an instruction, use the +TargetInstrInfo::get(opcode)::ImplicitDefs, where opcode is the opcode +of the target instruction. One important difference between explicit and +implicit physical registers is that the latter are defined statically for each +instruction, whereas the former may vary depending on the program being +compiled. For example, an instruction that represents a function call will +always implicitly define or use the same set of physical registers. To read the +registers implicitly used by an instruction, use +TargetInstrInfo::get(opcode)::ImplicitUses. Pre-colored registers impose +constraints on any register allocation algorithm. The register allocator must +make sure that none of them are overwritten by the values of virtual registers +while still alive.

+
+
+

Mapping virtual registers to physical registers

+

There are two ways to map virtual registers to physical registers (or to memory +slots). The first way, that we will call direct mapping, is based on the use +of methods of the classes TargetRegisterInfo, and MachineOperand. The +second way, that we will call indirect mapping, relies on the VirtRegMap +class in order to insert loads and stores sending and getting values to and from +memory.

+

The direct mapping provides more flexibility to the developer of the register +allocator; however, it is more error prone, and demands more implementation +work. Basically, the programmer will have to specify where load and store +instructions should be inserted in the target function being compiled in order +to get and store values in memory. To assign a physical register to a virtual +register present in a given operand, use MachineOperand::setReg(p_reg). To +insert a store instruction, use TargetInstrInfo::storeRegToStackSlot(...), +and to insert a load instruction, use TargetInstrInfo::loadRegFromStackSlot.

+

The indirect mapping shields the application developer from the complexities of +inserting load and store instructions. In order to map a virtual register to a +physical one, use VirtRegMap::assignVirt2Phys(vreg, preg). In order to map +a certain virtual register to memory, use +VirtRegMap::assignVirt2StackSlot(vreg). This method will return the stack +slot where vreg’s value will be located. If it is necessary to map another +virtual register to the same stack slot, use +VirtRegMap::assignVirt2StackSlot(vreg, stack_location). One important point +to consider when using the indirect mapping, is that even if a virtual register +is mapped to memory, it still needs to be mapped to a physical register. This +physical register is the location where the virtual register is supposed to be +found before being stored or after being reloaded.

+

If the indirect strategy is used, after all the virtual registers have been +mapped to physical registers or stack slots, it is necessary to use a spiller +object to place load and store instructions in the code. Every virtual that has +been mapped to a stack slot will be stored to memory after being defined and will +be loaded before being used. The implementation of the spiller tries to recycle +load/store instructions, avoiding unnecessary instructions. For an example of +how to invoke the spiller, see RegAllocLinearScan::runOnMachineFunction in +lib/CodeGen/RegAllocLinearScan.cpp.

+
+
+

Handling two address instructions

+

With very rare exceptions (e.g., function calls), the LLVM machine code +instructions are three address instructions. That is, each instruction is +expected to define at most one register, and to use at most two registers. +However, some architectures use two address instructions. In this case, the +defined register is also one of the used registers. For instance, an instruction +such as ADD %EAX, %EBX, in X86 is actually equivalent to %EAX = %EAX + +%EBX.

+

In order to produce correct code, LLVM must convert three address instructions +that represent two address instructions into true two address instructions. LLVM +provides the pass TwoAddressInstructionPass for this specific purpose. It +must be run before register allocation takes place. After its execution, the +resulting code may no longer be in SSA form. This happens, for instance, in +situations where an instruction such as %a = ADD %b %c is converted to two +instructions such as:

+
%a = MOVE %b
+%a = ADD %a %c
+
+
+

Notice that, internally, the second instruction is represented as ADD +%a[def/use] %c. I.e., the register operand %a is both used and defined by +the instruction.

+
+
+

The SSA deconstruction phase

+

An important transformation that happens during register allocation is called +the SSA Deconstruction Phase. The SSA form simplifies many analyses that are +performed on the control flow graph of programs. However, traditional +instruction sets do not implement PHI instructions. Thus, in order to generate +executable code, compilers must replace PHI instructions with other instructions +that preserve their semantics.

+

There are many ways in which PHI instructions can safely be removed from the +target code. The most traditional PHI deconstruction algorithm replaces PHI +instructions with copy instructions. That is the strategy adopted by LLVM. The +SSA deconstruction algorithm is implemented in +lib/CodeGen/PHIElimination.cpp. In order to invoke this pass, the identifier +PHIEliminationID must be marked as required in the code of the register +allocator.

+
+
+

Instruction folding

+

Instruction folding is an optimization performed during register allocation +that removes unnecessary copy instructions. For instance, a sequence of +instructions such as:

+
%EBX = LOAD %mem_address
+%EAX = COPY %EBX
+
+
+

can be safely substituted by the single instruction:

+
%EAX = LOAD %mem_address
+
+
+

Instructions can be folded with the +TargetRegisterInfo::foldMemoryOperand(...) method. Care must be taken when +folding instructions; a folded instruction can be quite different from the +original instruction. See LiveIntervals::addIntervalsForSpills in +lib/CodeGen/LiveIntervalAnalysis.cpp for an example of its use.

+
+
+

Built in register allocators

+

The LLVM infrastructure provides the application developer with three different +register allocators:

+
    +
  • Fast — This register allocator is the default for debug builds. It +allocates registers on a basic block level, attempting to keep values in +registers and reusing registers as appropriate.

  • +
  • Basic — This is an incremental approach to register allocation. Live +ranges are assigned to registers one at a time in an order that is driven by +heuristics. Since code can be rewritten on-the-fly during allocation, this +framework allows interesting allocators to be developed as extensions. It is +not itself a production register allocator but is a potentially useful +stand-alone mode for triaging bugs and as a performance baseline.

  • +
  • GreedyThe default allocator. This is a highly tuned implementation of +the Basic allocator that incorporates global live range splitting. This +allocator works hard to minimize the cost of spill code.

  • +
  • PBQP — A Partitioned Boolean Quadratic Programming (PBQP) based register +allocator. This allocator works by constructing a PBQP problem representing +the register allocation problem under consideration, solving this using a PBQP +solver, and mapping the solution back to a register assignment.

  • +
+

The type of register allocator used in llc can be chosen with the command +line option -regalloc=...:

+
$ llc -regalloc=linearscan file.bc -o ln.s
+$ llc -regalloc=fast file.bc -o fa.s
+$ llc -regalloc=pbqp file.bc -o pbqp.s
+
+
+
+
+
+

Prolog/Epilog Code Insertion

+

Compact Unwind

+

Throwing an exception requires unwinding out of a function. The information on +how to unwind a given function is traditionally expressed in DWARF unwind +(a.k.a. frame) info. But that format was originally developed for debuggers to +backtrace, and each Frame Description Entry (FDE) requires ~20-30 bytes per +function. There is also the cost of mapping from an address in a function to the +corresponding FDE at runtime. An alternative unwind encoding is called compact +unwind and requires just 4-bytes per function.

+

The compact unwind encoding is a 32-bit value, which is encoded in an +architecture-specific way. It specifies which registers to restore and from +where, and how to unwind out of the function. When the linker creates a final +linked image, it will create a __TEXT,__unwind_info section. This section is +a small and fast way for the runtime to access unwind info for any given +function. If we emit compact unwind info for the function, that compact unwind +info will be encoded in the __TEXT,__unwind_info section. If we emit DWARF +unwind info, the __TEXT,__unwind_info section will contain the offset of the +FDE in the __TEXT,__eh_frame section in the final linked image.

+

For X86, there are three modes for the compact unwind encoding:

+
+
Function with a Frame Pointer (``EBP`` or ``RBP``)

EBP/RBP-based frame, where EBP/RBP is pushed onto the stack +immediately after the return address, then ESP/RSP is moved to +EBP/RBP. Thus to unwind, ESP/RSP is restored with the current +EBP/RBP value, then EBP/RBP is restored by popping the stack, and the +return is done by popping the stack once more into the PC. All non-volatile +registers that need to be restored must have been saved in a small range on +the stack that starts EBP-4 to EBP-1020 (RBP-8 to +RBP-1020). The offset (divided by 4 in 32-bit mode and 8 in 64-bit mode) +is encoded in bits 16-23 (mask: 0x00FF0000). The registers saved are +encoded in bits 0-14 (mask: 0x00007FFF) as five 3-bit entries from the +following table:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Compact Number

i386 Register

x86-64 Register

1

EBX

RBX

2

ECX

R12

3

EDX

R13

4

EDI

R14

5

ESI

R15

6

EBP

RBP

+
+
+
Frameless with a Small Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)

To return, a constant (encoded in the compact unwind encoding) is added to the +ESP/RSP. Then the return is done by popping the stack into the PC. All +non-volatile registers that need to be restored must have been saved on the +stack immediately after the return address. The stack size (divided by 4 in +32-bit mode and 8 in 64-bit mode) is encoded in bits 16-23 (mask: +0x00FF0000). There is a maximum stack size of 1024 bytes in 32-bit mode +and 2048 in 64-bit mode. The number of registers saved is encoded in bits 9-12 +(mask: 0x00001C00). Bits 0-9 (mask: 0x000003FF) contain which +registers were saved and their order. (See the +encodeCompactUnwindRegistersWithoutFrame() function in +lib/Target/X86FrameLowering.cpp for the encoding algorithm.)

+
+
Frameless with a Large Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)

This case is like the “Frameless with a Small Constant Stack Size” case, but +the stack size is too large to encode in the compact unwind encoding. Instead +it requires that the function contains “subl $nnnnnn, %esp” in its +prolog. The compact encoding contains the offset to the $nnnnnn value in +the function in bits 9-12 (mask: 0x00001C00).

+
+
+
+
+

Late Machine Code Optimizations

+
+

Note

+

To Be Written

+
+
+
+

Code Emission

+

The code emission step of code generation is responsible for lowering from the +code generator abstractions (like MachineFunction, MachineInstr, etc) down +to the abstractions used by the MC layer (MCInst, MCStreamer, etc). This +is done with a combination of several different classes: the (misnamed) +target-independent AsmPrinter class, target-specific subclasses of AsmPrinter +(such as SparcAsmPrinter), and the TargetLoweringObjectFile class.

+

Since the MC layer works at the level of abstraction of object files, it doesn’t +have a notion of functions, global variables etc. Instead, it thinks about +labels, directives, and instructions. A key class used at this time is the +MCStreamer class. This is an abstract API that is implemented in different ways +(e.g. to output a .s file, output an ELF .o file, etc) that is effectively an +“assembler API”. MCStreamer has one method per directive, such as EmitLabel, +EmitSymbolAttribute, SwitchSection, etc, which directly correspond to assembly +level directives.

+

If you are interested in implementing a code generator for a target, there are +three important things that you have to implement for your target:

+
    +
  1. First, you need a subclass of AsmPrinter for your target. This class +implements the general lowering process converting MachineFunction’s into MC +label constructs. The AsmPrinter base class provides a number of useful +methods and routines, and also allows you to override the lowering process in +some important ways. You should get much of the lowering for free if you are +implementing an ELF, COFF, or MachO target, because the +TargetLoweringObjectFile class implements much of the common logic.

  2. +
  3. Second, you need to implement an instruction printer for your target. The +instruction printer takes an MCInst and renders it to a raw_ostream as +text. Most of this is automatically generated from the .td file (when you +specify something like “add $dst, $src1, $src2” in the instructions), but +you need to implement routines to print operands.

  4. +
  5. Third, you need to implement code that lowers a MachineInstr to an MCInst, +usually implemented in “<target>MCInstLower.cpp”. This lowering process is +often target specific, and is responsible for turning jump table entries, +constant pool indices, global variable addresses, etc into MCLabels as +appropriate. This translation layer is also responsible for expanding pseudo +ops used by the code generator into the actual machine instructions they +correspond to. The MCInsts that are generated by this are fed into the +instruction printer or the encoder.

  6. +
+

Finally, at your choosing, you can also implement a subclass of MCCodeEmitter +which lowers MCInst’s into machine code bytes and relocations. This is +important if you want to support direct .o file emission, or would like to +implement an assembler for your target.

+
+

Emitting function stack size information

+

A section containing metadata on function stack sizes will be emitted when +TargetLoweringObjectFile::StackSizesSection is not null, and +TargetOptions::EmitStackSizeSection is set (-stack-size-section). The +section will contain an array of pairs of function symbol values (pointer size) +and stack sizes (unsigned LEB128). The stack size values only include the space +allocated in the function prologue. Functions with dynamic stack allocations are +not included.

+
+
+
+

VLIW Packetizer

+

In a Very Long Instruction Word (VLIW) architecture, the compiler is responsible +for mapping instructions to functional-units available on the architecture. To +that end, the compiler creates groups of instructions called packets or +bundles. The VLIW packetizer in LLVM is a target-independent mechanism to +enable the packetization of machine instructions.

+
+

Mapping from instructions to functional units

+

Instructions in a VLIW target can typically be mapped to multiple functional +units. During the process of packetizing, the compiler must be able to reason +about whether an instruction can be added to a packet. This decision can be +complex since the compiler has to examine all possible mappings of instructions +to functional units. Therefore to alleviate compilation-time complexity, the +VLIW packetizer parses the instruction classes of a target and generates tables +at compiler build time. These tables can then be queried by the provided +machine-independent API to determine if an instruction can be accommodated in a +packet.

+
+
+

How the packetization tables are generated and used

+

The packetizer reads instruction classes from a target’s itineraries and creates +a deterministic finite automaton (DFA) to represent the state of a packet. A DFA +consists of three major elements: inputs, states, and transitions. The set of +inputs for the generated DFA represents the instruction being added to a +packet. The states represent the possible consumption of functional units by +instructions in a packet. In the DFA, transitions from one state to another +occur on the addition of an instruction to an existing packet. If there is a +legal mapping of functional units to instructions, then the DFA contains a +corresponding transition. The absence of a transition indicates that a legal +mapping does not exist and that the instruction cannot be added to the packet.

+

To generate tables for a VLIW target, add TargetGenDFAPacketizer.inc as a +target to the Makefile in the target directory. The exported API provides three +functions: DFAPacketizer::clearResources(), +DFAPacketizer::reserveResources(MachineInstr *MI), and +DFAPacketizer::canReserveResources(MachineInstr *MI). These functions allow +a target packetizer to add an instruction to an existing packet and to check +whether an instruction can be added to a packet. See +llvm/CodeGen/DFAPacketizer.h for more information.

+
+
+
+
+

Implementing a Native Assembler

+

Though you’re probably reading this because you want to write or maintain a +compiler backend, LLVM also fully supports building a native assembler. +We’ve tried hard to automate the generation of the assembler from the .td files +(in particular the instruction syntax and encodings), which means that a large +part of the manual and repetitive data entry can be factored and shared with the +compiler.

+
+

Instruction Parsing

+
+

Note

+

To Be Written

+
+
+
+

Instruction Alias Processing

+

Once the instruction is parsed, it enters the MatchInstructionImpl function. +The MatchInstructionImpl function performs alias processing and then does actual +matching.

+

Alias processing is the phase that canonicalizes different lexical forms of the +same instructions down to one representation. There are several different kinds +of alias that are possible to implement and they are listed below in the order +that they are processed (which is in order from simplest/weakest to most +complex/powerful). Generally you want to use the first alias mechanism that +meets the needs of your instruction, because it will allow a more concise +description.

+
+

Mnemonic Aliases

+

The first phase of alias processing is simple instruction mnemonic remapping for +classes of instructions which are allowed with two different mnemonics. This +phase is a simple and unconditionally remapping from one input mnemonic to one +output mnemonic. It isn’t possible for this form of alias to look at the +operands at all, so the remapping must apply for all forms of a given mnemonic. +Mnemonic aliases are defined simply, for example X86 has:

+
def : MnemonicAlias<"cbw",     "cbtw">;
+def : MnemonicAlias<"smovq",   "movsq">;
+def : MnemonicAlias<"fldcww",  "fldcw">;
+def : MnemonicAlias<"fucompi", "fucomip">;
+def : MnemonicAlias<"ud2a",    "ud2">;
+
+
+

… and many others. With a MnemonicAlias definition, the mnemonic is remapped +simply and directly. Though MnemonicAlias’s can’t look at any aspect of the +instruction (such as the operands) they can depend on global modes (the same +ones supported by the matcher), through a Requires clause:

+
def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+
+
+

In this example, the mnemonic gets mapped into a different one depending on +the current instruction set.

+
+
+

Instruction Aliases

+

The most general phase of alias processing occurs while matching is happening: +it provides new forms for the matcher to match along with a specific instruction +to generate. An instruction alias has two parts: the string to match and the +instruction to generate. For example:

+
def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8  :$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8  GR32:$dst, GR8  :$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8  GR64:$dst, GR8  :$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)>;
+
+
+

This shows a powerful example of the instruction aliases, matching the same +mnemonic in multiple different ways depending on what operands are present in +the assembly. The result of instruction aliases can include operands in a +different order than the destination instruction, and can use an input multiple +times, for example:

+
def : InstAlias<"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+
+

This example also shows that tied operands are only listed once. In the X86 +backend, XOR8rr has two input GR8’s and one output GR8 (where an input is tied +to the output). InstAliases take a flattened operand list without duplicates +for tied operands. The result of an instruction alias can also use immediates +and fixed physical registers which are added as simple immediate operands in the +result, for example:

+
// Fixed Immediate operand.
+def : InstAlias<"aad", (AAD8i8 10)>;
+
+// Fixed register operand.
+def : InstAlias<"fcomi", (COM_FIr ST1)>;
+
+// Simple alias.
+def : InstAlias<"fcomi $reg", (COM_FIr RST:$reg)>;
+
+
+

Instruction aliases can also have a Requires clause to make them subtarget +specific.

+

If the back-end supports it, the instruction printer can automatically emit the +alias rather than what’s being aliased. It typically leads to better, more +readable code. If it’s better to print out what’s being aliased, then pass a ‘0’ +as the third parameter to the InstAlias definition.

+
+
+
+

Instruction Matching

+
+

Note

+

To Be Written

+
+
+
+
+

Target-specific Implementation Notes

+

This section of the document explains features or design decisions that are +specific to the code generator for a particular target. First we start with a +table that summarizes what features are supported by each target.

+
+

Target Feature Matrix

+

Note that this table does not list features that are not supported fully by any +target yet. It considers a feature to be supported if at least one subtarget +supports it. A feature being supported means that it is useful and works for +most cases, it does not indicate that there are zero known bugs in the +implementation. Here is the key:

+

+ + + + + + + + + + + + + + +
UnknownNot ApplicableNo supportPartial SupportComplete Support

+

Here is the table:

+

+ + + + + + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

+ + + + + + + + + + + + +

+

Target
FeatureARMHexagonMSP430MipsNVPTXPowerPCSparcSystemZX86XCoreeBPF
is generally reliable
assembly parser
disassembler
inline asm
jit*
.o file writing
tail calls
segmented stacks *

+
+

Is Generally Reliable

+

This box indicates whether the target is considered to be production quality. +This indicates that the target has been used as a static compiler to compile +large amounts of code by a variety of different people and is in continuous use.

+
+
+

Assembly Parser

+

This box indicates whether the target supports parsing target specific .s files +by implementing the MCAsmParser interface. This is required for llvm-mc to be +able to act as a native assembler and is required for inline assembly support in +the native .o file writer.

+
+
+

Disassembler

+

This box indicates whether the target supports the MCDisassembler API for +disassembling machine opcode bytes into MCInst’s.

+
+
+

Inline Asm

+

This box indicates whether the target supports most popular inline assembly +constraints and modifiers.

+
+
+

JIT Support

+

This box indicates whether the target supports the JIT compiler through the +ExecutionEngine interface.

+

The ARM backend has basic support for integer code in ARM codegen mode, but +lacks NEON and full Thumb support.

+
+
+

.o File Writing

+

This box indicates whether the target supports writing .o files (e.g. MachO, +ELF, and/or COFF) files directly from the target. Note that the target also +must include an assembly parser and general inline assembly support for full +inline assembly support in the .o writer.

+

Targets that don’t support this feature can obviously still write out .o files, +they just rely on having an external assembler to translate from a .s file to a +.o file (as is the case for many C compilers).

+
+
+

Tail Calls

+

This box indicates whether the target supports guaranteed tail calls. These are +calls marked “tail” and use the fastcc calling +convention. Please see the tail call section for more details.

+
+
+

Segmented Stacks

+

This box indicates whether the target supports segmented stacks. This replaces +the traditional large C stack with many linked segments. It is compatible with +the gcc implementation used by the Go +front end.

+

Basic support exists on the X86 backend. Currently vararg doesn’t work and the +object files are not marked the way the gold linker expects, but simple Go +programs can be built by dragonegg.

+
+
+
+

Tail call optimization

+

Tail call optimization, callee reusing the stack of the caller, is currently +supported on x86/x86-64, PowerPC, AArch64, and WebAssembly. It is performed on +x86/x86-64, PowerPC, and AArch64 if:

+
    +
  • Caller and callee have the calling convention fastcc, cc 10 (GHC +calling convention), cc 11 (HiPE calling convention), tailcc, or +swifttailcc.

  • +
  • The call is a tail call - in tail position (ret immediately follows call and +ret uses value of call or is void).

  • +
  • Option -tailcallopt is enabled or the calling convention is tailcc.

  • +
  • Platform-specific constraints are met.

  • +
+

x86/x86-64 constraints:

+
    +
  • No variable argument lists are used.

  • +
  • On x86-64 when generating GOT/PIC code only module-local calls (visibility = +hidden or protected) are supported.

  • +
+

PowerPC constraints:

+
    +
  • No variable argument lists are used.

  • +
  • No byval parameters are used.

  • +
  • On ppc32/64 GOT/PIC only module-local calls (visibility = hidden or protected) +are supported.

  • +
+

WebAssembly constraints:

+
    +
  • No variable argument lists are used

  • +
  • The ‘tail-call’ target attribute is enabled.

  • +
  • The caller and callee’s return types must match. The caller cannot +be void unless the callee is, too.

  • +
+

AArch64 constraints:

+
    +
  • No variable argument lists are used.

  • +
+

Example:

+

Call as llc -tailcallopt test.ll.

+
declare fastcc i32 @tailcallee(i32 inreg %a1, i32 inreg %a2, i32 %a3, i32 %a4)
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+  %l1 = add i32 %in1, %in2
+  %tmp = tail call fastcc i32 @tailcallee(i32 inreg %in1, i32 inreg %in2, i32 %in1, i32 %l1)
+  ret i32 %tmp
+}
+
+
+

Implications of -tailcallopt:

+

To support tail call optimization in situations where the callee has more +arguments than the caller a ‘callee pops arguments’ convention is used. This +currently causes each fastcc call that is not tail call optimized (because +one or more of above constraints are not met) to be followed by a readjustment +of the stack. So performance might be worse in such cases.

+
+
+

Sibling call optimization

+

Sibling call optimization is a restricted form of tail call optimization. +Unlike tail call optimization described in the previous section, it can be +performed automatically on any tail calls when -tailcallopt option is not +specified.

+

Sibling call optimization is currently performed on x86/x86-64 when the +following constraints are met:

+
    +
  • Caller and callee have the same calling convention. It can be either c or +fastcc.

  • +
  • The call is a tail call - in tail position (ret immediately follows call and +ret uses value of call or is void).

  • +
  • Caller and callee have matching return type or the callee result is not used.

  • +
  • If any of the callee arguments are being passed in stack, they must be +available in caller’s own incoming argument stack and the frame offsets must +be the same.

  • +
+

Example:

+
declare i32 @bar(i32, i32)
+
+define i32 @foo(i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = tail call i32 @bar(i32 %a, i32 %b)
+  ret i32 %0
+}
+
+
+
+
+

The X86 backend

+

The X86 code generator lives in the lib/Target/X86 directory. This code +generator is capable of targeting a variety of x86-32 and x86-64 processors, and +includes support for ISA extensions such as MMX and SSE.

+
+

X86 Target Triples supported

+

The following are the known target triples that are supported by the X86 +backend. This is not an exhaustive list, and it would be useful to add those +that people test.

+
    +
  • i686-pc-linux-gnu — Linux

  • +
  • i386-unknown-freebsd5.3 — FreeBSD 5.3

  • +
  • i686-pc-cygwin — Cygwin on Win32

  • +
  • i686-pc-mingw32 — MingW on Win32

  • +
  • i386-pc-mingw32msvc — MingW crosscompiler on Linux

  • +
  • i686-apple-darwin* — Apple Darwin on X86

  • +
  • x86_64-unknown-linux-gnu — Linux

  • +
+
+
+

X86 Calling Conventions supported

+

The following target-specific calling conventions are known to backend:

+
    +
  • x86_StdCall — stdcall calling convention seen on Microsoft Windows +platform (CC ID = 64).

  • +
  • x86_FastCall — fastcall calling convention seen on Microsoft Windows +platform (CC ID = 65).

  • +
  • x86_ThisCall — Similar to X86_StdCall. Passes first argument in ECX, +others via stack. Callee is responsible for stack cleaning. This convention is +used by MSVC by default for methods in its ABI (CC ID = 70).

  • +
+
+
+

Representing X86 addressing modes in MachineInstrs

+

The x86 has a very flexible way of accessing memory. It is capable of forming +memory addresses of the following expression directly in integer instructions +(which use ModR/M addressing):

+
SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32
+
+
+

In order to represent this, LLVM tracks no less than 5 operands for each memory +operand of this form. This means that the “load” form of ‘mov’ has the +following MachineOperands in this order:

+
Index:        0     |    1        2       3           4          5
+Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement Segment
+OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
+
+
+

Stores, and all other instructions, treat the four memory operands in the same +way and in the same order. If the segment register is unspecified (regno = 0), +then no segment override is generated. “Lea” operations do not have a segment +register specified, so they only have 4 operands for their memory reference.

+
+
+

X86 address spaces supported

+

x86 has a feature which provides the ability to perform loads and stores to +different address spaces via the x86 segment registers. A segment override +prefix byte on an instruction causes the instruction’s memory access to go to +the specified segment. LLVM address space 0 is the default address space, which +includes the stack, and any unqualified memory accesses in a program. Address +spaces 1-255 are currently reserved for user-defined code. The GS-segment is +represented by address space 256, the FS-segment is represented by address space +257, and the SS-segment is represented by address space 258. Other x86 segments +have yet to be allocated address space numbers.

+

While these address spaces may seem similar to TLS via the thread_local +keyword, and often use the same underlying hardware, there are some fundamental +differences.

+

The thread_local keyword applies to global variables and specifies that they +are to be allocated in thread-local memory. There are no type qualifiers +involved, and these variables can be pointed to with normal pointers and +accessed with normal loads and stores. The thread_local keyword is +target-independent at the LLVM IR level (though LLVM doesn’t yet have +implementations of it for some configurations)

+

Special address spaces, in contrast, apply to static types. Every load and store +has a particular address space in its address operand type, and this is what +determines which address space is accessed. LLVM ignores these special address +space qualifiers on global variables, and does not provide a way to directly +allocate storage in them. At the LLVM IR level, the behavior of these special +address spaces depends in part on the underlying OS or runtime environment, and +they are specific to x86 (and LLVM doesn’t yet handle them correctly in some +cases).

+

Some operating systems and runtime environments use (or may in the future use) +the FS/GS-segment registers for various low-level purposes, so care should be +taken when considering them.

+
+
+

Instruction naming

+

An instruction name consists of the base name, a default operand size, and a a +character per operand with an optional special size. For example:

+
ADD8rr      -> add, 8-bit register, 8-bit register
+IMUL16rmi   -> imul, 16-bit register, 16-bit memory, 16-bit immediate
+IMUL16rmi8  -> imul, 16-bit register, 16-bit memory, 8-bit immediate
+MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory
+
+
+
+
+
+

The PowerPC backend

+

The PowerPC code generator lives in the lib/Target/PowerPC directory. The code +generation is retargetable to several variations or subtargets of the PowerPC +ISA; including ppc32, ppc64 and altivec.

+
+

LLVM PowerPC ABI

+

LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC relative +(PIC) or static addressing for accessing global values, so no TOC (r2) is +used. Second, r31 is used as a frame pointer to allow dynamic growth of a stack +frame. LLVM takes advantage of having no TOC to provide space to save the frame +pointer in the PowerPC linkage area of the caller frame. Other details of +PowerPC ABI can be found at PowerPC ABI. Note: This link describes the 32 bit ABI. The 64 bit ABI is similar except +space for GPRs are 8 bytes wide (not 4) and r13 is reserved for system use.

+
+
+

Frame Layout

+

The size of a PowerPC frame is usually fixed for the duration of a function’s +invocation. Since the frame is fixed size, all references into the frame can be +accessed via fixed offsets from the stack pointer. The exception to this is +when dynamic alloca or variable sized arrays are present, then a base pointer +(r31) is used as a proxy for the stack pointer and stack pointer is free to grow +or shrink. A base pointer is also used if llvm-gcc is not passed the +-fomit-frame-pointer flag. The stack pointer is always aligned to 16 bytes, so +that space allocated for altivec vectors will be properly aligned.

+

An invocation frame is laid out as follows (low memory at top):

+

+ + + + + + + + + + + + + + + + + + + + + +
Linkage

Parameter area

Dynamic area

Locals area

Saved registers area


Previous Frame

+

The linkage area is used by a callee to save special registers prior to +allocating its own frame. Only three entries are relevant to LLVM. The first +entry is the previous stack pointer (sp), aka link. This allows probing tools +like gdb or exception handlers to quickly scan the frames in the stack. A +function epilog can also use the link to pop the frame from the stack. The +third entry in the linkage area is used to save the return address from the lr +register. Finally, as mentioned above, the last entry is used to save the +previous frame pointer (r31.) The entries in the linkage area are the size of a +GPR, thus the linkage area is 24 bytes long in 32 bit mode and 48 bytes in 64 +bit mode.

+

32 bit linkage area:

+

+ + + + + + + + + + + + + + + + + + + + + + + + +
0Saved SP (r1)
4Saved CR
8Saved LR
12Reserved
16Reserved
20Saved FP (r31)

+

64 bit linkage area:

+

+ + + + + + + + + + + + + + + + + + + + + + + + +
0Saved SP (r1)
8Saved CR
16Saved LR
24Reserved
32Reserved
40Saved FP (r31)

+

The parameter area is used to store arguments being passed to a callee +function. Following the PowerPC ABI, the first few arguments are actually +passed in registers, with the space in the parameter area unused. However, if +there are not enough registers or the callee is a thunk or vararg function, +these register arguments can be spilled into the parameter area. Thus, the +parameter area must be large enough to store all the parameters for the largest +call sequence made by the caller. The size must also be minimally large enough +to spill registers r3-r10. This allows callees blind to the call signature, +such as thunks and vararg functions, enough space to cache the argument +registers. Therefore, the parameter area is minimally 32 bytes (64 bytes in 64 +bit mode.) Also note that since the parameter area is a fixed offset from the +top of the frame, that a callee can access its split arguments using fixed +offsets from the stack pointer (or base pointer.)

+

Combining the information about the linkage, parameter areas and alignment. A +stack frame is minimally 64 bytes in 32 bit mode and 128 bytes in 64 bit mode.

+

The dynamic area starts out as size zero. If a function uses dynamic alloca +then space is added to the stack, the linkage and parameter areas are shifted to +top of stack, and the new space is available immediately below the linkage and +parameter areas. The cost of shifting the linkage and parameter areas is minor +since only the link value needs to be copied. The link value can be easily +fetched by adding the original frame size to the base pointer. Note that +allocations in the dynamic space need to observe 16 byte alignment.

+

The locals area is where the llvm compiler reserves space for local variables.

+

The saved registers area is where the llvm compiler spills callee saved +registers on entry to the callee.

+
+
+

Prolog/Epilog

+

The llvm prolog and epilog are the same as described in the PowerPC ABI, with +the following exceptions. Callee saved registers are spilled after the frame is +created. This allows the llvm epilog/prolog support to be common with other +targets. The base pointer callee saved register r31 is saved in the TOC slot of +linkage area. This simplifies allocation of space for the base pointer and +makes it convenient to locate programmatically and during debugging.

+
+
+

Dynamic Allocation

+
+

Note

+

TODO - More to come.

+
+
+
+
+

The NVPTX backend

+

The NVPTX code generator under lib/Target/NVPTX is an open-source version of +the NVIDIA NVPTX code generator for LLVM. It is contributed by NVIDIA and is +a port of the code generator used in the CUDA compiler (nvcc). It targets the +PTX 3.0/3.1 ISA and can target any compute capability greater than or equal to +2.0 (Fermi).

+

This target is of production quality and should be completely compatible with +the official NVIDIA toolchain.

+

Code Generator Options:

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
OptionDescription
sm_20Set shader model/compute capability to 2.0
sm_21Set shader model/compute capability to 2.1
sm_30Set shader model/compute capability to 3.0
sm_35Set shader model/compute capability to 3.5
ptx30Target PTX 3.0
ptx31Target PTX 3.1

+
+
+

The extended Berkeley Packet Filter (eBPF) backend

+

Extended BPF (or eBPF) is similar to the original (“classic”) BPF (cBPF) used +to filter network packets. The +bpf() system call +performs a range of operations related to eBPF. For both cBPF and eBPF +programs, the Linux kernel statically analyzes the programs before loading +them, in order to ensure that they cannot harm the running system. eBPF is +a 64-bit RISC instruction set designed for one to one mapping to 64-bit CPUs. +Opcodes are 8-bit encoded, and 87 instructions are defined. There are 10 +registers, grouped by function as outlined below.

+
R0        return value from in-kernel functions; exit value for eBPF program
+R1 - R5   function call arguments to in-kernel functions
+R6 - R9   callee-saved registers preserved by in-kernel functions
+R10       stack frame pointer (read only)
+
+
+
+

Instruction encoding (arithmetic and jump)

+

eBPF is reusing most of the opcode encoding from classic to simplify conversion +of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit ‘code’ +field is divided into three parts:

+
+----------------+--------+--------------------+
+|   4 bits       |  1 bit |   3 bits           |
+| operation code | source | instruction class  |
++----------------+--------+--------------------+
+(MSB)                                      (LSB)
+
+
+

Three LSB bits store instruction class which is one of:

+
BPF_LD     0x0
+BPF_LDX    0x1
+BPF_ST     0x2
+BPF_STX    0x3
+BPF_ALU    0x4
+BPF_JMP    0x5
+(unused)   0x6
+BPF_ALU64  0x7
+
+
+

When BPF_CLASS(code) == BPF_ALU or BPF_ALU64 or BPF_JMP, +4th bit encodes source operand

+
BPF_X     0x1  use src_reg register as source operand
+BPF_K     0x0  use 32 bit immediate as source operand
+
+
+

and four MSB bits store operation code

+
BPF_ADD   0x0  add
+BPF_SUB   0x1  subtract
+BPF_MUL   0x2  multiply
+BPF_DIV   0x3  divide
+BPF_OR    0x4  bitwise logical OR
+BPF_AND   0x5  bitwise logical AND
+BPF_LSH   0x6  left shift
+BPF_RSH   0x7  right shift (zero extended)
+BPF_NEG   0x8  arithmetic negation
+BPF_MOD   0x9  modulo
+BPF_XOR   0xa  bitwise logical XOR
+BPF_MOV   0xb  move register to register
+BPF_ARSH  0xc  right shift (sign extended)
+BPF_END   0xd  endianness conversion
+
+
+

If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of

+
BPF_JA    0x0  unconditional jump
+BPF_JEQ   0x1  jump ==
+BPF_JGT   0x2  jump >
+BPF_JGE   0x3  jump >=
+BPF_JSET  0x4  jump if (DST & SRC)
+BPF_JNE   0x5  jump !=
+BPF_JSGT  0x6  jump signed >
+BPF_JSGE  0x7  jump signed >=
+BPF_CALL  0x8  function call
+BPF_EXIT  0x9  function return
+
+
+
+
+

Instruction encoding (load, store)

+

For load and store instructions the 8-bit ‘code’ field is divided as:

+
+--------+--------+-------------------+
+| 3 bits | 2 bits |   3 bits          |
+|  mode  |  size  | instruction class |
++--------+--------+-------------------+
+(MSB)                             (LSB)
+
+
+

Size modifier is one of

+
BPF_W       0x0  word
+BPF_H       0x1  half word
+BPF_B       0x2  byte
+BPF_DW      0x3  double word
+
+
+

Mode modifier is one of

+
BPF_IMM     0x0  immediate
+BPF_ABS     0x1  used to access packet data
+BPF_IND     0x2  used to access packet data
+BPF_MEM     0x3  memory
+(reserved)  0x4
+(reserved)  0x5
+BPF_XADD    0x6  exclusive add
+
+
+
+
+

Packet data access (BPF_ABS, BPF_IND)

+

Two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and +(BPF_IND | <size> | BPF_LD) which are used to access packet data. +Register R6 is an implicit input that must contain pointer to sk_buff. +Register R0 is an implicit output which contains the data fetched +from the packet. Registers R1-R5 are scratch registers and must not +be used to store the data across BPF_ABS | BPF_LD or BPF_IND | BPF_LD +instructions. These instructions have implicit program exit condition +as well. When eBPF program is trying to access the data beyond +the packet boundary, the interpreter will abort the execution of the program.

+
+
BPF_IND | BPF_W | BPF_LD is equivalent to:

R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))

+
+
+
+
+

eBPF maps

+

eBPF maps are provided for sharing data between kernel and user-space. +Currently implemented types are hash and array, with potential extension to +support bloom filters, radix trees, etc. A map is defined by its type, +maximum number of elements, key size and value size in bytes. eBPF syscall +supports create, update, find and delete functions on maps.

+
+
+

Function calls

+

Function call arguments are passed using up to five registers (R1 - R5). +The return value is passed in a dedicated register (R0). Four additional +registers (R6 - R9) are callee-saved, and the values in these registers +are preserved within kernel functions. R0 - R5 are scratch registers within +kernel functions, and eBPF programs must therefor store/restore values in +these registers if needed across function calls. The stack can be accessed +using the read-only frame pointer R10. eBPF registers map 1:1 to hardware +registers on x86_64 and other 64-bit architectures. For example, x86_64 +in-kernel JIT maps them as

+
R0 - rax
+R1 - rdi
+R2 - rsi
+R3 - rdx
+R4 - rcx
+R5 - r8
+R6 - rbx
+R7 - r13
+R8 - r14
+R9 - r15
+R10 - rbp
+
+
+

since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing +and rbx, r12 - r15 are callee saved.

+
+
+

Program start

+

An eBPF program receives a single argument and contains +a single eBPF main routine; the program does not contain eBPF functions. +Function calls are limited to a predefined set of kernel functions. The size +of a program is limited to 4K instructions: this ensures fast termination and +a limited number of kernel function calls. Prior to running an eBPF program, +a verifier performs static analysis to prevent loops in the code and +to ensure valid register usage and operand types.

+
+
+
+

The AMDGPU backend

+

The AMDGPU code generator lives in the lib/Target/AMDGPU +directory. This code generator is capable of targeting a variety of +AMD GPU processors. Refer to User Guide for AMDGPU Backend for more information.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodeOfConduct.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodeOfConduct.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodeOfConduct.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodeOfConduct.html 2021-09-19 16:16:32.000000000 +0000 @@ -0,0 +1,236 @@ + + + + + + + + + LLVM Community Code of Conduct — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Community Code of Conduct

+
+

Note

+

This document is currently a DRAFT document while it is being discussed +by the community.

+
+

The LLVM community has always worked to be a welcoming and respectful +community, and we want to ensure that doesn’t change as we grow and evolve. To +that end, we have a few ground rules that we ask people to adhere to:

+ +

This isn’t an exhaustive list of things that you can’t do. Rather, take it in +the spirit in which it’s intended - a guide to make it easier to communicate +and participate in the community.

+

This code of conduct applies to all spaces managed by the LLVM project or The +LLVM Foundation. This includes IRC channels, mailing lists, bug trackers, LLVM +events such as the developer meetings and socials, and any other forums created +by the project that the community uses for communication. It applies to all of +your communication and conduct in these spaces, including emails, chats, things +you say, slides, videos, posters, signs, or even t-shirts you display in these +spaces. In addition, violations of this code outside these spaces may, in rare +cases, affect a person’s ability to participate within them, when the conduct +amounts to an egregious violation of this code.

+

If you believe someone is violating the code of conduct, we ask that you report +it by emailing conduct@llvm.org. For more details please see our +Reporting Guide.

+
    +
  • Be friendly and patient.

  • +
+
    +
  • Be welcoming. We strive to be a community that welcomes and supports +people of all backgrounds and identities. This includes, but is not limited +to members of any race, ethnicity, culture, national origin, colour, +immigration status, social and economic class, educational level, sex, sexual +orientation, gender identity and expression, age, size, family status, +political belief, religion or lack thereof, and mental and physical ability.

  • +
+
    +
  • Be considerate. Your work will be used by other people, and you in turn +will depend on the work of others. Any decision you take will affect users +and colleagues, and you should take those consequences into account. Remember +that we’re a world-wide community, so you might not be communicating in +someone else’s primary language.

  • +
+
    +
  • Be respectful. Not all of us will agree all the time, but disagreement is +no excuse for poor behavior and poor manners. We might all experience some +frustration now and then, but we cannot allow that frustration to turn into +a personal attack. It’s important to remember that a community where people +feel uncomfortable or threatened is not a productive one. Members of the LLVM +community should be respectful when dealing with other members as well as +with people outside the LLVM community.

  • +
+
    +
  • Be careful in the words that you choose and be kind to others. Do not +insult or put down other participants. Harassment and other exclusionary +behavior aren’t acceptable. This includes, but is not limited to:

    +
      +
    • Violent threats or language directed against another person.

    • +
    • Discriminatory jokes and language.

    • +
    • Posting sexually explicit or violent material.

    • +
    • Posting (or threatening to post) other people’s personally identifying +information (“doxing”).

    • +
    • Personal insults, especially those using racist or sexist terms.

    • +
    • Unwelcome sexual attention.

    • +
    • Advocating for, or encouraging, any of the above behavior.

    • +
    +

    In general, if someone asks you to stop, then stop. Persisting in such +behavior after being asked to stop is considered harassment.

    +
  • +
+
    +
  • When we disagree, try to understand why. Disagreements, both social and +technical, happen all the time and LLVM is no exception. It is important that +we resolve disagreements and differing views constructively. Remember that +we’re different. The strength of LLVM comes from its varied community, people +from a wide range of backgrounds. Different people have different +perspectives on issues. Being unable to understand why someone holds +a viewpoint doesn’t mean that they’re wrong. Don’t forget that it is human to +err and blaming each other doesn’t get us anywhere. Instead, focus on helping +to resolve issues and learning from mistakes.

  • +
+
+

Questions?

+

If you have questions, please feel free to contact the LLVM Foundation Code of +Conduct Advisory Committee by emailing conduct@llvm.org.

+

(This text is based on the Django Project Code of Conduct, which is in turn +based on wording from the Speak Up! project.)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodeReview.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodeReview.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodeReview.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodeReview.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,361 @@ + + + + + + + + + LLVM Code-Review Policy and Practices — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Code-Review Policy and Practices

+

LLVM’s code-review policy and practices help maintain high code quality across +the project. Specifically, our code review process aims to:

+
+
    +
  • Improve readability and maintainability.

  • +
  • Improve robustness and prevent the introduction of defects.

  • +
  • Best leverage the experience of other contributors for each proposed change.

  • +
  • Help grow and develop new contributors, through mentorship by community leaders.

  • +
+
+

It is important for all contributors to understand our code-review +practices and participate in the code-review process.

+
+

General Policies

+
+

What Code Should Be Reviewed?

+

All developers are required to have significant changes reviewed before they +are committed to the repository.

+
+
+

Must Code Be Reviewed Prior to Being Committed?

+

Code can be reviewed either before it is committed or after. We expect +significant patches to be reviewed before being committed. Smaller patches +(or patches where the developer owns the component) that meet +likely-community-consensus requirements (as apply to all patch approvals) can +be committed prior to an explicit review. In situations where there is any +uncertainty, a patch should be reviewed prior to being committed.

+

Please note that the developer responsible for a patch is also +responsible for making all necessary review-related changes, including +those requested during any post-commit review.

+
+
+

Can Code Be Reviewed After It Is Committed?

+

Post-commit review is encouraged, and can be accomplished using any of the +tools detailed below. There is a strong expectation that authors respond +promptly to post-commit feedback and address it. Failure to do so is cause for +the patch to be reverted.

+

If a community member expresses a concern about a recent commit, and this +concern would have been significant enough to warrant a conversation during +pre-commit review (including around the need for more design discussions), +they may ask for a revert to the original author who is responsible to revert +the patch promptly. Developers often disagree, and erring on the side of the +developer asking for more review prevents any lingering disagreement over +code in the tree. This does not indicate any fault from the patch author, +this is inherent to our post-commit review practices. +Reverting a patch ensures that design discussions can happen without blocking +other development; it’s entirely possible the patch will end up being reapplied +essentially as-is once concerns have been resolved.

+

Before being recommitted, the patch generally should undergo further review. +The community member who identified the problem is expected to engage +actively in the review. In cases where the problem is identified by a buildbot, +a community member with access to hardware similar to that on the buildbot is +expected to engage in the review.

+

Please note: The bar for post-commit feedback is not higher than for pre-commit +feedback. Don’t delay unnecessarily in providing feedback. However, if you see +something after code has been committed about which you would have commented +pre-commit (had you noticed it earlier), please feel free to provide that +feedback at any time.

+

That having been said, if a substantial period of time has passed since the +original change was committed, it may be better to create a new patch to +address the issues than comment on the original commit. The original patch +author, for example, might no longer be an active contributor to the project.

+
+
+

What Tools Are Used for Code Review?

+

Pre-commit code reviews are conducted on our web-based code-review tool (see +Code Reviews with Phabricator). Post-commit reviews can be done on Phabricator, by email +on the relevant project’s commit mailing list, on the project’s development +list, or on the bug tracker.

+
+
+

When Is an RFC Required?

+

Some changes are too significant for just a code review. Changes that should +change the LLVM Language Reference (e.g., adding new target-independent +intrinsics), adding language extensions in Clang, and so on, require an RFC +(Request for Comment) email on the project’s *-dev mailing list first. For +changes that promise significant impact on users and/or downstream code bases, +reviewers can request an RFC achieving consensus before proceeding with code +review. That having been said, posting initial patches can help with +discussions on an RFC.

+
+
+
+

Code-Review Workflow

+

Code review can be an iterative process, which continues until the patch is +ready to be committed. Specifically, once a patch is sent out for review, it +needs an explicit approval before it is committed. Do not assume silent +approval, or solicit objections to a patch with a deadline.

+
+

Acknowledge All Reviewer Feedback

+

All comments by reviewers should be acknowledged by the patch author. It is +generally expected that suggested changes will be incorporated into a future +revision of the patch unless the author and/or other reviewers can articulate a +good reason to do otherwise (and then the reviewers must agree). If a new patch +does not address all outstanding feedback, the author should explicitly state +that when providing the updated patch. When using the web-based code-review +tool, such notes can be provided in the “Diff” description (which is different +from the description of the “Differential Revision” as a whole used for the +commit message).

+

If you suggest changes in a code review, but don’t wish the suggestion to be +interpreted this strongly, please state so explicitly.

+
+
+

Aim to Make Efficient Use of Everyone’s Time

+

Aim to limit the number of iterations in the review process. For example, when +suggesting a change, if you want the author to make a similar set of changes at +other places in the code, please explain the requested set of changes so that +the author can make all of the changes at once. If a patch will require +multiple steps prior to approval (e.g., splitting, refactoring, posting data +from specific performance tests), please explain as many of these up front as +possible. This allows the patch author and reviewers to make the most efficient +use of their time.

+
+
+

LGTM - How a Patch Is Accepted

+

A patch is approved to be committed when a reviewer accepts it, and this is +almost always associated with a message containing the text “LGTM” (which +stands for Looks Good To Me). Only approval from a single reviewer is required.

+

When providing an unqualified LGTM (approval to commit), it is the +responsibility of the reviewer to have reviewed all of the discussion and +feedback from all reviewers ensuring that all feedback has been addressed and +that all other reviewers will almost surely be satisfied with the patch being +approved. If unsure, the reviewer should provide a qualified approval, (e.g., +“LGTM, but please wait for @someone, @someone_else”). You may also do this if +you are fairly certain that a particular community member will wish to review, +even if that person hasn’t done so yet.

+

Note that, if a reviewer has requested a particular community member to review, +and after a week that community member has yet to respond, feel free to ping +the patch (which literally means submitting a comment on the patch with the +word, “Ping.”), or alternatively, ask the original reviewer for further +suggestions.

+

If it is likely that others will want to review a recently-posted patch, +especially if there might be objections, but no one else has done so yet, it is +also polite to provide a qualified approval (e.g., “LGTM, but please wait for a +couple of days in case others wish to review”). If approval is received very +quickly, a patch author may also elect to wait before committing (and this is +certainly considered polite for non-trivial patches). Especially given the +global nature of our community, this waiting time should be at least 24 hours. +Please also be mindful of weekends and major holidays.

+

Our goal is to ensure community consensus around design decisions and +significant implementation choices, and one responsibility of a reviewer, when +providing an overall approval for a patch, is to be reasonably sure that such +consensus exists. If you’re not familiar enough with the community to know, +then you shouldn’t be providing final approval to commit. A reviewer providing +final approval should have commit access to the LLVM project.

+

Every patch should be reviewed by at least one technical expert in the areas of +the project affected by the change.

+
+
+

Splitting Requests and Conditional Acceptance

+

Reviewers may request certain aspects of a patch to be broken out into separate +patches for independent review. Reviewers may also accept a patch +conditioned on the author providing a follow-up patch addressing some +particular issue or concern (although no committed patch should leave the +project in a broken state). Moreover, reviewers can accept a patch conditioned on +the author applying some set of minor updates prior to committing, and when +applicable, it is polite for reviewers to do so.

+
+
+

Don’t Unintentionally Block a Review

+

If you review a patch, but don’t intend for the review process to block on your +approval, please state that explicitly. Out of courtesy, we generally wait on +committing a patch until all reviewers are satisfied, and if you don’t intend +to look at the patch again in a timely fashion, please communicate that fact in +the review.

+
+
+
+

Who Can/Should Review Code?

+
+

Non-Experts Should Review Code

+

You do not need to be an expert in some area of the code base to review patches; +it’s fine to ask questions about what some piece of code is doing. If it’s not +clear to you what is going on, you’re unlikely to be the only one. Please +remember that it is not in the long-term best interest of the community to have +components that are only understood well by a small number of people. Extra +comments and/or test cases can often help (and asking for comments in the test +cases is fine as well).

+

Moreover, authors are encouraged to interpret questions as a reason to reexamine +the readability of the code in question. Structural changes, or further +comments, may be appropriate.

+

If you’re new to the LLVM community, you might also find this presentation helpful: +.. _How to Contribute to LLVM, A 2019 LLVM Developers’ Meeting Presentation: https://youtu.be/C5Y977rLqpw

+

A good way for new contributors to increase their knowledge of the code base is +to review code. It is perfectly acceptable to review code and explicitly +defer to others for approval decisions.

+
+
+

Experts Should Review Code

+

If you are an expert in an area of the compiler affected by a proposed patch, +then you are highly encouraged to review the code. If you are a relevant code +owner, and no other experts are reviewing a patch, you must either help arrange +for an expert to review the patch or review it yourself.

+
+
+

Code Reviews, Speed, and Reciprocity

+

Sometimes code reviews will take longer than you might hope, especially for +larger features. Common ways to speed up review times for your patches are:

+
    +
  • Review other people’s patches. If you help out, everybody will be more +willing to do the same for you; goodwill is our currency.

  • +
  • Ping the patch. If it is urgent, provide reasons why it is important to you to +get this patch landed and ping it every couple of days. If it is +not urgent, the common courtesy ping rate is one week. Remember that you’re +asking for valuable time from other professional developers.

  • +
  • Ask for help on IRC. Developers on IRC will be able to either help you +directly, or tell you who might be a good reviewer.

  • +
  • Split your patch into multiple smaller patches that build on each other. The +smaller your patch is, the higher the probability that somebody will take a quick +look at it. When doing this, it is helpful to add “[N/M]” (for 1 <= N <= M) to +the title of each patch in the series, so it is clear that there is an order +and what that order is.

  • +
+

Developers should participate in code reviews as both reviewers and +authors. If someone is kind enough to review your code, you should return the +favor for someone else. Note that anyone is welcome to review and give feedback +on a patch, but approval of patches should be consistent with the policy above.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodingStandards.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodingStandards.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CodingStandards.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CodingStandards.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,1673 @@ + + + + + + + + + LLVM Coding Standards — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Coding Standards

+
+ +
+
+

Introduction

+

This document describes coding standards that are used in the LLVM project. +Although no coding standards should be regarded as absolute requirements to be +followed in all instances, coding standards are +particularly important for large-scale code bases that follow a library-based +design (like LLVM).

+

While this document may provide guidance for some mechanical formatting issues, +whitespace, or other “microscopic details”, these are not fixed standards. +Always follow the golden rule:

+
+

If you are extending, enhancing, or bug fixing already implemented code, +use the style that is already being used so that the source is uniform and +easy to follow.

+
+

Note that some code bases (e.g. libc++) have special reasons to deviate +from the coding standards. For example, in the case of libc++, this is +because the naming and other conventions are dictated by the C++ standard.

+

There are some conventions that are not uniformly followed in the code base +(e.g. the naming convention). This is because they are relatively new, and a +lot of code was written before they were put in place. Our long term goal is +for the entire codebase to follow the convention, but we explicitly do not +want patches that do large-scale reformatting of existing code. On the other +hand, it is reasonable to rename the methods of a class if you’re about to +change it in some other way. Please commit such changes separately to +make code review easier.

+

The ultimate goal of these guidelines is to increase the readability and +maintainability of our common source base.

+
+
+

Languages, Libraries, and Standards

+

Most source code in LLVM and other LLVM projects using these coding standards +is C++ code. There are some places where C code is used either due to +environment restrictions, historical restrictions, or due to third-party source +code imported into the tree. Generally, our preference is for standards +conforming, modern, and portable C++ code as the implementation language of +choice.

+
+

C++ Standard Versions

+

Unless otherwise documented, LLVM subprojects are written using standard C++14 +code and avoid unnecessary vendor-specific extensions.

+

Nevertheless, we restrict ourselves to features which are available in the +major toolchains supported as host compilers (see Getting Started with the LLVM System page, +section Software).

+

Each toolchain provides a good reference for what it accepts:

+ +
+
+

C++ Standard Library

+

Instead of implementing custom data structures, we encourage the use of C++ +standard library facilities or LLVM support libraries whenever they are +available for a particular task. LLVM and related projects emphasize and rely +on the standard library facilities and the LLVM support libraries as much as +possible.

+

LLVM support libraries (for example, ADT) +implement specialized data structures or functionality missing in the standard +library. Such libraries are usually implemented in the llvm namespace and +follow the expected standard interface, when there is one.

+

When both C++ and the LLVM support libraries provide similar functionality, and +there isn’t a specific reason to favor the C++ implementation, it is generally +preferable to use the LLVM library. For example, llvm::DenseMap should +almost always be used instead of std::map or std::unordered_map, and +llvm::SmallVector should usually be used instead of std::vector.

+

We explicitly avoid some standard facilities, like the I/O streams, and instead +use LLVM’s streams library (raw_ostream). More detailed information on these +subjects is available in the LLVM Programmer’s Manual.

+

For more information about LLVM’s data structures and the tradeoffs they make, +please consult [that section of the programmer’s +manual](https://llvm.org/docs/ProgrammersManual.html#picking-the-right-data-structure-for-a-task).

+
+
+

Guidelines for Go code

+

Any code written in the Go programming language is not subject to the +formatting rules below. Instead, we adopt the formatting rules enforced by +the gofmt tool.

+

Go code should strive to be idiomatic. Two good sets of guidelines for what +this means are Effective Go and Go Code Review Comments.

+
+
+
+

Mechanical Source Issues

+
+

Source Code Formatting

+
+

Commenting

+

Comments are important for readability and maintainability. When writing comments, +write them as English prose, using proper capitalization, punctuation, etc. +Aim to describe what the code is trying to do and why, not how it does it at +a micro level. Here are a few important things to document:

+
+
File Headers
+

Every source file should have a header on it that describes the basic purpose of +the file. The standard header looks like this:

+
//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the Instruction class, which is the
+/// base class for all of the VM instructions.
+///
+//===----------------------------------------------------------------------===//
+
+
+

A few things to note about this particular format: The “-*- C++ -*-” string +on the first line is there to tell Emacs that the source file is a C++ file, not +a C file (Emacs assumes .h files are C files by default).

+
+

Note

+

This tag is not necessary in .cpp files. The name of the file is also +on the first line, along with a very short description of the purpose of the +file.

+
+

The next section in the file is a concise note that defines the license that the +file is released under. This makes it perfectly clear what terms the source +code can be distributed under and should not be modified in any way.

+

The main body is a Doxygen comment (identified by +the /// comment marker instead of the usual //) describing the purpose +of the file. The first sentence (or a passage beginning with \brief) is +used as an abstract. Any additional information should be separated by a blank +line. If an algorithm is based on a paper or is described in another source, +provide a reference.

+
+
+
Header Guard
+

The header file’s guard should be the all-caps path that a user of this header +would #include, using ‘_’ instead of path separator and extension marker. +For example, the header file +llvm/include/llvm/Analysis/Utils/Local.h would be #include-ed as +#include "llvm/Analysis/Utils/Local.h", so its guard is +LLVM_ANALYSIS_UTILS_LOCAL_H.

+
+
+
Class overviews
+

Classes are a fundamental part of an object-oriented design. As such, a +class definition should have a comment block that explains what the class is +used for and how it works. Every non-trivial class is expected to have a +doxygen comment block.

+
+
+
Method information
+

Methods and global functions should also be documented. A quick note about +what it does and a description of the edge cases is all that is necessary here. +The reader should be able to understand how to use interfaces without reading +the code itself.

+

Good things to talk about here are what happens when something unexpected +happens, for instance, does the method return null?

+
+
+
+

Comment Formatting

+

In general, prefer C++-style comments (// for normal comments, /// for +doxygen documentation comments). There are a few cases when it is +useful to use C-style (/* */) comments however:

+
    +
  1. When writing C code to be compatible with C89.

  2. +
  3. When writing a header file that may be #included by a C source file.

  4. +
  5. When writing a source file that is used by a tool that only accepts C-style +comments.

  6. +
  7. When documenting the significance of constants used as actual parameters in +a call. This is most helpful for bool parameters, or passing 0 or +nullptr. The comment should contain the parameter name, which ought to be +meaningful. For example, it’s not clear what the parameter means in this call:

    +
    Object.emitName(nullptr);
    +
    +
    +

    An in-line C-style comment makes the intent obvious:

    +
    Object.emitName(/*Prefix=*/nullptr);
    +
    +
    +
  8. +
+

Commenting out large blocks of code is discouraged, but if you really have to do +this (for documentation purposes or as a suggestion for debug printing), use +#if 0 and #endif. These nest properly and are better behaved in general +than C style comments.

+
+
+

Doxygen Use in Documentation Comments

+

Use the \file command to turn the standard file header into a file-level +comment.

+

Include descriptive paragraphs for all public interfaces (public classes, +member and non-member functions). Avoid restating the information that can +be inferred from the API name. The first sentence (or a paragraph beginning +with \brief) is used as an abstract. Try to use a single sentence as the +\brief adds visual clutter. Put detailed discussion into separate +paragraphs.

+

To refer to parameter names inside a paragraph, use the \p name command. +Don’t use the \arg name command since it starts a new paragraph that +contains documentation for the parameter.

+

Wrap non-inline code examples in \code ... \endcode.

+

To document a function parameter, start a new paragraph with the +\param name command. If the parameter is used as an out or an in/out +parameter, use the \param [out] name or \param [in,out] name command, +respectively.

+

To describe function return value, start a new paragraph with the \returns +command.

+

A minimal documentation comment:

+
/// Sets the xyzzy property to \p Baz.
+void setXyzzy(bool Baz);
+
+
+

A documentation comment that uses all Doxygen features in a preferred way:

+
/// Does foo and bar.
+///
+/// Does not do foo the usual way if \p Baz is true.
+///
+/// Typical usage:
+/// \code
+///   fooBar(false, "quux", Res);
+/// \endcode
+///
+/// \param Quux kind of foo to do.
+/// \param [out] Result filled with bar sequence on foo success.
+///
+/// \returns true on success.
+bool fooBar(bool Baz, StringRef Quux, std::vector<int> &Result);
+
+
+

Don’t duplicate the documentation comment in the header file and in the +implementation file. Put the documentation comments for public APIs into the +header file. Documentation comments for private APIs can go to the +implementation file. In any case, implementation files can include additional +comments (not necessarily in Doxygen markup) to explain implementation details +as needed.

+

Don’t duplicate function or class name at the beginning of the comment. +For humans it is obvious which function or class is being documented; +automatic documentation processing tools are smart enough to bind the comment +to the correct declaration.

+

Avoid:

+
// Example.h:
+
+// example - Does something important.
+void example();
+
+// Example.cpp:
+
+// example - Does something important.
+void example() { ... }
+
+
+

Preferred:

+
// Example.h:
+
+/// Does something important.
+void example();
+
+// Example.cpp:
+
+/// Builds a B-tree in order to do foo.  See paper by...
+void example() { ... }
+
+
+
+
+

Error and Warning Messages

+

Clear diagnostic messages are important to help users identify and fix issues in +their inputs. Use succinct but correct English prose that gives the user the +context needed to understand what went wrong. Also, to match error message +styles commonly produced by other tools, start the first sentence with a +lower-case letter, and finish the last sentence without a period, if it would +end in one otherwise. Sentences which end with different punctuation, such as +“did you forget ‘;’?”, should still do so.

+

For example this is a good error message:

+
error: file.o: section header 3 is corrupt. Size is 10 when it should be 20
+
+
+

This is a bad message, since it does not provide useful information and uses the +wrong style:

+
error: file.o: Corrupt section header.
+
+
+

As with other coding standards, individual projects, such as the Clang Static +Analyzer, may have preexisting styles that do not conform to this. If a +different formatting scheme is used consistently throughout the project, use +that style instead. Otherwise, this standard applies to all LLVM tools, +including clang, clang-tidy, and so on.

+

If the tool or project does not have existing functions to emit warnings or +errors, use the error and warning handlers provided in Support/WithColor.h +to ensure they are printed in the appropriate style, rather than printing to +stderr directly.

+

When using report_fatal_error, follow the same standards for the message as +regular error messages. Assertion messages and llvm_unreachable calls do not +necessarily need to follow these same styles as they are automatically +formatted, and thus these guidelines may not be suitable.

+
+
+

#include Style

+

Immediately after the header file comment (and include guards if working on a +header file), the minimal list of #includes required by the file should be +listed. We prefer these #includes to be listed in this order:

+
    +
  1. Main Module Header

  2. +
  3. Local/Private Headers

  4. +
  5. LLVM project/subproject headers (clang/..., lldb/..., llvm/..., etc)

  6. +
  7. System #includes

  8. +
+

and each category should be sorted lexicographically by the full path.

+

The Main Module Header file applies to .cpp files which implement an +interface defined by a .h file. This #include should always be included +first regardless of where it lives on the file system. By including a +header file first in the .cpp files that implement the interfaces, we ensure +that the header does not have any hidden dependencies which are not explicitly +#included in the header, but should be. It is also a form of documentation +in the .cpp file to indicate where the interfaces it implements are defined.

+

LLVM project and subproject headers should be grouped from most specific to least +specific, for the same reasons described above. For example, LLDB depends on +both clang and LLVM, and clang depends on LLVM. So an LLDB source file should +include lldb headers first, followed by clang headers, followed by +llvm headers, to reduce the possibility (for example) of an LLDB header +accidentally picking up a missing include due to the previous inclusion of that +header in the main source file or some earlier header file. clang should +similarly include its own headers before including llvm headers. This rule +applies to all LLVM subprojects.

+
+
+

Source Code Width

+

Write your code to fit within 80 columns.

+

There must be some limit to the width of the code in +order to allow developers to have multiple files side-by-side in +windows on a modest display. If you are going to pick a width limit, it is +somewhat arbitrary but you might as well pick something standard. Going with 90 +columns (for example) instead of 80 columns wouldn’t add any significant value +and would be detrimental to printing out code. Also many other projects have +standardized on 80 columns, so some people have already configured their editors +for it (vs something else, like 90 columns).

+
+
+

Whitespace

+

In all cases, prefer spaces to tabs in source files. People have different +preferred indentation levels, and different styles of indentation that they +like; this is fine. What isn’t fine is that different editors/viewers expand +tabs out to different tab stops. This can cause your code to look completely +unreadable, and it is not worth dealing with.

+

As always, follow the Golden Rule above: follow the style of existing code +if you are modifying and extending it.

+

Do not add trailing whitespace. Some common editors will automatically remove +trailing whitespace when saving a file which causes unrelated changes to appear +in diffs and commits.

+
+
Format Lambdas Like Blocks Of Code
+

When formatting a multi-line lambda, format it like a block of code. If there +is only one multi-line lambda in a statement, and there are no expressions +lexically after it in the statement, drop the indent to the standard two space +indent for a block of code, as if it were an if-block opened by the preceding +part of the statement:

+
std::sort(foo.begin(), foo.end(), [&](Foo a, Foo b) -> bool {
+  if (a.blah < b.blah)
+    return true;
+  if (a.baz < b.baz)
+    return true;
+  return a.bam < b.bam;
+});
+
+
+

To take best advantage of this formatting, if you are designing an API which +accepts a continuation or single callable argument (be it a function object, or +a std::function), it should be the last argument if at all possible.

+

If there are multiple multi-line lambdas in a statement, or additional +parameters after the lambda, indent the block two spaces from the indent of the +[]:

+
dyn_switch(V->stripPointerCasts(),
+           [] (PHINode *PN) {
+             // process phis...
+           },
+           [] (SelectInst *SI) {
+             // process selects...
+           },
+           [] (LoadInst *LI) {
+             // process loads...
+           },
+           [] (AllocaInst *AI) {
+             // process allocas...
+           });
+
+
+
+
+
Braced Initializer Lists
+

Starting from C++11, there are significantly more uses of braced lists to +perform initialization. For example, they can be used to construct aggregate +temporaries in expressions. They now have a natural way of ending up nested +within each other and within function calls in order to build up aggregates +(such as option structs) from local variables.

+

The historically common formatting of braced initialization of aggregate +variables does not mix cleanly with deep nesting, general expression contexts, +function arguments, and lambdas. We suggest new code use a simple rule for +formatting braced initialization lists: act as-if the braces were parentheses +in a function call. The formatting rules exactly match those already well +understood for formatting nested function calls. Examples:

+
foo({a, b, c}, {1, 2, 3});
+
+llvm::Constant *Mask[] = {
+    llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 0),
+    llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 1),
+    llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 2)};
+
+
+

This formatting scheme also makes it particularly easy to get predictable, +consistent, and automatic formatting with tools like Clang Format.

+
+
+
+
+

Language and Compiler Issues

+
+

Treat Compiler Warnings Like Errors

+

Compiler warnings are often useful and help improve the code. Those that are +not useful, can be often suppressed with a small code change. For example, an +assignment in the if condition is often a typo:

+
if (V = getValue()) {
+  ...
+}
+
+
+

Several compilers will print a warning for the code above. It can be suppressed +by adding parentheses:

+
if ((V = getValue())) {
+  ...
+}
+
+
+
+
+

Write Portable Code

+

In almost all cases, it is possible to write completely portable code. When +you need to rely on non-portable code, put it behind a well-defined and +well-documented interface.

+
+
+

Do not use RTTI or Exceptions

+

In an effort to reduce code and executable size, LLVM does not use exceptions +or RTTI (runtime type information, for example, +dynamic_cast<>).

+

That said, LLVM does make extensive use of a hand-rolled form of RTTI that use +templates like isa<>, cast<>, and dyn_cast<>. +This form of RTTI is opt-in and can be +added to any class.

+
+
+

Do not use Static Constructors

+

Static constructors and destructors (e.g., global variables whose types have a +constructor or destructor) should not be added to the code base, and should be +removed wherever possible.

+

Globals in different source files are initialized in arbitrary order +<https://yosefk.com/c++fqa/ctors.html#fqa-10.12>, making the code more +difficult to reason about.

+

Static constructors have negative impact on launch time of programs that use +LLVM as a library. We would really like for there to be zero cost for linking +in an additional LLVM target or other library into an application, but static +constructors undermine this goal.

+
+
+

Use of class and struct Keywords

+

In C++, the class and struct keywords can be used almost +interchangeably. The only difference is when they are used to declare a class: +class makes all members private by default while struct makes all +members public by default.

+
    +
  • All declarations and definitions of a given class or struct must use +the same keyword. For example:

  • +
+
// Avoid if `Example` is defined as a struct.
+class Example;
+
+// OK.
+struct Example;
+
+struct Example { ... };
+
+
+
    +
  • struct should be used when all members are declared public.

  • +
+
// Avoid using `struct` here, use `class` instead.
+struct Foo {
+private:
+  int Data;
+public:
+  Foo() : Data(0) { }
+  int getData() const { return Data; }
+  void setData(int D) { Data = D; }
+};
+
+// OK to use `struct`: all members are public.
+struct Bar {
+  int Data;
+  Bar() : Data(0) { }
+};
+
+
+
+
+

Do not use Braced Initializer Lists to Call a Constructor

+

Starting from C++11 there is a “generalized initialization syntax” which allows +calling constructors using braced initializer lists. Do not use these to call +constructors with non-trivial logic or if you care that you’re calling some +particular constructor. Those should look like function calls using +parentheses rather than like aggregate initialization. Similarly, if you need +to explicitly name the type and call its constructor to create a temporary, +don’t use a braced initializer list. Instead, use a braced initializer list +(without any type for temporaries) when doing aggregate initialization or +something notionally equivalent. Examples:

+
class Foo {
+public:
+  // Construct a Foo by reading data from the disk in the whizbang format, ...
+  Foo(std::string filename);
+
+  // Construct a Foo by looking up the Nth element of some global data ...
+  Foo(int N);
+
+  // ...
+};
+
+// The Foo constructor call is reading a file, don't use braces to call it.
+std::fill(foo.begin(), foo.end(), Foo("name"));
+
+// The pair is being constructed like an aggregate, use braces.
+bar_map.insert({my_key, my_value});
+
+
+

If you use a braced initializer list when initializing a variable, use an equals before the open curly brace:

+
int data[] = {0, 1, 2, 3};
+
+
+
+
+

Use auto Type Deduction to Make Code More Readable

+

Some are advocating a policy of “almost always auto” in C++11, however LLVM +uses a more moderate stance. Use auto if and only if it makes the code more +readable or easier to maintain. Don’t “almost always” use auto, but do use +auto with initializers like cast<Foo>(...) or other places where the +type is already obvious from the context. Another time when auto works well +for these purposes is when the type would have been abstracted away anyways, +often behind a container’s typedef such as std::vector<T>::iterator.

+

Similarly, C++14 adds generic lambda expressions where parameter types can be +auto. Use these where you would have used a template.

+
+
+

Beware unnecessary copies with auto

+

The convenience of auto makes it easy to forget that its default behavior +is a copy. Particularly in range-based for loops, careless copies are +expensive.

+

Use auto & for values and auto * for pointers unless you need to make a +copy.

+
// Typically there's no reason to copy.
+for (const auto &Val : Container) observe(Val);
+for (auto &Val : Container) Val.change();
+
+// Remove the reference if you really want a new copy.
+for (auto Val : Container) { Val.change(); saveSomewhere(Val); }
+
+// Copy pointers, but make it clear that they're pointers.
+for (const auto *Ptr : Container) observe(*Ptr);
+for (auto *Ptr : Container) Ptr->change();
+
+
+
+
+

Beware of non-determinism due to ordering of pointers

+

In general, there is no relative ordering among pointers. As a result, +when unordered containers like sets and maps are used with pointer keys +the iteration order is undefined. Hence, iterating such containers may +result in non-deterministic code generation. While the generated code +might work correctly, non-determinism can make it harder to reproduce bugs and +debug the compiler.

+

In case an ordered result is expected, remember to +sort an unordered container before iteration. Or use ordered containers +like vector/MapVector/SetVector if you want to iterate pointer +keys.

+
+
+

Beware of non-deterministic sorting order of equal elements

+

std::sort uses a non-stable sorting algorithm in which the order of equal +elements is not guaranteed to be preserved. Thus using std::sort for a +container having equal elements may result in non-deterministic behavior. +To uncover such instances of non-determinism, LLVM has introduced a new +llvm::sort wrapper function. For an EXPENSIVE_CHECKS build this will randomly +shuffle the container before sorting. Default to using llvm::sort instead +of std::sort.

+
+
+
+
+

Style Issues

+
+

The High-Level Issues

+
+

Self-contained Headers

+

Header files should be self-contained (compile on their own) and end in .h. +Non-header files that are meant for inclusion should end in .inc and be +used sparingly.

+

All header files should be self-contained. Users and refactoring tools should +not have to adhere to special conditions to include the header. Specifically, a +header should have header guards and include all other headers it needs.

+

There are rare cases where a file designed to be included is not +self-contained. These are typically intended to be included at unusual +locations, such as the middle of another file. They might not use header +guards, and might not include their prerequisites. Name such files with the +.inc extension. Use sparingly, and prefer self-contained headers when possible.

+

In general, a header should be implemented by one or more .cpp files. Each +of these .cpp files should include the header that defines their interface +first. This ensures that all of the dependences of the header have been +properly added to the header itself, and are not implicit. System headers +should be included after user headers for a translation unit.

+
+
+

Library Layering

+

A directory of header files (for example include/llvm/Foo) defines a +library (Foo). One library (both +its headers and implementation) should only use things from the libraries +listed in its dependencies.

+

Some of this constraint can be enforced by classic Unix linkers (Mac & Windows +linkers, as well as lld, do not enforce this constraint). A Unix linker +searches left to right through the libraries specified on its command line and +never revisits a library. In this way, no circular dependencies between +libraries can exist.

+

This doesn’t fully enforce all inter-library dependencies, and importantly +doesn’t enforce header file circular dependencies created by inline functions. +A good way to answer the “is this layered correctly” would be to consider +whether a Unix linker would succeed at linking the program if all inline +functions were defined out-of-line. (& for all valid orderings of dependencies +- since linking resolution is linear, it’s possible that some implicit +dependencies can sneak through: A depends on B and C, so valid orderings are +“C B A” or “B C A”, in both cases the explicit dependencies come before their +use. But in the first case, B could still link successfully if it implicitly +depended on C, or the opposite in the second case)

+
+
+

#include as Little as Possible

+

#include hurts compile time performance. Don’t do it unless you have to, +especially in header files.

+

But wait! Sometimes you need to have the definition of a class to use it, or to +inherit from it. In these cases go ahead and #include that header file. Be +aware however that there are many cases where you don’t need to have the full +definition of a class. If you are using a pointer or reference to a class, you +don’t need the header file. If you are simply returning a class instance from a +prototyped function or method, you don’t need it. In fact, for most cases, you +simply don’t need the definition of a class. And not #includeing speeds up +compilation.

+

It is easy to try to go too overboard on this recommendation, however. You +must include all of the header files that you are using — you can include +them either directly or indirectly through another header file. To make sure +that you don’t accidentally forget to include a header file in your module +header, make sure to include your module header first in the implementation +file (as mentioned above). This way there won’t be any hidden dependencies that +you’ll find out about later.

+
+
+

Keep “Internal” Headers Private

+

Many modules have a complex implementation that causes them to use more than one +implementation (.cpp) file. It is often tempting to put the internal +communication interface (helper classes, extra functions, etc) in the public +module header file. Don’t do this!

+

If you really need to do something like this, put a private header file in the +same directory as the source files, and include it locally. This ensures that +your private interface remains private and undisturbed by outsiders.

+
+

Note

+

It’s okay to put extra implementation methods in a public class itself. Just +make them private (or protected) and all is well.

+
+
+
+

Use Namespace Qualifiers to Implement Previously Declared Functions

+

When providing an out of line implementation of a function in a source file, do +not open namespace blocks in the source file. Instead, use namespace qualifiers +to help ensure that your definition matches an existing declaration. Do this:

+
// Foo.h
+namespace llvm {
+int foo(const char *s);
+}
+
+// Foo.cpp
+#include "Foo.h"
+using namespace llvm;
+int llvm::foo(const char *s) {
+  // ...
+}
+
+
+

Doing this helps to avoid bugs where the definition does not match the +declaration from the header. For example, the following C++ code defines a new +overload of llvm::foo instead of providing a definition for the existing +function declared in the header:

+
// Foo.cpp
+#include "Foo.h"
+namespace llvm {
+int foo(char *s) { // Mismatch between "const char *" and "char *"
+}
+} // end namespace llvm
+
+
+

This error will not be caught until the build is nearly complete, when the +linker fails to find a definition for any uses of the original function. If the +function were instead defined with a namespace qualifier, the error would have +been caught immediately when the definition was compiled.

+

Class method implementations must already name the class and new overloads +cannot be introduced out of line, so this recommendation does not apply to them.

+
+
+

Use Early Exits and continue to Simplify Code

+

When reading code, keep in mind how much state and how many previous decisions +have to be remembered by the reader to understand a block of code. Aim to +reduce indentation where possible when it doesn’t make it more difficult to +understand the code. One great way to do this is by making use of early exits +and the continue keyword in long loops. Consider this code that does not +use an early exit:

+
Value *doSomething(Instruction *I) {
+  if (!I->isTerminator() &&
+      I->hasOneUse() && doOtherThing(I)) {
+    ... some long code ....
+  }
+
+  return 0;
+}
+
+
+

This code has several problems if the body of the 'if' is large. When +you’re looking at the top of the function, it isn’t immediately clear that this +only does interesting things with non-terminator instructions, and only +applies to things with the other predicates. Second, it is relatively difficult +to describe (in comments) why these predicates are important because the if +statement makes it difficult to lay out the comments. Third, when you’re deep +within the body of the code, it is indented an extra level. Finally, when +reading the top of the function, it isn’t clear what the result is if the +predicate isn’t true; you have to read to the end of the function to know that +it returns null.

+

It is much preferred to format the code like this:

+
Value *doSomething(Instruction *I) {
+  // Terminators never need 'something' done to them because ...
+  if (I->isTerminator())
+    return 0;
+
+  // We conservatively avoid transforming instructions with multiple uses
+  // because goats like cheese.
+  if (!I->hasOneUse())
+    return 0;
+
+  // This is really just here for example.
+  if (!doOtherThing(I))
+    return 0;
+
+  ... some long code ....
+}
+
+
+

This fixes these problems. A similar problem frequently happens in for +loops. A silly example is something like this:

+
for (Instruction &I : BB) {
+  if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
+    Value *LHS = BO->getOperand(0);
+    Value *RHS = BO->getOperand(1);
+    if (LHS != RHS) {
+      ...
+    }
+  }
+}
+
+
+

When you have very, very small loops, this sort of structure is fine. But if it +exceeds more than 10-15 lines, it becomes difficult for people to read and +understand at a glance. The problem with this sort of code is that it gets very +nested very quickly. Meaning that the reader of the code has to keep a lot of +context in their brain to remember what is going immediately on in the loop, +because they don’t know if/when the if conditions will have elses etc. +It is strongly preferred to structure the loop like this:

+
for (Instruction &I : BB) {
+  auto *BO = dyn_cast<BinaryOperator>(&I);
+  if (!BO) continue;
+
+  Value *LHS = BO->getOperand(0);
+  Value *RHS = BO->getOperand(1);
+  if (LHS == RHS) continue;
+
+  ...
+}
+
+
+

This has all the benefits of using early exits for functions: it reduces nesting +of the loop, it makes it easier to describe why the conditions are true, and it +makes it obvious to the reader that there is no else coming up that they +have to push context into their brain for. If a loop is large, this can be a +big understandability win.

+
+
+

Don’t use else after a return

+

For similar reasons as above (reduction of indentation and easier reading), please +do not use 'else' or 'else if' after something that interrupts control +flow — like return, break, continue, goto, etc. For example:

+
case 'J': {
+  if (Signed) {
+    Type = Context.getsigjmp_bufType();
+    if (Type.isNull()) {
+      Error = ASTContext::GE_Missing_sigjmp_buf;
+      return QualType();
+    } else {
+      break; // Unnecessary.
+    }
+  } else {
+    Type = Context.getjmp_bufType();
+    if (Type.isNull()) {
+      Error = ASTContext::GE_Missing_jmp_buf;
+      return QualType();
+    } else {
+      break; // Unnecessary.
+    }
+  }
+}
+
+
+

It is better to write it like this:

+
case 'J':
+  if (Signed) {
+    Type = Context.getsigjmp_bufType();
+    if (Type.isNull()) {
+      Error = ASTContext::GE_Missing_sigjmp_buf;
+      return QualType();
+    }
+  } else {
+    Type = Context.getjmp_bufType();
+    if (Type.isNull()) {
+      Error = ASTContext::GE_Missing_jmp_buf;
+      return QualType();
+    }
+  }
+  break;
+
+
+

Or better yet (in this case) as:

+
case 'J':
+  if (Signed)
+    Type = Context.getsigjmp_bufType();
+  else
+    Type = Context.getjmp_bufType();
+
+  if (Type.isNull()) {
+    Error = Signed ? ASTContext::GE_Missing_sigjmp_buf :
+                     ASTContext::GE_Missing_jmp_buf;
+    return QualType();
+  }
+  break;
+
+
+

The idea is to reduce indentation and the amount of code you have to keep track +of when reading the code.

+
+
+

Turn Predicate Loops into Predicate Functions

+

It is very common to write small loops that just compute a boolean value. There +are a number of ways that people commonly write these, but an example of this +sort of thing is:

+
bool FoundFoo = false;
+for (unsigned I = 0, E = BarList.size(); I != E; ++I)
+  if (BarList[I]->isFoo()) {
+    FoundFoo = true;
+    break;
+  }
+
+if (FoundFoo) {
+  ...
+}
+
+
+

Instead of this sort of loop, we prefer to use a predicate function (which may +be static) that uses early exits:

+
/// \returns true if the specified list has an element that is a foo.
+static bool containsFoo(const std::vector<Bar*> &List) {
+  for (unsigned I = 0, E = List.size(); I != E; ++I)
+    if (List[I]->isFoo())
+      return true;
+  return false;
+}
+...
+
+if (containsFoo(BarList)) {
+  ...
+}
+
+
+

There are many reasons for doing this: it reduces indentation and factors out +code which can often be shared by other code that checks for the same predicate. +More importantly, it forces you to pick a name for the function, and forces +you to write a comment for it. In this silly example, this doesn’t add much +value. However, if the condition is complex, this can make it a lot easier for +the reader to understand the code that queries for this predicate. Instead of +being faced with the in-line details of how we check to see if the BarList +contains a foo, we can trust the function name and continue reading with better +locality.

+
+
+
+

The Low-Level Issues

+
+

Name Types, Functions, Variables, and Enumerators Properly

+

Poorly-chosen names can mislead the reader and cause bugs. We cannot stress +enough how important it is to use descriptive names. Pick names that match +the semantics and role of the underlying entities, within reason. Avoid +abbreviations unless they are well known. After picking a good name, make sure +to use consistent capitalization for the name, as inconsistency requires clients +to either memorize the APIs or to look it up to find the exact spelling.

+

In general, names should be in camel case (e.g. TextFileReader and +isLValue()). Different kinds of declarations have different rules:

+
    +
  • Type names (including classes, structs, enums, typedefs, etc) should be +nouns and start with an upper-case letter (e.g. TextFileReader).

  • +
  • Variable names should be nouns (as they represent state). The name should +be camel case, and start with an upper case letter (e.g. Leader or +Boats).

  • +
  • Function names should be verb phrases (as they represent actions), and +command-like function should be imperative. The name should be camel case, +and start with a lower case letter (e.g. openFile() or isFoo()).

  • +
  • Enum declarations (e.g. enum Foo {...}) are types, so they should +follow the naming conventions for types. A common use for enums is as a +discriminator for a union, or an indicator of a subclass. When an enum is +used for something like this, it should have a Kind suffix +(e.g. ValueKind).

  • +
  • Enumerators (e.g. enum { Foo, Bar }) and public member variables +should start with an upper-case letter, just like types. Unless the +enumerators are defined in their own small namespace or inside a class, +enumerators should have a prefix corresponding to the enum declaration name. +For example, enum ValueKind { ... }; may contain enumerators like +VK_Argument, VK_BasicBlock, etc. Enumerators that are just +convenience constants are exempt from the requirement for a prefix. For +instance:

    +
    enum {
    +  MaxSize = 42,
    +  Density = 12
    +};
    +
    +
    +
  • +
+

As an exception, classes that mimic STL classes can have member names in STL’s +style of lower-case words separated by underscores (e.g. begin(), +push_back(), and empty()). Classes that provide multiple +iterators should add a singular prefix to begin() and end() +(e.g. global_begin() and use_begin()).

+

Here are some examples:

+
class VehicleMaker {
+  ...
+  Factory<Tire> F;            // Avoid: a non-descriptive abbreviation.
+  Factory<Tire> Factory;      // Better: more descriptive.
+  Factory<Tire> TireFactory;  // Even better: if VehicleMaker has more than one
+                              // kind of factories.
+};
+
+Vehicle makeVehicle(VehicleType Type) {
+  VehicleMaker M;                         // Might be OK if scope is small.
+  Tire Tmp1 = M.makeTire();               // Avoid: 'Tmp1' provides no information.
+  Light Headlight = M.makeLight("head");  // Good: descriptive.
+  ...
+}
+
+
+
+
+

Assert Liberally

+

Use the “assert” macro to its fullest. Check all of your preconditions and +assumptions, you never know when a bug (not necessarily even yours) might be +caught early by an assertion, which reduces debugging time dramatically. The +“<cassert>” header file is probably already included by the header files you +are using, so it doesn’t cost anything to use it.

+

To further assist with debugging, make sure to put some kind of error message in +the assertion statement, which is printed if the assertion is tripped. This +helps the poor debugger make sense of why an assertion is being made and +enforced, and hopefully what to do about it. Here is one complete example:

+
inline Value *getOperand(unsigned I) {
+  assert(I < Operands.size() && "getOperand() out of range!");
+  return Operands[I];
+}
+
+
+

Here are more examples:

+
assert(Ty->isPointerType() && "Can't allocate a non-pointer type!");
+
+assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!");
+
+assert(idx < getNumSuccessors() && "Successor # out of range!");
+
+assert(V1.getType() == V2.getType() && "Constant types must be identical!");
+
+assert(isa<PHINode>(Succ->front()) && "Only works on PHId BBs!");
+
+
+

You get the idea.

+

In the past, asserts were used to indicate a piece of code that should not be +reached. These were typically of the form:

+
assert(0 && "Invalid radix for integer literal");
+
+
+

This has a few issues, the main one being that some compilers might not +understand the assertion, or warn about a missing return in builds where +assertions are compiled out.

+

Today, we have something much better: llvm_unreachable:

+
llvm_unreachable("Invalid radix for integer literal");
+
+
+

When assertions are enabled, this will print the message if it’s ever reached +and then exit the program. When assertions are disabled (i.e. in release +builds), llvm_unreachable becomes a hint to compilers to skip generating +code for this branch. If the compiler does not support this, it will fall back +to the “abort” implementation.

+

Use llvm_unreachable to mark a specific point in code that should never be +reached. This is especially desirable for addressing warnings about unreachable +branches, etc., but can be used whenever reaching a particular code path is +unconditionally a bug (not originating from user input; see below) of some kind. +Use of assert should always include a testable predicate (as opposed to +assert(false)).

+

If the error condition can be triggered by user input then the +recoverable error mechanism described in LLVM Programmer’s Manual should be +used instead. In cases where this is not practical, report_fatal_error may +be used.

+

Another issue is that values used only by assertions will produce an “unused +value” warning when assertions are disabled. For example, this code will warn:

+
unsigned Size = V.size();
+assert(Size > 42 && "Vector smaller than it should be");
+
+bool NewToSet = Myset.insert(Value);
+assert(NewToSet && "The value shouldn't be in the set yet");
+
+
+

These are two interesting different cases. In the first case, the call to +V.size() is only useful for the assert, and we don’t want it executed when +assertions are disabled. Code like this should move the call into the assert +itself. In the second case, the side effects of the call must happen whether +the assert is enabled or not. In this case, the value should be cast to void to +disable the warning. To be specific, it is preferred to write the code like +this:

+
assert(V.size() > 42 && "Vector smaller than it should be");
+
+bool NewToSet = Myset.insert(Value); (void)NewToSet;
+assert(NewToSet && "The value shouldn't be in the set yet");
+
+
+
+
+

Do Not Use using namespace std

+

In LLVM, we prefer to explicitly prefix all identifiers from the standard +namespace with an “std::” prefix, rather than rely on “using namespace +std;”.

+

In header files, adding a 'using namespace XXX' directive pollutes the +namespace of any source file that #includes the header, creating +maintenance issues.

+

In implementation files (e.g. .cpp files), the rule is more of a stylistic +rule, but is still important. Basically, using explicit namespace prefixes +makes the code clearer, because it is immediately obvious what facilities +are being used and where they are coming from. And more portable, because +namespace clashes cannot occur between LLVM code and other namespaces. The +portability rule is important because different standard library implementations +expose different symbols (potentially ones they shouldn’t), and future revisions +to the C++ standard will add more symbols to the std namespace. As such, we +never use 'using namespace std;' in LLVM.

+

The exception to the general rule (i.e. it’s not an exception for the std +namespace) is for implementation files. For example, all of the code in the +LLVM project implements code that lives in the ‘llvm’ namespace. As such, it is +ok, and actually clearer, for the .cpp files to have a 'using namespace +llvm;' directive at the top, after the #includes. This reduces +indentation in the body of the file for source editors that indent based on +braces, and keeps the conceptual context cleaner. The general form of this rule +is that any .cpp file that implements code in any namespace may use that +namespace (and its parents’), but should not use any others.

+
+
+

Provide a Virtual Method Anchor for Classes in Headers

+

If a class is defined in a header file and has a vtable (either it has virtual +methods or it derives from classes with virtual methods), it must always have at +least one out-of-line virtual method in the class. Without this, the compiler +will copy the vtable and RTTI into every .o file that #includes the +header, bloating .o file sizes and increasing link times.

+
+
+

Don’t use default labels in fully covered switches over enumerations

+

-Wswitch warns if a switch, without a default label, over an enumeration +does not cover every enumeration value. If you write a default label on a fully +covered switch over an enumeration then the -Wswitch warning won’t fire +when new elements are added to that enumeration. To help avoid adding these +kinds of defaults, Clang has the warning -Wcovered-switch-default which is +off by default but turned on when building LLVM with a version of Clang that +supports the warning.

+

A knock-on effect of this stylistic requirement is that when building LLVM with +GCC you may get warnings related to “control may reach end of non-void function” +if you return from each case of a covered switch-over-enum because GCC assumes +that the enum expression may take any representable value, not just those of +individual enumerators. To suppress this warning, use llvm_unreachable after +the switch.

+
+
+

Use range-based for loops wherever possible

+

The introduction of range-based for loops in C++11 means that explicit +manipulation of iterators is rarely necessary. We use range-based for +loops wherever possible for all newly added code. For example:

+
BasicBlock *BB = ...
+for (Instruction &I : *BB)
+  ... use I ...
+
+
+

Usage of std::for_each()/llvm::for_each() functions is discouraged, +unless the the callable object already exists.

+
+
+

Don’t evaluate end() every time through a loop

+

In cases where range-based for loops can’t be used and it is necessary +to write an explicit iterator-based loop, pay close attention to whether +end() is re-evaluated on each loop iteration. One common mistake is to +write a loop in this style:

+
BasicBlock *BB = ...
+for (auto I = BB->begin(); I != BB->end(); ++I)
+  ... use I ...
+
+
+

The problem with this construct is that it evaluates “BB->end()” every time +through the loop. Instead of writing the loop like this, we strongly prefer +loops to be written so that they evaluate it once before the loop starts. A +convenient way to do this is like so:

+
BasicBlock *BB = ...
+for (auto I = BB->begin(), E = BB->end(); I != E; ++I)
+  ... use I ...
+
+
+

The observant may quickly point out that these two loops may have different +semantics: if the container (a basic block in this case) is being mutated, then +“BB->end()” may change its value every time through the loop and the second +loop may not in fact be correct. If you actually do depend on this behavior, +please write the loop in the first form and add a comment indicating that you +did it intentionally.

+

Why do we prefer the second form (when correct)? Writing the loop in the first +form has two problems. First it may be less efficient than evaluating it at the +start of the loop. In this case, the cost is probably minor — a few extra +loads every time through the loop. However, if the base expression is more +complex, then the cost can rise quickly. I’ve seen loops where the end +expression was actually something like: “SomeMap[X]->end()” and map lookups +really aren’t cheap. By writing it in the second form consistently, you +eliminate the issue entirely and don’t even have to think about it.

+

The second (even bigger) issue is that writing the loop in the first form hints +to the reader that the loop is mutating the container (a fact that a comment +would handily confirm!). If you write the loop in the second form, it is +immediately obvious without even looking at the body of the loop that the +container isn’t being modified, which makes it easier to read the code and +understand what it does.

+

While the second form of the loop is a few extra keystrokes, we do strongly +prefer it.

+
+
+

#include <iostream> is Forbidden

+

The use of #include <iostream> in library files is hereby forbidden, +because many common implementations transparently inject a static constructor +into every translation unit that includes it.

+

Note that using the other stream headers (<sstream> for example) is not +problematic in this regard — just <iostream>. However, raw_ostream +provides various APIs that are better performing for almost every use than +std::ostream style APIs.

+
+

Note

+

New code should always use raw_ostream for writing, or the +llvm::MemoryBuffer API for reading files.

+
+
+
+

Use raw_ostream

+

LLVM includes a lightweight, simple, and efficient stream implementation in +llvm/Support/raw_ostream.h, which provides all of the common features of +std::ostream. All new code should use raw_ostream instead of +ostream.

+

Unlike std::ostream, raw_ostream is not a template and can be forward +declared as class raw_ostream. Public headers should generally not include +the raw_ostream header, but use forward declarations and constant references +to raw_ostream instances.

+
+
+

Avoid std::endl

+

The std::endl modifier, when used with iostreams outputs a newline to +the output stream specified. In addition to doing this, however, it also +flushes the output stream. In other words, these are equivalent:

+
std::cout << std::endl;
+std::cout << '\n' << std::flush;
+
+
+

Most of the time, you probably have no reason to flush the output stream, so +it’s better to use a literal '\n'.

+
+
+

Don’t use inline when defining a function in a class definition

+

A member function defined in a class definition is implicitly inline, so don’t +put the inline keyword in this case.

+

Don’t:

+
class Foo {
+public:
+  inline void bar() {
+    // ...
+  }
+};
+
+
+

Do:

+
class Foo {
+public:
+  void bar() {
+    // ...
+  }
+};
+
+
+
+
+
+

Microscopic Details

+

This section describes preferred low-level formatting guidelines along with +reasoning on why we prefer them.

+
+

Spaces Before Parentheses

+

Put a space before an open parenthesis only in control flow statements, but not +in normal function call expressions and function-like macros. For example:

+
if (X) ...
+for (I = 0; I != 100; ++I) ...
+while (LLVMRocks) ...
+
+somefunc(42);
+assert(3 != 4 && "laws of math are failing me");
+
+A = foo(42, 92) + bar(X);
+
+
+

The reason for doing this is not completely arbitrary. This style makes control +flow operators stand out more, and makes expressions flow better.

+
+
+

Prefer Preincrement

+

Hard fast rule: Preincrement (++X) may be no slower than postincrement +(X++) and could very well be a lot faster than it. Use preincrementation +whenever possible.

+

The semantics of postincrement include making a copy of the value being +incremented, returning it, and then preincrementing the “work value”. For +primitive types, this isn’t a big deal. But for iterators, it can be a huge +issue (for example, some iterators contains stack and set objects in them… +copying an iterator could invoke the copy ctor’s of these as well). In general, +get in the habit of always using preincrement, and you won’t have a problem.

+
+
+

Namespace Indentation

+

In general, we strive to reduce indentation wherever possible. This is useful +because we want code to fit into 80 columns without excessive wrapping, but +also because it makes it easier to understand the code. To facilitate this and +avoid some insanely deep nesting on occasion, don’t indent namespaces. If it +helps readability, feel free to add a comment indicating what namespace is +being closed by a }. For example:

+
namespace llvm {
+namespace knowledge {
+
+/// This class represents things that Smith can have an intimate
+/// understanding of and contains the data associated with it.
+class Grokable {
+...
+public:
+  explicit Grokable() { ... }
+  virtual ~Grokable() = 0;
+
+  ...
+
+};
+
+} // end namespace knowledge
+} // end namespace llvm
+
+
+

Feel free to skip the closing comment when the namespace being closed is +obvious for any reason. For example, the outer-most namespace in a header file +is rarely a source of confusion. But namespaces both anonymous and named in +source files that are being closed half way through the file probably could use +clarification.

+
+
+

Anonymous Namespaces

+

After talking about namespaces in general, you may be wondering about anonymous +namespaces in particular. Anonymous namespaces are a great language feature +that tells the C++ compiler that the contents of the namespace are only visible +within the current translation unit, allowing more aggressive optimization and +eliminating the possibility of symbol name collisions. Anonymous namespaces are +to C++ as “static” is to C functions and global variables. While “static” +is available in C++, anonymous namespaces are more general: they can make entire +classes private to a file.

+

The problem with anonymous namespaces is that they naturally want to encourage +indentation of their body, and they reduce locality of reference: if you see a +random function definition in a C++ file, it is easy to see if it is marked +static, but seeing if it is in an anonymous namespace requires scanning a big +chunk of the file.

+

Because of this, we have a simple guideline: make anonymous namespaces as small +as possible, and only use them for class declarations. For example:

+
namespace {
+class StringSort {
+...
+public:
+  StringSort(...)
+  bool operator<(const char *RHS) const;
+};
+} // end anonymous namespace
+
+static void runHelper() {
+  ...
+}
+
+bool StringSort::operator<(const char *RHS) const {
+  ...
+}
+
+
+

Avoid putting declarations other than classes into anonymous namespaces:

+
namespace {
+
+// ... many declarations ...
+
+void runHelper() {
+  ...
+}
+
+// ... many declarations ...
+
+} // end anonymous namespace
+
+
+

When you are looking at “runHelper” in the middle of a large C++ file, +you have no immediate way to tell if this function is local to the file. In +contrast, when the function is marked static, you don’t need to cross-reference +faraway places in the file to tell that the function is local.

+
+
+

Don’t Use Braces on Simple Single-Statement Bodies of if/else/loop Statements

+

When writing the body of an if, else, or loop statement, we prefer to +omit the braces to avoid unnecessary line noise. However, braces should be used +in cases where the omission of braces harm the readability and maintainability +of the code.

+

We consider that readability is harmed when omitting the brace in the presence +of a single statement that is accompanied by a comment (assuming the comment +can’t be hoisted above the if or loop statement, see below). +Similarly, braces should be used when a single-statement body is complex enough +that it becomes difficult to see where the block containing the following +statement began. An if/else chain or a loop is considered a single +statement for this rule, and this rule applies recursively.

+

This list is not exhaustive, for example, readability is also harmed if an +if/else chain does not use braced bodies for either all or none of its +members, with complex conditionals, deep nesting, etc. The examples below +intend to provide some guidelines.

+

Maintainability is harmed if the body of an if ends with a (directly or +indirectly) nested if statement with no else. Braces on the outer if +would help to avoid running into a “dangling else” situation.

+
// Omit the braces, since the body is simple and clearly associated with the if.
+if (isa<FunctionDecl>(D))
+  handleFunctionDecl(D);
+else if (isa<VarDecl>(D))
+  handleVarDecl(D);
+
+
+// Here we document the condition itself and not the body.
+if (isa<VarDecl>(D)) {
+  // It is necessary that we explain the situation with this surprisingly long
+  // comment, so it would be unclear without the braces whether the following
+  // statement is in the scope of the `if`.
+  // Because the condition is documented, we can't really hoist this
+  // comment that applies to the body above the if.
+  handleOtherDecl(D);
+}
+
+// Use braces on the outer `if` to avoid a potential dangling else situation.
+if (isa<VarDecl>(D)) {
+  for (auto *A : D.attrs())
+    if (shouldProcessAttr(A))
+      handleAttr(A);
+}
+
+// Use braces for the `if` block to keep it uniform with the else block.
+if (isa<FunctionDecl>(D)) {
+  handleFunctionDecl(D);
+} else {
+  // In this else case, it is necessary that we explain the situation with this
+  // surprisingly long comment, so it would be unclear without the braces whether
+  // the following statement is in the scope of the `if`.
+  handleOtherDecl(D);
+}
+
+// This should also omit braces.  The `for` loop contains only a single statement,
+// so it shouldn't have braces.  The `if` also only contains a single simple
+// statement (the for loop), so it also should omit braces.
+if (isa<FunctionDecl>(D))
+  for (auto *A : D.attrs())
+    handleAttr(A);
+
+// Use braces for the outer `if` since the nested `for` is braced.
+if (isa<FunctionDecl>(D)) {
+  for (auto *A : D.attrs()) {
+    // In this for loop body, it is necessary that we explain the situation
+    // with this surprisingly long comment, forcing braces on the `for` block.
+    handleAttr(A);
+  }
+}
+
+// Use braces on the outer block because there are more than two levels of nesting.
+if (isa<FunctionDecl>(D)) {
+  for (auto *A : D.attrs())
+    for (ssize_t i : llvm::seq<ssize_t>(count))
+       handleAttrOnDecl(D, A, i);
+}
+
+// Use braces on the outer block because of a nested `if`, otherwise the
+// compiler would warn: `add explicit braces to avoid dangling else`
+if (auto *D = dyn_cast<FunctionDecl>(D)) {
+  if (shouldProcess(D))
+    handleVarDecl(D);
+  else
+    markAsIgnored(D);
+}
+
+
+
+
+
+
+

See Also

+

A lot of these comments and recommendations have been culled from other sources. +Two particularly important books for our work are:

+
    +
  1. Effective C++ +by Scott Meyers. Also interesting and useful are “More Effective C++” and +“Effective STL” by the same author.

  2. +
  3. Large-Scale C++ Software Design +by John Lakos

  4. +
+

If you get some free time, and you haven’t read them: do so, you might learn +something.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/bugpoint.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/bugpoint.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/bugpoint.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/bugpoint.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,325 @@ + + + + + + + + + bugpoint - automatic test case reduction tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

bugpoint - automatic test case reduction tool

+
+

SYNOPSIS

+

bugpoint [options] [input LLVM ll/bc files] [LLVM passes] –args +program arguments

+
+
+

DESCRIPTION

+

bugpoint narrows down the source of problems in LLVM tools and passes. It +can be used to debug three types of failures: optimizer crashes, miscompilations +by optimizers, or bad native code generation (including problems in the static +and JIT compilers). It aims to reduce large test cases to small, useful ones. +For more information on the design and inner workings of bugpoint, as well as +advice for using bugpoint, see LLVM bugpoint tool: design and usage in the LLVM +distribution.

+
+
+

OPTIONS

+

–additional-so library

+
+

Load the dynamic shared object library into the test program whenever it is +run. This is useful if you are debugging programs which depend on non-LLVM +libraries (such as the X or curses libraries) to run.

+
+

–append-exit-code={true,false}

+
+

Append the test programs exit code to the output file so that a change in exit +code is considered a test failure. Defaults to false.

+
+

–args program args

+
+

Pass all arguments specified after –args to the test program whenever it runs. +Note that if any of the program args start with a “-“, you should use:

+
bugpoint [bugpoint args] --args -- [program args]
+
+
+

The “--” right after the –args option tells bugpoint to consider +any options starting with “-” to be part of the –args option, not as +options to bugpoint itself.

+
+

–tool-args tool args

+
+

Pass all arguments specified after –tool-args to the LLVM tool under test +(llc, lli, etc.) whenever it runs. You should use this option in the +following way:

+
bugpoint [bugpoint args] --tool-args -- [tool args]
+
+
+

The “--” right after the –tool-args option tells bugpoint to +consider any options starting with “-” to be part of the –tool-args +option, not as options to bugpoint itself. (See –args, above.)

+
+

–safe-tool-args tool args

+
+

Pass all arguments specified after –safe-tool-args to the “safe” execution +tool.

+
+

–gcc-tool-args gcc tool args

+
+

Pass all arguments specified after –gcc-tool-args to the invocation of +gcc.

+
+

–opt-args opt args

+
+

Pass all arguments specified after –opt-args to the invocation of opt.

+
+

–disable-{dce,simplifycfg}

+
+

Do not run the specified passes to clean up and reduce the size of the test +program. By default, bugpoint uses these passes internally when attempting to +reduce test programs. If you’re trying to find a bug in one of these passes, +bugpoint may crash.

+
+

–enable-valgrind

+
+

Use valgrind to find faults in the optimization phase. This will allow +bugpoint to find otherwise asymptomatic problems caused by memory +mis-management.

+
+

-find-bugs

+
+

Continually randomize the specified passes and run them on the test program +until a bug is found or the user kills bugpoint.

+
+

-help

+
+

Print a summary of command line options.

+
+

–input filename

+
+

Open filename and redirect the standard input of the test program, whenever +it runs, to come from that file.

+
+

–load plugin

+
+

Load the dynamic object plugin into bugpoint itself. This object should +register new optimization passes. Once loaded, the object will add new command +line options to enable various optimizations. To see the new complete list of +optimizations, use the -help and –load options together; for example:

+
bugpoint --load myNewPass.so -help
+
+
+
+

–mlimit megabytes

+
+

Specifies an upper limit on memory usage of the optimization and codegen. Set +to zero to disable the limit.

+
+

–output filename

+
+

Whenever the test program produces output on its standard output stream, it +should match the contents of filename (the “reference output”). If you +do not use this option, bugpoint will attempt to generate a reference output +by compiling the program with the “safe” backend and running it.

+
+

–run-{int,jit,llc,custom}

+
+

Whenever the test program is compiled, bugpoint should generate code for it +using the specified code generator. These options allow you to choose the +interpreter, the JIT compiler, the static native code compiler, or a +custom command (see –exec-command) respectively.

+
+

–safe-{llc,custom}

+
+

When debugging a code generator, bugpoint should use the specified code +generator as the “safe” code generator. This is a known-good code generator +used to generate the “reference output” if it has not been provided, and to +compile portions of the program that as they are excluded from the testcase. +These options allow you to choose the +static native code compiler, or a custom command, (see –exec-command) +respectively. The interpreter and the JIT backends cannot currently +be used as the “safe” backends.

+
+

–exec-command command

+
+

This option defines the command to use with the –run-custom and +–safe-custom options to execute the bitcode testcase. This can +be useful for cross-compilation.

+
+

–compile-command command

+
+

This option defines the command to use with the –compile-custom +option to compile the bitcode testcase. The command should exit with a +failure exit code if the file is “interesting” and should exit with a +success exit code (i.e. 0) otherwise (this is the same as if it crashed on +“interesting” inputs).

+

This can be useful for +testing compiler output without running any link or execute stages. To +generate a reduced unit test, you may add CHECK directives to the +testcase and pass the name of an executable compile-command script in this form:

+
#!/bin/sh
+llc "$@"
+not FileCheck [bugpoint input file].ll < bugpoint-test-program.s
+
+
+

This script will “fail” as long as FileCheck passes. So the result +will be the minimum bitcode that passes FileCheck.

+
+

–safe-path path

+
+

This option defines the path to the command to execute with the +–safe-{int,jit,llc,custom} +option.

+
+

–verbose-errors={true,false}

+
+

The default behavior of bugpoint is to print “<crash>” when it finds a reduced +test that crashes compilation. This flag prints the output of the crashing +program to stderr. This is useful to make sure it is the same error being +tracked down and not a different error that happens to crash the compiler as +well. Defaults to false.

+
+
+
+

EXIT STATUS

+

If bugpoint succeeds in finding a problem, it will exit with 0. Otherwise, +if an error occurs, it will exit with a non-zero value.

+
+
+

SEE ALSO

+

opt(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/clang-tblgen.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/clang-tblgen.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/clang-tblgen.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/clang-tblgen.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + clang-tblgen - Description to C++ Code for Clang — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

clang-tblgen - Description to C++ Code for Clang

+
+

SYNOPSIS

+

clang-tblgen [options] [filename]

+
+
+

DESCRIPTION

+

clang-tblgen is a program that translates compiler-related target +description (.td) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler.

+

Please see tblgen - Description to C++ Code +for a description of the filename argument and options, including the +options common to all *-tblgen programs.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/dsymutil.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/dsymutil.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/dsymutil.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/dsymutil.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,369 @@ + + + + + + + + + dsymutil - manipulate archived DWARF debug symbol files — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

dsymutil - manipulate archived DWARF debug symbol files

+
+

SYNOPSIS

+
+
dsymutil [options] executable
+
+
+
+

DESCRIPTION

+

dsymutil links the DWARF debug information found in the object files +for an executable executable by using debug symbols information contained in +its symbol table. By default, the linked debug information is placed in a +.dSYM bundle with the same name as the executable.

+
+
+

OPTIONS

+
+
+--accelerator=<accelerator type>
+

Specify the desired type of accelerator table. Valid options are ‘Apple’, +‘Dwarf’ and ‘Default’.

+
+ +
+
+--arch <arch>
+

Link DWARF debug information only for specified CPU architecture types. +Architectures may be specified by name. When using this option, an error will +be returned if any architectures can not be properly linked. This option can +be specified multiple times, once for each desired architecture. All CPU +architectures will be linked by default and any architectures that can’t be +properly linked will cause dsymutil to return an error.

+
+ +
+
+--dump-debug-map
+

Dump the executable’s debug-map (the list of the object files containing the +debug information) in YAML format and exit. Not DWARF link will take place.

+
+ +
+
+--flat, -f
+

Produce a flat dSYM file. A .dwarf extension will be appended to the +executable name unless the output file is specified using the -o option.

+
+ +
+
+--gen-reproducer
+

Generate a reproducer consisting of the input object files.

+
+ +
+
+--help, -h
+

Print this help output.

+
+ +
+
+--keep-function-for-static
+

Make a static variable keep the enclosing function even if it would have been +omitted otherwise.

+
+ +
+
+--minimize, -z
+

When used when creating a dSYM file, this option will suppress the emission of +the .debug_inlines, .debug_pubnames, and .debug_pubtypes sections since +dsymutil currently has better equivalents: .apple_names and .apple_types. When +used in conjunction with --update option, this option will cause redundant +accelerator tables to be removed.

+
+ +
+
+--no-odr
+

Do not use ODR (One Definition Rule) for uniquing C++ types.

+
+ +
+
+--no-output
+

Do the link in memory, but do not emit the result file.

+
+ +
+
+--no-swiftmodule-timestamp
+

Don’t check the timestamp for swiftmodule files.

+
+ +
+
+--num-threads <threads>, -j <threads>
+

Specifies the maximum number (n) of simultaneous threads to use when +linking multiple architectures.

+
+ +
+
+--object-prefix-map <prefix=remapped>
+

Remap object file paths (but no source paths) before processing. Use +this for Clang objects where the module cache location was remapped using +-fdebug-prefix-map; to help dsymutil find the Clang module cache.

+
+ +
+
+--oso-prepend-path <path>
+

Specifies a path to prepend to all debug symbol object file paths.

+
+ +
+
+--out <filename>, -o <filename>
+

Specifies an alternate path to place the dSYM bundle. The default dSYM +bundle path is created by appending .dSYM to the executable name.

+
+ +
+
+--papertrail
+

When running dsymutil as part of your build system, it can be desirable for +warnings to be part of the end product, rather than just being emitted to the +output stream. When enabled warnings are embedded in the linked DWARF debug +information.

+
+ +
+
+--remarks-output-format <format>
+

Specify the format to be used when serializing the linked remarks.

+
+ +
+
+--remarks-prepend-path <path>
+

Specify a directory to prepend the paths of the external remark files.

+
+ +
+
+--statistics
+

Print statistics about the contribution of each object file to the linked +debug info. This prints a table after linking with the object file name, the +size of the debug info in the object file (in bytes) and the size contributed +(in bytes) to the linked dSYM. The table is sorted by the output size listing +the object files with the largest contribution first.

+
+ +
+
+--symbol-map <bcsymbolmap>
+

Update the existing dSYMs inplace using symbol map specified.

+
+ +
+
+-s, --symtab
+

Dumps the symbol table found in executable or object file(s) and exits.

+
+ +
+
+-S
+

Output textual assembly instead of a binary dSYM companion file.

+
+ +
+
+--toolchain <toolchain>
+

Embed the toolchain in the dSYM bundle’s property list.

+
+ +
+
+-u, --update
+

Update an existing dSYM file to contain the latest accelerator tables and +other DWARF optimizations. This option will rebuild the ‘.apple_names’ and +‘.apple_types’ hashed accelerator tables.

+
+ +
+
+--use-reproducer <path>
+

Use the object files from the given reproducer path.

+
+ +
+
+--verbose
+

Display verbose information when linking.

+
+ +
+
+--verify
+

Run the DWARF verifier on the linked DWARF debug info.

+
+ +
+
+-v, --version
+

Display the version of the tool.

+
+ +
+
+-y
+

Treat executable as a YAML debug-map rather than an executable.

+
+ +
+
+

EXIT STATUS

+

dsymutil returns 0 if the DWARF debug information was linked +successfully. Otherwise, it returns 1.

+
+
+

SEE ALSO

+

llvm-dwarfdump(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/FileCheck.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/FileCheck.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/FileCheck.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/FileCheck.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,1027 @@ + + + + + + + + + FileCheck - Flexible pattern matching file verifier — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FileCheck - Flexible pattern matching file verifier

+
+

SYNOPSIS

+

FileCheck match-filename [–check-prefix=XXX] [–strict-whitespace]

+
+
+

DESCRIPTION

+

FileCheck reads two files (one from standard input, and one +specified on the command line) and uses one to verify the other. This +behavior is particularly useful for the testsuite, which wants to verify that +the output of some tool (e.g. llc) contains the expected information +(for example, a movsd from esp or whatever is interesting). This is similar to +using grep, but it is optimized for matching multiple different +inputs in one file in a specific order.

+

The match-filename file specifies the file that contains the patterns to +match. The file to verify is read from standard input unless the +--input-file option is used.

+
+
+

OPTIONS

+

Options are parsed from the environment variable FILECHECK_OPTS +and from the command line.

+
+
+-help
+

Print a summary of command line options.

+
+ +
+
+--check-prefix prefix
+

FileCheck searches the contents of match-filename for patterns to +match. By default, these patterns are prefixed with “CHECK:”. +If you’d like to use a different prefix (e.g. because the same input +file is checking multiple different tool or options), the +--check-prefix argument allows you to specify (without the trailing +“:”) one or more prefixes to match. Multiple prefixes are useful for tests +which might change for different run options, but most lines remain the same.

+

FileCheck does not permit duplicate prefixes, even if one is a check prefix +and one is a comment prefix (see --comment-prefixes below).

+
+ +
+
+--check-prefixes prefix1,prefix2,...
+

An alias of --check-prefix that allows multiple prefixes to be +specified as a comma separated list.

+
+ +
+
+--comment-prefixes prefix1,prefix2,...
+

By default, FileCheck ignores any occurrence in match-filename of any check +prefix if it is preceded on the same line by “COM:” or “RUN:”. See the +section The “COM:” directive for usage details.

+

These default comment prefixes can be overridden by +--comment-prefixes if they are not appropriate for your testing +environment. However, doing so is not recommended in LLVM’s LIT-based test +suites, which should be easier to maintain if they all follow a consistent +comment style. In that case, consider proposing a change to the default +comment prefixes instead.

+
+ +
+
+--allow-unused-prefixes
+

This option controls the behavior when using more than one prefix as specified +by --check-prefix or --check-prefixes, and some of these +prefixes are missing in the test file. If true, this is allowed, if false, +FileCheck will report an error, listing the missing prefixes.

+

It is currently, temporarily, true by default, and will be subsequently +switched to false.

+
+ +
+
+--input-file filename
+

File to check (defaults to stdin).

+
+ +
+
+--match-full-lines
+

By default, FileCheck allows matches of anywhere on a line. This +option will require all positive matches to cover an entire +line. Leading and trailing whitespace is ignored, unless +--strict-whitespace is also specified. (Note: negative +matches from CHECK-NOT are not affected by this option!)

+

Passing this option is equivalent to inserting {{^ *}} or +{{^}} before, and {{ *$}} or {{$}} after every positive +check pattern.

+
+ +
+
+--strict-whitespace
+

By default, FileCheck canonicalizes input horizontal whitespace (spaces and +tabs) which causes it to ignore these differences (a space will match a tab). +The --strict-whitespace argument disables this behavior. End-of-line +sequences are canonicalized to UNIX-style \n in all modes.

+
+ +
+
+--ignore-case
+

By default, FileCheck uses case-sensitive matching. This option causes +FileCheck to use case-insensitive matching.

+
+ +
+
+--implicit-check-not check-pattern
+

Adds implicit negative checks for the specified patterns between positive +checks. The option allows writing stricter tests without stuffing them with +CHECK-NOTs.

+

For example, “--implicit-check-not warning:” can be useful when testing +diagnostic messages from tools that don’t have an option similar to clang +-verify. With this option FileCheck will verify that input does not contain +warnings not covered by any CHECK: patterns.

+
+ +
+
+--dump-input <value>
+

Dump input to stderr, adding annotations representing currently enabled +diagnostics. When there are multiple occurrences of this option, the +<value> that appears earliest in the list below has precedence. The +default is fail.

+
    +
  • help - Explain input dump and quit

  • +
  • always - Always dump input

  • +
  • fail - Dump input on failure

  • +
  • never - Never dump input

  • +
+
+ +
+
+--dump-input-context <N>
+

In the dump requested by --dump-input, print <N> input lines before +and <N> input lines after any lines specified by --dump-input-filter. +When there are multiple occurrences of this option, the largest specified +<N> has precedence. The default is 5.

+
+ +
+
+--dump-input-filter <value>
+

In the dump requested by --dump-input, print only input lines of kind +<value> plus any context specified by --dump-input-context. When +there are multiple occurrences of this option, the <value> that appears +earliest in the list below has precedence. The default is error when +--dump-input=fail, and it’s all when --dump-input=always.

+
    +
  • all - All input lines

  • +
  • annotation-full - Input lines with annotations

  • +
  • annotation - Input lines with starting points of annotations

  • +
  • error - Input lines with starting points of error annotations

  • +
+
+ +
+
+--enable-var-scope
+

Enables scope for regex variables.

+

Variables with names that start with $ are considered global and +remain set throughout the file.

+

All other variables get undefined after each encountered CHECK-LABEL.

+
+ +
+
+-D<VAR=VALUE>
+

Sets a filecheck pattern variable VAR with value VALUE that can be +used in CHECK: lines.

+
+ +
+
+-D#<FMT>,<NUMVAR>=<NUMERIC EXPRESSION>
+

Sets a filecheck numeric variable NUMVAR of matching format FMT to +the result of evaluating <NUMERIC EXPRESSION> that can be used in +CHECK: lines. See section +FileCheck Numeric Variables and Expressions for details on supported +numeric expressions.

+
+ +
+
+-version
+

Show the version number of this program.

+
+ +
+
+-v
+

Print good directive pattern matches. However, if -dump-input=fail or +-dump-input=always, add those matches as input annotations instead.

+
+ +
+
+-vv
+

Print information helpful in diagnosing internal FileCheck issues, such as +discarded overlapping CHECK-DAG: matches, implicit EOF pattern matches, +and CHECK-NOT: patterns that do not have matches. Implies -v. +However, if -dump-input=fail or -dump-input=always, just add that +information as input annotations instead.

+
+ +
+
+--allow-deprecated-dag-overlap
+

Enable overlapping among matches in a group of consecutive CHECK-DAG: +directives. This option is deprecated and is only provided for convenience +as old tests are migrated to the new non-overlapping CHECK-DAG: +implementation.

+
+ +
+
+--allow-empty
+

Allow checking empty input. By default, empty input is rejected.

+
+ +
+
+--color
+

Use colors in output (autodetected by default).

+
+ +
+
+

EXIT STATUS

+

If FileCheck verifies that the file matches the expected contents, +it exits with 0. Otherwise, if not, or if an error occurs, it will exit with a +non-zero value.

+
+
+

TUTORIAL

+

FileCheck is typically used from LLVM regression tests, being invoked on the RUN +line of the test. A simple example of using FileCheck from a RUN line looks +like this:

+
; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
+
+
+

This syntax says to pipe the current file (“%s”) into llvm-as, pipe +that into llc, then pipe the output of llc into FileCheck. This +means that FileCheck will be verifying its standard input (the llc output) +against the filename argument specified (the original .ll file specified by +“%s”). To see how this works, let’s look at the rest of the .ll file +(after the RUN line):

+
define void @sub1(i32* %p, i32 %v) {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
+        ret void
+}
+
+define void @inc4(i64* %p) {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
+        ret void
+}
+
+
+

Here you can see some “CHECK:” lines specified in comments. Now you can +see how the file is piped into llvm-as, then llc, and the machine code +output is what we are verifying. FileCheck checks the machine code output to +verify that it matches what the “CHECK:” lines specify.

+

The syntax of the “CHECK:” lines is very simple: they are fixed strings that +must occur in order. FileCheck defaults to ignoring horizontal whitespace +differences (e.g. a space is allowed to match a tab) but otherwise, the contents +of the “CHECK:” line is required to match some thing in the test file exactly.

+

One nice thing about FileCheck (compared to grep) is that it allows merging +test cases together into logical groups. For example, because the test above +is checking for the “sub1:” and “inc4:” labels, it will not match +unless there is a “subl” in between those labels. If it existed somewhere +else in the file, that would not count: “grep subl” matches if “subl” +exists anywhere in the file.

+
+

The FileCheck -check-prefix option

+

The FileCheck -check-prefix option allows multiple test +configurations to be driven from one .ll file. This is useful in many +circumstances, for example, testing different architectural variants with +llc. Here’s a simple example:

+
; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
+; RUN:              | FileCheck %s -check-prefix=X32
+; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
+; RUN:              | FileCheck %s -check-prefix=X64
+
+define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32>; %tmp, i32 %s, i32 1
+        ret <4 x i32> %tmp1
+; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; X64: pinsrd_1:
+; X64:    pinsrd $1, %edi, %xmm0
+}
+
+
+

In this case, we’re testing that we get the expected code generation with +both 32-bit and 64-bit code generation.

+
+
+

The “COM:” directive

+

Sometimes you want to disable a FileCheck directive without removing it +entirely, or you want to write comments that mention a directive by name. The +“COM:” directive makes it easy to do this. For example, you might have:

+
; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; COM: FIXME: X64 isn't working correctly yet for this part of codegen, but
+; COM: X64 will have something similar to X32:
+; COM:
+; COM:   X64: pinsrd_1:
+; COM:   X64:    pinsrd $1, %edi, %xmm0
+
+
+

Without “COM:”, you would need to use some combination of rewording and +directive syntax mangling to prevent FileCheck from recognizing the commented +occurrences of “X32:” and “X64:” above as directives. Moreover, +FileCheck diagnostics have been proposed that might complain about the above +occurrences of “X64” that don’t have the trailing “:” because they look +like directive typos. Dodging all these problems can be tedious for a test +author, and directive syntax mangling can make the purpose of test code unclear. +“COM:” avoids all these problems.

+

A few important usage notes:

+
    +
  • COM:” within another directive’s pattern does not comment out the +remainder of the pattern. For example:

    +
    ; X32: pinsrd $1, 4(%esp), %xmm0 COM: This is part of the X32 pattern!
    +
    +
    +

    If you need to temporarily comment out part of a directive’s pattern, move it +to another line. The reason is that FileCheck parses “COM:” in the same +manner as any other directive: only the first directive on the line is +recognized as a directive.

    +
  • +
  • For the sake of LIT, FileCheck treats “RUN:” just like “COM:”. If this +is not suitable for your test environment, see --comment-prefixes.

  • +
  • FileCheck does not recognize “COM”, “RUN”, or any user-defined comment +prefix as a comment directive if it’s combined with one of the usual check +directive suffixes, such as “-NEXT:” or “-NOT:”, discussed below. +FileCheck treats such a combination as plain text instead. If it needs to act +as a comment directive for your test environment, define it as such with +--comment-prefixes.

  • +
+
+
+

The “CHECK-NEXT:” directive

+

Sometimes you want to match lines and would like to verify that matches +happen on exactly consecutive lines with no other lines in between them. In +this case, you can use “CHECK:” and “CHECK-NEXT:” directives to specify +this. If you specified a custom check prefix, just use “<PREFIX>-NEXT:”. +For example, something like this works as you’d expect:

+
define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) {
+     %tmp3 = load <2 x double>* %A, align 16
+     %tmp7 = insertelement <2 x double> undef, double %B, i32 0
+     %tmp9 = shufflevector <2 x double> %tmp3,
+                            <2 x double> %tmp7,
+                            <2 x i32> < i32 0, i32 2 >
+     store <2 x double> %tmp9, <2 x double>* %r, align 16
+     ret void
+
+; CHECK:          t2:
+; CHECK:             movl    8(%esp), %eax
+; CHECK-NEXT:        movapd  (%eax), %xmm0
+; CHECK-NEXT:        movhpd  12(%esp), %xmm0
+; CHECK-NEXT:        movl    4(%esp), %eax
+; CHECK-NEXT:        movapd  %xmm0, (%eax)
+; CHECK-NEXT:        ret
+}
+
+
+

CHECK-NEXT:” directives reject the input unless there is exactly one +newline between it and the previous directive. A “CHECK-NEXT:” cannot be +the first directive in a file.

+
+
+

The “CHECK-SAME:” directive

+

Sometimes you want to match lines and would like to verify that matches happen +on the same line as the previous match. In this case, you can use “CHECK:” +and “CHECK-SAME:” directives to specify this. If you specified a custom +check prefix, just use “<PREFIX>-SAME:”.

+

CHECK-SAME:” is particularly powerful in conjunction with “CHECK-NOT:” +(described below).

+

For example, the following works like you’d expect:

+
!0 = !DILocation(line: 5, scope: !1, inlinedAt: !2)
+
+; CHECK:       !DILocation(line: 5,
+; CHECK-NOT:               column:
+; CHECK-SAME:              scope: ![[SCOPE:[0-9]+]]
+
+
+

CHECK-SAME:” directives reject the input if there are any newlines between +it and the previous directive.

+

CHECK-SAME:” is also useful to avoid writing matchers for irrelevant +fields. For example, suppose you’re writing a test which parses a tool that +generates output like this:

+
Name: foo
+Field1: ...
+Field2: ...
+Field3: ...
+Value: 1
+
+Name: bar
+Field1: ...
+Field2: ...
+Field3: ...
+Value: 2
+
+Name: baz
+Field1: ...
+Field2: ...
+Field3: ...
+Value: 1
+
+
+

To write a test that verifies foo has the value 1, you might first +write this:

+
CHECK: Name: foo
+CHECK: Value: 1{{$}}
+
+
+

However, this would be a bad test: if the value for foo changes, the test +would still pass because the “CHECK: Value: 1” line would match the value +from baz. To fix this, you could add CHECK-NEXT matchers for every +FieldN: line, but that would be verbose, and need to be updated when +Field4 is added. A more succinct way to write the test using the +“CHECK-SAME:” matcher would be as follows:

+
CHECK:      Name: foo
+CHECK:      Value:
+CHECK-SAME:        {{ 1$}}
+
+
+

This verifies that the next time “Value:” appears in the output, it has +the value 1.

+

Note: a “CHECK-SAME:” cannot be the first directive in a file.

+
+
+

The “CHECK-EMPTY:” directive

+

If you need to check that the next line has nothing on it, not even whitespace, +you can use the “CHECK-EMPTY:” directive.

+
declare void @foo()
+
+declare void @bar()
+; CHECK: foo
+; CHECK-EMPTY:
+; CHECK-NEXT: bar
+
+
+

Just like “CHECK-NEXT:” the directive will fail if there is more than one +newline before it finds the next blank line, and it cannot be the first +directive in a file.

+
+
+

The “CHECK-NOT:” directive

+

The “CHECK-NOT:” directive is used to verify that a string doesn’t occur +between two matches (or before the first match, or after the last match). For +example, to verify that a load is removed by a transformation, a test like this +can be used:

+
define i8 @coerce_offset0(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+
+  %P2 = bitcast i32* %P to i8*
+  %P3 = getelementptr i8* %P2, i32 2
+
+  %A = load i8* %P3
+  ret i8 %A
+; CHECK: @coerce_offset0
+; CHECK-NOT: load
+; CHECK: ret i8
+}
+
+
+
+
+

The “CHECK-COUNT:” directive

+

If you need to match multiple lines with the same pattern over and over again +you can repeat a plain CHECK: as many times as needed. If that looks too +boring you can instead use a counted check “CHECK-COUNT-<num>:”, where +<num> is a positive decimal number. It will match the pattern exactly +<num> times, no more and no less. If you specified a custom check prefix, +just use “<PREFIX>-COUNT-<num>:” for the same effect. +Here is a simple example:

+
Loop at depth 1
+Loop at depth 1
+Loop at depth 1
+Loop at depth 1
+  Loop at depth 2
+    Loop at depth 3
+
+; CHECK-COUNT-6: Loop at depth {{[0-9]+}}
+; CHECK-NOT:     Loop at depth {{[0-9]+}}
+
+
+
+
+

The “CHECK-DAG:” directive

+

If it’s necessary to match strings that don’t occur in a strictly sequential +order, “CHECK-DAG:” could be used to verify them between two matches (or +before the first match, or after the last match). For example, clang emits +vtable globals in reverse order. Using CHECK-DAG:, we can keep the checks +in the natural order:

+
// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s
+
+struct Foo { virtual void method(); };
+Foo f;  // emit vtable
+// CHECK-DAG: @_ZTV3Foo =
+
+struct Bar { virtual void method(); };
+Bar b;
+// CHECK-DAG: @_ZTV3Bar =
+
+
+

CHECK-NOT: directives could be mixed with CHECK-DAG: directives to +exclude strings between the surrounding CHECK-DAG: directives. As a result, +the surrounding CHECK-DAG: directives cannot be reordered, i.e. all +occurrences matching CHECK-DAG: before CHECK-NOT: must not fall behind +occurrences matching CHECK-DAG: after CHECK-NOT:. For example,

+
; CHECK-DAG: BEFORE
+; CHECK-NOT: NOT
+; CHECK-DAG: AFTER
+
+
+

This case will reject input strings where BEFORE occurs after AFTER.

+

With captured variables, CHECK-DAG: is able to match valid topological +orderings of a DAG with edges from the definition of a variable to its use. +It’s useful, e.g., when your test cases need to match different output +sequences from the instruction scheduler. For example,

+
; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2
+; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4
+; CHECK:     mul r5, [[REG1]], [[REG2]]
+
+
+

In this case, any order of that two add instructions will be allowed.

+

If you are defining and using variables in the same CHECK-DAG: block, +be aware that the definition rule can match after its use.

+

So, for instance, the code below will pass:

+
; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0]
+; CHECK-DAG: vmov.32 [[REG2]][1]
+vmov.32 d0[1]
+vmov.32 d0[0]
+
+
+

While this other code, will not:

+
; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0]
+; CHECK-DAG: vmov.32 [[REG2]][1]
+vmov.32 d1[1]
+vmov.32 d0[0]
+
+
+

While this can be very useful, it’s also dangerous, because in the case of +register sequence, you must have a strong order (read before write, copy before +use, etc). If the definition your test is looking for doesn’t match (because +of a bug in the compiler), it may match further away from the use, and mask +real bugs away.

+

In those cases, to enforce the order, use a non-DAG directive between DAG-blocks.

+

A CHECK-DAG: directive skips matches that overlap the matches of any +preceding CHECK-DAG: directives in the same CHECK-DAG: block. Not only +is this non-overlapping behavior consistent with other directives, but it’s +also necessary to handle sets of non-unique strings or patterns. For example, +the following directives look for unordered log entries for two tasks in a +parallel program, such as the OpenMP runtime:

+
// CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin
+// CHECK-DAG: [[THREAD_ID]]: task_end
+//
+// CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin
+// CHECK-DAG: [[THREAD_ID]]: task_end
+
+
+

The second pair of directives is guaranteed not to match the same log entries +as the first pair even though the patterns are identical and even if the text +of the log entries is identical because the thread ID manages to be reused.

+
+
+

The “CHECK-LABEL:” directive

+

Sometimes in a file containing multiple tests divided into logical blocks, one +or more CHECK: directives may inadvertently succeed by matching lines in a +later block. While an error will usually eventually be generated, the check +flagged as causing the error may not actually bear any relationship to the +actual source of the problem.

+

In order to produce better error messages in these cases, the “CHECK-LABEL:” +directive can be used. It is treated identically to a normal CHECK +directive except that FileCheck makes an additional assumption that a line +matched by the directive cannot also be matched by any other check present in +match-filename; this is intended to be used for lines containing labels or +other unique identifiers. Conceptually, the presence of CHECK-LABEL divides +the input stream into separate blocks, each of which is processed independently, +preventing a CHECK: directive in one block matching a line in another block. +If --enable-var-scope is in effect, all local variables are cleared at the +beginning of the block.

+

For example,

+
define %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) {
+entry:
+; CHECK-LABEL: C_ctor_base:
+; CHECK: mov [[SAVETHIS:r[0-9]+]], r0
+; CHECK: bl A_ctor_base
+; CHECK: mov r0, [[SAVETHIS]]
+  %0 = bitcast %struct.C* %this to %struct.A*
+  %call = tail call %struct.A* @A_ctor_base(%struct.A* %0)
+  %1 = bitcast %struct.C* %this to %struct.B*
+  %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x)
+  ret %struct.C* %this
+}
+
+define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) {
+entry:
+; CHECK-LABEL: D_ctor_base:
+
+
+

The use of CHECK-LABEL: directives in this case ensures that the three +CHECK: directives only accept lines corresponding to the body of the +@C_ctor_base function, even if the patterns match lines found later in +the file. Furthermore, if one of these three CHECK: directives fail, +FileCheck will recover by continuing to the next block, allowing multiple test +failures to be detected in a single invocation.

+

There is no requirement that CHECK-LABEL: directives contain strings that +correspond to actual syntactic labels in a source or output language: they must +simply uniquely match a single line in the file being verified.

+

CHECK-LABEL: directives cannot contain variable definitions or uses.

+
+
+

Directive modifiers

+

A directive modifier can be append to a directive by following the directive +with {<modifier>} where the only supported value for <modifier> is +LITERAL.

+

The LITERAL directive modifier can be used to perform a literal match. The +modifier results in the directive not recognizing any syntax to perform regex +matching, variable capture or any substitutions. This is useful when the text +to match would require excessive escaping otherwise. For example, the +following will perform literal matches rather than considering these as +regular expressions:

+
Input: [[[10, 20]], [[30, 40]]]
+Output %r10: [[10, 20]]
+Output %r10: [[30, 40]]
+
+; CHECK{LITERAL}: [[[10, 20]], [[30, 40]]]
+; CHECK-DAG{LITERAL}: [[30, 40]]
+; CHECK-DAG{LITERAL}: [[10, 20]]
+
+
+
+
+

FileCheck Regex Matching Syntax

+

All FileCheck directives take a pattern to match. +For most uses of FileCheck, fixed string matching is perfectly sufficient. For +some things, a more flexible form of matching is desired. To support this, +FileCheck allows you to specify regular expressions in matching strings, +surrounded by double braces: {{yourregex}}. FileCheck implements a POSIX +regular expression matcher; it supports Extended POSIX regular expressions +(ERE). Because we want to use fixed string matching for a majority of what we +do, FileCheck has been designed to support mixing and matching fixed string +matching with regular expressions. This allows you to write things like this:

+
; CHECK: movhpd      {{[0-9]+}}(%esp), {{%xmm[0-7]}}
+
+
+

In this case, any offset from the ESP register will be allowed, and any xmm +register will be allowed.

+

Because regular expressions are enclosed with double braces, they are +visually distinct, and you don’t need to use escape characters within the double +braces like you would in C. In the rare case that you want to match double +braces explicitly from the input, you can use something ugly like +{{[}][}]}} as your pattern. Or if you are using the repetition count +syntax, for example [[:xdigit:]]{8} to match exactly 8 hex digits, you +would need to add parentheses like this {{([[:xdigit:]]{8})}} to avoid +confusion with FileCheck’s closing double-brace.

+
+
+

FileCheck String Substitution Blocks

+

It is often useful to match a pattern and then verify that it occurs again +later in the file. For codegen tests, this can be useful to allow any +register, but verify that that register is used consistently later. To do +this, FileCheck supports string substitution blocks that allow +string variables to be defined and substituted into patterns. Here is a simple +example:

+
; CHECK: test5:
+; CHECK:    notw     [[REGISTER:%[a-z]+]]
+; CHECK:    andw     {{.*}}[[REGISTER]]
+
+
+

The first check line matches a regex %[a-z]+ and captures it into the +string variable REGISTER. The second line verifies that whatever is in +REGISTER occurs later in the file after an “andw”. FileCheck +string substitution blocks are always contained in [[ ]] pairs, and string +variable names can be formed with the regex [a-zA-Z_][a-zA-Z0-9_]*. If a +colon follows the name, then it is a definition of the variable; otherwise, it +is a substitution.

+

FileCheck variables can be defined multiple times, and substitutions +always get the latest value. Variables can also be substituted later on the +same line they were defined on. For example:

+
; CHECK: op [[REG:r[0-9]+]], [[REG]]
+
+
+

Can be useful if you want the operands of op to be the same register, +and don’t care exactly which register it is.

+

If --enable-var-scope is in effect, variables with names that +start with $ are considered to be global. All others variables are +local. All local variables get undefined at the beginning of each +CHECK-LABEL block. Global variables are not affected by CHECK-LABEL. +This makes it easier to ensure that individual tests are not affected +by variables set in preceding tests.

+
+
+

FileCheck Numeric Substitution Blocks

+

FileCheck also supports numeric substitution blocks that allow +defining numeric variables and checking for numeric values that satisfy a +numeric expression constraint based on those variables via a numeric +substitution. This allows CHECK: directives to verify a numeric relation +between two numbers, such as the need for consecutive registers to be used.

+

The syntax to capture a numeric value is +[[#%<fmtspec>,<NUMVAR>:]] where:

+
    +
  • %<fmtspec>, is an optional format specifier to indicate what number +format to match and the minimum number of digits to expect.

  • +
  • <NUMVAR>: is an optional definition of variable <NUMVAR> from the +captured value.

  • +
+

The syntax of <fmtspec> is: #.<precision><conversion specifier> where:

+
    +
  • # is an optional flag available for hex values (see +<conversion specifier> below) which requires the value matched to be +prefixed by 0x.

  • +
  • .<precision> is an optional printf-style precision specifier in which +<precision> indicates the minimum number of digits that the value matched +must have, expecting leading zeros if needed.

  • +
  • <conversion specifier> is an optional scanf-style conversion specifier +to indicate what number format to match (e.g. hex number). Currently +accepted format specifiers are %u, %d, %x and %X. If absent, +the format specifier defaults to %u.

  • +
+

For example:

+
; CHECK: mov r[[#REG:]], 0x[[#%.8X,ADDR:]]
+
+
+

would match mov r5, 0x0000FEFE and set REG to the value 5 and +ADDR to the value 0xFEFE. Note that due to the precision it would fail +to match mov r5, 0xFEFE.

+

As a result of the numeric variable definition being optional, it is possible +to only check that a numeric value is present in a given format. This can be +useful when the value itself is not useful, for instance:

+
; CHECK-NOT: mov r0, r[[#]]
+
+
+

to check that a value is synthesized rather than moved around.

+

The syntax of a numeric substitution is +[[#%<fmtspec>, <constraint> <expr>]] where:

+
    +
  • <fmtspec> is the same format specifier as for defining a variable but +in this context indicating how a numeric expression value should be matched +against. If absent, both components of the format specifier are inferred from +the matching format of the numeric variable(s) used by the expression +constraint if any, and defaults to %u if no numeric variable is used, +denoting that the value should be unsigned with no leading zeros. In case of +conflict between format specifiers of several numeric variables, the +conversion specifier becomes mandatory but the precision specifier remains +optional.

  • +
  • <constraint> is the constraint describing how the value to match must +relate to the value of the numeric expression. The only currently accepted +constraint is == for an exact match and is the default if +<constraint> is not provided. No matching constraint must be specified +when the <expr> is empty.

  • +
  • <expr> is an expression. An expression is in turn recursively defined +as:

    +
      +
    • a numeric operand, or

    • +
    • an expression followed by an operator and a numeric operand.

    • +
    +

    A numeric operand is a previously defined numeric variable, an integer +literal, or a function. Spaces are accepted before, after and between any of +these elements. Numeric operands have 64-bit precision. Overflow and underflow +are rejected. There is no support for operator precedence, but parentheses +can be used to change the evaluation order.

    +
  • +
+

The supported operators are:

+
+
    +
  • + - Returns the sum of its two operands.

  • +
  • - - Returns the difference of its two operands.

  • +
+
+

The syntax of a function call is <name>(<arguments>) where:

+
    +
  • name is a predefined string literal. Accepted values are:

    +
      +
    • add - Returns the sum of its two operands.

    • +
    • div - Returns the quotient of its two operands.

    • +
    • max - Returns the largest of its two operands.

    • +
    • min - Returns the smallest of its two operands.

    • +
    • mul - Returns the product of its two operands.

    • +
    • sub - Returns the difference of its two operands.

    • +
    +
  • +
  • <arguments> is a comma separated list of expressions.

  • +
+

For example:

+
; CHECK: load r[[#REG:]], [r0]
+; CHECK: load r[[#REG+1]], [r1]
+; CHECK: Loading from 0x[[#%x,ADDR:]]
+; CHECK-SAME: to 0x[[#ADDR + 7]]
+
+
+

The above example would match the text:

+
load r5, [r0]
+load r6, [r1]
+Loading from 0xa0463440 to 0xa0463447
+
+
+

but would not match the text:

+
load r5, [r0]
+load r7, [r1]
+Loading from 0xa0463440 to 0xa0463443
+
+
+

Due to 7 being unequal to 5 + 1 and a0463443 being unequal to +a0463440 + 7.

+

A numeric variable can also be defined to the result of a numeric expression, +in which case the numeric expression constraint is checked and if verified the +variable is assigned to the value. The unified syntax for both checking a +numeric expression and capturing its value into a numeric variable is thus +[[#%<fmtspec>,<NUMVAR>: <constraint> <expr>]] with each element as +described previously. One can use this syntax to make a testcase more +self-describing by using variables instead of values:

+
; CHECK: mov r[[#REG_OFFSET:]], 0x[[#%X,FIELD_OFFSET:12]]
+; CHECK-NEXT: load r[[#]], [r[[#REG_BASE:]], r[[#REG_OFFSET]]]
+
+
+

which would match:

+
mov r4, 0xC
+load r6, [r5, r4]
+
+
+

The --enable-var-scope option has the same effect on numeric variables as +on string variables.

+

Important note: In its current implementation, an expression cannot use a +numeric variable defined earlier in the same CHECK directive.

+
+
+

FileCheck Pseudo Numeric Variables

+

Sometimes there’s a need to verify output that contains line numbers of the +match file, e.g. when testing compiler diagnostics. This introduces a certain +fragility of the match file structure, as “CHECK:” lines contain absolute +line numbers in the same file, which have to be updated whenever line numbers +change due to text addition or deletion.

+

To support this case, FileCheck expressions understand the @LINE pseudo +numeric variable which evaluates to the line number of the CHECK pattern where +it is found.

+

This way match patterns can be put near the relevant test lines and include +relative line number references, for example:

+
// CHECK: test.cpp:[[# @LINE + 4]]:6: error: expected ';' after top level declarator
+// CHECK-NEXT: {{^int a}}
+// CHECK-NEXT: {{^     \^}}
+// CHECK-NEXT: {{^     ;}}
+int a
+
+
+

To support legacy uses of @LINE as a special string variable, +FileCheck also accepts the following uses of @LINE with string +substitution block syntax: [[@LINE]], [[@LINE+<offset>]] and +[[@LINE-<offset>]] without any spaces inside the brackets and where +offset is an integer.

+
+
+

Matching Newline Characters

+

To match newline characters in regular expressions the character class +[[:space:]] can be used. For example, the following pattern:

+
// CHECK: DW_AT_location [DW_FORM_sec_offset] ([[DLOC:0x[0-9a-f]+]]){{[[:space:]].*}}"intd"
+
+
+

matches output of the form (from llvm-dwarfdump):

+
DW_AT_location [DW_FORM_sec_offset]   (0x00000233)
+DW_AT_name [DW_FORM_strp]  ( .debug_str[0x000000c9] = "intd")
+
+
+

letting us set the FileCheck variable DLOC to the desired value +0x00000233, extracted from the line immediately preceding “intd”.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/index.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,223 @@ + + + + + + + + + LLVM Command Guide — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Command Guide

+

The following documents are command descriptions for all of the LLVM tools. +These pages describe how to use the LLVM commands and what their options are. +Note that these pages do not describe all of the options available for all +tools. To get a complete listing, pass the --help (general options) or +--help-hidden (general and debugging options) arguments to the tool you are +interested in.

+ + + + +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/lit.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/lit.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/lit.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/lit.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,814 @@ + + + + + + + + + lit - LLVM Integrated Tester — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

lit - LLVM Integrated Tester

+
+

SYNOPSIS

+

lit [options] [tests]

+
+
+

DESCRIPTION

+

lit is a portable tool for executing LLVM and Clang style test +suites, summarizing their results, and providing indication of failures. +lit is designed to be a lightweight testing tool with as simple a +user interface as possible.

+

lit should be run with one or more tests to run specified on the +command line. Tests can be either individual test files or directories to +search for tests (see TEST DISCOVERY).

+

Each specified test will be executed (potentially concurrently) and once all +tests have been run lit will print summary information on the number +of tests which passed or failed (see TEST STATUS RESULTS). The +lit program will execute with a non-zero exit code if any tests +fail.

+

By default lit will use a succinct progress display and will only +print summary information for test failures. See OUTPUT OPTIONS for +options controlling the lit progress display and output.

+

lit also includes a number of options for controlling how tests are +executed (specific features may depend on the particular test format). See +EXECUTION OPTIONS for more information.

+

Finally, lit also supports additional options for only running a +subset of the options specified on the command line, see +SELECTION OPTIONS for more information.

+

lit parses options from the environment variable LIT_OPTS after +parsing options from the command line. LIT_OPTS is primarily useful for +supplementing or overriding the command-line options supplied to lit +by check targets defined by a project’s build system.

+

Users interested in the lit architecture or designing a +lit testing implementation should see LIT INFRASTRUCTURE.

+
+
+

GENERAL OPTIONS

+
+
+-h, --help
+

Show the lit help message.

+
+ +
+
+-j N, --workers=N
+

Run N tests in parallel. By default, this is automatically chosen to +match the number of detected available CPUs.

+
+ +
+
+--config-prefix=NAME
+

Search for NAME.cfg and NAME.site.cfg when searching for +test suites, instead of lit.cfg and lit.site.cfg.

+
+ +
+
+-D NAME[=VALUE], --param NAME[=VALUE]
+

Add a user defined parameter NAME with the given VALUE (or the empty +string if not given). The meaning and use of these parameters is test suite +dependent.

+
+ +
+
+

OUTPUT OPTIONS

+
+
+-q, --quiet
+

Suppress any output except for test failures.

+
+ +
+
+-s, --succinct
+

Show less output, for example don’t show information on tests that pass. +Also show a progress bar, unless --no-progress-bar is specified.

+
+ +
+
+-v, --verbose
+

Show more information on test failures, for example the entire test output +instead of just the test result.

+
+ +
+
+-vv, --echo-all-commands
+

Echo all commands to stdout, as they are being executed. +This can be valuable for debugging test failures, as the last echoed command +will be the one which has failed. +lit normally inserts a no-op command (: in the case of bash) +with argument 'RUN: at line N' before each command pipeline, and this +option also causes those no-op commands to be echoed to stdout to help you +locate the source line of the failed command. +This option implies --verbose.

+
+ +
+
+-a, --show-all
+

Show more information about all tests, for example the entire test +commandline and output.

+
+ +
+
+--no-progress-bar
+

Do not use curses based progress bar.

+
+ +
+
+--show-unsupported
+

Show the names of unsupported tests.

+
+ +
+
+--show-xfail
+

Show the names of tests that were expected to fail.

+
+ +
+
+

EXECUTION OPTIONS

+
+
+--path=PATH
+

Specify an additional PATH to use when searching for executables in tests.

+
+ +
+
+--vg
+

Run individual tests under valgrind (using the memcheck tool). The +--error-exitcode argument for valgrind is used so that valgrind failures +will cause the program to exit with a non-zero status.

+

When this option is enabled, lit will also automatically provide a +“valgrind” feature that can be used to conditionally disable (or expect +failure in) certain tests.

+
+ +
+
+--vg-arg=ARG
+

When --vg is used, specify an additional argument to pass to +valgrind itself.

+
+ +
+
+--vg-leak
+

When --vg is used, enable memory leak checks. When this option is +enabled, lit will also automatically provide a “vg_leak” +feature that can be used to conditionally disable (or expect failure in) +certain tests.

+
+ +
+
+--time-tests
+

Track the wall time individual tests take to execute and includes the results +in the summary output. This is useful for determining which tests in a test +suite take the most time to execute.

+
+ +
+
+--ignore-fail
+

Exit with status zero even if some tests fail.

+
+ +
+
+--no-indirectly-run-check
+

Do not error if a test would not be run if the user had specified the +containing directory instead of naming the test directly.

+
+ +
+
+

SELECTION OPTIONS

+

By default, lit will run failing tests first, then run tests in descending +execution time order to optimize concurrency. The execution order can be +changed using the --order option.

+

The timing data is stored in the test_exec_root in a file named +.lit_test_times.txt. If this file does not exist, then lit checks the +test_source_root for the file to optionally accelerate clean builds.

+
+
+--shuffle
+

Run the tests in a random order, not failing/slowest first. Deprecated, +use --order instead.

+
+ +
+
+--max-failures N
+

Stop execution after the given number N of failures. +An integer argument should be passed on the command line +prior to execution.

+
+ +
+
+--max-tests=N
+

Run at most N tests and then terminate.

+
+ +
+
+--max-time=N
+

Spend at most N seconds (approximately) running tests and then terminate. +Note that this is not an alias for --timeout; the two are +different kinds of maximums.

+
+ +
+
+--num-shards=M
+

Divide the set of selected tests into M equal-sized subsets or +“shards”, and run only one of them. Must be used with the +--run-shard=N option, which selects the shard to run. The environment +variable LIT_NUM_SHARDS can also be used in place of this +option. These two options provide a coarse mechanism for partitioning large +testsuites, for parallel execution on separate machines (say in a large +testing farm).

+
+ +
+
+--order={lexical,random,smart}
+

Define the order in which tests are run. The supported values are:

+
    +
  • lexical - tests will be run in lexical order according to the test file +path. This option is useful when predictable test order is desired.

  • +
  • random - tests will be run in random order.

  • +
  • smart - tests that failed previously will be run first, then the remaining +tests, all in descending execution time order. This is the default as it +optimizes concurrency.

  • +
+
+ +
+
+--run-shard=N
+

Select which shard to run, assuming the --num-shards=M option was +provided. The two options must be used together, and the value of N +must be in the range 1..M. The environment variable +LIT_RUN_SHARD can also be used in place of this option.

+
+ +
+
+--timeout=N
+

Spend at most N seconds (approximately) running each individual test. +0 means no time limit, and 0 is the default. Note that this is not an +alias for --max-time; the two are different kinds of maximums.

+
+ +
+
+--filter=REGEXP
+

Run only those tests whose name matches the regular expression specified in +REGEXP. The environment variable LIT_FILTER can be also used in place +of this option, which is especially useful in environments where the call +to lit is issued indirectly.

+
+ +
+
+--filter-out=REGEXP
+

Filter out those tests whose name matches the regular expression specified in +REGEXP. The environment variable LIT_FILTER_OUT can be also used in +place of this option, which is especially useful in environments where the +call to lit is issued indirectly.

+
+ +
+
+--xfail=LIST
+

Treat those tests whose name is in the semicolon separated list LIST as +XFAIL. This can be helpful when one does not want to modify the test +suite. The environment variable LIT_XFAIL can be also used in place of +this option, which is especially useful in environments where the call to +lit is issued indirectly.

+

A test name can specified as a file name relative to the test suite directory. +For example:

+
LIT_XFAIL="affinity/kmp-hw-subset.c;offloading/memory_manager.cpp"
+
+
+

In this case, all of the following tests are treated as XFAIL:

+
libomp :: affinity/kmp-hw-subset.c
+libomptarget :: nvptx64-nvidia-cuda :: offloading/memory_manager.cpp
+libomptarget :: x86_64-pc-linux-gnu :: offloading/memory_manager.cpp
+
+
+

Alternatively, a test name can be specified as the full test name +reported in LIT output. For example, we can adjust the previous +example not to treat the nvptx64-nvidia-cuda version of +offloading/memory_manager.cpp as XFAIL:

+
LIT_XFAIL="affinity/kmp-hw-subset.c;libomptarget :: x86_64-pc-linux-gnu :: offloading/memory_manager.cpp"
+
+
+
+ +
+
+--xfail-not=LIST
+

Do not treat the specified tests as XFAIL. The environment variable +LIT_XFAIL_NOT can also be used in place of this option. The syntax is the +same as for --xfail and LIT_XFAIL. --xfail-not and +LIT_XFAIL_NOT always override all other XFAIL specifications, +including an --xfail appearing later on the command line. The +primary purpose is to suppress an XPASS result without modifying a test +case that uses the XFAIL directive.

+
+ +
+
+

ADDITIONAL OPTIONS

+
+
+--debug
+

Run lit in debug mode, for debugging configuration issues and +lit itself.

+
+ +
+
+--show-suites
+

List the discovered test suites and exit.

+
+ +
+
+--show-tests
+

List all of the discovered tests and exit.

+
+ +
+
+

EXIT STATUS

+

lit will exit with an exit code of 1 if there are any FAIL or XPASS +results. Otherwise, it will exit with the status 0. Other exit codes are used +for non-test related failures (for example a user error or an internal program +error).

+
+
+

TEST DISCOVERY

+

The inputs passed to lit can be either individual tests, or entire +directories or hierarchies of tests to run. When lit starts up, the +first thing it does is convert the inputs into a complete list of tests to run +as part of test discovery.

+

In the lit model, every test must exist inside some test suite. +lit resolves the inputs specified on the command line to test suites +by searching upwards from the input path until it finds a lit.cfg or +lit.site.cfg file. These files serve as both a marker of test suites +and as configuration files which lit loads in order to understand +how to find and run the tests inside the test suite.

+

Once lit has mapped the inputs into test suites it traverses the +list of inputs adding tests for individual files and recursively searching for +tests in directories.

+

This behavior makes it easy to specify a subset of tests to run, while still +allowing the test suite configuration to control exactly how tests are +interpreted. In addition, lit always identifies tests by the test +suite they are in, and their relative path inside the test suite. For +appropriately configured projects, this allows lit to provide +convenient and flexible support for out-of-tree builds.

+
+
+

TEST STATUS RESULTS

+

Each test ultimately produces one of the following eight results:

+

PASS

+
+

The test succeeded.

+
+

FLAKYPASS

+
+

The test succeeded after being re-run more than once. This only applies to +tests containing an ALLOW_RETRIES: annotation.

+
+

XFAIL

+
+

The test failed, but that is expected. This is used for test formats which allow +specifying that a test does not currently work, but wish to leave it in the test +suite.

+
+

XPASS

+
+

The test succeeded, but it was expected to fail. This is used for tests which +were specified as expected to fail, but are now succeeding (generally because +the feature they test was broken and has been fixed).

+
+

FAIL

+
+

The test failed.

+
+

UNRESOLVED

+
+

The test result could not be determined. For example, this occurs when the test +could not be run, the test itself is invalid, or the test was interrupted.

+
+

UNSUPPORTED

+
+

The test is not supported in this environment. This is used by test formats +which can report unsupported tests.

+
+

TIMEOUT

+
+

The test was run, but it timed out before it was able to complete. This is +considered a failure.

+
+

Depending on the test format tests may produce additional information about +their status (generally only for failures). See the OUTPUT OPTIONS +section for more information.

+
+
+

LIT INFRASTRUCTURE

+

This section describes the lit testing architecture for users interested in +creating a new lit testing implementation, or extending an existing one.

+

lit proper is primarily an infrastructure for discovering and running +arbitrary tests, and to expose a single convenient interface to these +tests. lit itself doesn’t know how to run tests, rather this logic is +defined by test suites.

+
+

TEST SUITES

+

As described in TEST DISCOVERY, tests are always located inside a test +suite. Test suites serve to define the format of the tests they contain, the +logic for finding those tests, and any additional information to run the tests.

+

lit identifies test suites as directories containing lit.cfg or +lit.site.cfg files (see also --config-prefix). Test suites are +initially discovered by recursively searching up the directory hierarchy for +all the input files passed on the command line. You can use +--show-suites to display the discovered test suites at startup.

+

Once a test suite is discovered, its config file is loaded. Config files +themselves are Python modules which will be executed. When the config file is +executed, two important global variables are predefined:

+

lit_config

+
+

The global lit configuration object (a LitConfig instance), which defines +the builtin test formats, global configuration parameters, and other helper +routines for implementing test configurations.

+
+

config

+
+

This is the config object (a TestingConfig instance) for the test suite, +which the config file is expected to populate. The following variables are also +available on the config object, some of which must be set by the config and +others are optional or predefined:

+

name [required] The name of the test suite, for use in reports and +diagnostics.

+

test_format [required] The test format object which will be used to +discover and run tests in the test suite. Generally this will be a builtin test +format available from the lit.formats module.

+

test_source_root The filesystem path to the test suite root. For out-of-dir +builds this is the directory that will be scanned for tests.

+

test_exec_root For out-of-dir builds, the path to the test suite root inside +the object directory. This is where tests will be run and temporary output files +placed.

+

environment A dictionary representing the environment to use when executing +tests in the suite.

+

standalone_tests When true, mark a directory with tests expected to be run +standalone. Test discovery is disabled for that directory and +–no-indirectly-run-check is in effect. lit.suffixes and lit.excludes +must be empty when this variable is true.

+

suffixes For lit test formats which scan directories for tests, this +variable is a list of suffixes to identify test files. Used by: ShTest.

+

substitutions For lit test formats which substitute variables into a test +script, the list of substitutions to perform. Used by: ShTest.

+

unsupported Mark an unsupported directory, all tests within it will be +reported as unsupported. Used by: ShTest.

+

parent The parent configuration, this is the config object for the directory +containing the test suite, or None.

+

root The root configuration. This is the top-most lit configuration in +the project.

+

pipefail Normally a test using a shell pipe fails if any of the commands +on the pipe fail. If this is not desired, setting this variable to false +makes the test fail only if the last command in the pipe fails.

+

available_features A set of features that can be used in XFAIL, +REQUIRES, and UNSUPPORTED directives.

+
+
+
+

TEST DISCOVERY

+

Once test suites are located, lit recursively traverses the source +directory (following test_source_root) looking for tests. When lit +enters a sub-directory, it first checks to see if a nested test suite is +defined in that directory. If so, it loads that test suite recursively, +otherwise it instantiates a local test config for the directory (see +LOCAL CONFIGURATION FILES).

+

Tests are identified by the test suite they are contained within, and the +relative path inside that suite. Note that the relative path may not refer to +an actual file on disk; some test formats (such as GoogleTest) define +“virtual tests” which have a path that contains both the path to the actual +test file and a subpath to identify the virtual test.

+
+
+

LOCAL CONFIGURATION FILES

+

When lit loads a subdirectory in a test suite, it instantiates a +local test configuration by cloning the configuration for the parent directory +— the root of this configuration chain will always be a test suite. Once the +test configuration is cloned lit checks for a lit.local.cfg file +in the subdirectory. If present, this file will be loaded and can be used to +specialize the configuration for each individual directory. This facility can +be used to define subdirectories of optional tests, or to change other +configuration parameters — for example, to change the test format, or the +suffixes which identify test files.

+
+
+

SUBSTITUTIONS

+

lit allows patterns to be substituted inside RUN commands. It also +provides the following base set of substitutions, which are defined in +TestRunner.py:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Macro

Substitution

%s

source path (path to the file currently being run)

%S

source dir (directory of the file currently being run)

%p

same as %S

%{pathsep}

path separator

%t

temporary file name unique to the test

%basename_t

The last path component of %t but without the .tmp extension

%T

parent directory of %t (not unique, deprecated, do not use)

%%

%

%/s

%s but \ is replaced by /

%/S

%S but \ is replaced by /

%/p

%p but \ is replaced by /

%/t

%t but \ is replaced by /

%/T

%T but \ is replaced by /

%{/s:regex_replacement}

%/s but escaped for use in the replacement of a s@@@ command in sed

%{/S:regex_replacement}

%/S but escaped for use in the replacement of a s@@@ command in sed

%{/p:regex_replacement}

%/p but escaped for use in the replacement of a s@@@ command in sed

%{/t:regex_replacement}

%/t but escaped for use in the replacement of a s@@@ command in sed

%{/T:regex_replacement}

%/T but escaped for use in the replacement of a s@@@ command in sed

%:s

On Windows, %/s but a : is removed if its the second character. +Otherwise, %s but with a single leading / removed.

%:S

On Windows, %/S but a : is removed if its the second character. +Otherwise, %S but with a single leading / removed.

%:p

On Windows, %/p but a : is removed if its the second character. +Otherwise, %p but with a single leading / removed.

%:t

On Windows, %/t but a : is removed if its the second character. +Otherwise, %t but with a single leading / removed.

%:T

On Windows, %/T but a : is removed if its the second character. +Otherwise, %T but with a single leading / removed.

+
+

Other substitutions are provided that are variations on this base set and +further substitution patterns can be defined by each test module. See the +modules LOCAL CONFIGURATION FILES.

+

By default, substitutions are expanded exactly once, so that if e.g. a +substitution %build is defined in top of another substitution %cxx, +%build will expand to %cxx textually, not to what %cxx expands to. +However, if the recursiveExpansionLimit property of the TestingConfig +is set to a non-negative integer, substitutions will be expanded recursively +until that limit is reached. It is an error if the limit is reached and +expanding substitutions again would yield a different result.

+

More detailed information on substitutions can be found in the +LLVM Testing Infrastructure Guide.

+
+
+

TEST RUN OUTPUT FORMAT

+

The lit output for a test run conforms to the following schema, in +both short and verbose modes (although in short mode no PASS lines will be +shown). This schema has been chosen to be relatively easy to reliably parse by +a machine (for example in buildbot log scraping), and for other tools to +generate.

+

Each test result is expected to appear on a line that matches:

+
<result code>: <test name> (<progress info>)
+
+
+

where <result-code> is a standard test result such as PASS, FAIL, XFAIL, +XPASS, UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and +REGRESSED are also allowed.

+

The <test name> field can consist of an arbitrary string containing no +newline.

+

The <progress info> field can be used to report progress information such +as (1/300) or can be empty, but even when empty the parentheses are required.

+

Each test result may include additional (multiline) log information in the +following format:

+
<log delineator> TEST '(<test name>)' <trailing delineator>
+... log message ...
+<log delineator>
+
+
+

where <test name> should be the name of a preceding reported test, <log +delineator> is a string of “*” characters at least four characters long +(the recommended length is 20), and <trailing delineator> is an arbitrary +(unparsed) string.

+

The following is an example of a test run output which consists of four tests A, +B, C, and D, and a log message for the failing test C:

+
PASS: A (1 of 4)
+PASS: B (2 of 4)
+FAIL: C (3 of 4)
+******************** TEST 'C' FAILED ********************
+Test 'C' failed as a result of exit code 1.
+********************
+PASS: D (4 of 4)
+
+
+
+
+

LIT EXAMPLE TESTS

+

The lit distribution contains several example implementations of +test suites in the ExampleTests directory.

+
+
+
+

SEE ALSO

+

valgrind(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llc.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llc.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llc.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llc.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,400 @@ + + + + + + + + + llc - LLVM static compiler — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llc - LLVM static compiler

+
+

SYNOPSIS

+

llc [options] [filename]

+
+
+

DESCRIPTION

+

The llc command compiles LLVM source inputs into assembly language +for a specified architecture. The assembly language output can then be passed +through a native assembler and linker to generate a native executable.

+

The choice of architecture for the output assembly code is automatically +determined from the input file, unless the -march option is used to +override the default.

+
+
+

OPTIONS

+

If filename is “-” or omitted, llc reads from standard input. +Otherwise, it will from filename. Inputs can be in either the LLVM assembly +language format (.ll) or the LLVM bitcode format (.bc).

+

If the -o option is omitted, then llc will send its output +to standard output if the input is from standard input. If the -o +option specifies “-“, then the output will also be sent to standard output.

+

If no -o option is specified and an input file other than “-” is +specified, then llc creates the output filename by taking the input +filename, removing any existing .bc extension, and adding a .s suffix.

+

Other llc options are described below.

+
+

End-user Options

+
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-o <filename>
+

Use <filename> as the output filename. See the summary above for more +details.

+
+ +
+
+-O=uint
+

Generate code at different optimization levels. These correspond to the +-O0, -O1, -O2, and -O3 optimization levels used by +clang.

+
+ +
+
+-mtriple=<target triple>
+

Override the target triple specified in the input file with the specified +string.

+
+ +
+
+-march=<arch>
+

Specify the architecture for which to generate assembly, overriding the target +encoded in the input file. See the output of llc -help for a list of +valid architectures. By default this is inferred from the target triple or +autodetected to the current architecture.

+
+ +
+
+-mcpu=<cpuname>
+

Specify a specific chip in the current architecture to generate code for. +By default this is inferred from the target triple and autodetected to +the current architecture. For a list of available CPUs, use:

+
llvm-as < /dev/null | llc -march=xyz -mcpu=help
+
+
+
+ +
+
+-filetype=<output file type>
+

Specify what kind of output llc should generated. Options are: asm +for textual assembly ( '.s'), obj for native object files ('.o') +and null for not emitting anything (for performance testing).

+

Note that not all targets support all options.

+
+ +
+
+-mattr=a1,+a2,-a3,...
+

Override or control specific attributes of the target, such as whether SIMD +operations are enabled or not. The default set of attributes is set by the +current CPU. For a list of available attributes, use:

+
llvm-as < /dev/null | llc -march=xyz -mattr=help
+
+
+
+ +
+
+--frame-pointer
+

Specify effect of frame pointer elimination optimization (all,non-leaf,none).

+
+ +
+
+--disable-excess-fp-precision
+

Disable optimizations that may produce excess precision for floating point. +Note that this option can dramatically slow down code on some systems +(e.g. X86).

+
+ +
+
+--enable-no-infs-fp-math
+

Enable optimizations that assume no Inf values.

+
+ +
+
+--enable-no-nans-fp-math
+

Enable optimizations that assume no NAN values.

+
+ +
+
+--enable-no-signed-zeros-fp-math
+

Enable FP math optimizations that assume the sign of 0 is insignificant.

+
+ +
+
+--enable-no-trapping-fp-math
+

Enable setting the FP exceptions build attribute not to use exceptions.

+
+ +
+
+--enable-unsafe-fp-math
+

Enable optimizations that make unsafe assumptions about IEEE math (e.g. that +addition is associative) or may not work for all input ranges. These +optimizations allow the code generator to make use of some instructions which +would otherwise not be usable (such as fsin on X86).

+
+ +
+
+--stats
+

Print statistics recorded by code-generation passes.

+
+ +
+
+--time-passes
+

Record the amount of time needed for each pass and print a report to standard +error.

+
+ +
+
+--load=<dso_path>
+

Dynamically load dso_path (a path to a dynamically shared object) that +implements an LLVM target. This will permit the target name to be used with +the -march option so that code can be generated for that target.

+
+ +
+
+-meabi=[default|gnu|4|5]
+

Specify which EABI version should conform to. Valid EABI versions are gnu, +4 and 5. Default value (default) depends on the triple.

+
+ +
+
+-stack-size-section
+

Emit the .stack_sizes section which contains stack size metadata. The section +contains an array of pairs of function symbol values (pointer size) and stack +sizes (unsigned LEB128). The stack size values only include the space allocated +in the function prologue. Functions with dynamic stack allocations are not +included.

+
+ +
+
+-remarks-section
+

Emit the __remarks (MachO) section which contains metadata about remark +diagnostics.

+
+ +
+
+

Tuning/Configuration Options

+
+
+--print-after-isel
+

Print generated machine code after instruction selection (useful for debugging).

+
+ +
+
+--regalloc=<allocator>
+

Specify the register allocator to use. +Valid register allocators are:

+

basic

+
+

Basic register allocator.

+
+

fast

+
+

Fast register allocator. It is the default for unoptimized code.

+
+

greedy

+
+

Greedy register allocator. It is the default for optimized code.

+
+

pbqp

+
+

Register allocator based on ‘Partitioned Boolean Quadratic Programming’.

+
+
+ +
+
+--spiller=<spiller>
+

Specify the spiller to use for register allocators that support it. Currently +this option is used only by the linear scan register allocator. The default +spiller is local. Valid spillers are:

+

simple

+
+

Simple spiller

+
+

local

+
+

Local spiller

+
+
+ +
+
+

Intel IA-32-specific Options

+
+
+--x86-asm-syntax=[att|intel]
+

Specify whether to emit assembly code in AT&T syntax (the default) or Intel +syntax.

+
+ +
+
+
+

EXIT STATUS

+

If llc succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value.

+
+
+

SEE ALSO

+

lli(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/lldb-tblgen.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/lldb-tblgen.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/lldb-tblgen.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/lldb-tblgen.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + lldb-tblgen - Description to C++ Code for LLDB — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

lldb-tblgen - Description to C++ Code for LLDB

+
+

SYNOPSIS

+

lldb-tblgen [options] [filename]

+
+
+

DESCRIPTION

+

lldb-tblgen is a program that translates compiler-related target +description (.td) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler.

+

Please see tblgen - Description to C++ Code +for a description of the filename argument and options, including the +options common to all *-tblgen programs.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/lli.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/lli.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/lli.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/lli.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,397 @@ + + + + + + + + + lli - directly execute programs from LLVM bitcode — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

lli - directly execute programs from LLVM bitcode

+
+

SYNOPSIS

+

lli [options] [filename] [program args]

+
+
+

DESCRIPTION

+

lli directly executes programs in LLVM bitcode format. It takes a program +in LLVM bitcode format and executes it using a just-in-time compiler or an +interpreter.

+

lli is not an emulator. It will not execute IR of different architectures +and it can only interpret (or JIT-compile) for the host architecture.

+

The JIT compiler takes the same arguments as other tools, like llc, +but they don’t necessarily work for the interpreter.

+

If filename is not specified, then lli reads the LLVM bitcode for the +program from standard input.

+

The optional args specified on the command line are passed to the program as +arguments.

+
+
+

GENERAL OPTIONS

+
+
+-fake-argv0=executable
+

Override the argv[0] value passed into the executing program.

+
+ +
+
+-force-interpreter={false,true}
+

If set to true, use the interpreter even if a just-in-time compiler is available +for this architecture. Defaults to false.

+
+ +
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-load=pluginfilename
+

Causes lli to load the plugin (shared object) named pluginfilename and use +it for optimization.

+
+ +
+
+-stats
+

Print statistics from the code-generation passes. This is only meaningful for +the just-in-time compiler, at present.

+
+ +
+
+-time-passes
+

Record the amount of time needed for each code-generation pass and print it to +standard error.

+
+ +
+
+-version
+

Print out the version of lli and exit without doing anything else.

+
+ +
+
+

TARGET OPTIONS

+
+
+-mtriple=target triple
+

Override the target triple specified in the input bitcode file with the +specified string. This may result in a crash if you pick an +architecture which is not compatible with the current system.

+
+ +
+
+-march=arch
+

Specify the architecture for which to generate assembly, overriding the target +encoded in the bitcode file. See the output of llc -help for a list of +valid architectures. By default this is inferred from the target triple or +autodetected to the current architecture.

+
+ +
+
+-mcpu=cpuname
+

Specify a specific chip in the current architecture to generate code for. +By default this is inferred from the target triple and autodetected to +the current architecture. For a list of available CPUs, use: +llvm-as < /dev/null | llc -march=xyz -mcpu=help

+
+ +
+
+-mattr=a1,+a2,-a3,...
+

Override or control specific attributes of the target, such as whether SIMD +operations are enabled or not. The default set of attributes is set by the +current CPU. For a list of available attributes, use: +llvm-as < /dev/null | llc -march=xyz -mattr=help

+
+ +
+
+

FLOATING POINT OPTIONS

+
+
+-disable-excess-fp-precision
+

Disable optimizations that may increase floating point precision.

+
+ +
+
+-enable-no-infs-fp-math
+

Enable optimizations that assume no Inf values.

+
+ +
+
+-enable-no-nans-fp-math
+

Enable optimizations that assume no NAN values.

+
+ +
+
+-enable-unsafe-fp-math
+

Causes lli to enable optimizations that may decrease floating point +precision.

+
+ +
+
+-soft-float
+

Causes lli to generate software floating point library calls instead of +equivalent hardware instructions.

+
+ +
+
+

CODE GENERATION OPTIONS

+
+
+-code-model=model
+

Choose the code model from:

+
default: Target default code model
+tiny: Tiny code model
+small: Small code model
+kernel: Kernel code model
+medium: Medium code model
+large: Large code model
+
+
+
+ +
+
+-disable-post-RA-scheduler
+

Disable scheduling after register allocation.

+
+ +
+
+-disable-spill-fusing
+

Disable fusing of spill code into instructions.

+
+ +
+
+-jit-enable-eh
+

Exception handling should be enabled in the just-in-time compiler.

+
+ +
+
+-join-liveintervals
+

Coalesce copies (default=true).

+
+ +
+
+-nozero-initialized-in-bss
+

Don’t place zero-initialized symbols into the BSS section.

+
+ +
+
+-pre-RA-sched=scheduler
+

Instruction schedulers available (before register allocation):

+
=default: Best scheduler for the target
+=none: No scheduling: breadth first sequencing
+=simple: Simple two pass scheduling: minimize critical path and maximize processor utilization
+=simple-noitin: Simple two pass scheduling: Same as simple except using generic latency
+=list-burr: Bottom-up register reduction list scheduling
+=list-tdrr: Top-down register reduction list scheduling
+=list-td: Top-down list scheduler
+
+
+
+ +
+
+-regalloc=allocator
+

Register allocator to use (default=linearscan)

+
=bigblock: Big-block register allocator
+=linearscan: linear scan register allocator =local -   local register allocator
+=simple: simple register allocator
+
+
+
+ +
+
+-relocation-model=model
+

Choose relocation model from:

+
=default: Target default relocation model
+=static: Non-relocatable code =pic -   Fully relocatable, position independent code
+=dynamic-no-pic: Relocatable external references, non-relocatable code
+
+
+
+ +
+
+-spiller
+

Spiller to use (default=local)

+
=simple: simple spiller
+=local: local spiller
+
+
+
+ +
+
+-x86-asm-syntax=syntax
+

Choose style of code to emit from X86 backend:

+
=att: Emit AT&T-style assembly
+=intel: Emit Intel-style assembly
+
+
+
+ +
+
+

EXIT STATUS

+

If lli fails to load the program, it will exit with an exit code of 1. +Otherwise, it will return the exit code of the program it executes.

+
+
+

SEE ALSO

+

llc(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-addr2line.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-addr2line.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-addr2line.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-addr2line.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,174 @@ + + + + + + + + + llvm-addr2line - a drop-in replacement for addr2line — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-addr2line - a drop-in replacement for addr2line

+
+

SYNOPSIS

+

llvm-addr2line [options]

+
+
+

DESCRIPTION

+

llvm-addr2line is an alias for the llvm-symbolizer(1) +tool with different defaults. The goal is to make it a drop-in replacement for +GNU’s addr2line.

+

Here are some of those differences:

+
    +
  • llvm-addr2line interprets all addresses as hexadecimal and ignores an +optional 0x prefix, whereas llvm-symbolizer attempts to determine +the base from the literal’s prefix and defaults to decimal if there is no +prefix.

  • +
  • llvm-addr2line defaults not to print function names. Use -f to enable +that.

  • +
  • llvm-addr2line defaults not to demangle function names. Use -C to +switch the demangling on.

  • +
  • llvm-addr2line defaults not to print inlined frames. Use -i to show +inlined frames for a source code location in an inlined function.

  • +
  • llvm-addr2line uses –output-style=GNU by default.

  • +
  • llvm-addr2line parses options from the environment variable +LLVM_ADDR2LINE_OPTS instead of from LLVM_SYMBOLIZER_OPTS.

  • +
+
+
+

SEE ALSO

+

llvm-symbolizer(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-ar.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-ar.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-ar.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-ar.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,554 @@ + + + + + + + + + llvm-ar - LLVM archiver — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-ar - LLVM archiver

+
+

SYNOPSIS

+

llvm-ar [-]{dmpqrstx}[abcDilLNoOPsSTuUvV] [relpos] [count] archive [files…]

+
+
+

DESCRIPTION

+

The llvm-ar command is similar to the common Unix utility, +ar. It archives several files, such as objects and LLVM bitcode +files into a single archive library that can be linked into a program. However, +the archive can contain any kind of file. By default, llvm-ar +generates a symbol table that makes linking faster because only the symbol +table needs to be consulted, not each individual file member of the archive.

+

The llvm-ar command can be used to read archive files in SVR4, +GNU, BSD and Darwin format, and write in the GNU, BSD, and Darwin style +archive files. If an SVR4 format archive is used with the r +(replace), d (delete), m (move) or q +(quick update) operations, the archive will be reconstructed in the format +defined by --format.

+

Here’s where llvm-ar departs from previous ar +implementations:

+

The following option is not supported

+
+

[f] - truncate inserted filenames

+
+

The following options are ignored for compatibility

+
+

–plugin=<string> - load a plugin which adds support for other file formats

+

[l] - ignored in ar

+
+

Symbol Table

+
+

Since llvm-ar supports bitcode files, the symbol table it creates +includes both native and bitcode symbols.

+
+

Deterministic Archives

+
+

By default, llvm-ar always uses zero for timestamps and UIDs/GIDs +to write archives in a deterministic mode. This is equivalent to the +D modifier being enabled by default. If you wish to maintain +compatibility with other ar implementations, you can pass the +U modifier to write actual timestamps and UIDs/GIDs.

+
+

Windows Paths

+
+

When on Windows llvm-ar treats the names of archived files in the same +case sensitive manner as the operating system. When on a non-Windows machine +llvm-ar does not consider character case.

+
+
+
+

OPTIONS

+

llvm-ar operations are compatible with other ar +implementations. However, there are a few modifiers (L) that are not +found in other ar implementations. The options for +llvm-ar specify a single basic Operation to perform on the archive, +a variety of Modifiers for that Operation, the name of the archive file, and an +optional list of file names. If the files option is not specified, it +generally means either “none” or “all” members, depending on the operation. The +Options, Operations and Modifiers are explained in the sections below.

+

The minimal set of options is at least one operator and the name of the +archive.

+
+

Operations

+
+
+d [NT]
+

Delete files from the archive. The N and T modifiers +apply to this operation. The files options specify which members should be +removed from the archive. It is not an error if a specified file does not +appear in the archive. If no files are specified, the archive is not +modified.

+
+ +
+
+m [abi]
+

Move files from one location in the archive to another. The a, +b, and i modifiers apply to this operation. The files +will all be moved to the location given by the modifiers. If no modifiers are +used, the files will be moved to the end of the archive. If no files are +specified, the archive is not modified.

+
+ +
+
+p [v]
+

Print files to the standard output stream. If no files are specified, the +entire archive is printed. With the v modifier, +llvm-ar also prints out the name of the file being output. Printing +binary files is ill-advised as they might confuse your terminal settings. The +p operation never modifies the archive.

+
+ +
+
+q [LT]
+

Quickly append files to the end of the archive without removing +duplicates. If no files are specified, the archive is not modified. The +behavior when appending one archive to another depends upon whether the +L and T modifiers are used:

+
    +
  • Appending a regular archive to a regular archive will append the archive +file. If the L modifier is specified the members will be appended +instead.

  • +
  • Appending a regular archive to a thin archive requires the T +modifier and will append the archive file. The L modifier is not +supported.

  • +
  • Appending a thin archive to a regular archive will append the archive file. +If the L modifier is specified the members will be appended +instead.

  • +
  • Appending a thin archive to a thin archive will always quick append its +members.

  • +
+
+ +
+
+r [abTu]
+

Replace existing files or insert them at the end of the archive if +they do not exist. The a, b, T and u +modifiers apply to this operation. If no files are specified, the archive +is not modified.

+
+ +

t[v] +.. option:: t [vO]

+
+

Print the table of contents. Without any modifiers, this operation just prints +the names of the members to the standard output stream. With the v +modifier, llvm-ar also prints out the file type (B=bitcode, +S=symbol table, blank=regular file), the permission mode, the owner and group, +are ignored when extracting files and set to placeholder values when adding +size, and the date. With the O modifier, display member offsets. If +any files are specified, the listing is only for those files. If no files +are specified, the table of contents for the whole archive is printed.

+
+
+
+V
+

A synonym for the --version option.

+
+ +
+
+x [oP]
+

Extract archive members back to files. The o modifier applies +to this operation. This operation retrieves the indicated files from the +archive and writes them back to the operating system’s file system. If no +files are specified, the entire archive is extracted.

+
+ +
+
+

Modifiers (operation specific)

+

The modifiers below are specific to certain operations. See the Operations +section to determine which modifiers are applicable to which operations.

+
+
+a
+

When inserting or moving member files, this option specifies the destination +of the new files as being after the relpos member. If relpos is not found, +the files are placed at the end of the archive. relpos cannot be +consumed without either a, b or i.

+
+ +
+
+b
+

When inserting or moving member files, this option specifies the destination +of the new files as being before the relpos member. If relpos is not +found, the files are placed at the end of the archive. relpos cannot +be consumed without either a, b or i. This +modifier is identical to the i modifier.

+
+ +
+
+i
+

A synonym for the b option.

+
+ +
+
+L
+

When quick appending an archive, instead quick append its members. This +is a feature for llvm-ar that is not found in gnu-ar.

+
+ +
+
+N
+

When extracting or deleting a member that shares its name with another member, +the count parameter allows you to supply a positive whole number that +selects the instance of the given name, with “1” indicating the first +instance. If N is not specified the first member of that name will +be selected. If count is not supplied, the operation fails.*count* cannot be

+
+ +
+
+o
+

When extracting files, use the modification times of any files as they +appear in the archive. By default files extracted from the archive +use the time of extraction.

+
+ +
+
+O
+

Display member offsets inside the archive.

+
+ +
+
+T
+

When creating or modifying an archive, this option specifies that the +archive will be thin. By default, archives are not created as thin +archives and when modifying a thin archive, it will be converted to a regular +archive.

+
+ +
+
+v
+

When printing files or the archive table of contents, this modifier +instructs llvm-ar to include additional information in the output.

+
+ +
+
+

Modifiers (generic)

+

The modifiers below may be applied to any operation.

+
+
+c
+

For the r (replace)and q (quick update) operations, +llvm-ar will always create the archive if it doesn’t exist. +Normally, llvm-ar will print a warning message indicating that the +archive is being created. Using this modifier turns off +that warning.

+
+ +
+
+D
+

Use zero for timestamps and UIDs/GIDs. This is set by default.

+
+ +
+
+P
+

Use full paths when matching member names rather than just the file name. +This can be useful when manipulating an archive generated by another +archiver, as some allow paths as member names. This is the default behavior +for thin archives.

+
+ +
+
+s
+

This modifier requests that an archive index (or symbol table) be added to the +archive, as if using ranlib. The symbol table will contain all the +externally visible functions and global variables defined by all the bitcode +files in the archive. By default llvm-ar generates symbol tables in +archives. This can also be used as an operation.

+
+ +
+
+S
+

This modifier is the opposite of the s modifier. It instructs +llvm-ar to not build the symbol table. If both s and +S are used, the last modifier to occur in the options will prevail.

+
+ +
+
+u
+

Only update archive members with files that have more recent +timestamps.

+
+ +
+
+U
+

Use actual timestamps and UIDs/GIDs.

+
+ +
+
+

Other

+
+
+--format=<type>
+

This option allows for default, gnu, darwin or bsd <type> to be selected. +When creating an archive, <type> will default to that of the host +machine.

+
+ +
+
+-h, --help
+

Print a summary of command-line options and their meanings.

+
+ +
+
+-M
+

This option allows for MRI scripts to be read through the standard input +stream. No other options are compatible with this option.

+
+ +
+
+--rsp-quoting=<type>
+
+This option selects the quoting style ``<type>`` for response files, either
+
+``posix`` or ``windows``. The default when on Windows is ``windows``, otherwise the
+
+default is ``posix``.
+
+ +
+
+--version
+

Display the version of the llvm-ar executable.

+
+ +
+
+@<FILE>
+

Read command-line options and commands from response file <FILE>.

+
+ +
+
+
+

MRI SCRIPTS

+

llvm-ar understands a subset of the MRI scripting interface commonly +supported by archivers following in the ar tradition. An MRI script contains a +sequence of commands to be executed by the archiver. The -M option +allows for an MRI script to be passed to llvm-ar through the +standard input stream.

+

Note that llvm-ar has known limitations regarding the use of MRI +scripts:

+
    +
  • Each script can only create one archive.

  • +
  • Existing archives can not be modified.

  • +
+
+

MRI Script Commands

+

Each command begins with the command’s name and must appear on its own line. +Some commands have arguments, which must be separated from the name by +whitespace. An MRI script should begin with either a CREATE or +CREATETHIN command and will typically end with a SAVE +command. Any text after either ‘*’ or ‘;’ is treated as a comment.

+
+
+CREATE archive
+

Begin creation of a regular archive with the specified name. Subsequent +commands act upon this archive.

+
+ +
+
+CREATETHIN archive
+

Begin creation of a thin archive with the specified name. Subsequent +commands act upon this archive.

+
+ +
+
+ADDLIB archive
+

Append the contents of archive to the current archive.

+
+ +
+
+ADDMOD <file>
+

Append <file> to the current archive.

+
+ +
+
+DELETE <file>
+

Delete the member of the current archive whose file name, excluding directory +components, matches <file>.

+
+ +
+
+SAVE
+

Write the current archive to the path specified in the previous +CREATE/CREATETHIN command.

+
+ +
+
+END
+

Ends the MRI script (optional).

+
+ +
+
+
+

EXIT STATUS

+

If llvm-ar succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non-zero value.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-as.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-as.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-as.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-as.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,190 @@ + + + + + + + + + llvm-as - LLVM assembler — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-as - LLVM assembler

+
+

SYNOPSIS

+

llvm-as [options] [filename]

+
+
+

DESCRIPTION

+

llvm-as is the LLVM assembler. It reads a file containing human-readable +LLVM assembly language, translates it to LLVM bitcode, and writes the result +into a file or to standard output.

+

If filename is omitted or is -, then llvm-as reads its input from +standard input.

+

If an output file is not specified with the -o option, then +llvm-as sends its output to a file or standard output by following +these rules:

+
    +
  • If the input is standard input, then the output is standard output.

  • +
  • If the input is a file that ends with .ll, then the output file is of the +same name, except that the suffix is changed to .bc.

  • +
  • If the input is a file that does not end with the .ll suffix, then the +output file has the same name as the input file, except that the .bc +suffix is appended.

  • +
+
+
+

OPTIONS

+
+
-f

Enable binary output on terminals. Normally, llvm-as will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +llvm-as will write raw bitcode regardless of the output device.

+
+
-help

Print a summary of command line options.

+
+
-o filename

Specify the output file name. If filename is -, then llvm-as +sends its output to standard output.

+
+
+
+
+

EXIT STATUS

+

If llvm-as succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non-zero value.

+
+
+

SEE ALSO

+

llvm-dis(1), as(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-bcanalyzer.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-bcanalyzer.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-bcanalyzer.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-bcanalyzer.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,445 @@ + + + + + + + + + llvm-bcanalyzer - LLVM bitcode analyzer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-bcanalyzer - LLVM bitcode analyzer

+
+

SYNOPSIS

+

llvm-bcanalyzer [options] [filename]

+
+
+

DESCRIPTION

+

The llvm-bcanalyzer command is a small utility for analyzing bitcode +files. The tool reads a bitcode file (such as generated with the +llvm-as tool) and produces a statistical report on the contents of +the bitcode file. The tool can also dump a low level but human readable +version of the bitcode file. This tool is probably not of much interest or +utility except for those working directly with the bitcode file format. Most +LLVM users can just ignore this tool.

+

If filename is omitted or is -, then llvm-bcanalyzer reads its +input from standard input. This is useful for combining the tool into a +pipeline. Output is written to the standard output.

+
+
+

OPTIONS

+
+
+-nodetails
+

Causes llvm-bcanalyzer to abbreviate its output by writing out only +a module level summary. The details for individual functions are not +displayed.

+
+ +
+
+-dump
+

Causes llvm-bcanalyzer to dump the bitcode in a human readable +format. This format is significantly different from LLVM assembly and +provides details about the encoding of the bitcode file.

+
+ +
+
+-verify
+

Causes llvm-bcanalyzer to verify the module produced by reading the +bitcode. This ensures that the statistics generated are based on a consistent +module.

+
+ +
+
+-help
+

Print a summary of command line options.

+
+ +
+
+

EXIT STATUS

+

If llvm-bcanalyzer succeeds, it will exit with 0. Otherwise, if an +error occurs, it will exit with a non-zero value, usually 1.

+
+
+

SUMMARY OUTPUT DEFINITIONS

+

The following items are always printed by llvm-bcanalyzer. They comprize the +summary output.

+

Bitcode Analysis Of Module

+
+

This just provides the name of the module for which bitcode analysis is being +generated.

+
+

Bitcode Version Number

+
+

The bitcode version (not LLVM version) of the file read by the analyzer.

+
+

File Size

+
+

The size, in bytes, of the entire bitcode file.

+
+

Module Bytes

+
+

The size, in bytes, of the module block. Percentage is relative to File Size.

+
+

Function Bytes

+
+

The size, in bytes, of all the function blocks. Percentage is relative to File +Size.

+
+

Global Types Bytes

+
+

The size, in bytes, of the Global Types Pool. Percentage is relative to File +Size. This is the size of the definitions of all types in the bitcode file.

+
+

Constant Pool Bytes

+
+

The size, in bytes, of the Constant Pool Blocks Percentage is relative to File +Size.

+
+

Module Globals Bytes

+
+

Ths size, in bytes, of the Global Variable Definitions and their initializers. +Percentage is relative to File Size.

+
+

Instruction List Bytes

+
+

The size, in bytes, of all the instruction lists in all the functions. +Percentage is relative to File Size. Note that this value is also included in +the Function Bytes.

+
+

Compaction Table Bytes

+
+

The size, in bytes, of all the compaction tables in all the functions. +Percentage is relative to File Size. Note that this value is also included in +the Function Bytes.

+
+

Symbol Table Bytes

+
+

The size, in bytes, of all the symbol tables in all the functions. Percentage is +relative to File Size. Note that this value is also included in the Function +Bytes.

+
+

Dependent Libraries Bytes

+
+

The size, in bytes, of the list of dependent libraries in the module. Percentage +is relative to File Size. Note that this value is also included in the Module +Global Bytes.

+
+

Number Of Bitcode Blocks

+
+

The total number of blocks of any kind in the bitcode file.

+
+

Number Of Functions

+
+

The total number of function definitions in the bitcode file.

+
+

Number Of Types

+
+

The total number of types defined in the Global Types Pool.

+
+

Number Of Constants

+
+

The total number of constants (of any type) defined in the Constant Pool.

+
+

Number Of Basic Blocks

+
+

The total number of basic blocks defined in all functions in the bitcode file.

+
+

Number Of Instructions

+
+

The total number of instructions defined in all functions in the bitcode file.

+
+

Number Of Long Instructions

+
+

The total number of long instructions defined in all functions in the bitcode +file. Long instructions are those taking greater than 4 bytes. Typically long +instructions are GetElementPtr with several indices, PHI nodes, and calls to +functions with large numbers of arguments.

+
+

Number Of Operands

+
+

The total number of operands used in all instructions in the bitcode file.

+
+

Number Of Compaction Tables

+
+

The total number of compaction tables in all functions in the bitcode file.

+
+

Number Of Symbol Tables

+
+

The total number of symbol tables in all functions in the bitcode file.

+
+

Number Of Dependent Libs

+
+

The total number of dependent libraries found in the bitcode file.

+
+

Total Instruction Size

+
+

The total size of the instructions in all functions in the bitcode file.

+
+

Average Instruction Size

+
+

The average number of bytes per instruction across all functions in the bitcode +file. This value is computed by dividing Total Instruction Size by Number Of +Instructions.

+
+

Maximum Type Slot Number

+
+

The maximum value used for a type’s slot number. Larger slot number values take +more bytes to encode.

+
+

Maximum Value Slot Number

+
+

The maximum value used for a value’s slot number. Larger slot number values take +more bytes to encode.

+
+

Bytes Per Value

+
+

The average size of a Value definition (of any type). This is computed by +dividing File Size by the total number of values of any type.

+
+

Bytes Per Global

+
+

The average size of a global definition (constants and global variables).

+
+

Bytes Per Function

+
+

The average number of bytes per function definition. This is computed by +dividing Function Bytes by Number Of Functions.

+
+

# of VBR 32-bit Integers

+
+

The total number of 32-bit integers encoded using the Variable Bit Rate +encoding scheme.

+
+

# of VBR 64-bit Integers

+
+

The total number of 64-bit integers encoded using the Variable Bit Rate encoding +scheme.

+
+

# of VBR Compressed Bytes

+
+

The total number of bytes consumed by the 32-bit and 64-bit integers that use +the Variable Bit Rate encoding scheme.

+
+

# of VBR Expanded Bytes

+
+

The total number of bytes that would have been consumed by the 32-bit and 64-bit +integers had they not been compressed with the Variable Bit Rage encoding +scheme.

+
+

Bytes Saved With VBR

+
+

The total number of bytes saved by using the Variable Bit Rate encoding scheme. +The percentage is relative to # of VBR Expanded Bytes.

+
+
+
+

DETAILED OUTPUT DEFINITIONS

+

The following definitions occur only if the -nodetails option was not given. +The detailed output provides additional information on a per-function basis.

+

Type

+
+

The type signature of the function.

+
+

Byte Size

+
+

The total number of bytes in the function’s block.

+
+

Basic Blocks

+
+

The number of basic blocks defined by the function.

+
+

Instructions

+
+

The number of instructions defined by the function.

+
+

Long Instructions

+
+

The number of instructions using the long instruction format in the function.

+
+

Operands

+
+

The number of operands used by all instructions in the function.

+
+

Instruction Size

+
+

The number of bytes consumed by instructions in the function.

+
+

Average Instruction Size

+
+

The average number of bytes consumed by the instructions in the function. +This value is computed by dividing Instruction Size by Instructions.

+
+

Bytes Per Instruction

+
+

The average number of bytes used by the function per instruction. This value +is computed by dividing Byte Size by Instructions. Note that this is not the +same as Average Instruction Size. It computes a number relative to the total +function size not just the size of the instruction list.

+
+

Number of VBR 32-bit Integers

+
+

The total number of 32-bit integers found in this function (for any use).

+
+

Number of VBR 64-bit Integers

+
+

The total number of 64-bit integers found in this function (for any use).

+
+

Number of VBR Compressed Bytes

+
+

The total number of bytes in this function consumed by the 32-bit and 64-bit +integers that use the Variable Bit Rate encoding scheme.

+
+

Number of VBR Expanded Bytes

+
+

The total number of bytes in this function that would have been consumed by +the 32-bit and 64-bit integers had they not been compressed with the Variable +Bit Rate encoding scheme.

+
+

Bytes Saved With VBR

+
+

The total number of bytes saved in this function by using the Variable Bit +Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes.

+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-config.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-config.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-config.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-config.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,258 @@ + + + + + + + + + llvm-config - Print LLVM compilation options — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-config - Print LLVM compilation options

+
+

SYNOPSIS

+

llvm-config option [components…]

+
+
+

DESCRIPTION

+

llvm-config makes it easier to build applications that use LLVM. It can +print the compiler flags, linker flags and object libraries needed to link +against LLVM.

+
+
+

EXAMPLES

+

To link against the JIT:

+
g++ `llvm-config --cxxflags` -o HowToUseJIT.o -c HowToUseJIT.cpp
+g++ `llvm-config --ldflags` -o HowToUseJIT HowToUseJIT.o \
+    `llvm-config --libs engine bcreader scalaropts`
+
+
+
+
+

OPTIONS

+

–version

+
+

Print the version number of LLVM.

+
+

-help

+
+

Print a summary of llvm-config arguments.

+
+

–prefix

+
+

Print the installation prefix for LLVM.

+
+

–src-root

+
+

Print the source root from which LLVM was built.

+
+

–obj-root

+
+

Print the object root used to build LLVM.

+
+

–bindir

+
+

Print the installation directory for LLVM binaries.

+
+

–includedir

+
+

Print the installation directory for LLVM headers.

+
+

–libdir

+
+

Print the installation directory for LLVM libraries.

+
+

–cxxflags

+
+

Print the C++ compiler flags needed to use LLVM headers.

+
+

–ldflags

+
+

Print the flags needed to link against LLVM libraries.

+
+

–libs

+
+

Print all the libraries needed to link against the specified LLVM +components, including any dependencies.

+
+

–libnames

+
+

Similar to –libs, but prints the bare filenames of the libraries +without -l or pathnames. Useful for linking against a not-yet-installed +copy of LLVM.

+
+

–libfiles

+
+

Similar to –libs, but print the full path to each library file. This is +useful when creating makefile dependencies, to ensure that a tool is relinked if +any library it uses changes.

+
+

–components

+
+

Print all valid component names.

+
+

–targets-built

+
+

Print the component names for all targets supported by this copy of LLVM.

+
+

–build-mode

+
+

Print the build mode used when LLVM was built (e.g. Debug or Release)

+
+
+
+

COMPONENTS

+

To print a list of all available components, run llvm-config +–components. In most cases, components correspond directly to LLVM +libraries. Useful “virtual” components include:

+

all

+
+

Includes all LLVM libraries. The default if no components are specified.

+
+

backend

+
+

Includes either a native backend or the C backend.

+
+

engine

+
+

Includes either a native JIT or the bitcode interpreter.

+
+
+
+

EXIT STATUS

+

If llvm-config succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-cov.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-cov.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-cov.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-cov.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,696 @@ + + + + + + + + + llvm-cov - emit coverage information — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-cov - emit coverage information

+
+

SYNOPSIS

+

llvm-cov command [args…]

+
+
+

DESCRIPTION

+

The llvm-cov tool shows code coverage information for +programs that are instrumented to emit profile data. It can be used to +work with gcov-style coverage or with clang's instrumentation +based profiling.

+

If the program is invoked with a base name of gcov, it will behave as if +the llvm-cov gcov command were called. Otherwise, a command should +be provided.

+
+
+

COMMANDS

+ +
+
+

GCOV COMMAND

+
+

SYNOPSIS

+

llvm-cov gcov [options] SOURCEFILE

+
+
+

DESCRIPTION

+

The llvm-cov gcov tool reads code coverage data files and displays +the coverage information for a specified source file. It is compatible with the +gcov tool from version 4.2 of GCC and may also be compatible with some +later versions of gcov.

+

To use llvm-cov gcov, you must first build an instrumented version +of your application that collects coverage data as it runs. Compile with the +-fprofile-arcs and -ftest-coverage options to add the +instrumentation. (Alternatively, you can use the --coverage option, which +includes both of those other options.)

+

At the time you compile the instrumented code, a .gcno data file will be +generated for each object file. These .gcno files contain half of the +coverage data. The other half of the data comes from .gcda files that are +generated when you run the instrumented program, with a separate .gcda +file for each object file. Each time you run the program, the execution counts +are summed into any existing .gcda files, so be sure to remove any old +files if you do not want their contents to be included.

+

By default, the .gcda files are written into the same directory as the +object files, but you can override that by setting the GCOV_PREFIX and +GCOV_PREFIX_STRIP environment variables. The GCOV_PREFIX_STRIP +variable specifies a number of directory components to be removed from the +start of the absolute path to the object file directory. After stripping those +directories, the prefix from the GCOV_PREFIX variable is added. These +environment variables allow you to run the instrumented program on a machine +where the original object file directories are not accessible, but you will +then need to copy the .gcda files back to the object file directories +where llvm-cov gcov expects to find them.

+

Once you have generated the coverage data files, run llvm-cov gcov +for each main source file where you want to examine the coverage results. This +should be run from the same directory where you previously ran the +compiler. The results for the specified source file are written to a file named +by appending a .gcov suffix. A separate output file is also created for +each file included by the main source file, also with a .gcov suffix added.

+

The basic content of an .gcov output file is a copy of the source file with +an execution count and line number prepended to every line. The execution +count is shown as - if a line does not contain any executable code. If +a line contains code but that code was never executed, the count is displayed +as #####.

+
+
+

OPTIONS

+
+
+-a, --all-blocks
+

Display all basic blocks. If there are multiple blocks for a single line of +source code, this option causes llvm-cov to show the count for each block +instead of just one count for the entire line.

+
+ +
+
+-b, --branch-probabilities
+

Display conditional branch probabilities and a summary of branch information.

+
+ +
+
+-c, --branch-counts
+

Display branch counts instead of probabilities (requires -b).

+
+ +
+
+-m, --demangled-names
+

Demangle function names.

+
+ +
+
+-f, --function-summaries
+

Show a summary of coverage for each function instead of just one summary for +an entire source file.

+
+ +
+
+--help
+

Display available options (–help-hidden for more).

+
+ +
+
+-l, --long-file-names
+

For coverage output of files included from the main source file, add the +main file name followed by ## as a prefix to the output file names. This +can be combined with the –preserve-paths option to use complete paths for +both the main file and the included file.

+
+ +
+
+-n, --no-output
+

Do not output any .gcov files. Summary information is still +displayed.

+
+ +
+
+-o=<DIR|FILE>, --object-directory=<DIR>, --object-file=<FILE>
+

Find objects in DIR or based on FILE’s path. If you specify a particular +object file, the coverage data files are expected to have the same base name +with .gcno and .gcda extensions. If you specify a directory, the +files are expected in that directory with the same base name as the source +file.

+
+ +
+
+-p, --preserve-paths
+

Preserve path components when naming the coverage output files. In addition +to the source file name, include the directories from the path to that +file. The directories are separate by # characters, with . directories +removed and .. directories replaced by ^ characters. When used with +the –long-file-names option, this applies to both the main file name and the +included file name.

+
+ +
+
+-r
+

Only dump files with relative paths or absolute paths with the prefix specified +by -s.

+
+ +
+
+-s=<string>
+

Source prefix to elide.

+
+ +
+
+-t, --stdout
+

Print to stdout instead of producing .gcov files.

+
+ +
+
+-u, --unconditional-branches
+

Include unconditional branches in the output for the –branch-probabilities +option.

+
+ +
+
+-version
+

Display the version of llvm-cov.

+
+ +
+
+-x, --hash-filenames
+

Use md5 hash of file name when naming the coverage output files. The source +file name will be suffixed by ## followed by MD5 hash calculated for it.

+
+ +
+
+

EXIT STATUS

+

llvm-cov gcov returns 1 if it cannot read input files. Otherwise, +it exits with zero.

+
+
+
+

SHOW COMMAND

+
+

SYNOPSIS

+

llvm-cov show [options] -instr-profile PROFILE BIN [-object BIN,…] [[-object BIN]] [SOURCES]

+
+
+

DESCRIPTION

+

The llvm-cov show command shows line by line coverage of the +binaries BIN,… using the profile data PROFILE. It can optionally be +filtered to only show the coverage for the files listed in SOURCES.

+

BIN may be an executable, object file, dynamic library, or archive (thin or +otherwise).

+

To use llvm-cov show, you need a program that is compiled with +instrumentation to emit profile and coverage data. To build such a program with +clang use the -fprofile-instr-generate and -fcoverage-mapping +flags. If linking with the clang driver, pass -fprofile-instr-generate +to the link stage to make sure the necessary runtime libraries are linked in.

+

The coverage information is stored in the built executable or library itself, +and this is what you should pass to llvm-cov show as a BIN +argument. The profile data is generated by running this instrumented program +normally. When the program exits it will write out a raw profile file, +typically called default.profraw, which can be converted to a format that +is suitable for the PROFILE argument using the llvm-profdata merge +tool.

+
+
+

OPTIONS

+
+
+-show-branches=<VIEW>
+

Show coverage for branch conditions in terms of either count or percentage. +The supported views are: “count”, “percent”.

+
+ +
+
+-show-line-counts
+

Show the execution counts for each line. Defaults to true, unless another +-show option is used.

+
+ +
+
+-show-expansions
+

Expand inclusions, such as preprocessor macros or textual inclusions, inline +in the display of the source file. Defaults to false.

+
+ +
+
+-show-instantiations
+

For source regions that are instantiated multiple times, such as templates in +C++, show each instantiation separately as well as the combined summary. +Defaults to true.

+
+ +
+
+-show-regions
+

Show the execution counts for each region by displaying a caret that points to +the character where the region starts. Defaults to false.

+
+ +
+
+-show-line-counts-or-regions
+

Show the execution counts for each line if there is only one region on the +line, but show the individual regions if there are multiple on the line. +Defaults to false.

+
+ +
+
+-use-color
+

Enable or disable color output. By default this is autodetected.

+
+ +
+
+-arch=[*NAMES*]
+

Specify a list of architectures such that the Nth entry in the list +corresponds to the Nth specified binary. If the covered object is a universal +binary, this specifies the architecture to use. It is an error to specify an +architecture that is not included in the universal binary or to use an +architecture that does not match a non-universal binary.

+
+ +
+
+-name=<NAME>
+

Show code coverage only for functions with the given name.

+
+ +
+
+-name-whitelist=<FILE>
+

Show code coverage only for functions listed in the given file. Each line in +the file should start with whitelist_fun:, immediately followed by the name +of the function to accept. This name can be a wildcard expression.

+
+ +
+
+-name-regex=<PATTERN>
+

Show code coverage only for functions that match the given regular expression.

+
+ +
+
+-ignore-filename-regex=<PATTERN>
+

Skip source code files with file paths that match the given regular expression.

+
+ +
+
+-format=<FORMAT>
+

Use the specified output format. The supported formats are: “text”, “html”.

+
+ +
+
+-tab-size=<TABSIZE>
+

Replace tabs with <TABSIZE> spaces when preparing reports. Currently, this is +only supported for the html format.

+
+ +
+
+-output-dir=PATH
+

Specify a directory to write coverage reports into. If the directory does not +exist, it is created. When used in function view mode (i.e when -name or +-name-regex are used to select specific functions), the report is written to +PATH/functions.EXTENSION. When used in file view mode, a report for each file +is written to PATH/REL_PATH_TO_FILE.EXTENSION.

+
+ +
+
+-Xdemangler=<TOOL>|<TOOL-OPTION>
+

Specify a symbol demangler. This can be used to make reports more +human-readable. This option can be specified multiple times to supply +arguments to the demangler (e.g -Xdemangler c++filt -Xdemangler -n for C++). +The demangler is expected to read a newline-separated list of symbols from +stdin and write a newline-separated list of the same length to stdout.

+
+ +
+
+-num-threads=N, -j=N
+

Use N threads to write file reports (only applicable when -output-dir is +specified). When N=0, llvm-cov auto-detects an appropriate number of threads to +use. This is the default.

+
+ +
+
+-compilation-dir=<dir>
+

Directory used as a base for relative coverage mapping paths. Only applicable +when binaries have been compiled with one of -fcoverage-prefix-map +-fcoverage-compilation-dir, or -ffile-compilation-dir.

+
+ +
+
+-line-coverage-gt=<N>
+

Show code coverage only for functions with line coverage greater than the +given threshold.

+
+ +
+
+-line-coverage-lt=<N>
+

Show code coverage only for functions with line coverage less than the given +threshold.

+
+ +
+
+-region-coverage-gt=<N>
+

Show code coverage only for functions with region coverage greater than the +given threshold.

+
+ +
+
+-region-coverage-lt=<N>
+

Show code coverage only for functions with region coverage less than the given +threshold.

+
+ +
+
+-path-equivalence=<from>,<to>
+

Map the paths in the coverage data to local source file paths. This allows you +to generate the coverage data on one machine, and then use llvm-cov on a +different machine where you have the same files on a different path.

+
+ +
+
+
+

REPORT COMMAND

+
+

SYNOPSIS

+

llvm-cov report [options] -instr-profile PROFILE BIN [-object BIN,…] [[-object BIN]] [SOURCES]

+
+
+

DESCRIPTION

+

The llvm-cov report command displays a summary of the coverage of +the binaries BIN,… using the profile data PROFILE. It can optionally be +filtered to only show the coverage for the files listed in SOURCES.

+

BIN may be an executable, object file, dynamic library, or archive (thin or +otherwise).

+

If no source files are provided, a summary line is printed for each file in the +coverage data. If any files are provided, summaries can be shown for each +function in the listed files if the -show-functions option is enabled.

+

For information on compiling programs for coverage and generating profile data, +see SHOW COMMAND.

+
+
+

OPTIONS

+
+
+-use-color[=VALUE]
+

Enable or disable color output. By default this is autodetected.

+
+ +
+
+-arch=<name>
+

If the covered binary is a universal binary, select the architecture to use. +It is an error to specify an architecture that is not included in the +universal binary or to use an architecture that does not match a +non-universal binary.

+
+ +
+
+-show-region-summary
+

Show statistics for all regions. Defaults to true.

+
+ +
+
+-show-branch-summary
+

Show statistics for all branch conditions. Defaults to true.

+
+ +
+
+-show-functions
+

Show coverage summaries for each function. Defaults to false.

+
+ +
+
+-show-instantiation-summary
+

Show statistics for all function instantiations. Defaults to false.

+
+ +
+
+-ignore-filename-regex=<PATTERN>
+

Skip source code files with file paths that match the given regular expression.

+
+ +
+
+-compilation-dir=<dir>
+

Directory used as a base for relative coverage mapping paths. Only applicable +when binaries have been compiled with one of -fcoverage-prefix-map +-fcoverage-compilation-dir, or -ffile-compilation-dir.

+
+ +
+
+
+

EXPORT COMMAND

+
+

SYNOPSIS

+

llvm-cov export [options] -instr-profile PROFILE BIN [-object BIN,…] [[-object BIN]] [SOURCES]

+
+
+

DESCRIPTION

+

The llvm-cov export command exports coverage data of the binaries +BIN,… using the profile data PROFILE in either JSON or lcov trace file +format.

+

When exporting JSON, the regions, functions, branches, expansions, and +summaries of the coverage data will be exported. When exporting an lcov trace +file, the line-based coverage, branch coverage, and summaries will be exported.

+

The exported data can optionally be filtered to only export the coverage +for the files listed in SOURCES.

+

For information on compiling programs for coverage and generating profile data, +see SHOW COMMAND.

+
+
+

OPTIONS

+
+
+-arch=<name>
+

If the covered binary is a universal binary, select the architecture to use. +It is an error to specify an architecture that is not included in the +universal binary or to use an architecture that does not match a +non-universal binary.

+
+ +
+
+-format=<FORMAT>
+

Use the specified output format. The supported formats are: “text” (JSON), +“lcov”.

+
+ +
+
+-summary-only
+

Export only summary information for each file in the coverage data. This mode +will not export coverage information for smaller units such as individual +functions or regions. The result will contain the same information as produced +by the llvm-cov report command, but presented in JSON or lcov +format rather than text.

+
+ +
+
+-ignore-filename-regex=<PATTERN>
+

Skip source code files with file paths that match the given regular expression.

+
+
+-skip-expansions
+
+ +

Skip exporting macro expansion coverage data.

+
+
+-skip-functions
+
+ +

Skip exporting per-function coverage data.

+
+
+-num-threads=N, -j=N
+
+ +

Use N threads to export coverage data. When N=0, llvm-cov auto-detects an +appropriate number of threads to use. This is the default.

+
+ +
+
+-compilation-dir=<dir>
+

Directory used as a base for relative coverage mapping paths. Only applicable +when binaries have been compiled with one of -fcoverage-prefix-map +-fcoverage-compilation-dir, or -ffile-compilation-dir.

+
+ +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-cxxfilt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-cxxfilt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-cxxfilt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-cxxfilt.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,230 @@ + + + + + + + + + llvm-cxxfilt - LLVM symbol name demangler — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-cxxfilt - LLVM symbol name demangler

+
+

SYNOPSIS

+

llvm-cxxfilt [options] [mangled names…]

+
+
+

DESCRIPTION

+

llvm-cxxfilt is a symbol demangler that can be used as a replacement +for the GNU c++filt tool. It takes a series of symbol names and +prints their demangled form on the standard output stream. If a name cannot be +demangled, it is simply printed as is.

+

If no names are specified on the command-line, names are read interactively from +the standard input stream. When reading names from standard input, each input +line is split on characters that are not part of valid Itanium name manglings, +i.e. characters that are not alphanumeric, ‘.’, ‘$’, or ‘_’. Separators between +names are copied to the output as is.

+
+
+

EXAMPLE

+
$ llvm-cxxfilt _Z3foov _Z3bari not_mangled
+foo()
+bar(int)
+not_mangled
+$ cat input.txt
+| _Z3foov *** _Z3bari *** not_mangled |
+$ llvm-cxxfilt < input.txt
+| foo() *** bar(int) *** not_mangled |
+
+
+
+
+

OPTIONS

+
+
+--format=<value>, -s
+

Mangling scheme to assume. Valid values are auto (default, auto-detect the +style) and gnu (assume GNU/Itanium style).

+
+ +
+
+--help, -h
+

Print a summary of command line options.

+
+ +
+
+--no-strip-underscore, -n
+

Do not strip a leading underscore. This is the default for all platforms +except Mach-O based hosts.

+
+ +
+
+--strip-underscore, -_
+

Strip a single leading underscore, if present, from each input name before +demangling. On by default on Mach-O based platforms.

+
+ +
+
+--types, -t
+

Attempt to demangle names as type names as well as function names.

+
+ +
+
+--version
+

Display the version of the llvm-cxxfilt executable.

+
+ +
+
+@<FILE>
+

Read command-line options from response file <FILE>.

+
+ +
+
+

EXIT STATUS

+

llvm-cxxfilt returns 0 unless it encounters a usage error, in which +case a non-zero exit code is returned.

+
+
+

SEE ALSO

+

llvm-nm(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-cxxmap.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-cxxmap.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-cxxmap.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-cxxmap.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,232 @@ + + + + + + + + + llvm-cxxmap - Mangled name remapping tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-cxxmap - Mangled name remapping tool

+
+

SYNOPSIS

+

llvm-cxxmap [options] symbol-file-1 symbol-file-2

+
+
+

DESCRIPTION

+

The llvm-cxxmap tool performs fuzzy matching of C++ mangled names, +based on a file describing name components that should be considered equivalent.

+

The symbol files should contain a list of C++ mangled names (one per line). +Blank lines and lines starting with # are ignored. The output is a list +of pairs of equivalent symbols, one per line, of the form

+
<symbol-1> <symbol-2>
+
+
+

where <symbol-1> is a symbol from symbol-file-1 and <symbol-2> is +a symbol from symbol-file-2. Mappings for which the two symbols are identical +are omitted.

+
+
+

OPTIONS

+
+
+-remapping-file=file, -r=file
+

Specify a file containing a list of equivalence rules that should be used +to determine whether two symbols are equivalent. Required. +See REMAPPING FILE.

+
+ +
+
+-output=file, -o=file
+

Specify a file to write the list of matched names to. If unspecified, the +list will be written to stdout.

+
+ +
+
+-Wambiguous
+

Produce a warning if there are multiple equivalent (but distinct) symbols in +symbol-file-2.

+
+ +
+
+-Wincomplete
+

Produce a warning if symbol-file-1 contains a symbol for which there is no +equivalent symbol in symbol-file-2.

+
+ +
+
+

REMAPPING FILE

+

The remapping file is a text file containing lines of the form

+
fragmentkind fragment1 fragment2
+
+
+

where fragmentkind is one of name, type, or encoding, +indicating whether the following mangled name fragments are +<name>s, +<type>s, or +<encoding>s, +respectively. +Blank lines and lines starting with # are ignored.

+

Unmangled C names can be expressed as an encoding that is a (length-prefixed) +<source-name>:

+
# C function "void foo_bar()" is remapped to C++ function "void foo::bar()".
+encoding 7foo_bar _Z3foo3barv
+
+
+

For convenience, built-in <substitution>s such as St and Ss +are accepted as <name>s (even though they technically are not <name>s).

+

For example, to specify that absl::string_view and std::string_view +should be treated as equivalent, the following remapping file could be used:

+
# absl::string_view is considered equivalent to std::string_view
+type N4absl11string_viewE St17basic_string_viewIcSt11char_traitsIcEE
+
+# std:: might be std::__1:: in libc++ or std::__cxx11:: in libstdc++
+name St St3__1
+name St St7__cxx11
+
+
+
+

Note

+

Symbol remapping is currently only supported for C++ mangled names +following the Itanium C++ ABI mangling scheme. This covers all C++ targets +supported by Clang other than Windows targets.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-diff.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-diff.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-diff.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-diff.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,181 @@ + + + + + + + + + llvm-diff - LLVM structural ‘diff’ — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-diff - LLVM structural ‘diff’

+
+

SYNOPSIS

+

llvm-diff [options] module 1 module 2 [global name …]

+
+
+

DESCRIPTION

+

llvm-diff compares the structure of two LLVM modules, primarily +focusing on differences in function definitions. Insignificant +differences, such as changes in the ordering of globals or in the +names of local values, are ignored.

+

An input module will be interpreted as an assembly file if its name +ends in ‘.ll’; otherwise it will be read in as a bitcode file.

+

If a list of global names is given, just the values with those names +are compared; otherwise, all global values are compared, and +diagnostics are produced for globals which only appear in one module +or the other.

+

llvm-diff compares two functions by comparing their basic blocks, +beginning with the entry blocks. If the terminators seem to match, +then the corresponding successors are compared; otherwise they are +ignored. This algorithm is very sensitive to changes in control flow, +which tend to stop any downstream changes from being detected.

+

llvm-diff is intended as a debugging tool for writers of LLVM +passes and frontends. It does not have a stable output format.

+
+
+

EXIT STATUS

+

If llvm-diff finds no differences between the modules, it will exit +with 0 and produce no output. Otherwise it will exit with a non-zero +value.

+
+
+

BUGS

+

Many important differences, like changes in linkage or function +attributes, are not diagnosed.

+

Changes in memory behavior (for example, coalescing loads) can cause +massive detected differences in blocks.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-dis.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-dis.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-dis.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-dis.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,188 @@ + + + + + + + + + llvm-dis - LLVM disassembler — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-dis - LLVM disassembler

+
+

SYNOPSIS

+

llvm-dis [options] [filename]

+
+
+

DESCRIPTION

+

The llvm-dis command is the LLVM disassembler. It takes an LLVM +bitcode file and converts it into human-readable LLVM assembly language.

+

If filename is omitted or specified as -, llvm-dis reads its +input from standard input.

+

If the input is being read from standard input, then llvm-dis +will send its output to standard output by default. Otherwise, the +output will be written to a file named after the input file, with +a .ll suffix added (any existing .bc suffix will first be +removed). You can override the choice of output file using the +-o option.

+
+
+

OPTIONS

+

-f

+
+

Enable binary output on terminals. Normally, llvm-dis will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +llvm-dis will write raw bitcode regardless of the output device.

+
+

-help

+
+

Print a summary of command line options.

+
+

-o filename

+
+

Specify the output file name. If filename is -, then the output is sent +to standard output.

+
+
+
+

EXIT STATUS

+

If llvm-dis succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value.

+
+
+

SEE ALSO

+

llvm-as(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-dwarfdump.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-dwarfdump.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-dwarfdump.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-dwarfdump.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,389 @@ + + + + + + + + + llvm-dwarfdump - dump and verify DWARF debug information — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-dwarfdump - dump and verify DWARF debug information

+
+

SYNOPSIS

+

llvm-dwarfdump [options] [filename …]

+
+
+

DESCRIPTION

+

llvm-dwarfdump parses DWARF sections in object files, +archives, and .dSYM bundles and prints their contents in +human-readable form. Only the .debug_info section is printed unless one of +the section-specific options or --all is specified.

+

If no input file is specified, a.out is used instead. If - is used as the +input file, llvm-dwarfdump reads the input from its standard input +stream.

+
+
+

OPTIONS

+
+
+-a, --all
+

Dump all supported DWARF sections.

+
+ +
+
+--arch=<arch>
+

Dump DWARF debug information for the specified CPU architecture. +Architectures may be specified by name or by number. This +option can be specified multiple times, once for each desired +architecture. All CPU architectures will be printed by +default.

+
+ +
+
+-c, --show-children
+

Show a debug info entry’s children when selectively printing with +the =<offset> argument of --debug-info, or options such +as --find or --name.

+
+ +
+
+--color
+

Use colors in output.

+
+ +
+
+-f <name>, --find=<name>
+

Search for the exact text <name> in the accelerator tables +and print the matching debug information entries. +When there is no accelerator tables or the name of the DIE +you are looking for is not found in the accelerator tables, +try using the slower but more complete --name option.

+
+ +
+
+-F, --show-form
+

Show DWARF form types after the DWARF attribute types.

+
+ +
+
+-h, --help
+

Show help and usage for this command.

+
+ +
+
+--help-list
+

Show help and usage for this command without grouping the options +into categories.

+
+ +
+
+-i, --ignore-case
+

Ignore case distinctions when using --name.

+
+ +
+
+-n <name>, --name=<name>
+

Find and print all debug info entries whose name +(DW_AT_name attribute) is <name>.

+
+ +
+
+--lookup=<address>
+

Look up <address> in the debug information and print out the file, +function, block, and line table details.

+
+ +
+
+-o <path>
+

Redirect output to a file specified by <path>, where - is the +standard output stream.

+
+ +
+
+-p, --show-parents
+

Show a debug info entry’s parents when selectively printing with +the =<offset> argument of --debug-info, or options such +as --find or --name.

+
+ +
+
+--parent-recurse-depth=<N>
+

When displaying debug info entry parents, only show them to a +maximum depth of <N>.

+
+ +
+
+--quiet
+

Use with --verify to not emit to STDOUT.

+
+ +
+
+-r <N>, --recurse-depth=<N>
+

When displaying debug info entries, only show children to a maximum +depth of <N>.

+
+ +
+
+--show-section-sizes
+

Show the sizes of all debug sections, expressed in bytes.

+
+ +
+
+--statistics
+

Collect debug info quality metrics and print the results +as machine-readable single-line JSON output. The output +format is described in the section below (FORMAT OF STATISTICS OUTPUT).

+
+ +
+
+--summarize-types
+

Abbreviate the description of type unit entries.

+
+ +
+
+-x, --regex
+

Treat any <name> strings as regular expressions when searching +with --name. If --ignore-case is also specified, +the regular expression becomes case-insensitive.

+
+ +
+
+-u, --uuid
+

Show the UUID for each architecture.

+
+ +
+
+--diff
+

Dump the output in a format that is more friendly for comparing +DWARF output from two different files.

+
+ +
+
+-v, --verbose
+

Display verbose information when dumping. This can help to debug +DWARF issues.

+
+ +
+
+--verify
+

Verify the structure of the DWARF information by verifying the +compile unit chains, DIE relationships graph, address +ranges, and more.

+
+ +
+
+--version
+

Display the version of the tool.

+
+ +
+
+--debug-abbrev, --debug-addr, --debug-aranges, --debug-cu-index, --debug-frame[=<offset>], --debug-gnu-pubnames, --debug-gnu-pubtypes, --debug-info [=<offset>], --debug-line [=<offset>], --debug-line-str, --debug-loc [=<offset>], --debug-loclists [=<offset>], --debug-macro, --debug-names, --debug-pubnames, --debug-pubtypes, --debug-ranges, --debug-rnglists, --debug-str, --debug-str-offsets, --debug-tu-index, --debug-types [=<offset>], --eh-frame [=<offset>], --gdb-index, --apple-names, --apple-types, --apple-namespaces, --apple-objc
+

Dump the specified DWARF section by name. Only the +.debug_info section is shown by default. Some entries +support adding an =<offset> as a way to provide an +optional offset of the exact entry to dump within the +respective section. When an offset is provided, only the +entry at that offset will be dumped, else the entire +section will be dumped.

+
+ +
+
+@<FILE>
+

Read command-line options from <FILE>.

+
+ +
+
+

FORMAT OF STATISTICS OUTPUT

+

The :--statistics option generates single-line JSON output +representing quality metrics of the processed debug info. These metrics are +useful to compare changes between two compilers, particularly for judging +the effect that a change to the compiler has on the debug info quality.

+

The output is formatted as key-value pairs. The first pair contains a version +number. The following naming scheme is used for the keys:

+
+
    +
  • variables ==> local variables and parameters

  • +
  • local vars ==> local variables

  • +
  • params ==> formal parameters

  • +
+
+

For aggregated values, the following keys are used:

+
+
    +
  • sum_of_all_variables(…) ==> the sum applied to all variables

  • +
  • #bytes ==> the number of bytes

  • +
  • #variables - entry values … ==> the number of variables excluding +the entry values etc.

  • +
+
+
+
+

EXIT STATUS

+

llvm-dwarfdump returns 0 if the input files were parsed and dumped +successfully. Otherwise, it returns 1.

+
+
+

SEE ALSO

+

dsymutil(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-exegesis.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-exegesis.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-exegesis.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-exegesis.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,461 @@ + + + + + + + + + llvm-exegesis - LLVM Machine Instruction Benchmark — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-exegesis - LLVM Machine Instruction Benchmark

+
+

SYNOPSIS

+

llvm-exegesis [options]

+
+
+

DESCRIPTION

+

llvm-exegesis is a benchmarking tool that uses information available +in LLVM to measure host machine instruction characteristics like latency, +throughput, or port decomposition.

+

Given an LLVM opcode name and a benchmarking mode, llvm-exegesis +generates a code snippet that makes execution as serial (resp. as parallel) as +possible so that we can measure the latency (resp. inverse throughput/uop decomposition) +of the instruction. +The code snippet is jitted and executed on the host subtarget. The time taken +(resp. resource usage) is measured using hardware performance counters. The +result is printed out as YAML to the standard output.

+

The main goal of this tool is to automatically (in)validate the LLVM’s TableDef +scheduling models. To that end, we also provide analysis of the results.

+

llvm-exegesis can also benchmark arbitrary user-provided code +snippets.

+
+
+

EXAMPLE 1: benchmarking instructions

+

Assume you have an X86-64 machine. To measure the latency of a single +instruction, run:

+
$ llvm-exegesis -mode=latency -opcode-name=ADD64rr
+
+
+

Measuring the uop decomposition or inverse throughput of an instruction works similarly:

+
$ llvm-exegesis -mode=uops -opcode-name=ADD64rr
+$ llvm-exegesis -mode=inverse_throughput -opcode-name=ADD64rr
+
+
+

The output is a YAML document (the default is to write to stdout, but you can +redirect the output to a file using -benchmarks-file):

+
---
+key:
+  opcode_name:     ADD64rr
+  mode:            latency
+  config:          ''
+cpu_name:        haswell
+llvm_triple:     x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+  - { key: latency, value: 1.0058, debug_string: '' }
+error:           ''
+info:            'explicit self cycles, selecting one aliasing configuration.
+Snippet:
+ADD64rr R8, R8, R10
+'
+...
+
+
+

To measure the latency of all instructions for the host architecture, run:

+
$ llvm-exegesis -mode=latency -opcode-index=-1
+
+
+
+
+

EXAMPLE 2: benchmarking a custom code snippet

+

To measure the latency/uops of a custom piece of code, you can specify the +snippets-file option (- reads from standard input).

+
$ echo "vzeroupper" | llvm-exegesis -mode=uops -snippets-file=-
+
+
+

Real-life code snippets typically depend on registers or memory. +llvm-exegesis checks the liveliness of registers (i.e. any register +use has a corresponding def or is a “live in”). If your code depends on the +value of some registers, you have two options:

+
    +
  • Mark the register as requiring a definition. llvm-exegesis will +automatically assign a value to the register. This can be done using the +directive LLVM-EXEGESIS-DEFREG <reg name> <hex_value>, where <hex_value> +is a bit pattern used to fill <reg_name>. If <hex_value> is smaller than +the register width, it will be sign-extended.

  • +
  • Mark the register as a “live in”. llvm-exegesis will benchmark +using whatever value was in this registers on entry. This can be done using +the directive LLVM-EXEGESIS-LIVEIN <reg name>.

  • +
+

For example, the following code snippet depends on the values of XMM1 (which +will be set by the tool) and the memory buffer passed in RDI (live in).

+
# LLVM-EXEGESIS-LIVEIN RDI
+# LLVM-EXEGESIS-DEFREG XMM1 42
+vmulps        (%rdi), %xmm1, %xmm2
+vhaddps       %xmm2, %xmm2, %xmm3
+addq $0x10, %rdi
+
+
+
+
+

EXAMPLE 3: analysis

+

Assuming you have a set of benchmarked instructions (either latency or uops) as +YAML in file /tmp/benchmarks.yaml, you can analyze the results using the +following command:

+
  $ llvm-exegesis -mode=analysis \
+-benchmarks-file=/tmp/benchmarks.yaml \
+-analysis-clusters-output-file=/tmp/clusters.csv \
+-analysis-inconsistencies-output-file=/tmp/inconsistencies.html
+
+
+

This will group the instructions into clusters with the same performance +characteristics. The clusters will be written out to /tmp/clusters.csv in the +following format:

+
cluster_id,opcode_name,config,sched_class
+...
+2,ADD32ri8_DB,,WriteALU,1.00
+2,ADD32ri_DB,,WriteALU,1.01
+2,ADD32rr,,WriteALU,1.01
+2,ADD32rr_DB,,WriteALU,1.00
+2,ADD32rr_REV,,WriteALU,1.00
+2,ADD64i32,,WriteALU,1.01
+2,ADD64ri32,,WriteALU,1.01
+2,MOVSX64rr32,,BSWAP32r_BSWAP64r_MOVSX64rr32,1.00
+2,VPADDQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.02
+2,VPSUBQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.01
+2,ADD64ri8,,WriteALU,1.00
+2,SETBr,,WriteSETCC,1.01
+...
+
+
+

llvm-exegesis will also analyze the clusters to point out +inconsistencies in the scheduling information. The output is an html file. For +example, /tmp/inconsistencies.html will contain messages like the following :

+../_images/llvm-exegesis-analysis.png +

Note that the scheduling class names will be resolved only when +llvm-exegesis is compiled in debug mode, else only the class id will +be shown. This does not invalidate any of the analysis results though.

+
+
+

OPTIONS

+
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-opcode-index=<LLVM opcode index>
+

Specify the opcode to measure, by index. Specifying -1 will result +in measuring every existing opcode. See example 1 for details. +Either opcode-index, opcode-name or snippets-file must be set.

+
+ +
+
+-opcode-name=<opcode name 1>,<opcode name 2>,...
+

Specify the opcode to measure, by name. Several opcodes can be specified as +a comma-separated list. See example 1 for details. +Either opcode-index, opcode-name or snippets-file must be set.

+
+ +
+
+-snippets-file=<filename>
+

Specify the custom code snippet to measure. See example 2 for details. +Either opcode-index, opcode-name or snippets-file must be set.

+
+ +
+
+-mode=[latency|uops|inverse_throughput|analysis]
+

Specify the run mode. Note that some modes have additional requirements and options.

+

latency mode can be make use of either RDTSC or LBR. +latency[LBR] is only available on X86 (at least Skylake). +To run in latency mode, a positive value must be specified +for x86-lbr-sample-period and –repetition-mode=loop.

+

In analysis mode, you also need to specify at least one of the +-analysis-clusters-output-file= and -analysis-inconsistencies-output-file=.

+
+ +
+
+-x86-lbr-sample-period=<nBranches/sample>
+

Specify the LBR sampling period - how many branches before we take a sample. +When a positive value is specified for this option and when the mode is latency, +we will use LBRs for measuring. +On choosing the “right” sampling period, a small value is preferred, but throttling +could occur if the sampling is too frequent. A prime number should be used to +avoid consistently skipping certain blocks.

+
+ +
+
+-repetition-mode=[duplicate|loop|min]
+

Specify the repetition mode. duplicate will create a large, straight line +basic block with num-repetitions instructions (repeating the snippet +num-repetitions/snippet size times). loop will, optionally, duplicate the +snippet until the loop body contains at least loop-body-size instructions, +and then wrap the result in a loop which will execute num-repetitions +instructions (thus, again, repeating the snippet +num-repetitions/snippet size times). The loop mode, especially with loop +unrolling tends to better hide the effects of the CPU frontend on architectures +that cache decoded instructions, but consumes a register for counting +iterations. If performing an analysis over many opcodes, it may be best to +instead use the min mode, which will run each other mode, +and produce the minimal measured result.

+
+ +
+
+-num-repetitions=<Number of repetitions>
+

Specify the target number of executed instructions. Note that the actual +repetition count of the snippet will be num-repetitions/snippet size. +Higher values lead to more accurate measurements but lengthen the benchmark.

+
+ +
+
+-loop-body-size=<Preferred loop body size>
+

Only effective for -repetition-mode=[loop|min]. +Instead of looping over the snippet directly, first duplicate it so that the +loop body contains at least this many instructions. This potentially results +in loop body being cached in the CPU Op Cache / Loop Cache, which allows to +which may have higher throughput than the CPU decoders.

+
+ +
+
+-max-configs-per-opcode=<value>
+

Specify the maximum configurations that can be generated for each opcode. +By default this is 1, meaning that we assume that a single measurement is +enough to characterize an opcode. This might not be true of all instructions: +for example, the performance characteristics of the LEA instruction on X86 +depends on the value of assigned registers and immediates. Setting a value of +-max-configs-per-opcode larger than 1 allows llvm-exegesis to explore +more configurations to discover if some register or immediate assignments +lead to different performance characteristics.

+
+ +
+
+-benchmarks-file=</path/to/file>
+

File to read (analysis mode) or write (latency/uops/inverse_throughput +modes) benchmark results. “-” uses stdin/stdout.

+
+ +
+
+-analysis-clusters-output-file=</path/to/file>
+

If provided, write the analysis clusters as CSV to this file. “-” prints to +stdout. By default, this analysis is not run.

+
+ +
+
+-analysis-inconsistencies-output-file=</path/to/file>
+

If non-empty, write inconsistencies found during analysis to this file. - +prints to stdout. By default, this analysis is not run.

+
+ +
+
+-analysis-clustering=[dbscan,naive]
+

Specify the clustering algorithm to use. By default DBSCAN will be used. +Naive clustering algorithm is better for doing further work on the +-analysis-inconsistencies-output-file= output, it will create one cluster +per opcode, and check that the cluster is stable (all points are neighbours).

+
+ +
+
+-analysis-numpoints=<dbscan numPoints parameter>
+

Specify the numPoints parameters to be used for DBSCAN clustering +(analysis mode, DBSCAN only).

+
+ +
+
+-analysis-clustering-epsilon=<dbscan epsilon parameter>
+

Specify the epsilon parameter used for clustering of benchmark points +(analysis mode).

+
+ +
+
+-analysis-inconsistency-epsilon=<epsilon>
+

Specify the epsilon parameter used for detection of when the cluster +is different from the LLVM schedule profile values (analysis mode).

+
+ +
+
+-analysis-display-unstable-clusters
+

If there is more than one benchmark for an opcode, said benchmarks may end up +not being clustered into the same cluster if the measured performance +characteristics are different. by default all such opcodes are filtered out. +This flag will instead show only such unstable opcodes.

+
+ +
+
+-ignore-invalid-sched-class=false
+

If set, ignore instructions that do not have a sched class (class idx = 0).

+
+ +
+
+-mcpu=<cpu name>
+

If set, measure the cpu characteristics using the counters for this CPU. This +is useful when creating new sched models (the host CPU is unknown to LLVM).

+
+ +
+
+--dump-object-to-disk=true
+

By default, llvm-exegesis will dump the generated code to a temporary file to +enable code inspection. You may disable it to speed up the execution and save +disk space.

+
+ +
+
+

EXIT STATUS

+

llvm-exegesis returns 0 on success. Otherwise, an error message is +printed to standard error, and the tool returns a non 0 value.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-extract.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-extract.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-extract.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-extract.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,244 @@ + + + + + + + + + llvm-extract - extract a function from an LLVM module — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-extract - extract a function from an LLVM module

+
+

SYNOPSIS

+

llvm-extract [options] –func function-name [filename]

+
+
+

DESCRIPTION

+

The llvm-extract command takes the name of a function and extracts +it from the specified LLVM bitcode file. It is primarily used as a debugging +tool to reduce test cases from larger programs that are triggering a bug.

+

In addition to extracting the bitcode of the specified function, +llvm-extract will also remove unreachable global variables, +prototypes, and unused types.

+

The llvm-extract command reads its input from standard input if +filename is omitted or if filename is -. The output is always written to +standard output, unless the -o option is specified (see below).

+
+
+

OPTIONS

+

–alias alias-name

+
+

Extract the alias named function-name from the LLVM bitcode. May be +specified multiple times to extract multiple alias at once.

+
+

–ralias alias-regular-expr

+
+

Extract the alias matching alias-regular-expr from the LLVM bitcode. +All alias matching the regular expression will be extracted. May be +specified multiple times.

+
+

–bb basic-block-specifier

+
+

Extract basic blocks(s) specified in basic-block-specifier. May be +specified multiple times. Each <function:bb[;bb]> specifier pair will create +a function. If multiple basic blocks are specified in one pair, the first +block in the sequence should dominate the rest.

+
+

–delete

+
+

Delete specified Globals from Module.

+
+

-f

+
+

Enable binary output on terminals. Normally, llvm-extract will +refuse to write raw bitcode output if the output stream is a terminal. With +this option, llvm-extract will write raw bitcode regardless of the +output device.

+
+

–func function-name

+
+

Extract the function named function-name from the LLVM bitcode. May be +specified multiple times to extract multiple functions at once.

+
+

–rfunc function-regular-expr

+
+

Extract the function(s) matching function-regular-expr from the LLVM bitcode. +All functions matching the regular expression will be extracted. May be +specified multiple times.

+
+

–glob global-name

+
+

Extract the global variable named global-name from the LLVM bitcode. May be +specified multiple times to extract multiple global variables at once.

+
+

–rglob glob-regular-expr

+
+

Extract the global variable(s) matching global-regular-expr from the LLVM +bitcode. All global variables matching the regular expression will be +extracted. May be specified multiple times.

+
+

–keep-const-init

+
+

Preserve the values of constant globals.

+
+

–recursive

+
+

Recursively extract all called functions

+
+

-help

+
+

Print a summary of command line options.

+
+

-o filename

+
+

Specify the output filename. If filename is “-” (the default), then +llvm-extract sends its output to standard output.

+
+

-S

+
+

Write output in LLVM intermediate language (instead of bitcode).

+
+
+
+

EXIT STATUS

+

If llvm-extract succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value.

+
+
+

SEE ALSO

+

bugpoint(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-install-name-tool.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-install-name-tool.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-install-name-tool.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-install-name-tool.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,234 @@ + + + + + + + + + llvm-install-name-tool - LLVM tool for manipulating install-names and rpaths — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-install-name-tool - LLVM tool for manipulating install-names and rpaths

+
+

SYNOPSIS

+

llvm-install-name-tool [options] input

+
+
+

DESCRIPTION

+

llvm-install-name-tool is a tool to manipulate dynamic shared library +install names and rpaths listed in a Mach-O binary.

+

For most scenarios, it works as a drop-in replacement for Apple’s +install_name_tool.

+
+
+

OPTIONS

+

At least one of the following options are required, and some options can be +combined with other options. Options -add_rpath, -delete_rpath, +and -rpath can be combined in an invocation only if they do not share +the same <rpath> value.

+
+
+-add_rpath <rpath>
+

Add an rpath named <rpath> to the specified binary. Can be specified multiple +times to add multiple rpaths. Throws an error if <rpath> is already listed in +the binary.

+
+ +
+
+-change <old_install_name> <new_install_name>
+

Change an install name <old_install_name> to <new_install_name> in the +specified binary. Can be specified multiple times to change multiple dependent shared +library install names. Option is ignored if <old_install_name> is not listed +in the specified binary.

+
+ +
+
+-delete_rpath <rpath>
+

Delete an rpath named <rpath> from the specified binary. Can be specified multiple +times to delete multiple rpaths. Throws an error if <rpath> is not listed in +the binary.

+
+ +
+
+-delete_all_rpaths
+

Deletes all rpaths from the binary.

+
+ +
+
+--help, -h
+

Print a summary of command line options.

+
+ +
+
+-id <name>
+

Change shared library’s identification name under LC_ID_DYLIB to <name> in the +specified binary. If specified multiple times, only the last -id option is +selected. Option is ignored if the specified Mach-O binary is not a dynamic shared library.

+
+ +
+
+-rpath <old_rpath> <new_rpath>
+

Change an rpath named <old_rpath> to <new_rpath> in the specified binary. Can be specified +multiple times to change multiple rpaths. Throws an error if <old_rpath> is not listed +in the binary or <new_rpath> is already listed in the binary.

+
+ +
+
+--version, -V
+

Display the version of the llvm-install-name-tool executable.

+
+ +
+
+

EXIT STATUS

+

llvm-install-name-tool exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0.

+
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+
+

SEE ALSO

+

llvm-objcopy(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-lib.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-lib.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-lib.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-lib.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,166 @@ + + + + + + + + + llvm-lib - LLVM lib.exe compatible library tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-lib - LLVM lib.exe compatible library tool

+
+

SYNOPSIS

+

llvm-lib [/libpath:<path>] [/out:<output>] [/llvmlibthin] +[/ignore] [/machine] [/nologo] [files…]

+
+
+

DESCRIPTION

+

The llvm-lib command is intended to be a lib.exe compatible +tool. See https://msdn.microsoft.com/en-us/library/7ykb2k5f for the +general description.

+

llvm-lib has the following extensions:

+
    +
  • Bitcode files in symbol tables. +llvm-lib includes symbols from both bitcode files and regular +object files in the symbol table.

  • +
  • Creating thin archives. +The /llvmlibthin option causes llvm-lib to create thin archive +that contain only the symbol table and the header for the various +members. These files are much smaller, but are not compatible with +link.exe (lld can handle them).

  • +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-libtool-darwin.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-libtool-darwin.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-libtool-darwin.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-libtool-darwin.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,267 @@ + + + + + + + + + llvm-libtool-darwin - LLVM tool for creating libraries for Darwin — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-libtool-darwin - LLVM tool for creating libraries for Darwin

+
+

SYNOPSIS

+

llvm-libtool-darwin [options] <input files>

+
+
+

DESCRIPTION

+

llvm-libtool-darwin is a tool for creating static and dynamic +libraries for Darwin.

+

For most scenarios, it works as a drop-in replacement for cctools’ +libtool.

+
+
+

OPTIONS

+

llvm-libtool-darwin supports the following options:

+
+
+-arch_only <architecture>
+

Build a static library only for the specified <architecture> and ignore all +other architectures in the files.

+
+ +
+
+-color
+

Use colors in output.

+
+ +
+
+-D
+

Use zero for timestamps and UIDs/GIDs. This is set by default.

+
+ +
+
+-filelist <listfile[,dirname]>
+

Read input file names from <listfile>. File names are specified in <listfile> +one per line, separated only by newlines. Whitespace on a line is assumed +to be part of the filename. If the directory name, dirname, is also +specified then it is prepended to each file name in the <listfile>.

+
+ +
+
+-h, -help
+

Show help and usage for this command.

+
+ +
+
+-help-list
+

Show help and usage for this command without grouping the options +into categories.

+
+ +
+
+-l <x>
+

Searches for the library libx.a in the library search path. If the string <x> +ends with ‘.o’, then the library ‘x’ is searched for without prepending ‘lib’ +or appending ‘.a’. If the library is found, it is added to the list of input +files. Otherwise, an error is raised.

+
+ +
+
+-L <dir>
+

Adds <dir> to the list of directories in which to search for libraries. The +directories are searched in the order in which they are specified with +-L and before the default search path. The default search path +includes directories /lib, /usr/lib and /usr/local/lib.

+
+ +
+
+-no_warning_for_no_symbols
+

Do not warn about files that have no symbols.

+
+ +
+
+-o <filename>
+

Specify the output file name. Must be specified exactly once.

+
+ +
+
+-static
+

Produces a static library from the input files.

+
+ +
+
+-U
+

Use actual timestamps and UIDs/GIDs.

+
+ +
+
+-V
+

Display the version of this program and perform any operation specified.

+
+ +
+
+-version
+

Display the version of this program and exit immediately.

+
+ +
+
+

EXIT STATUS

+

llvm-libtool-darwin exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0.

+
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+
+

SEE ALSO

+

llvm-ar(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-link.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-link.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-link.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-link.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,205 @@ + + + + + + + + + llvm-link - LLVM bitcode linker — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + + + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-lipo.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-lipo.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-lipo.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-lipo.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,229 @@ + + + + + + + + + llvm-lipo - LLVM tool for manipulating universal binaries — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-lipo - LLVM tool for manipulating universal binaries

+
+

SYNOPSIS

+

llvm-lipo [filenames…] [options]

+
+
+

DESCRIPTION

+

llvm-lipo can create universal binaries from Mach-O files, extract regular object files from universal binaries, and display architecture information about both universal and regular files.

+
+
+

COMMANDS

+

llvm-lipo supports the following mutually exclusive commands:

+
+
+-help, -h
+

Display usage information and exit.

+
+ +
+
+-version
+

Display the version of this program.

+
+ +
+
+-verify_arch  <architecture 1> [<architecture 2> ...]
+

Take a single input file and verify the specified architectures are present in the file. +If so then exit with a status of 0 else exit with a status of 1.

+
+ +
+
+-archs
+

Take a single input file and display the architectures present in the file. +Each architecture is separated by a single whitespace. +Unknown architectures are displayed as unknown(CPUtype,CPUsubtype).

+
+ +
+
+-info
+

Take at least one input file and display the descriptions of each file. +The descriptions include the filename and architecture types separated by whitespace. +Universal binaries are grouped together first, followed by thin files. +Architectures in the fat file: <filename> are: <architectures> +Non-fat file: <filename> is architecture: <architecture>

+
+ +
+
+-thin
+

Take a single universal binary input file and the thin flag followed by an architecture type. +Require the output flag to be specified, and output a thin binary of the specified architecture.

+
+ +
+
+-create
+

Take at least one input file and require the output flag to be specified. +Output a universal binary combining the input files.

+
+ +
+
+-replace
+

Take a single universal binary input file and require the output flag to be specified. +The replace flag is followed by an architecture type, and a thin input file. +Output a universal binary with the specified architecture slice in the +universal binary input replaced with the contents of the thin input file.

+
+ +
+
+-segalign
+

Additional flag that can be specified with create and replace. +The segalign flag is followed by an architecture type, and an alignment. +The alignment is a hexadecimal number that is a power of 2. +Output a file in which the slice with the specified architecture has the specified alignment.

+
+ +
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-locstats.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-locstats.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-locstats.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-locstats.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,253 @@ + + + + + + + + + llvm-locstats - calculate statistics on DWARF debug location — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-locstats - calculate statistics on DWARF debug location

+
+

SYNOPSIS

+

llvm-locstats [options] [filename]

+
+
+

DESCRIPTION

+

llvm-locstats works like a wrapper around llvm-dwarfdump. +It parses llvm-dwarfdump statistics regarding debug location by +pretty printing it in a more human readable way.

+

The line 0% shows the number and the percentage of DIEs with no location +information, but the line 100% shows the information for DIEs where there is +location information in all code section bytes (where the variable or parameter +is in the scope). The line [50%,60%) shows the number and the percentage of DIEs +where the location information is between 50 and 60 percentage of its scope +covered.

+
+
+

OPTIONS

+
+
+--only-variables
+

calculate the location statistics only for local variables

+
+ +
+
+--only-formal-parameters
+

calculate the location statistics only for formal parameters

+
+ +
+
+--ignore-debug-entry-values
+

ignore the location statistics on locations containing the +debug entry values DWARF operation

+
+ +
+
+--draw-plot
+

make histogram of location buckets generated (requires +matplotlib)

+
+ +
+
+--compare
+

compare the debug location coverage on two files provided, and draw +a plot showing the difference (requires matplotlib)

+
+ +
+
+

EXIT STATUS

+

llvm-locstats returns 0 if the input file were parsed +successfully. Otherwise, it returns 1.

+
+
+

EXAMPLE 1

+

Pretty print the location coverage on the standard output.

+
llvm-locstats a.out
+
+  =================================================
+            Debug Location Statistics
+  =================================================
+        cov%          samples       percentage(~)
+  -------------------------------------------------
+     0%                    1              16%
+     (0%,10%)              0               0%
+     [10%,20%)             0               0%
+     [20%,30%)             0               0%
+     [30%,40%)             0               0%
+     [40%,50%)             0               0%
+     [50%,60%)             1              16%
+     [60%,70%)             0               0%
+     [70%,80%)             0               0%
+     [80%,90%)             1              16%
+     [90%,100%)            0               0%
+     100%                  3              50%
+  =================================================
+  -the number of debug variables processed: 6
+  -PC ranges covered: 81%
+  -------------------------------------------------
+  -total availability: 83%
+  =================================================
+
+
+
+
+

EXAMPLE 2

+

Generate a plot as an image file.

+
llvm-locstats --draw-plot file1.out
+
+
+../_images/locstats-draw-plot.png +
+
+

EXAMPLE 3

+

Generate a plot as an image file showing the difference in the debug location +coverage.

+
llvm-locstats --compare file1.out file1.withentryvals.out
+
+
+../_images/locstats-compare.png +
+
+

SEE ALSO

+

llvm-dwarfdump(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-mca.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-mca.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-mca.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-mca.html 2021-09-19 16:16:33.000000000 +0000 @@ -0,0 +1,1136 @@ + + + + + + + + + llvm-mca - LLVM Machine Code Analyzer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-mca - LLVM Machine Code Analyzer

+
+

SYNOPSIS

+

llvm-mca [options] [input]

+
+
+

DESCRIPTION

+

llvm-mca is a performance analysis tool that uses information +available in LLVM (e.g. scheduling models) to statically measure the performance +of machine code in a specific CPU.

+

Performance is measured in terms of throughput as well as processor resource +consumption. The tool currently works for processors with a backend for which +there is a scheduling model available in LLVM.

+

The main goal of this tool is not just to predict the performance of the code +when run on the target, but also help with diagnosing potential performance +issues.

+

Given an assembly code sequence, llvm-mca estimates the Instructions +Per Cycle (IPC), as well as hardware resource pressure. The analysis and +reporting style were inspired by the IACA tool from Intel.

+

For example, you can compile code with clang, output assembly, and pipe it +directly into llvm-mca for analysis:

+
$ clang foo.c -O2 -target x86_64-unknown-unknown -S -o - | llvm-mca -mcpu=btver2
+
+
+

Or for Intel syntax:

+
$ clang foo.c -O2 -target x86_64-unknown-unknown -mllvm -x86-asm-syntax=intel -S -o - | llvm-mca -mcpu=btver2
+
+
+

(llvm-mca detects Intel syntax by the presence of an .intel_syntax +directive at the beginning of the input. By default its output syntax matches +that of its input.)

+

Scheduling models are not just used to compute instruction latencies and +throughput, but also to understand what processor resources are available +and how to simulate them.

+

By design, the quality of the analysis conducted by llvm-mca is +inevitably affected by the quality of the scheduling models in LLVM.

+

If you see that the performance report is not accurate for a processor, +please file a bug +against the appropriate backend.

+
+
+

OPTIONS

+

If input is “-” or omitted, llvm-mca reads from standard +input. Otherwise, it will read from the specified filename.

+

If the -o option is omitted, then llvm-mca will send its output +to standard output if the input is from standard input. If the -o +option specifies “-“, then the output will also be sent to standard output.

+
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-o <filename>
+

Use <filename> as the output filename. See the summary above for more +details.

+
+ +
+
+-mtriple=<target triple>
+

Specify a target triple string.

+
+ +
+
+-march=<arch>
+

Specify the architecture for which to analyze the code. It defaults to the +host default target.

+
+ +
+
+-mcpu=<cpuname>
+

Specify the processor for which to analyze the code. By default, the cpu name +is autodetected from the host.

+
+ +
+
+-output-asm-variant=<variant id>
+

Specify the output assembly variant for the report generated by the tool. +On x86, possible values are [0, 1]. A value of 0 (vic. 1) for this flag enables +the AT&T (vic. Intel) assembly format for the code printed out by the tool in +the analysis report.

+
+ +
+
+-print-imm-hex
+

Prefer hex format for numeric literals in the output assembly printed as part +of the report.

+
+ +
+
+-dispatch=<width>
+

Specify a different dispatch width for the processor. The dispatch width +defaults to field ‘IssueWidth’ in the processor scheduling model. If width is +zero, then the default dispatch width is used.

+
+ +
+
+-register-file-size=<size>
+

Specify the size of the register file. When specified, this flag limits how +many physical registers are available for register renaming purposes. A value +of zero for this flag means “unlimited number of physical registers”.

+
+ +
+
+-iterations=<number of iterations>
+

Specify the number of iterations to run. If this flag is set to 0, then the +tool sets the number of iterations to a default value (i.e. 100).

+
+ +
+
+-noalias=<bool>
+

If set, the tool assumes that loads and stores don’t alias. This is the +default behavior.

+
+ +
+
+-lqueue=<load queue size>
+

Specify the size of the load queue in the load/store unit emulated by the tool. +By default, the tool assumes an unbound number of entries in the load queue. +A value of zero for this flag is ignored, and the default load queue size is +used instead.

+
+ +
+
+-squeue=<store queue size>
+

Specify the size of the store queue in the load/store unit emulated by the +tool. By default, the tool assumes an unbound number of entries in the store +queue. A value of zero for this flag is ignored, and the default store queue +size is used instead.

+
+ +
+
+-timeline
+

Enable the timeline view.

+
+ +
+
+-timeline-max-iterations=<iterations>
+

Limit the number of iterations to print in the timeline view. By default, the +timeline view prints information for up to 10 iterations.

+
+ +
+
+-timeline-max-cycles=<cycles>
+

Limit the number of cycles in the timeline view, or use 0 for no limit. By +default, the number of cycles is set to 80.

+
+ +
+
+-resource-pressure
+

Enable the resource pressure view. This is enabled by default.

+
+ +
+
+-register-file-stats
+

Enable register file usage statistics.

+
+ +
+
+-dispatch-stats
+

Enable extra dispatch statistics. This view collects and analyzes instruction +dispatch events, as well as static/dynamic dispatch stall events. This view +is disabled by default.

+
+ +
+
+-scheduler-stats
+

Enable extra scheduler statistics. This view collects and analyzes instruction +issue events. This view is disabled by default.

+
+ +
+
+-retire-stats
+

Enable extra retire control unit statistics. This view is disabled by default.

+
+ +
+
+-instruction-info
+

Enable the instruction info view. This is enabled by default.

+
+ +
+
+-show-encoding
+

Enable the printing of instruction encodings within the instruction info view.

+
+ +
+
+-all-stats
+

Print all hardware statistics. This enables extra statistics related to the +dispatch logic, the hardware schedulers, the register file(s), and the retire +control unit. This option is disabled by default.

+
+ +
+
+-all-views
+

Enable all the view.

+
+ +
+
+-instruction-tables
+

Prints resource pressure information based on the static information +available from the processor model. This differs from the resource pressure +view because it doesn’t require that the code is simulated. It instead prints +the theoretical uniform distribution of resource pressure for every +instruction in sequence.

+
+ +
+
+-bottleneck-analysis
+

Print information about bottlenecks that affect the throughput. This analysis +can be expensive, and it is disabled by default. Bottlenecks are highlighted +in the summary view. Bottleneck analysis is currently not supported for +processors with an in-order backend.

+
+ +
+
+-json
+

Print the requested views in valid JSON format. The instructions and the +processor resources are printed as members of special top level JSON objects. +The individual views refer to them by index. However, not all views are +currently supported. For example, the report from the bottleneck analysis is +not printed out in JSON. All the default views are currently supported.

+
+ +
+
+-disable-cb
+

Force usage of the generic CustomBehaviour and InstrPostProcess classes rather +than using the target specific implementation. The generic classes never +detect any custom hazards or make any post processing modifications to +instructions.

+
+ +
+
+

EXIT STATUS

+

llvm-mca returns 0 on success. Otherwise, an error message is printed +to standard error, and the tool returns 1.

+
+
+

USING MARKERS TO ANALYZE SPECIFIC CODE BLOCKS

+

llvm-mca allows for the optional usage of special code comments to +mark regions of the assembly code to be analyzed. A comment starting with +substring LLVM-MCA-BEGIN marks the beginning of a code region. A comment +starting with substring LLVM-MCA-END marks the end of a code region. For +example:

+
# LLVM-MCA-BEGIN
+  ...
+# LLVM-MCA-END
+
+
+

If no user-defined region is specified, then llvm-mca assumes a +default region which contains every instruction in the input file. Every region +is analyzed in isolation, and the final performance report is the union of all +the reports generated for every code region.

+

Code regions can have names. For example:

+
# LLVM-MCA-BEGIN A simple example
+  add %eax, %eax
+# LLVM-MCA-END
+
+
+

The code from the example above defines a region named “A simple example” with a +single instruction in it. Note how the region name doesn’t have to be repeated +in the LLVM-MCA-END directive. In the absence of overlapping regions, +an anonymous LLVM-MCA-END directive always ends the currently active user +defined region.

+

Example of nesting regions:

+
# LLVM-MCA-BEGIN foo
+  add %eax, %edx
+# LLVM-MCA-BEGIN bar
+  sub %eax, %edx
+# LLVM-MCA-END bar
+# LLVM-MCA-END foo
+
+
+

Example of overlapping regions:

+
# LLVM-MCA-BEGIN foo
+  add %eax, %edx
+# LLVM-MCA-BEGIN bar
+  sub %eax, %edx
+# LLVM-MCA-END foo
+  add %eax, %edx
+# LLVM-MCA-END bar
+
+
+

Note that multiple anonymous regions cannot overlap. Also, overlapping regions +cannot have the same name.

+

There is no support for marking regions from high-level source code, like C or +C++. As a workaround, inline assembly directives may be used:

+
int foo(int a, int b) {
+  __asm volatile("# LLVM-MCA-BEGIN foo");
+  a += 42;
+  __asm volatile("# LLVM-MCA-END");
+  a *= b;
+  return a;
+}
+
+
+

However, this interferes with optimizations like loop vectorization and may have +an impact on the code generated. This is because the __asm statements are +seen as real code having important side effects, which limits how the code +around them can be transformed. If users want to make use of inline assembly +to emit markers, then the recommendation is to always verify that the output +assembly is equivalent to the assembly generated in the absence of markers. +The Clang options to emit optimization reports +can also help in detecting missed optimizations.

+
+
+

HOW LLVM-MCA WORKS

+

llvm-mca takes assembly code as input. The assembly code is parsed +into a sequence of MCInst with the help of the existing LLVM target assembly +parsers. The parsed sequence of MCInst is then analyzed by a Pipeline module +to generate a performance report.

+

The Pipeline module simulates the execution of the machine code sequence in a +loop of iterations (default is 100). During this process, the pipeline collects +a number of execution related statistics. At the end of this process, the +pipeline generates and prints a report from the collected statistics.

+

Here is an example of a performance report generated by the tool for a +dot-product of two packed float vectors of four elements. The analysis is +conducted for target x86, cpu btver2. The following result can be produced via +the following command using the example located at +test/tools/llvm-mca/X86/BtVer2/dot-product.s:

+
$ llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=300 dot-product.s
+
+
+
Iterations:        300
+Instructions:      900
+Total Cycles:      610
+Total uOps:        900
+
+Dispatch Width:    2
+uOps Per Cycle:    1.48
+IPC:               1.48
+Block RThroughput: 2.0
+
+
+Instruction Info:
+[1]: #uOps
+[2]: Latency
+[3]: RThroughput
+[4]: MayLoad
+[5]: MayStore
+[6]: HasSideEffects (U)
+
+[1]    [2]    [3]    [4]    [5]    [6]    Instructions:
+ 1      2     1.00                        vmulps      %xmm0, %xmm1, %xmm2
+ 1      3     1.00                        vhaddps     %xmm2, %xmm2, %xmm3
+ 1      3     1.00                        vhaddps     %xmm3, %xmm3, %xmm4
+
+
+Resources:
+[0]   - JALU0
+[1]   - JALU1
+[2]   - JDiv
+[3]   - JFPA
+[4]   - JFPM
+[5]   - JFPU0
+[6]   - JFPU1
+[7]   - JLAGU
+[8]   - JMul
+[9]   - JSAGU
+[10]  - JSTC
+[11]  - JVALU0
+[12]  - JVALU1
+[13]  - JVIMUL
+
+
+Resource pressure per iteration:
+[0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]
+ -      -      -     2.00   1.00   2.00   1.00    -      -      -      -      -      -      -
+
+Resource pressure by instruction:
+[0]    [1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    [9]    [10]   [11]   [12]   [13]   Instructions:
+ -      -      -      -     1.00    -     1.00    -      -      -      -      -      -      -     vmulps      %xmm0, %xmm1, %xmm2
+ -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     vhaddps     %xmm2, %xmm2, %xmm3
+ -      -      -     1.00    -     1.00    -      -      -      -      -      -      -      -     vhaddps     %xmm3, %xmm3, %xmm4
+
+
+

According to this report, the dot-product kernel has been executed 300 times, +for a total of 900 simulated instructions. The total number of simulated micro +opcodes (uOps) is also 900.

+

The report is structured in three main sections. The first section collects a +few performance numbers; the goal of this section is to give a very quick +overview of the performance throughput. Important performance indicators are +IPC, uOps Per Cycle, and Block RThroughput (Block Reciprocal +Throughput).

+

Field DispatchWidth is the maximum number of micro opcodes that are dispatched +to the out-of-order backend every simulated cycle. For processors with an +in-order backend, DispatchWidth is the maximum number of micro opcodes issued +to the backend every simulated cycle.

+

IPC is computed dividing the total number of simulated instructions by the total +number of cycles.

+

Field Block RThroughput is the reciprocal of the block throughput. Block +throughput is a theoretical quantity computed as the maximum number of blocks +(i.e. iterations) that can be executed per simulated clock cycle in the absence +of loop carried dependencies. Block throughput is superiorly limited by the +dispatch rate, and the availability of hardware resources.

+

In the absence of loop-carried data dependencies, the observed IPC tends to a +theoretical maximum which can be computed by dividing the number of instructions +of a single iteration by the Block RThroughput.

+

Field ‘uOps Per Cycle’ is computed dividing the total number of simulated micro +opcodes by the total number of cycles. A delta between Dispatch Width and this +field is an indicator of a performance issue. In the absence of loop-carried +data dependencies, the observed ‘uOps Per Cycle’ should tend to a theoretical +maximum throughput which can be computed by dividing the number of uOps of a +single iteration by the Block RThroughput.

+

Field uOps Per Cycle is bounded from above by the dispatch width. That is +because the dispatch width limits the maximum size of a dispatch group. Both IPC +and ‘uOps Per Cycle’ are limited by the amount of hardware parallelism. The +availability of hardware resources affects the resource pressure distribution, +and it limits the number of instructions that can be executed in parallel every +cycle. A delta between Dispatch Width and the theoretical maximum uOps per +Cycle (computed by dividing the number of uOps of a single iteration by the +Block RThroughput) is an indicator of a performance bottleneck caused by the +lack of hardware resources. +In general, the lower the Block RThroughput, the better.

+

In this example, uOps per iteration/Block RThroughput is 1.50. Since there +are no loop-carried dependencies, the observed uOps Per Cycle is expected to +approach 1.50 when the number of iterations tends to infinity. The delta between +the Dispatch Width (2.00), and the theoretical maximum uOp throughput (1.50) is +an indicator of a performance bottleneck caused by the lack of hardware +resources, and the Resource pressure view can help to identify the problematic +resource usage.

+

The second section of the report is the instruction info view. It shows the +latency and reciprocal throughput of every instruction in the sequence. It also +reports extra information related to the number of micro opcodes, and opcode +properties (i.e., ‘MayLoad’, ‘MayStore’, and ‘HasSideEffects’).

+

Field RThroughput is the reciprocal of the instruction throughput. Throughput +is computed as the maximum number of instructions of a same type that can be +executed per clock cycle in the absence of operand dependencies. In this +example, the reciprocal throughput of a vector float multiply is 1 +cycles/instruction. That is because the FP multiplier JFPM is only available +from pipeline JFPU1.

+

Instruction encodings are displayed within the instruction info view when flag +-show-encoding is specified.

+

Below is an example of -show-encoding output for the dot-product kernel:

+
Instruction Info:
+[1]: #uOps
+[2]: Latency
+[3]: RThroughput
+[4]: MayLoad
+[5]: MayStore
+[6]: HasSideEffects (U)
+[7]: Encoding Size
+
+[1]    [2]    [3]    [4]    [5]    [6]    [7]    Encodings:                    Instructions:
+ 1      2     1.00                         4     c5 f0 59 d0                   vmulps %xmm0, %xmm1, %xmm2
+ 1      4     1.00                         4     c5 eb 7c da                   vhaddps        %xmm2, %xmm2, %xmm3
+ 1      4     1.00                         4     c5 e3 7c e3                   vhaddps        %xmm3, %xmm3, %xmm4
+
+
+

The Encoding Size column shows the size in bytes of instructions. The +Encodings column shows the actual instruction encodings (byte sequences in +hex).

+

The third section is the Resource pressure view. This view reports +the average number of resource cycles consumed every iteration by instructions +for every processor resource unit available on the target. Information is +structured in two tables. The first table reports the number of resource cycles +spent on average every iteration. The second table correlates the resource +cycles to the machine instruction in the sequence. For example, every iteration +of the instruction vmulps always executes on resource unit [6] +(JFPU1 - floating point pipeline #1), consuming an average of 1 resource cycle +per iteration. Note that on AMD Jaguar, vector floating-point multiply can +only be issued to pipeline JFPU1, while horizontal floating-point additions can +only be issued to pipeline JFPU0.

+

The resource pressure view helps with identifying bottlenecks caused by high +usage of specific hardware resources. Situations with resource pressure mainly +concentrated on a few resources should, in general, be avoided. Ideally, +pressure should be uniformly distributed between multiple resources.

+
+

Timeline View

+

The timeline view produces a detailed report of each instruction’s state +transitions through an instruction pipeline. This view is enabled by the +command line option -timeline. As instructions transition through the +various stages of the pipeline, their states are depicted in the view report. +These states are represented by the following characters:

+
    +
  • D : Instruction dispatched.

  • +
  • e : Instruction executing.

  • +
  • E : Instruction executed.

  • +
  • R : Instruction retired.

  • +
  • = : Instruction already dispatched, waiting to be executed.

  • +
  • - : Instruction executed, waiting to be retired.

  • +
+

Below is the timeline view for a subset of the dot-product example located in +test/tools/llvm-mca/X86/BtVer2/dot-product.s and processed by +llvm-mca using the following command:

+
$ llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=3 -timeline dot-product.s
+
+
+
Timeline view:
+                    012345
+Index     0123456789
+
+[0,0]     DeeER.    .    .   vmulps   %xmm0, %xmm1, %xmm2
+[0,1]     D==eeeER  .    .   vhaddps  %xmm2, %xmm2, %xmm3
+[0,2]     .D====eeeER    .   vhaddps  %xmm3, %xmm3, %xmm4
+[1,0]     .DeeE-----R    .   vmulps   %xmm0, %xmm1, %xmm2
+[1,1]     . D=eeeE---R   .   vhaddps  %xmm2, %xmm2, %xmm3
+[1,2]     . D====eeeER   .   vhaddps  %xmm3, %xmm3, %xmm4
+[2,0]     .  DeeE-----R  .   vmulps   %xmm0, %xmm1, %xmm2
+[2,1]     .  D====eeeER  .   vhaddps  %xmm2, %xmm2, %xmm3
+[2,2]     .   D======eeeER   vhaddps  %xmm3, %xmm3, %xmm4
+
+
+Average Wait times (based on the timeline view):
+[0]: Executions
+[1]: Average time spent waiting in a scheduler's queue
+[2]: Average time spent waiting in a scheduler's queue while ready
+[3]: Average time elapsed from WB until retire stage
+
+      [0]    [1]    [2]    [3]
+0.     3     1.0    1.0    3.3       vmulps   %xmm0, %xmm1, %xmm2
+1.     3     3.3    0.7    1.0       vhaddps  %xmm2, %xmm2, %xmm3
+2.     3     5.7    0.0    0.0       vhaddps  %xmm3, %xmm3, %xmm4
+       3     3.3    0.5    1.4       <total>
+
+
+

The timeline view is interesting because it shows instruction state changes +during execution. It also gives an idea of how the tool processes instructions +executed on the target, and how their timing information might be calculated.

+

The timeline view is structured in two tables. The first table shows +instructions changing state over time (measured in cycles); the second table +(named Average Wait times) reports useful timing statistics, which should +help diagnose performance bottlenecks caused by long data dependencies and +sub-optimal usage of hardware resources.

+

An instruction in the timeline view is identified by a pair of indices, where +the first index identifies an iteration, and the second index is the +instruction index (i.e., where it appears in the code sequence). Since this +example was generated using 3 iterations: -iterations=3, the iteration +indices range from 0-2 inclusively.

+

Excluding the first and last column, the remaining columns are in cycles. +Cycles are numbered sequentially starting from 0.

+

From the example output above, we know the following:

+
    +
  • Instruction [1,0] was dispatched at cycle 1.

  • +
  • Instruction [1,0] started executing at cycle 2.

  • +
  • Instruction [1,0] reached the write back stage at cycle 4.

  • +
  • Instruction [1,0] was retired at cycle 10.

  • +
+

Instruction [1,0] (i.e., vmulps from iteration #1) does not have to wait in the +scheduler’s queue for the operands to become available. By the time vmulps is +dispatched, operands are already available, and pipeline JFPU1 is ready to +serve another instruction. So the instruction can be immediately issued on the +JFPU1 pipeline. That is demonstrated by the fact that the instruction only +spent 1cy in the scheduler’s queue.

+

There is a gap of 5 cycles between the write-back stage and the retire event. +That is because instructions must retire in program order, so [1,0] has to wait +for [0,2] to be retired first (i.e., it has to wait until cycle 10).

+

In the example, all instructions are in a RAW (Read After Write) dependency +chain. Register %xmm2 written by vmulps is immediately used by the first +vhaddps, and register %xmm3 written by the first vhaddps is used by the second +vhaddps. Long data dependencies negatively impact the ILP (Instruction Level +Parallelism).

+

In the dot-product example, there are anti-dependencies introduced by +instructions from different iterations. However, those dependencies can be +removed at register renaming stage (at the cost of allocating register aliases, +and therefore consuming physical registers).

+

Table Average Wait times helps diagnose performance issues that are caused by +the presence of long latency instructions and potentially long data dependencies +which may limit the ILP. Last row, <total>, shows a global average over all +instructions measured. Note that llvm-mca, by default, assumes at +least 1cy between the dispatch event and the issue event.

+

When the performance is limited by data dependencies and/or long latency +instructions, the number of cycles spent while in the ready state is expected +to be very small when compared with the total number of cycles spent in the +scheduler’s queue. The difference between the two counters is a good indicator +of how large of an impact data dependencies had on the execution of the +instructions. When performance is mostly limited by the lack of hardware +resources, the delta between the two counters is small. However, the number of +cycles spent in the queue tends to be larger (i.e., more than 1-3cy), +especially when compared to other low latency instructions.

+
+
+

Bottleneck Analysis

+

The -bottleneck-analysis command line option enables the analysis of +performance bottlenecks.

+

This analysis is potentially expensive. It attempts to correlate increases in +backend pressure (caused by pipeline resource pressure and data dependencies) to +dynamic dispatch stalls.

+

Below is an example of -bottleneck-analysis output generated by +llvm-mca for 500 iterations of the dot-product example on btver2.

+
Cycles with backend pressure increase [ 48.07% ]
+Throughput Bottlenecks:
+  Resource Pressure       [ 47.77% ]
+  - JFPA  [ 47.77% ]
+  - JFPU0  [ 47.77% ]
+  Data Dependencies:      [ 0.30% ]
+  - Register Dependencies [ 0.30% ]
+  - Memory Dependencies   [ 0.00% ]
+
+Critical sequence based on the simulation:
+
+              Instruction                         Dependency Information
+ +----< 2.    vhaddps %xmm3, %xmm3, %xmm4
+ |
+ |    < loop carried >
+ |
+ |      0.    vmulps  %xmm0, %xmm1, %xmm2
+ +----> 1.    vhaddps %xmm2, %xmm2, %xmm3         ## RESOURCE interference:  JFPA [ probability: 74% ]
+ +----> 2.    vhaddps %xmm3, %xmm3, %xmm4         ## REGISTER dependency:  %xmm3
+ |
+ |    < loop carried >
+ |
+ +----> 1.    vhaddps %xmm2, %xmm2, %xmm3         ## RESOURCE interference:  JFPA [ probability: 74% ]
+
+
+

According to the analysis, throughput is limited by resource pressure and not by +data dependencies. The analysis observed increases in backend pressure during +48.07% of the simulated run. Almost all those pressure increase events were +caused by contention on processor resources JFPA/JFPU0.

+

The critical sequence is the most expensive sequence of instructions according +to the simulation. It is annotated to provide extra information about critical +register dependencies and resource interferences between instructions.

+

Instructions from the critical sequence are expected to significantly impact +performance. By construction, the accuracy of this analysis is strongly +dependent on the simulation and (as always) by the quality of the processor +model in llvm.

+

Bottleneck analysis is currently not supported for processors with an in-order +backend.

+
+
+

Extra Statistics to Further Diagnose Performance Issues

+

The -all-stats command line option enables extra statistics and performance +counters for the dispatch logic, the reorder buffer, the retire control unit, +and the register file.

+

Below is an example of -all-stats output generated by llvm-mca +for 300 iterations of the dot-product example discussed in the previous +sections.

+
Dynamic Dispatch Stall Cycles:
+RAT     - Register unavailable:                      0
+RCU     - Retire tokens unavailable:                 0
+SCHEDQ  - Scheduler full:                            272  (44.6%)
+LQ      - Load queue full:                           0
+SQ      - Store queue full:                          0
+GROUP   - Static restrictions on the dispatch group: 0
+
+
+Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
+[# dispatched], [# cycles]
+ 0,              24  (3.9%)
+ 1,              272  (44.6%)
+ 2,              314  (51.5%)
+
+
+Schedulers - number of cycles where we saw N micro opcodes issued:
+[# issued], [# cycles]
+ 0,          7  (1.1%)
+ 1,          306  (50.2%)
+ 2,          297  (48.7%)
+
+Scheduler's queue usage:
+[1] Resource name.
+[2] Average number of used buffer entries.
+[3] Maximum number of used buffer entries.
+[4] Total number of buffer entries.
+
+ [1]            [2]        [3]        [4]
+JALU01           0          0          20
+JFPU01           17         18         18
+JLSAGU           0          0          12
+
+
+Retire Control Unit - number of cycles where we saw N instructions retired:
+[# retired], [# cycles]
+ 0,           109  (17.9%)
+ 1,           102  (16.7%)
+ 2,           399  (65.4%)
+
+Total ROB Entries:                64
+Max Used ROB Entries:             35  ( 54.7% )
+Average Used ROB Entries per cy:  32  ( 50.0% )
+
+
+Register File statistics:
+Total number of mappings created:    900
+Max number of mappings used:         35
+
+*  Register File #1 -- JFpuPRF:
+   Number of physical registers:     72
+   Total number of mappings created: 900
+   Max number of mappings used:      35
+
+*  Register File #2 -- JIntegerPRF:
+   Number of physical registers:     64
+   Total number of mappings created: 0
+   Max number of mappings used:      0
+
+
+

If we look at the Dynamic Dispatch Stall Cycles table, we see the counter for +SCHEDQ reports 272 cycles. This counter is incremented every time the dispatch +logic is unable to dispatch a full group because the scheduler’s queue is full.

+

Looking at the Dispatch Logic table, we see that the pipeline was only able to +dispatch two micro opcodes 51.5% of the time. The dispatch group was limited to +one micro opcode 44.6% of the cycles, which corresponds to 272 cycles. The +dispatch statistics are displayed by either using the command option +-all-stats or -dispatch-stats.

+

The next table, Schedulers, presents a histogram displaying a count, +representing the number of micro opcodes issued on some number of cycles. In +this case, of the 610 simulated cycles, single opcodes were issued 306 times +(50.2%) and there were 7 cycles where no opcodes were issued.

+

The Scheduler’s queue usage table shows that the average and maximum number of +buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01 +reached its maximum (18 of 18 queue entries). Note that AMD Jaguar implements +three schedulers:

+
    +
  • JALU01 - A scheduler for ALU instructions.

  • +
  • JFPU01 - A scheduler floating point operations.

  • +
  • JLSAGU - A scheduler for address generation.

  • +
+

The dot-product is a kernel of three floating point instructions (a vector +multiply followed by two horizontal adds). That explains why only the floating +point scheduler appears to be used.

+

A full scheduler queue is either caused by data dependency chains or by a +sub-optimal usage of hardware resources. Sometimes, resource pressure can be +mitigated by rewriting the kernel using different instructions that consume +different scheduler resources. Schedulers with a small queue are less resilient +to bottlenecks caused by the presence of long data dependencies. The scheduler +statistics are displayed by using the command option -all-stats or +-scheduler-stats.

+

The next table, Retire Control Unit, presents a histogram displaying a count, +representing the number of instructions retired on some number of cycles. In +this case, of the 610 simulated cycles, two instructions were retired during the +same cycle 399 times (65.4%) and there were 109 cycles where no instructions +were retired. The retire statistics are displayed by using the command option +-all-stats or -retire-stats.

+

The last table presented is Register File statistics. Each physical register +file (PRF) used by the pipeline is presented in this table. In the case of AMD +Jaguar, there are two register files, one for floating-point registers (JFpuPRF) +and one for integer registers (JIntegerPRF). The table shows that of the 900 +instructions processed, there were 900 mappings created. Since this dot-product +example utilized only floating point registers, the JFPuPRF was responsible for +creating the 900 mappings. However, we see that the pipeline only used a +maximum of 35 of 72 available register slots at any given time. We can conclude +that the floating point PRF was the only register file used for the example, and +that it was never resource constrained. The register file statistics are +displayed by using the command option -all-stats or +-register-file-stats.

+

In this example, we can conclude that the IPC is mostly limited by data +dependencies, and not by resource pressure.

+
+
+

Instruction Flow

+

This section describes the instruction flow through the default pipeline of +llvm-mca, as well as the functional units involved in the process.

+

The default pipeline implements the following sequence of stages used to +process instructions.

+
    +
  • Dispatch (Instruction is dispatched to the schedulers).

  • +
  • Issue (Instruction is issued to the processor pipelines).

  • +
  • Write Back (Instruction is executed, and results are written back).

  • +
  • Retire (Instruction is retired; writes are architecturally committed).

  • +
+

The in-order pipeline implements the following sequence of stages: +* InOrderIssue (Instruction is issued to the processor pipelines). +* Retire (Instruction is retired; writes are architecturally committed).

+

llvm-mca assumes that instructions have all been decoded and placed +into a queue before the simulation start. Therefore, the instruction fetch and +decode stages are not modeled. Performance bottlenecks in the frontend are not +diagnosed. Also, llvm-mca does not model branch prediction.

+
+

Instruction Dispatch

+

During the dispatch stage, instructions are picked in program order from a +queue of already decoded instructions, and dispatched in groups to the +simulated hardware schedulers.

+

The size of a dispatch group depends on the availability of the simulated +hardware resources. The processor dispatch width defaults to the value +of the IssueWidth in LLVM’s scheduling model.

+

An instruction can be dispatched if:

+
    +
  • The size of the dispatch group is smaller than processor’s dispatch width.

  • +
  • There are enough entries in the reorder buffer.

  • +
  • There are enough physical registers to do register renaming.

  • +
  • The schedulers are not full.

  • +
+

Scheduling models can optionally specify which register files are available on +the processor. llvm-mca uses that information to initialize register +file descriptors. Users can limit the number of physical registers that are +globally available for register renaming by using the command option +-register-file-size. A value of zero for this option means unbounded. By +knowing how many registers are available for renaming, the tool can predict +dispatch stalls caused by the lack of physical registers.

+

The number of reorder buffer entries consumed by an instruction depends on the +number of micro-opcodes specified for that instruction by the target scheduling +model. The reorder buffer is responsible for tracking the progress of +instructions that are “in-flight”, and retiring them in program order. The +number of entries in the reorder buffer defaults to the value specified by field +MicroOpBufferSize in the target scheduling model.

+

Instructions that are dispatched to the schedulers consume scheduler buffer +entries. llvm-mca queries the scheduling model to determine the set +of buffered resources consumed by an instruction. Buffered resources are +treated like scheduler resources.

+
+
+

Instruction Issue

+

Each processor scheduler implements a buffer of instructions. An instruction +has to wait in the scheduler’s buffer until input register operands become +available. Only at that point, does the instruction becomes eligible for +execution and may be issued (potentially out-of-order) for execution. +Instruction latencies are computed by llvm-mca with the help of the +scheduling model.

+

llvm-mca’s scheduler is designed to simulate multiple processor +schedulers. The scheduler is responsible for tracking data dependencies, and +dynamically selecting which processor resources are consumed by instructions. +It delegates the management of processor resource units and resource groups to a +resource manager. The resource manager is responsible for selecting resource +units that are consumed by instructions. For example, if an instruction +consumes 1cy of a resource group, the resource manager selects one of the +available units from the group; by default, the resource manager uses a +round-robin selector to guarantee that resource usage is uniformly distributed +between all units of a group.

+

llvm-mca’s scheduler internally groups instructions into three sets:

+
    +
  • WaitSet: a set of instructions whose operands are not ready.

  • +
  • ReadySet: a set of instructions ready to execute.

  • +
  • IssuedSet: a set of instructions executing.

  • +
+

Depending on the operands availability, instructions that are dispatched to the +scheduler are either placed into the WaitSet or into the ReadySet.

+

Every cycle, the scheduler checks if instructions can be moved from the WaitSet +to the ReadySet, and if instructions from the ReadySet can be issued to the +underlying pipelines. The algorithm prioritizes older instructions over younger +instructions.

+
+
+

Write-Back and Retire Stage

+

Issued instructions are moved from the ReadySet to the IssuedSet. There, +instructions wait until they reach the write-back stage. At that point, they +get removed from the queue and the retire control unit is notified.

+

When instructions are executed, the retire control unit flags the instruction as +“ready to retire.”

+

Instructions are retired in program order. The register file is notified of the +retirement so that it can free the physical registers that were allocated for +the instruction during the register renaming stage.

+
+
+

Load/Store Unit and Memory Consistency Model

+

To simulate an out-of-order execution of memory operations, llvm-mca +utilizes a simulated load/store unit (LSUnit) to simulate the speculative +execution of loads and stores.

+

Each load (or store) consumes an entry in the load (or store) queue. Users can +specify flags -lqueue and -squeue to limit the number of entries in the +load and store queues respectively. The queues are unbounded by default.

+

The LSUnit implements a relaxed consistency model for memory loads and stores. +The rules are:

+
    +
  1. A younger load is allowed to pass an older load only if there are no +intervening stores or barriers between the two loads.

  2. +
  3. A younger load is allowed to pass an older store provided that the load does +not alias with the store.

  4. +
  5. A younger store is not allowed to pass an older store.

  6. +
  7. A younger store is not allowed to pass an older load.

  8. +
+

By default, the LSUnit optimistically assumes that loads do not alias +(-noalias=true) store operations. Under this assumption, younger loads are +always allowed to pass older stores. Essentially, the LSUnit does not attempt +to run any alias analysis to predict when loads and stores do not alias with +each other.

+

Note that, in the case of write-combining memory, rule 3 could be relaxed to +allow reordering of non-aliasing store operations. That being said, at the +moment, there is no way to further relax the memory model (-noalias is the +only option). Essentially, there is no option to specify a different memory +type (e.g., write-back, write-combining, write-through; etc.) and consequently +to weaken, or strengthen, the memory model.

+

Other limitations are:

+
    +
  • The LSUnit does not know when store-to-load forwarding may occur.

  • +
  • The LSUnit does not know anything about cache hierarchy and memory types.

  • +
  • The LSUnit does not know how to identify serializing operations and memory +fences.

  • +
+

The LSUnit does not attempt to predict if a load or store hits or misses the L1 +cache. It only knows if an instruction “MayLoad” and/or “MayStore.” For +loads, the scheduling model provides an “optimistic” load-to-use latency (which +usually matches the load-to-use latency for when there is a hit in the L1D).

+

llvm-mca does not know about serializing operations or memory-barrier +like instructions. The LSUnit conservatively assumes that an instruction which +has both “MayLoad” and unmodeled side effects behaves like a “soft” +load-barrier. That means, it serializes loads without forcing a flush of the +load queue. Similarly, instructions that “MayStore” and have unmodeled side +effects are treated like store barriers. A full memory barrier is a “MayLoad” +and “MayStore” instruction with unmodeled side effects. This is inaccurate, but +it is the best that we can do at the moment with the current information +available in LLVM.

+

A load/store barrier consumes one entry of the load/store queue. A load/store +barrier enforces ordering of loads/stores. A younger load cannot pass a load +barrier. Also, a younger store cannot pass a store barrier. A younger load +has to wait for the memory/load barrier to execute. A load/store barrier is +“executed” when it becomes the oldest entry in the load/store queue(s). That +also means, by construction, all of the older loads/stores have been executed.

+

In conclusion, the full set of load/store consistency rules are:

+
    +
  1. A store may not pass a previous store.

  2. +
  3. A store may not pass a previous load (regardless of -noalias).

  4. +
  5. A store has to wait until an older store barrier is fully executed.

  6. +
  7. A load may pass a previous load.

  8. +
  9. A load may not pass a previous store unless -noalias is set.

  10. +
  11. A load has to wait until an older load barrier is fully executed.

  12. +
+
+
+

In-order Issue and Execute

+

In-order processors are modelled as a single InOrderIssueStage stage. It +bypasses Dispatch, Scheduler and Load/Store unit. Instructions are issued as +soon as their operand registers are available and resource requirements are +met. Multiple instructions can be issued in one cycle according to the value of +the IssueWidth parameter in LLVM’s scheduling model.

+

Once issued, an instruction is moved to IssuedInst set until it is ready to +retire. llvm-mca ensures that writes are committed in-order. However, +an instruction is allowed to commit writes and retire out-of-order if +RetireOOO property is true for at least one of its writes.

+
+
+

Custom Behaviour

+

Due to certain instructions not being expressed perfectly within their +scheduling model, llvm-mca isn’t always able to simulate them +perfectly. Modifying the scheduling model isn’t always a viable +option though (maybe because the instruction is modeled incorrectly on +purpose or the instruction’s behaviour is quite complex). The +CustomBehaviour class can be used in these cases to enforce proper +instruction modeling (often by customizing data dependencies and detecting +hazards that llvm-mca has no way of knowing about).

+

llvm-mca comes with one generic and multiple target specific +CustomBehaviour classes. The generic class will be used if the -disable-cb +flag is used or if a target specific CustomBehaviour class doesn’t exist for +that target. (The generic class does nothing.) Currently, the CustomBehaviour +class is only a part of the in-order pipeline, but there are plans to add it +to the out-of-order pipeline in the future.

+

CustomBehaviour’s main method is checkCustomHazard() which uses the +current instruction and a list of all instructions still executing within +the pipeline to determine if the current instruction should be dispatched. +As output, the method returns an integer representing the number of cycles +that the current instruction must stall for (this can be an underestimate +if you don’t know the exact number and a value of 0 represents no stall).

+

If you’d like to add a CustomBehaviour class for a target that doesn’t +already have one, refer to an existing implementation to see how to set it +up. The classes are implemented within the target specific backend (for +example /llvm/lib/Target/AMDGPU/MCA/) so that they can access backend symbols.

+
+
+

Custom Views

+

llvm-mca comes with several Views such as the Timeline View and +Summary View. These Views are generic and can work with most (if not all) +targets. If you wish to add a new View to llvm-mca and it does not +require any backend functionality that is not already exposed through MC layer +classes (MCSubtargetInfo, MCInstrInfo, etc.), please add it to the +/tools/llvm-mca/View/ directory. However, if your new View is target specific +AND requires unexposed backend symbols or functionality, you can define it in +the /lib/Target/<TargetName>/MCA/ directory.

+

To enable this target specific View, you will have to use this target’s +CustomBehaviour class to override the CustomBehaviour::getViews() methods. +There are 3 variations of these methods based on where you want your View to +appear in the output: getStartViews(), getPostInstrInfoViews(), and +getEndViews(). These methods returns a vector of Views so you will want to +return a vector containing all of the target specific Views for the target in +question.

+

Because these target specific (and backend dependent) Views require the +CustomBehaviour::getViews() variants, these Views will not be enabled if +the -disable-cb flag is used.

+

Enabling these custom Views does not affect the non-custom (generic) Views. +Continue to use the usual command line arguments to enable / disable those +Views.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-nm.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-nm.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-nm.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-nm.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,485 @@ + + + + + + + + + llvm-nm - list LLVM bitcode and object file’s symbol table — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-nm - list LLVM bitcode and object file’s symbol table

+
+

SYNOPSIS

+

llvm-nm [options] [filenames…]

+
+
+

DESCRIPTION

+

The llvm-nm utility lists the names of symbols from LLVM bitcode +files, object files, and archives. Each symbol is listed along with some simple +information about its provenance. If no filename is specified, a.out is used +as the input. If - is used as a filename, llvm-nm will read a file +from its standard input stream.

+

llvm-nm’s default output format is the traditional BSD nm +output format. Each such output record consists of an (optional) 8-digit +hexadecimal address, followed by a type code character, followed by a name, for +each symbol. One record is printed per line; fields are separated by spaces. +When the address is omitted, it is replaced by 8 spaces.

+

The supported type code characters are as follows. Where both lower and +upper-case characters are listed for the same meaning, a lower-case character +represents a local symbol, whilst an upper-case character represents a global +(external) symbol:

+

a, A

+
+

Absolute symbol.

+
+

b, B

+
+

Uninitialized data (bss) object.

+
+

C

+
+

Common symbol. Multiple definitions link together into one definition.

+
+

d, D

+
+

Writable data object.

+
+

i, I

+
+

COFF: .idata symbol or symbol in a section with IMAGE_SCN_LNK_INFO set.

+
+

n

+
+

ELF: local symbol from non-alloc section.

+

COFF: debug symbol.

+
+

N

+
+

ELF: debug section symbol, or global symbol from non-alloc section.

+
+

s, S

+
+

COFF: section symbol.

+

Mach-O: absolute symbol or symbol from a section other than __TEXT_EXEC __text, +__TEXT __text, __DATA __data, or __DATA __bss.

+
+

r, R

+
+

Read-only data object.

+
+

t, T

+
+

Code (text) object.

+
+

u

+
+

ELF: GNU unique symbol.

+
+

U

+
+

Named object is undefined in this file.

+
+

v

+
+

ELF: Undefined weak object. It is not a link failure if the object is not +defined.

+
+

V

+
+

ELF: Defined weak object symbol. This definition will only be used if no +regular definitions exist in a link. If multiple weak definitions and no +regular definitions exist, one of the weak definitions will be used.

+
+

w

+
+

Undefined weak symbol other than an ELF object symbol. It is not a link failure +if the symbol is not defined.

+
+

W

+
+

Defined weak symbol other than an ELF object symbol. This definition will only +be used if no regular definitions exist in a link. If multiple weak definitions +and no regular definitions exist, one of the weak definitions will be used.

+
+

-

+
+

Mach-O: N_STAB symbol.

+
+

?

+
+

Something unrecognizable.

+
+

Because LLVM bitcode files typically contain objects that are not considered to +have addresses until they are linked into an executable image or dynamically +compiled “just-in-time”, llvm-nm does not print an address for any +symbol in an LLVM bitcode file, even symbols which are defined in the bitcode +file.

+
+
+

OPTIONS

+
+
+-B
+

Use BSD output format. Alias for --format=bsd.

+
+ +
+
+--debug-syms, -a
+

Show all symbols, even those usually suppressed.

+
+ +
+
+--defined-only
+

Print only symbols defined in this file.

+
+ +
+
+--demangle, -C
+

Demangle symbol names.

+
+ +
+
+--dynamic, -D
+

Display dynamic symbols instead of normal symbols.

+
+ +
+
+--extern-only, -g
+

Print only symbols whose definitions are external; that is, accessible from +other files.

+
+ +
+
+--format=<format>, -f
+

Select an output format; format may be sysv, posix, darwin, bsd or +just-symbols. +The default is bsd.

+
+ +
+
+--help, -h
+

Print a summary of command-line options and their meanings.

+
+ +
+
+-j
+

Print just the symbol names. Alias for –format=just-symbols`.

+
+ +
+
+-m
+

Use Darwin format. Alias for --format=darwin.

+
+ +
+
+--no-demangle
+

Don’t demangle symbol names. This is the default.

+
+ +
+
+--no-llvm-bc
+

Disable the LLVM bitcode reader.

+
+ +
+
+--no-sort, -p
+

Show symbols in the order encountered.

+
+ +
+
+--no-weak
+

Don’t print weak symbols.

+
+ +
+
+--numeric-sort, -n, -v
+

Sort symbols by address.

+
+ +
+
+--portability, -P
+

Use POSIX.2 output format. Alias for --format=posix.

+
+ +
+
+--print-armap
+

Print the archive symbol table, in addition to the symbols.

+
+ +
+
+--print-file-name, -A, -o
+

Precede each symbol with the file it came from.

+
+ +
+
+--print-size, -S
+

Show symbol size as well as address (not applicable for Mach-O).

+
+ +
+
+--quiet
+

Suppress ‘no symbols’ diagnostic.

+
+ +
+
+--radix=<RADIX>, -t
+

Specify the radix of the symbol address(es). Values accepted are d (decimal), +x (hexadecimal) and o (octal).

+
+ +
+
+--reverse-sort, -r
+

Sort symbols in reverse order.

+
+ +
+
+--size-sort
+

Sort symbols by size.

+
+ +
+
+--special-syms
+

Do not filter special symbols from the output.

+
+ +
+
+--undefined-only, -u
+

Print only undefined symbols.

+
+ +
+
+--version, -V
+

Display the version of the llvm-nm executable, then exit. Does not +stack with other commands.

+
+ +
+
+@<FILE>
+

Read command-line options from response file <FILE>.

+
+ +
+
+

MACH-O SPECIFIC OPTIONS

+
+
+--add-dyldinfo
+

Add symbols from the dyldinfo, if they are not already in the symbol table. +This is the default.

+
+ +
+
+--add-inlinedinfo
+

Add symbols from the inlined libraries, TBD file inputs only.

+
+ +
+
+--arch=<arch1[,arch2,...]>
+

Dump the symbols from the specified architecture(s).

+
+ +
+
+--dyldinfo-only
+

Dump only symbols from the dyldinfo.

+
+ +
+
+--no-dyldinfo
+

Do not add any symbols from the dyldinfo.

+
+ +
+
+-s <segment> <section>
+

Dump only symbols from this segment and section name.

+
+ +
+
+-x
+

Print symbol entry in hex.

+
+ +
+
+

BUGS

+
+
    +
  • llvm-nm does not support the full set of arguments that GNU +nm does.

  • +
+
+
+
+

EXIT STATUS

+

llvm-nm exits with an exit code of zero.

+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-objcopy.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-objcopy.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-objcopy.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-objcopy.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,817 @@ + + + + + + + + + llvm-objcopy - object copying and editing tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-objcopy - object copying and editing tool

+
+

SYNOPSIS

+

llvm-objcopy [options] input [output]

+
+
+

DESCRIPTION

+

llvm-objcopy is a tool to copy and manipulate objects. In basic +usage, it makes a semantic copy of the input to the output. If any options are +specified, the output may be modified along the way, e.g. by removing sections.

+

If no output file is specified, the input file is modified in-place. If “-” is +specified for the input file, the input is read from the program’s standard +input stream. If “-” is specified for the output file, the output is written to +the standard output stream of the program.

+

If the input is an archive, any requested operations will be applied to each +archive member individually.

+

The tool is still in active development, but in most scenarios it works as a +drop-in replacement for GNU’s objcopy.

+
+
+

GENERIC AND CROSS-PLATFORM OPTIONS

+

The following options are either agnostic of the file format, or apply to +multiple file formats.

+
+ +

Add a .gnu_debuglink section for <debug-file> to the output.

+
+ +
+
+--add-section <section=file>
+

Add a section named <section> with the contents of <file> to the +output. For ELF objects the section will be of type SHT_NOTE, if the name +starts with “.note”. Otherwise, it will have type SHT_PROGBITS. Can be +specified multiple times to add multiple sections.

+

For MachO objects, <section> must be formatted as +<segment name>,<section name>.

+
+ +
+
+--binary-architecture <arch>, -B
+

Ignored for compatibility.

+
+ +
+
+--disable-deterministic-archives, -U
+

Use real values for UIDs, GIDs and timestamps when updating archive member +headers.

+
+ +
+
+--discard-all, -x
+

Remove most local symbols from the output. Different file formats may limit +this to a subset of the local symbols. For example, file and section symbols in +ELF objects will not be discarded. Additionally, remove all debug sections.

+
+ +
+
+--dump-section <section>=<file>
+

Dump the contents of section <section> into the file <file>. Can be +specified multiple times to dump multiple sections to different files. +<file> is unrelated to the input and output files provided to +llvm-objcopy and as such the normal copying and editing +operations will still be performed. No operations are performed on the sections +prior to dumping them.

+

For MachO objects, <section> must be formatted as +<segment name>,<section name>.

+
+ +
+
+--enable-deterministic-archives, -D
+

Enable deterministic mode when copying archives, i.e. use 0 for archive member +header UIDs, GIDs and timestamp fields. On by default.

+
+ +
+
+--help, -h
+

Print a summary of command line options.

+
+ +
+
+--only-keep-debug
+

Produce a debug file as the output that only preserves contents of sections +useful for debugging purposes.

+

For ELF objects, this removes the contents of SHF_ALLOC sections that are not +SHT_NOTE by making them SHT_NOBITS and shrinking the program headers where +possible.

+
+ +
+
+--only-section <section>, -j
+

Remove all sections from the output, except for sections named <section>. +Can be specified multiple times to keep multiple sections.

+

For MachO objects, <section> must be formatted as +<segment name>,<section name>.

+
+ +
+
+--redefine-sym <old>=<new>
+

Rename symbols called <old> to <new> in the output. Can be specified +multiple times to rename multiple symbols.

+
+ +
+
+--redefine-syms <filename>
+

Rename symbols in the output as described in the file <filename>. In the +file, each line represents a single symbol to rename, with the old name and new +name separated by whitespace. Leading and trailing whitespace is ignored, as is +anything following a ‘#’. Can be specified multiple times to read names from +multiple files.

+
+ +
+
+--regex
+

If specified, symbol and section names specified by other switches are treated +as extended POSIX regular expression patterns.

+
+ +
+
+--remove-section <section>, -R
+

Remove the specified section from the output. Can be specified multiple times +to remove multiple sections simultaneously.

+

For MachO objects, <section> must be formatted as +<segment name>,<section name>.

+
+ +
+
+--set-section-alignment <section>=<align>
+

Set the alignment of section <section> to <align>`. Can be specified +multiple times to update multiple sections.

+
+ +
+
+--set-section-flags <section>=<flag>[,<flag>,...]
+

Set section properties in the output of section <section> based on the +specified <flag> values. Can be specified multiple times to update multiple +sections.

+

Supported flag names are alloc, load, noload, readonly, exclude, +debug, code, data, rom, share, contents, merge and strings. Not +all flags are meaningful for all object file formats.

+

For ELF objects, the flags have the following effects:

+
    +
  • alloc = add the SHF_ALLOC flag.

  • +
  • load = if the section has SHT_NOBITS type, mark it as a SHT_PROGBITS +section.

  • +
  • readonly = if this flag is not specified, add the SHF_WRITE flag.

  • +
  • exclude = add the SHF_EXCLUDE flag.

  • +
  • code = add the SHF_EXECINSTR flag.

  • +
  • merge = add the SHF_MERGE flag.

  • +
  • strings = add the SHF_STRINGS flag.

  • +
  • contents = if the section has SHT_NOBITS type, mark it as a SHT_PROGBITS +section.

  • +
+

For COFF objects, the flags have the following effects:

+
    +
  • alloc = add the IMAGE_SCN_CNT_UNINITIALIZED_DATA and IMAGE_SCN_MEM_READ +flags, unless the load flag is specified.

  • +
  • noload = add the IMAGE_SCN_LNK_REMOVE and IMAGE_SCN_MEM_READ flags.

  • +
  • readonly = if this flag is not specified, add the IMAGE_SCN_MEM_WRITE +flag.

  • +
  • exclude = add the IMAGE_SCN_LNK_REMOVE and IMAGE_SCN_MEM_READ flags.

  • +
  • debug = add the IMAGE_SCN_CNT_INITIALIZED_DATA, +IMAGE_SCN_MEM_DISCARDABLE and IMAGE_SCN_MEM_READ flags.

  • +
  • code = add the IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE and +IMAGE_SCN_MEM_READ flags.

  • +
  • data = add the IMAGE_SCN_CNT_INITIALIZED_DATA and IMAGE_SCN_MEM_READ +flags.

  • +
  • share = add the IMAGE_SCN_MEM_SHARED and IMAGE_SCN_MEM_READ flags.

  • +
+
+ +
+
+--strip-all-gnu
+

Remove all symbols, debug sections and relocations from the output. This option +is equivalent to GNU objcopy’s --strip-all switch.

+
+ +
+
+--strip-all, -S
+

For ELF objects, remove from the output all symbols and non-alloc sections not +within segments, except for .gnu.warning, .ARM.attribute sections and the +section name table.

+

For COFF and Mach-O objects, remove all symbols, debug sections, and +relocations from the output.

+
+ +
+
+--strip-debug, -g
+

Remove all debug sections from the output.

+
+ +
+
+--strip-symbol <symbol>, -N
+

Remove all symbols named <symbol> from the output. Can be specified +multiple times to remove multiple symbols.

+
+ +
+
+--strip-symbols <filename>
+

Remove all symbols whose names appear in the file <filename>, from the +output. In the file, each line represents a single symbol name, with leading +and trailing whitespace ignored, as is anything following a ‘#’. Can be +specified multiple times to read names from multiple files.

+
+ +
+
+--strip-unneeded-symbol <symbol>
+

Remove from the output all symbols named <symbol> that are local or +undefined and are not required by any relocation.

+
+ +
+
+--strip-unneeded-symbols <filename>
+

Remove all symbols whose names appear in the file <filename>, from the +output, if they are local or undefined and are not required by any relocation. +In the file, each line represents a single symbol name, with leading and +trailing whitespace ignored, as is anything following a ‘#’. Can be specified +multiple times to read names from multiple files.

+
+ +
+
+--strip-unneeded
+

Remove from the output all local or undefined symbols that are not required by +relocations. Also remove all debug sections.

+
+ +
+
+--version, -V
+

Display the version of the llvm-objcopy executable.

+
+ +
+
+--wildcard, -w
+

Allow wildcard syntax for symbol-related flags. On by default for +section-related flags. Incompatible with –regex.

+

Wildcard syntax allows the following special symbols:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Character

Meaning

Equivalent

*

Any number of characters

.*

?

Any single character

.

\

Escape the next character

\

[a-z]

Character class

[a-z]

[!a-z], [^a-z]

Negated character class

[^a-z]

+

Additionally, starting a wildcard with ‘!’ will prevent a match, even if +another flag matches. For example -w -N '*' -N '!x' will strip all symbols +except for x.

+

The order of wildcards does not matter. For example, -w -N '*' -N '!x' is +the same as -w -N '!x' -N '*'.

+
+ +
+
+@<FILE>
+

Read command-line options and commands from response file <FILE>.

+
+ +
+
+

ELF-SPECIFIC OPTIONS

+

The following options are implemented only for ELF objects. If used with other +objects, llvm-objcopy will either emit an error or silently ignore +them.

+
+
+--add-symbol <name>=[<section>:]<value>[,<flags>]
+

Add a new symbol called <name> to the output symbol table, in the section +named <section>, with value <value>. If <section> is not specified, +the symbol is added as an absolute symbol. The <flags> affect the symbol +properties. Accepted values are:

+
    +
  • global = the symbol will have global binding.

  • +
  • local = the symbol will have local binding.

  • +
  • weak = the symbol will have weak binding.

  • +
  • default = the symbol will have default visibility.

  • +
  • hidden = the symbol will have hidden visibility.

  • +
  • protected = the symbol will have protected visibility.

  • +
  • file = the symbol will be an STT_FILE symbol.

  • +
  • section = the symbol will be an STT_SECTION symbol.

  • +
  • object = the symbol will be an STT_OBJECT symbol.

  • +
  • function = the symbol will be an STT_FUNC symbol.

  • +
  • indirect-function = the symbol will be an STT_GNU_IFUNC symbol.

  • +
+

Additionally, the following flags are accepted but ignored: debug, +constructor, warning, indirect, synthetic, unique-object, before.

+

Can be specified multiple times to add multiple symbols.

+
+ +
+ +

Allow llvm-objcopy to remove sections even if it would leave invalid +section references. Any invalid sh_link fields will be set to zero.

+
+ +
+
+--change-start <incr>, --adjust-start
+

Add <incr> to the program’s start address. Can be specified multiple +times, in which case the values will be applied cumulatively.

+
+ +
+
+--compress-debug-sections [<style>]
+

Compress DWARF debug sections in the output, using the specified style. +Supported styles are zlib-gnu and zlib. Defaults to zlib if no style is +specified.

+
+ +
+
+--decompress-debug-sections
+

Decompress any compressed DWARF debug sections in the output.

+
+ +
+
+--discard-locals, -X
+

Remove local symbols starting with “.L” from the output.

+
+ +
+
+--extract-dwo
+

Remove all sections that are not DWARF .dwo sections from the output.

+
+ +
+
+--extract-main-partition
+

Extract the main partition from the output.

+
+ +
+
+--extract-partition <name>
+

Extract the named partition from the output.

+
+ +
+
+--globalize-symbol <symbol>
+

Mark any defined symbols named <symbol> as global symbols in the output. +Can be specified multiple times to mark multiple symbols.

+
+ +
+
+--globalize-symbols <filename>
+

Read a list of names from the file <filename> and mark defined symbols with +those names as global in the output. In the file, each line represents a single +symbol, with leading and trailing whitespace ignored, as is anything following +a ‘#’. Can be specified multiple times to read names from multiple files.

+
+ +
+
+--input-target <format>, -I
+

Read the input as the specified format. See SUPPORTED FORMATS for a list of +valid <format> values. If unspecified, llvm-objcopy will attempt +to determine the format automatically.

+
+ +
+
+--keep-file-symbols
+

Keep symbols of type STT_FILE, even if they would otherwise be stripped.

+
+ +
+
+--keep-global-symbol <symbol>
+

Make all symbols local in the output, except for symbols with the name +<symbol>. Can be specified multiple times to ignore multiple symbols.

+
+ +
+
+--keep-global-symbols <filename>
+

Make all symbols local in the output, except for symbols named in the file +<filename>. In the file, each line represents a single symbol, with leading +and trailing whitespace ignored, as is anything following a ‘#’. Can be +specified multiple times to read names from multiple files.

+
+ +
+
+--keep-section <section>
+

When removing sections from the output, do not remove sections named +<section>. Can be specified multiple times to keep multiple sections.

+
+ +
+
+--keep-symbol <symbol>, -K
+

When removing symbols from the output, do not remove symbols named +<symbol>. Can be specified multiple times to keep multiple symbols.

+
+ +
+
+--keep-symbols <filename>
+

When removing symbols from the output do not remove symbols named in the file +<filename>. In the file, each line represents a single symbol, with leading +and trailing whitespace ignored, as is anything following a ‘#’. Can be +specified multiple times to read names from multiple files.

+
+ +
+
+--localize-hidden
+

Make all symbols with hidden or internal visibility local in the output.

+
+ +
+
+--localize-symbol <symbol>, -L
+

Mark any defined non-common symbol named <symbol> as a local symbol in the +output. Can be specified multiple times to mark multiple symbols as local.

+
+ +
+
+--localize-symbols <filename>
+

Read a list of names from the file <filename> and mark defined non-common +symbols with those names as local in the output. In the file, each line +represents a single symbol, with leading and trailing whitespace ignored, as is +anything following a ‘#’. Can be specified multiple times to read names from +multiple files.

+
+ +
+
+--new-symbol-visibility <visibility>
+

Specify the visibility of the symbols automatically created when using binary +input or --add-symbol. Valid options are:

+
    +
  • default

  • +
  • hidden

  • +
  • internal

  • +
  • protected

  • +
+

The default is default.

+
+ +
+
+--output-target <format>, -O
+

Write the output as the specified format. See SUPPORTED FORMATS for a list +of valid <format> values. If unspecified, the output format is assumed to +be the same as the value specified for --input-target or the input +file’s format if that option is also unspecified.

+
+ +
+
+--prefix-alloc-sections <prefix>
+

Add <prefix> to the front of the names of all allocatable sections in the +output.

+
+ +
+
+--prefix-symbols <prefix>
+

Add <prefix> to the front of every symbol name in the output.

+
+ +
+
+--preserve-dates, -p
+

Preserve access and modification timestamps in the output.

+
+ +
+
+--rename-section <old>=<new>[,<flag>,...]
+

Rename sections called <old> to <new> in the output, and apply any +specified <flag> values. See --set-section-flags for a list of +supported flags. Can be specified multiple times to rename multiple sections.

+
+ +
+
+--set-start-addr <addr>
+

Set the start address of the output to <addr>. Overrides any previously +specified --change-start or --adjust-start options.

+
+ +
+
+--split-dwo <dwo-file>
+

Equivalent to running llvm-objcopy with --extract-dwo and +<dwo-file> as the output file and no other options, and then with +--strip-dwo on the input file.

+
+ +
+
+--strip-dwo
+

Remove all DWARF .dwo sections from the output.

+
+ +
+
+--strip-non-alloc
+

Remove from the output all non-allocatable sections that are not within +segments.

+
+ +
+
+--strip-sections
+

Remove from the output all section headers and all section data not within +segments. Note that many tools will not be able to use an object without +section headers.

+
+ +
+
+--target <format>, -F
+

Equivalent to --input-target and --output-target for the +specified format. See SUPPORTED FORMATS for a list of valid <format> +values.

+
+ +
+
+--weaken-symbol <symbol>, -W
+

Mark any global symbol named <symbol> as a weak symbol in the output. Can +be specified multiple times to mark multiple symbols as weak.

+
+ +
+
+--weaken-symbols <filename>
+

Read a list of names from the file <filename> and mark global symbols with +those names as weak in the output. In the file, each line represents a single +symbol, with leading and trailing whitespace ignored, as is anything following +a ‘#’. Can be specified multiple times to read names from multiple files.

+
+ +
+
+--weaken
+

Mark all defined global symbols as weak in the output.

+
+ +
+
+

MACH-O-SPECIFIC OPTIONS

+
+
+--keep-undefined
+

Keep undefined symbols, even if they would otherwise be stripped.

+
+ +
+
+

SUPPORTED FORMATS

+

The following values are currently supported by llvm-objcopy for the +--input-target, --output-target, and --target +options. For GNU objcopy compatibility, the values are all bfdnames.

+
    +
  • binary

  • +
  • ihex

  • +
  • elf32-i386

  • +
  • elf32-x86-64

  • +
  • elf64-x86-64

  • +
  • elf32-iamcu

  • +
  • elf32-littlearm

  • +
  • elf64-aarch64

  • +
  • elf64-littleaarch64

  • +
  • elf32-littleriscv

  • +
  • elf64-littleriscv

  • +
  • elf32-powerpc

  • +
  • elf32-powerpcle

  • +
  • elf64-powerpc

  • +
  • elf64-powerpcle

  • +
  • elf32-bigmips

  • +
  • elf32-ntradbigmips

  • +
  • elf32-ntradlittlemips

  • +
  • elf32-tradbigmips

  • +
  • elf32-tradlittlemips

  • +
  • elf64-tradbigmips

  • +
  • elf64-tradlittlemips

  • +
  • elf32-sparc

  • +
  • elf32-sparcel

  • +
+

Additionally, all targets except binary and ihex can have -freebsd as a +suffix.

+
+
+

BINARY INPUT AND OUTPUT

+

If binary is used as the value for --input-target, the input file +will be embedded as a data section in an ELF relocatable object, with symbols +_binary_<file_name>_start, _binary_<file_name>_end, and +_binary_<file_name>_size representing the start, end and size of the data, +where <file_name> is the path of the input file as specified on the command +line with non-alphanumeric characters converted to _.

+

If binary is used as the value for --output-target, the output file +will be a raw binary file, containing the memory image of the input file. +Symbols and relocation information will be discarded. The image will start at +the address of the first loadable section in the output.

+
+
+

EXIT STATUS

+

llvm-objcopy exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0.

+
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+

There is a known issue with --input-target and --target +causing only binary and ihex formats to have any effect. Other values +will be ignored and llvm-objcopy will attempt to guess the input +format.

+
+
+

SEE ALSO

+

llvm-strip(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-objdump.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-objdump.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-objdump.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-objdump.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,681 @@ + + + + + + + + + llvm-objdump - LLVM’s object file dumper — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-objdump - LLVM’s object file dumper

+
+

SYNOPSIS

+

llvm-objdump [commands] [options] [filenames…]

+
+
+

DESCRIPTION

+

The llvm-objdump utility prints the contents of object files and +final linked images named on the command line. If no file name is specified, +llvm-objdump will attempt to read from a.out. If - is used as a +file name, llvm-objdump will process a file on its standard input +stream.

+
+
+

COMMANDS

+

At least one of the following commands are required, and some commands can be +combined with other commands:

+
+
+-a, --archive-headers
+

Display the information contained within an archive’s headers.

+
+ +
+
+-d, --disassemble
+

Disassemble all text sections found in the input files.

+
+ +
+
+-D, --disassemble-all
+

Disassemble all sections found in the input files.

+
+ +
+
+--disassemble-symbols=<symbol1[,symbol2,...]>
+

Disassemble only the specified symbols. Takes demangled symbol names when +--demangle is specified, otherwise takes mangled symbol names. +Implies --disassemble.

+
+ +
+
+--dwarf=<value>
+

Dump the specified DWARF debug sections. The supported values are:

+

frames - .debug_frame

+
+ +
+
+-f, --file-headers
+

Display the contents of the overall file header.

+
+ +
+
+--fault-map-section
+

Display the content of the fault map section.

+
+ +
+
+-h, --headers, --section-headers
+

Display summaries of the headers for each section.

+
+ +
+
+--help
+

Display usage information and exit. Does not stack with other commands.

+
+ +
+
+-p, --private-headers
+

Display format-specific file headers.

+
+ +
+
+-r, --reloc
+

Display the relocation entries in the file.

+
+ +
+
+-R, --dynamic-reloc
+

Display the dynamic relocation entries in the file.

+
+ +
+
+--raw-clang-ast
+

Dump the raw binary contents of the clang AST section.

+
+ +
+
+-s, --full-contents
+

Display the contents of each section.

+
+ +
+
+-t, --syms
+

Display the symbol table.

+
+ +
+
+-T, --dynamic-syms
+

Display the contents of the dynamic symbol table.

+
+ +
+
+-u, --unwind-info
+

Display the unwind info of the input(s).

+
+ +
+
+-v, --version
+

Display the version of the llvm-objdump executable. Does not stack +with other commands.

+
+ +
+
+-x, --all-headers
+

Display all available header information. Equivalent to specifying +--archive-headers, --file-headers, +--private-headers, --reloc, --section-headers, +and --syms.

+
+ +
+
+

OPTIONS

+

llvm-objdump supports the following options:

+
+
+--adjust-vma=<offset>
+

Increase the displayed address in disassembly or section header printing by +the specified offset.

+
+ +
+
+--arch-name=<string>
+

Specify the target architecture when disassembling. Use --version +for a list of available targets.

+
+ +
+
+-C, --demangle
+

Demangle symbol names in the output.

+
+ +
+
+--debug-vars=<format>
+

Print the locations (in registers or memory) of source-level variables +alongside disassembly. format may be unicode or ascii, defaulting +to unicode if omitted.

+
+ +
+
+--debug-vars-indent=<width>
+

Distance to indent the source-level variable display, relative to the start +of the disassembly. Defaults to 52 characters.

+
+ +
+
+-j, --section=<section1[,section2,...]>
+

Perform commands on the specified sections only. For Mach-O use +segment,section to specify the section name.

+
+ +
+
+-l, --line-numbers
+

When disassembling, display source line numbers. Implies +--disassemble.

+
+ +
+
+-M, --disassembler-options=<opt1[,opt2,...]>
+

Pass target-specific disassembler options. Available options:

+
    +
  • reg-names-std: ARM only (default). Print in ARM ‘s instruction set documentation, with r13/r14/r15 replaced by sp/lr/pc.

  • +
  • reg-names-raw: ARM only. Use r followed by the register number.

  • +
  • no-aliases: AArch64 and RISC-V only. Print raw instruction mnemonic instead of pseudo instruction mnemonic.

  • +
  • numeric: RISC-V only. Print raw register names instead of ABI mnemonic. (e.g. print x1 instead of ra)

  • +
  • att: x86 only (default). Print in the AT&T syntax.

  • +
  • intel: x86 only. Print in the intel syntax.

  • +
+
+ +
+
+--mcpu=<cpu-name>
+

Target a specific CPU type for disassembly. Specify --mcpu=help to display +available CPUs.

+
+ +
+
+--mattr=<a1,+a2,-a3,...>
+

Enable/disable target-specific attributes. Specify --mattr=help to display +the available attributes.

+
+ +
+
+--no-leading-addr
+

When disassembling, do not print leading addresses.

+
+ +
+
+--no-print-imm-hex
+

Do not use hex format for immediate values in disassembly output (default).

+
+ +
+
+--no-show-raw-insn
+

When disassembling, do not print the raw bytes of each instruction.

+
+ +
+
+--prefix=<prefix>
+

When disassembling with the --source option, prepend prefix to +absolute paths.

+
+ +
+
+--prefix-strip=<level>
+

When disassembling with the --source option, strip out level +initial directories from absolute paths. This option has no effect without +--prefix.

+
+ +
+
+--print-imm-hex
+

Use hex format when printing immediate values in disassembly output.

+
+ +
+
+-S, --source
+

When disassembling, display source interleaved with the disassembly. Implies +--disassemble.

+
+ +
+
+--show-lma
+

Display the LMA column when dumping ELF section headers. Defaults to off +unless any section has different VMA and LMAs.

+
+ +
+
+--start-address=<address>
+

When disassembling, only disassemble from the specified address.

+

When printing relocations, only print the relocations patching offsets from at least address.

+

When printing symbols, only print symbols with a value of at least address.

+
+ +
+
+--stop-address=<address>
+

When disassembling, only disassemble up to, but not including the specified address.

+

When printing relocations, only print the relocations patching offsets up to address.

+

When printing symbols, only print symbols with a value up to address.

+
+ +
+
+--symbolize-operands
+

When disassembling, symbolize a branch target operand to print a label instead of a real address.

+

When printing a PC-relative global symbol reference, print it as an offset from the leading symbol.

+

Only works with an X86 linked image.

+
+
Example:

A non-symbolized branch instruction with a local target and pc-relative memory access like

+
+
+
cmp eax, dword ptr [rip + 4112]
+jge 0x20117e <_start+0x25>
+
+
+

might become

+
<L0>:
+  cmp eax, dword ptr <g>
+  jge      <L0>
+
+
+
+ +
+
+--triple=<string>
+

Target triple to disassemble for, see --version for available targets.

+
+ +
+
+-w, --wide
+

Ignored for compatibility with GNU objdump.

+
+ +
+
+--x86-asm-syntax=<style>
+

Deprecated. +When used with --disassemble, choose style of code to emit from +X86 backend. Supported values are:

+
+
+
+att
+

AT&T-style assembly

+
+ +
+
+intel
+

Intel-style assembly

+
+ +
+

The default disassembly style is att.

+
+ +
+
+-z, --disassemble-zeroes
+

Do not skip blocks of zeroes when disassembling.

+
+ +
+
+@<FILE>
+

Read command-line options and commands from response file <FILE>.

+
+ +
+
+

MACH-O ONLY OPTIONS AND COMMANDS

+
+
+--arch=<architecture>
+

Specify the architecture to disassemble. see --version for available +architectures.

+
+ +
+
+--archive-member-offsets
+

Print the offset to each archive member for Mach-O archives (requires +--archive-headers).

+
+ +
+
+--bind
+

Display binding info

+
+ +
+
+--data-in-code
+

Display the data in code table.

+
+ +
+
+--dis-symname=<name>
+

Disassemble just the specified symbol’s instructions.

+
+ +
+
+--dylibs-used
+

Display the shared libraries used for linked files.

+
+ +
+
+--dsym=<string>
+

Use .dSYM file for debug info.

+
+ +
+
+--dylib-id
+

Display the shared library’s ID for dylib files.

+
+ +
+
+--exports-trie
+

Display exported symbols.

+
+ +
+
+--function-starts
+

Print the function starts table for Mach-O objects.

+
+ +
+
+-g
+

Print line information from debug info if available.

+
+ +
+
+--full-leading-addr
+

Print the full leading address when disassembling.

+
+ +
+
+--indirect-symbols
+

Display the indirect symbol table.

+
+ +
+
+--info-plist
+

Display the info plist section as strings.

+
+ +
+
+--lazy-bind
+

Display lazy binding info.

+
+ +
+ +

Display the linker optimization hints.

+
+ +
+
+-m, --macho
+

Use Mach-O specific object file parser. Commands and other options may behave +differently when used with --macho.

+
+ +
+
+--no-leading-headers
+

Do not print any leading headers.

+
+ +
+
+--no-symbolic-operands
+

Do not print symbolic operands when disassembling.

+
+ +
+
+--non-verbose
+

Display the information for Mach-O objects in non-verbose or numeric form.

+
+ +
+
+--objc-meta-data
+

Display the Objective-C runtime meta data.

+
+ +
+
+--private-header
+

Display only the first format specific file header.

+
+ +
+
+--rebase
+

Display rebasing information.

+
+ +
+
+--rpaths
+

Display runtime search paths for the binary.

+
+ +
+
+--universal-headers
+

Display universal headers.

+
+ +
+
+--weak-bind
+

Display weak binding information.

+
+ +
+
+

XCOFF ONLY OPTIONS AND COMMANDS

+
+
+--symbol-description
+

Add symbol description to disassembly output.

+
+ +
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-otool.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-otool.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-otool.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-otool.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,326 @@ + + + + + + + + + llvm-otool - Mach-O dumping tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-otool - Mach-O dumping tool

+
+

SYNOPSIS

+

llvm-otool [option…] [file…]

+
+
+

DESCRIPTION

+

llvm-otool is a tool for dumping Mach-O files.

+

It attempts to be command-line-compatible and output-compatible with macOS’s +otool.

+
+
+

OPTIONS

+
+
+-arch <value>
+

Select slice of universal Mach-O file.

+
+ +
+
+-C
+

Print linker optimization hints.

+
+ +
+
+-D
+

Print shared library id.

+
+ +
+
+-d
+

Print data section.

+
+ +
+
+-f
+

Print universal headers.

+
+ +
+
+-G
+

Print data-in-code table.

+
+ +
+
+--help-hidden
+

Print help for hidden flags.

+
+ +
+
+--help
+

Print help.

+
+ +
+
+-h
+

Print mach header.

+
+ +
+
+-I
+

Print indirect symbol table.

+
+ +
+
+-j
+

Print opcode bytes.

+
+ +
+
+-L
+

Print used shared libraries.

+
+ +
+
+-l
+

Print load commands.

+
+ +
+
+-mcpu=<value>
+

Select cpu for disassembly.

+
+ +
+
+-o
+

Print Objective-C segment.

+
+ +
+
+-P
+

Print __TEXT,__info_plist section as strings.

+
+ +
+
+-p <function name>
+

Start disassembly at <function name>.

+
+ +
+
+-r
+

Print relocation entries.

+
+ +
+
+-s <segname> <sectname>
+

Print contents of section.

+
+ +
+
+-t
+

Print text section.

+
+ +
+
+--version
+

Print version.

+
+ +
+
+-V
+

Symbolize disassembled operands (implies -v).

+
+ +
+
+-v
+

Verbose output / disassemble when printing text sections.

+
+ +
+
+-X
+

Omit leading addresses or headers.

+
+ +
+
+-x
+

Print all text sections.

+
+ +
+
+@<FILE>
+

Read command-line options and commands from response file <FILE>.

+
+ +
+
+

EXIT STATUS

+

llvm-otool exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0.

+
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-pdbutil.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-pdbutil.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-pdbutil.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-pdbutil.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,913 @@ + + + + + + + + + llvm-pdbutil - PDB File forensics and diagnostics — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-pdbutil - PDB File forensics and diagnostics

+ +
+

Synopsis

+

llvm-pdbutil [subcommand] [options]

+
+
+

Description

+

Display types, symbols, CodeView records, and other information from a +PDB file, as well as manipulate and create PDB files. llvm-pdbutil +is normally used by FileCheck-based tests to test LLVM’s PDB reading and +writing functionality, but can also be used for general PDB file investigation +and forensics, or as a replacement for cvdump.

+
+
+

Subcommands

+

llvm-pdbutil is separated into several subcommands each tailored to +a different purpose. A brief summary of each command follows, with more detail +in the sections that follow.

+
+
    +
  • pretty - Dump symbol and type information in a format that +tries to look as much like the original source code as possible.

  • +
  • dump - Dump low level types and structures from the PDB +file, including CodeView records, hash tables, PDB streams, etc.

  • +
  • bytes - Dump data from the PDB file’s streams, records, +types, symbols, etc as raw bytes.

  • +
  • yaml2pdb - Given a yaml description of a PDB file, produce +a valid PDB file that matches that description.

  • +
  • pdb2yaml - For a given PDB file, produce a YAML +description of some or all of the file in a way that the PDB can be +reconstructed.

  • +
  • merge - Given two PDBs, produce a third PDB that is the +result of merging the two input PDBs.

  • +
+
+
+

pretty

+
+

Important

+

The pretty subcommand is built on the Windows DIA SDK, and as such is not +supported on non-Windows platforms.

+
+

USAGE: llvm-pdbutil pretty [options] <input PDB file>

+
+

Summary

+

The pretty subcommand displays a very high level representation of your +program’s debug info. Since it is built on the Windows DIA SDK which is the +standard API that Windows tools and debuggers query debug information, it +presents a more authoritative view of how a debugger is going to interpret your +debug information than a mode which displays low-level CodeView records.

+
+
+

Options

+
+
Filtering and Sorting Options
+
+

Note

+

exclude filters take priority over include filters. So if a filter +matches both an include and an exclude rule, then it is excluded.

+
+
+
+-exclude-compilands=<string>
+

When dumping compilands, compiland source-file contributions, or per-compiland +symbols, this option instructs llvm-pdbutil to omit any compilands that +match the specified regular expression.

+
+ +
+
+-exclude-symbols=<string>
+

When dumping global, public, or per-compiland symbols, this option instructs +llvm-pdbutil to omit any symbols that match the specified regular +expression.

+
+ +
+
+-exclude-types=<string>
+

When dumping types, this option instructs llvm-pdbutil to omit any types +that match the specified regular expression.

+
+ +
+
+-include-compilands=<string>
+

When dumping compilands, compiland source-file contributions, or per-compiland +symbols, limit the initial search to only those compilands that match the +specified regular expression.

+
+ +
+
+-include-symbols=<string>
+

When dumping global, public, or per-compiland symbols, limit the initial +search to only those symbols that match the specified regular expression.

+
+ +
+
+-include-types=<string>
+

When dumping types, limit the initial search to only those types that match +the specified regular expression.

+
+ +
+
+-min-class-padding=<uint>
+

Only display types that have at least the specified amount of alignment +padding, accounting for padding in base classes and aggregate field members.

+
+ +
+
+-min-class-padding-imm=<uint>
+

Only display types that have at least the specified amount of alignment +padding, ignoring padding in base classes and aggregate field members.

+
+ +
+
+-min-type-size=<uint>
+

Only display types T where sizeof(T) is greater than or equal to the specified +amount.

+
+ +
+
+-no-compiler-generated
+

Don’t show compiler generated types and symbols

+
+ +
+
+-no-enum-definitions
+

When dumping an enum, don’t show the full enum (e.g. the individual enumerator +values).

+
+ +
+
+-no-system-libs
+

Don’t show symbols from system libraries

+
+ +
+
+
Symbol Type Options
+
+
+-all
+

Implies all other options in this category.

+
+ +
+
+-class-definitions=<format>
+

Displays class definitions in the specified format.

+
=all      - Display all class members including data, constants, typedefs, functions, etc (default)
+=layout   - Only display members that contribute to class size.
+=none     - Don't display class definitions (e.g. only display the name and base list)
+
+
+
+ +
+
+-class-order
+

Displays classes in the specified order.

+
=none            - Undefined / no particular sort order (default)
+=name            - Sort classes by name
+=size            - Sort classes by size
+=padding         - Sort classes by amount of padding
+=padding-pct     - Sort classes by percentage of space consumed by padding
+=padding-imm     - Sort classes by amount of immediate padding
+=padding-pct-imm - Sort classes by percentage of space consumed by immediate padding
+
+
+
+ +
+
+-class-recurse-depth=<uint>
+

When dumping class definitions, stop after recursing the specified number of times. The +default is 0, which is no limit.

+
+ +
+
+-classes
+

Display classes

+
+ +
+
+-compilands
+

Display compilands (e.g. object files)

+
+ +
+
+-enums
+

Display enums

+
+ +
+
+-externals
+

Dump external (e.g. exported) symbols

+
+ +
+
+-globals
+

Dump global symbols

+
+ +
+
+-lines
+

Dump the mappings between source lines and code addresses.

+
+ +
+
+-module-syms
+

Display symbols (variables, functions, etc) for each compiland

+
+ +
+
+-sym-types=<types>
+

Type of symbols to dump when -globals, -externals, or -module-syms is +specified. (default all)

+
=thunks - Display thunk symbols
+=data   - Display data symbols
+=funcs  - Display function symbols
+=all    - Display all symbols (default)
+
+
+
+ +
+
+-symbol-order=<order>
+

For symbols dumped via the -module-syms, -globals, or -externals options, sort +the results in specified order.

+
=none - Undefined / no particular sort order
+=name - Sort symbols by name
+=size - Sort symbols by size
+
+
+
+ +
+
+-typedefs
+

Display typedef types

+
+ +
+
+-types
+

Display all types (implies -classes, -enums, -typedefs)

+
+ +
+
+
Other Options
+
+
+-color-output
+

Force color output on or off. By default, color if used if outputting to a +terminal.

+
+ +
+
+-load-address=<uint>
+

When displaying relative virtual addresses, assume the process is loaded at the +given address and display what would be the absolute address.

+
+ +
+
+
+
+

dump

+

USAGE: llvm-pdbutil dump [options] <input PDB file>

+
+

Summary

+

The dump subcommand displays low level information about the structure of a +PDB file. It is used heavily by LLVM’s testing infrastructure, but can also be +used for PDB forensics. It serves a role similar to that of Microsoft’s +cvdump tool.

+
+

Note

+

The dump subcommand exposes internal details of the file format. As +such, the reader should be familiar with The PDB File Format before using this +command.

+
+
+
+

Options

+
+
MSF Container Options
+
+
+-streams
+

dump a summary of all of the streams in the PDB file.

+
+ +
+
+-stream-blocks
+

In conjunction with -streams, add information to the output about +what blocks the specified stream occupies.

+
+ +
+
+-summary
+

Dump MSF and PDB header information.

+
+ +
+
+
Module & File Options
+
+
+-modi=<uint>
+

For all options that dump information from each module/compiland, limit to +the specified module.

+
+ +
+
+-files
+

Dump the source files that contribute to each displayed module.

+
+ +
+
+-il
+

Dump inlinee line information (DEBUG_S_INLINEELINES CodeView subsection)

+
+ +
+
+-l
+

Dump line information (DEBUG_S_LINES CodeView subsection)

+
+ +
+
+-modules
+

Dump compiland information

+
+ +
+
+-xme
+

Dump cross module exports (DEBUG_S_CROSSSCOPEEXPORTS CodeView subsection)

+
+ +
+
+-xmi
+

Dump cross module imports (DEBUG_S_CROSSSCOPEIMPORTS CodeView subsection)

+
+ +
+
+
Symbol Options
+
+
+-globals
+

dump global symbol records

+
+ +
+
+-global-extras
+

dump additional information about the globals, such as hash buckets and hash +values.

+
+ +
+
+-publics
+

dump public symbol records

+
+ +
+
+-public-extras
+

dump additional information about the publics, such as hash buckets and hash +values.

+
+ +
+
+-symbols
+

dump symbols (functions, variables, etc) for each module dumped.

+
+ +
+
+-sym-data
+

For each symbol record dumped as a result of the -symbols option, +display the full bytes of the record in binary as well.

+
+ +
+
+
Type Record Options
+
+
+-types
+

Dump CodeView type records from TPI stream

+
+ +
+
+-type-extras
+

Dump additional information from the TPI stream, such as hashes and the type +index offsets array.

+
+ +
+
+-type-data
+

For each type record dumped, display the full bytes of the record in binary as +well.

+
+ +
+
+-type-index=<uint>
+

Only dump types with the specified type index.

+
+ +
+
+-ids
+

Dump CodeView type records from IPI stream.

+
+ +
+
+-id-extras
+

Dump additional information from the IPI stream, such as hashes and the type +index offsets array.

+
+ +
+
+-id-data
+

For each ID record dumped, display the full bytes of the record in binary as +well.

+
+ +
+
+-id-index=<uint>
+

only dump ID records with the specified hexadecimal type index.

+
+ +
+
+-dependents
+

When used in conjunction with -type-index or -id-index, +dumps the entire dependency graph for the specified index instead of just the +single record with the specified index. For example, if type index 0x4000 is +a function whose return type has index 0x3000, and you specify +-dependents=0x4000, then this would dump both records (as well as any other +dependents in the tree).

+
+ +
+
+
Miscellaneous Options
+
+
+-all
+

Implies most other options.

+
+ +
+
+-section-contribs
+

Dump section contributions.

+
+ +
+
+-section-headers
+

Dump image section headers.

+
+ +
+
+-section-map
+

Dump section map.

+
+ +
+
+-string-table
+

Dump PDB string table.

+
+ +
+
+
+
+

bytes

+

USAGE: llvm-pdbutil bytes [options] <input PDB file>

+
+

Summary

+

Like the dump subcommand, the bytes subcommand displays low level +information about the structure of a PDB file, but it is used for even deeper +forensics. The bytes subcommand finds various structures in a PDB file +based on the command line options specified, and dumps them in hex. Someone +working on support for emitting PDBs would use this heavily, for example, to +compare one PDB against another PDB to ensure byte-for-byte compatibility. It +is not enough to simply compare the bytes of an entire file, or an entire stream +because it’s perfectly fine for the same structure to exist at different +locations in two different PDBs, and “finding” the structure is half the battle.

+
+
+

Options

+
+
MSF File Options
+
+
+-block-range=<start[-end]>
+

Dump binary data from specified range of MSF file blocks.

+
+ +
+
+-byte-range=<start[-end]>
+

Dump binary data from specified range of bytes in the file.

+
+ +
+
+-fpm
+

Dump the MSF free page map.

+
+ +
+
+-stream-data=<string>
+

Dump binary data from the specified streams. Format is SN[:Start][@Size]. +For example, -stream-data=7:3@12 dumps 12 bytes from stream 7, starting +at offset 3 in the stream.

+
+ +
+
+
PDB Stream Options
+
+
+-name-map
+

Dump bytes of PDB Name Map

+
+ +
+
+
DBI Stream Options
+
+
+-ec
+

Dump the edit and continue map substream of the DBI stream.

+
+ +
+
+-files
+

Dump the file info substream of the DBI stream.

+
+ +
+
+-modi
+

Dump the modi substream of the DBI stream.

+
+ +
+
+-sc
+

Dump section contributions substream of the DBI stream.

+
+ +
+
+-sm
+

Dump the section map from the DBI stream.

+
+ +
+
+-type-server
+

Dump the type server map from the DBI stream.

+
+ +
+
+
Module Options
+
+
+-mod=<uint>
+

Limit all options in this category to the specified module index. By default, +options in this category will dump bytes from all modules.

+
+ +
+
+-chunks
+

Dump the bytes of each module’s C13 debug subsection.

+
+ +
+
+-split-chunks
+

When specified with -chunks, split the C13 debug subsection into a +separate chunk for each subsection type, and dump them separately.

+
+ +
+
+-syms
+

Dump the symbol record substream from each module.

+
+ +
+
+
Type Record Options
+
+
+-id=<uint>
+

Dump the record from the IPI stream with the given type index.

+
+ +
+
+-type=<uint>
+

Dump the record from the TPI stream with the given type index.

+
+ +
+
+
+
+

pdb2yaml

+

USAGE: llvm-pdbutil pdb2yaml [options] <input PDB file>

+ + +
+
+

yaml2pdb

+

USAGE: llvm-pdbutil yaml2pdb [options] <input YAML file>

+
+

Summary

+

Generate a PDB file from a YAML description. The YAML syntax is not described +here. Instead, use llvm-pdbutil pdb2yaml and +examine the output for an example starting point.

+
+
+

Options

+
+
+-pdb=<file-name>
+
+ +

Write the resulting PDB to the specified file.

+
+
+
+

merge

+

USAGE: llvm-pdbutil merge [options] <input PDB file 1> <input PDB file 2>

+
+

Summary

+

Merge two PDB files into a single file.

+
+
+

Options

+
+
+-pdb=<file-name>
+
+ +

Write the resulting PDB to the specified file.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-profdata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-profdata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-profdata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-profdata.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,569 @@ + + + + + + + + + llvm-profdata - Profile data tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-profdata - Profile data tool

+
+

SYNOPSIS

+

llvm-profdata command [args…]

+
+
+

DESCRIPTION

+

The llvm-profdata tool is a small utility for working with profile +data files.

+
+
+

COMMANDS

+ +
+
+

MERGE

+
+

SYNOPSIS

+

llvm-profdata merge [options] [filename…]

+
+
+

DESCRIPTION

+

llvm-profdata merge takes several profile data files +generated by PGO instrumentation and merges them together into a single +indexed profile data file.

+

By default profile data is merged without modification. This means that the +relative importance of each input file is proportional to the number of samples +or counts it contains. In general, the input from a longer training run will be +interpreted as relatively more important than a shorter run. Depending on the +nature of the training runs it may be useful to adjust the weight given to each +input file by using the -weighted-input option.

+

Profiles passed in via -weighted-input, -input-files, or via positional +arguments are processed once for each time they are seen.

+
+
+

OPTIONS

+
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-output=output, -o=output
+

Specify the output file name. Output cannot be - as the resulting +indexed profile data can’t be written to standard output.

+
+ +
+
+-weighted-input=weight,filename
+

Specify an input file name along with a weight. The profile counts of the +supplied filename will be scaled (multiplied) by the supplied +weight, where weight is a decimal integer >= 1. +Input files specified without using this option are assigned a default +weight of 1. Examples are shown below.

+
+ +
+
+-input-files=path, -f=path
+

Specify a file which contains a list of files to merge. The entries in this +file are newline-separated. Lines starting with ‘#’ are skipped. Entries may +be of the form <filename> or <weight>,<filename>.

+
+ +
+
+-remapping-file=path, -r=path
+

Specify a file which contains a remapping from symbol names in the input +profile to the symbol names that should be used in the output profile. The +file should consist of lines of the form <input-symbol> <output-symbol>. +Blank lines and lines starting with # are skipped.

+

The llvm-cxxmap tool can be used to generate the symbol +remapping file.

+
+ +
+
+-instr (default)
+

Specify that the input profile is an instrumentation-based profile.

+
+ +
+
+-sample
+

Specify that the input profile is a sample-based profile.

+

The format of the generated file can be generated in one of three ways:

+
+
+-binary (default)
+
+ +

Emit the profile using a binary encoding. For instrumentation-based profile +the output format is the indexed binary format.

+
+
+-extbinary
+
+ +

Emit the profile using an extensible binary encoding. This option can only +be used with sample-based profile. The extensible binary encoding can be +more compact with compression enabled and can be loaded faster than the +default binary encoding.

+
+
+-text
+
+ +

Emit the profile in text mode. This option can also be used with both +sample-based and instrumentation-based profile. When this option is used +the profile will be dumped in the text format that is parsable by the profile +reader.

+
+
+-gcc
+
+ +

Emit the profile using GCC’s gcov format (Not yet supported).

+
+ +
+
+-sparse[=true|false]
+

Do not emit function records with 0 execution count. Can only be used in +conjunction with -instr. Defaults to false, since it can inhibit compiler +optimization during PGO.

+
+ +
+
+-num-threads=N, -j=N
+

Use N threads to perform profile merging. When N=0, llvm-profdata auto-detects +an appropriate number of threads to use. This is the default.

+
+ +
+
+-failure-mode=[any|all]
+

Set the failure mode. There are two options: ‘any’ causes the merge command to +fail if any profiles are invalid, and ‘all’ causes the merge command to fail +only if all profiles are invalid. If ‘all’ is set, information from any +invalid profiles is excluded from the final merged product. The default +failure mode is ‘any’.

+
+ +
+
+-prof-sym-list=path
+

Specify a file which contains a list of symbols to generate profile symbol +list in the profile. This option can only be used with sample-based profile +in extbinary format. The entries in this file are newline-separated.

+
+ +
+
+-compress-all-sections=[true|false]
+

Compress all sections when writing the profile. This option can only be used +with sample-based profile in extbinary format.

+
+ +
+
+-use-md5=[true|false]
+

Use MD5 to represent string in name table when writing the profile. +This option can only be used with sample-based profile in extbinary format.

+
+ +
+
+-gen-partial-profile=[true|false]
+

Mark the profile to be a partial profile which only provides partial profile +coverage for the optimized target. This option can only be used with +sample-based profile in extbinary format.

+
+ +
+
+-supplement-instr-with-sample=path_to_sample_profile
+

Supplement an instrumentation profile with sample profile. The sample profile +is the input of the flag. Output will be in instrumentation format (only works +with -instr).

+
+ +
+
+-zero-counter-threshold=threshold_float_number
+

For the function which is cold in instr profile but hot in sample profile, if +the ratio of the number of zero counters divided by the the total number of +counters is above the threshold, the profile of the function will be regarded +as being harmful for performance and will be dropped.

+
+ +
+
+-instr-prof-cold-threshold=threshold_int_number
+

User specified cold threshold for instr profile which will override the cold +threshold got from profile summary.

+
+ +
+
+-suppl-min-size-threshold=threshold_int_number
+

If the size of a function is smaller than the threshold, assume it can be +inlined by PGO early inliner and it will not be adjusted based on sample +profile.

+
+ +
+
+

EXAMPLES

+
+

Basic Usage

+

Merge three profiles:

+
llvm-profdata merge foo.profdata bar.profdata baz.profdata -output merged.profdata
+
+
+
+
+

Weighted Input

+

The input file foo.profdata is especially important, multiply its counts by 10:

+
llvm-profdata merge -weighted-input=10,foo.profdata bar.profdata baz.profdata -output merged.profdata
+
+
+

Exactly equivalent to the previous invocation (explicit form; useful for programmatic invocation):

+
llvm-profdata merge -weighted-input=10,foo.profdata -weighted-input=1,bar.profdata -weighted-input=1,baz.profdata -output merged.profdata
+
+
+
+
+
+
+

SHOW

+
+

SYNOPSIS

+

llvm-profdata show [options] [filename]

+
+
+

DESCRIPTION

+

llvm-profdata show takes a profile data file and displays the +information about the profile counters for this file and +for any of the specified function(s).

+

If filename is omitted or is -, then llvm-profdata show reads its +input from standard input.

+
+
+

OPTIONS

+
+
+-all-functions
+

Print details for every function.

+
+ +
+
+-counts
+

Print the counter values for the displayed functions.

+
+ +
+
+-function=string
+

Print details for a function if the function’s name contains the given string.

+
+ +
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-output=output, -o=output
+

Specify the output file name. If output is - or it isn’t specified, +then the output is sent to standard output.

+
+ +
+
+-instr (default)
+

Specify that the input profile is an instrumentation-based profile.

+
+ +
+
+-text
+

Instruct the profile dumper to show profile counts in the text format of the +instrumentation-based profile data representation. By default, the profile +information is dumped in a more human readable form (also in text) with +annotations.

+
+ +
+
+-topn=n
+

Instruct the profile dumper to show the top n functions with the +hottest basic blocks in the summary section. By default, the topn functions +are not dumped.

+
+ +
+
+-sample
+

Specify that the input profile is a sample-based profile.

+
+ +
+
+-memop-sizes
+

Show the profiled sizes of the memory intrinsic calls for shown functions.

+
+ +
+
+-value-cutoff=n
+

Show only those functions whose max count values are greater or equal to n. +By default, the value-cutoff is set to 0.

+
+ +
+
+-list-below-cutoff
+

Only output names of functions whose max count value are below the cutoff +value.

+
+ +
+
+-showcs
+

Only show context sensitive profile counts. The default is to filter all +context sensitive profile counts.

+
+ +
+
+-show-prof-sym-list=[true|false]
+

Show profile symbol list if it exists in the profile. This option is only +meaningful for sample-based profile in extbinary format.

+
+ +
+
+-show-sec-info-only=[true|false]
+

Show basic information about each section in the profile. This option is +only meaningful for sample-based profile in extbinary format.

+
+ +
+
+
+

OVERLAP

+
+

SYNOPSIS

+

llvm-profdata overlap [options] [base profile file] [test profile file]

+
+
+

DESCRIPTION

+

llvm-profdata overlap takes two profile data files and displays the +overlap of counter distribution between the whole files and between any of the +specified functions.

+

In this command, overlap is defined as follows: +Suppose base profile file has the following counts: +{c1_1, c1_2, …, c1_n, c1_u_1, c2_u_2, …, c2_u_s}, +and test profile file has +{c2_1, c2_2, …, c2_n, c2_v_1, c2_v_2, …, c2_v_t}. +Here c{1|2}_i (i = 1 .. n) are matched counters and c1_u_i (i = 1 .. s) and +c2_v_i (i = 1 .. v) are unmatched counters (or counters only existing in) +base profile file and test profile file, respectively. +Let sum_1 = c1_1 + c1_2 + … + c1_n + c1_u_1 + c2_u_2 + … + c2_u_s, and +sum_2 = c2_1 + c2_2 + … + c2_n + c2_v_1 + c2_v_2 + … + c2_v_t. +overlap = min(c1_1/sum_1, c2_1/sum_2) + min(c1_2/sum_1, c2_2/sum_2) + … ++ min(c1_n/sum_1, c2_n/sum_2).

+

The result overlap distribution is a percentage number, ranging from 0.0% to +100.0%, where 0.0% means there is no overlap and 100.0% means a perfect +overlap.

+

Here is an example, if base profile file has counts of {400, 600}, and +test profile file has matched counts of {60000, 40000}. The overlap is 80%.

+
+
+

OPTIONS

+
+
+-function=string
+

Print details for a function if the function’s name contains the given string.

+
+ +
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-o=output or -o output
+

Specify the output file name. If output is - or it isn’t specified, +then the output is sent to standard output.

+
+ +
+
+-value-cutoff=n
+

Show only those functions whose max count values are greater or equal to n. +By default, the value-cutoff is set to max of unsigned long long.

+
+ +
+
+-cs
+

Only show overlap for the context sensitive profile counts. The default is to show +non-context sensitive profile counts.

+
+ +
+
+
+

EXIT STATUS

+

llvm-profdata returns 1 if the command is omitted or is invalid, +if it cannot read input files, or if there is a mismatch between their data.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-profgen.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-profgen.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-profgen.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-profgen.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,208 @@ + + + + + + + + + llvm-profgen - LLVM SPGO profile generation tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-profgen - LLVM SPGO profile generation tool

+
+

SYNOPSIS

+

llvm-profgen [commands] [options]

+
+
+

DESCRIPTION

+

The llvm-profgen utility generates a profile data file +from given perf script data files for sample-based profile guided +optimization(SPGO).

+
+
+

COMMANDS

+

At least one of the following commands are required:

+
+
+--perfscript=<string[,string,...]>
+

Path of perf-script trace created by Linux perf tool with script +command(the raw perf.data should be profiled with -b).

+
+ +
+
+--binary=<string[,string,...]>
+

Path of the input profiled binary files.

+
+ +
+
+--output=<string>
+

Path of the output profile file.

+
+ +
+
+

OPTIONS

+

llvm-profgen supports the following options:

+
+
+--format=[text|binary|extbinary|compbinary|gcc]
+

Specify the format of the generated profile. Supported <format> are text, +binary, extbinary, compbinary, gcc, see llvm-profdata for more +descriptions of the format.

+
+ +
+
+--show-mmap-events
+

Print mmap events.

+
+ +
+
+--show-disassembly
+

Print disassembled code.

+
+ +
+
+--x86-asm-syntax=[att|intel]
+

Specify whether to print assembly code in AT&T syntax (the default) or Intel +syntax.

+
+ +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-ranlib.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-ranlib.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-ranlib.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-ranlib.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,159 @@ + + + + + + + + + llvm-ranlib - generates an archive index — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-ranlib - generates an archive index

+
+

SYNOPSIS

+

llvm-ranlib [options]

+
+
+

DESCRIPTION

+

llvm-ranlib is an alias for the llvm-ar tool that +generates an index for an archive. It can be used as a replacement for GNU’s +ranlib tool.

+

Running llvm-ranlib is equivalent to running llvm-ar s.

+
+
+

SEE ALSO

+

llvm-ar(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-readelf.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-readelf.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-readelf.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-readelf.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,420 @@ + + + + + + + + + llvm-readelf - GNU-style LLVM Object Reader — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-readelf - GNU-style LLVM Object Reader

+
+

SYNOPSIS

+

llvm-readelf [options] [input…]

+
+
+

DESCRIPTION

+

The llvm-readelf tool displays low-level format-specific information +about one or more object files.

+

If input is “-“, llvm-readelf reads from standard +input. Otherwise, it will read from the specified filenames.

+
+
+

OPTIONS

+
+
+--all
+

Equivalent to specifying all the main display options relevant to the file +format.

+
+ +
+
+--addrsig
+

Display the address-significance table.

+
+ +
+
+--arch-specific, -A
+

Display architecture-specific information, e.g. the ARM attributes section on ARM.

+
+ +
+
+--bb-addr-map
+

Display the contents of the basic block address map section(s), which contain the +address of each function, along with the relative offset of each basic block.

+
+ +
+
+--demangle, -C
+

Display demangled symbol names in the output.

+
+ +
+
+--dyn-relocations
+

Display the dynamic relocation entries.

+
+ +
+
+--dyn-symbols, --dyn-syms
+

Display the dynamic symbol table.

+
+ +
+
+--dynamic-table, --dynamic, -d
+

Display the dynamic table.

+
+ +
+
+--cg-profile
+

Display the callgraph profile section.

+
+ +
+
+--histogram, -I
+

Display a bucket list histogram for dynamic symbol hash tables.

+
+ +
+
+--elf-linker-options
+

Display the linker options section.

+
+ +
+
+--elf-output-style=<value>
+

Format ELF information in the specified style. Valid options are LLVM and +GNU. LLVM output is an expanded and structured format, whilst GNU +(the default) output mimics the equivalent GNU readelf output.

+
+ +
+
+--section-groups, -g
+

Display section groups.

+
+ +
+
+--expand-relocs
+

When used with --relocations, display each relocation in an expanded +multi-line format.

+
+ +
+
+--file-header, -h
+

Display file headers.

+
+ +
+
+--gnu-hash-table
+

Display the GNU hash table for dynamic symbols.

+
+ +
+
+--hash-symbols
+

Display the expanded hash table with dynamic symbol data.

+
+ +
+
+--hash-table
+

Display the hash table for dynamic symbols.

+
+ +
+
+--headers, -e
+

Equivalent to setting: --file-header, --program-headers, +and --sections.

+
+ +
+
+--help
+

Display a summary of command line options.

+
+ +
+
+--hex-dump=<section[,section,...]>, -x
+

Display the specified section(s) as hexadecimal bytes. section may be a +section index or section name.

+
+ +
+
+--needed-libs
+

Display the needed libraries.

+
+ +
+
+--notes, -n
+

Display all notes.

+
+ +
+
+--program-headers, --segments, -l
+

Display the program headers.

+
+ +
+
+--raw-relr
+

Do not decode relocations in RELR relocation sections when displaying them.

+
+ +
+
+--relocations, --relocs, -r
+

Display the relocation entries in the file.

+
+ +
+
+--sections, --section-headers, -S
+

Display all sections.

+
+ +
+
+--section-data
+

When used with --sections, display section data for each section +shown. This option has no effect for GNU style output.

+
+ +
+
+--section-details, -t
+

Display all section details. Used as an alternative to --sections.

+
+ +
+
+--section-mapping
+

Display the section to segment mapping.

+
+ +
+
+--section-relocations
+

When used with --sections, display relocations for each section +shown. This option has no effect for GNU style output.

+
+ +
+
+--section-symbols
+

When used with --sections, display symbols for each section shown. +This option has no effect for GNU style output.

+
+ +
+
+--stackmap
+

Display contents of the stackmap section.

+
+ +
+
+--stack-sizes
+

Display the contents of the stack sizes section(s), i.e. pairs of function +names and the size of their stack frames. Currently only implemented for GNU +style output.

+
+ +
+
+--string-dump=<section[,section,...]>, -p
+

Display the specified section(s) as a list of strings. section may be a +section index or section name.

+
+ +
+
+--symbols, --syms, -s
+

Display the symbol table.

+
+ +
+
+--unwind, -u
+

Display unwind information.

+
+ +
+
+--version
+

Display the version of the llvm-readelf executable.

+
+ +
+
+--version-info, -V
+

Display version sections.

+
+ +
+
+@<FILE>
+

Read command-line options from response file <FILE>.

+
+ +
+
+

EXIT STATUS

+

llvm-readelf returns 0 under normal operation. It returns a non-zero +exit code if there were any errors.

+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-readobj.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-readobj.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-readobj.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-readobj.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,572 @@ + + + + + + + + + llvm-readobj - LLVM Object Reader — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-readobj - LLVM Object Reader

+
+

SYNOPSIS

+

llvm-readobj [options] [input…]

+
+
+

DESCRIPTION

+

The llvm-readobj tool displays low-level format-specific information +about one or more object files.

+

If input is “-“, llvm-readobj reads from standard +input. Otherwise, it will read from the specified filenames.

+
+
+

DIFFERENCES TO LLVM-READELF

+

llvm-readelf is an alias for the llvm-readobj tool with a +slightly different command-line interface and output that is GNU compatible. +Following is a list of differences between llvm-readelf and +llvm-readobj:

+
    +
  • llvm-readelf uses GNU for the --elf-output-style option +by default. llvm-readobj uses LLVM.

  • +
  • llvm-readelf allows single-letter grouped flags (e.g. +llvm-readelf -SW is the same as llvm-readelf -S -W). +llvm-readobj does not allow grouping.

  • +
  • llvm-readelf provides -s as an alias for +--symbols, for GNU readelf compatibility, whereas it is +an alias for --section-headers in llvm-readobj.

  • +
  • llvm-readobj provides -t as an alias for --symbols. +llvm-readelf does not.

  • +
  • llvm-readobj provides --sr, --sd, --st and --dt as +aliases for --section-relocations, --section-data, +--section-symbols and --dyn-symbols respectively. +llvm-readelf does not provide these aliases, to avoid conflicting +with grouped flags.

  • +
+
+
+

GENERAL AND MULTI-FORMAT OPTIONS

+

These options are applicable to more than one file format, or are unrelated to +file formats.

+
+
+--all
+

Equivalent to specifying all the main display options relevant to the file +format.

+
+ +
+
+--addrsig
+

Display the address-significance table.

+
+ +
+
+--expand-relocs
+

When used with --relocs, display each relocation in an expanded +multi-line format.

+
+ +
+
+--file-header, -h
+

Display file headers.

+
+ +
+
+--headers, -e
+

Equivalent to setting: --file-header, --program-headers, +and --sections.

+
+ +
+
+--help
+

Display a summary of command line options.

+
+ +
+
+--hex-dump=<section[,section,...]>, -x
+

Display the specified section(s) as hexadecimal bytes. section may be a +section index or section name.

+
+ +
+
+--needed-libs
+

Display the needed libraries.

+
+ +
+
+--relocations, --relocs, -r
+

Display the relocation entries in the file.

+
+ +
+
+--sections, --section-headers, -S
+

Display all sections.

+
+ +
+
+--section-data, --sd
+

When used with --sections, display section data for each section +shown. This option has no effect for GNU style output.

+
+ +
+
+--section-relocations, --sr
+

When used with --sections, display relocations for each section +shown. This option has no effect for GNU style output.

+
+ +
+
+--section-symbols, --st
+

When used with --sections, display symbols for each section shown. +This option has no effect for GNU style output.

+
+ +
+
+--stackmap
+

Display contents of the stackmap section.

+
+ +
+
+--string-dump=<section[,section,...]>, -p
+

Display the specified section(s) as a list of strings. section may be a +section index or section name.

+
+ +
+
+--string-table
+

Display contents of the string table.

+
+ +
+
+--symbols, --syms, -s
+

Display the symbol table.

+
+ +
+
+--unwind, -u
+

Display unwind information.

+
+ +
+
+--version
+

Display the version of the llvm-readobj executable.

+
+ +
+
+@<FILE>
+

Read command-line options from response file <FILE>.

+
+ +
+
+

ELF SPECIFIC OPTIONS

+

The following options are implemented only for the ELF file format.

+
+
+--arch-specific, -A
+

Display architecture-specific information, e.g. the ARM attributes section on ARM.

+
+ +
+
+--bb-addr-map
+

Display the contents of the basic block address map section(s), which contain the +address of each function, along with the relative offset of each basic block.

+
+ +
+
+--demangle, -C
+

Display demangled symbol names in the output.

+
+ +
+
+--dependent-libraries
+

Display the dependent libraries section.

+
+ +
+
+--dyn-relocations
+

Display the dynamic relocation entries.

+
+ +
+
+--dyn-symbols, --dyn-syms, --dt
+

Display the dynamic symbol table.

+
+ +
+
+--dynamic-table, --dynamic, -d
+

Display the dynamic table.

+
+ +
+
+--cg-profile
+

Display the callgraph profile section.

+
+ +
+
+--histogram, -I
+

Display a bucket list histogram for dynamic symbol hash tables.

+
+ +
+
+--elf-linker-options
+

Display the linker options section.

+
+ +
+
+--elf-output-style=<value>
+

Format ELF information in the specified style. Valid options are LLVM and +GNU. LLVM output (the default) is an expanded and structured format, +whilst GNU output mimics the equivalent GNU readelf output.

+
+ +
+
+--section-groups, -g
+

Display section groups.

+
+ +
+
+--gnu-hash-table
+

Display the GNU hash table for dynamic symbols.

+
+ +
+
+--hash-symbols
+

Display the expanded hash table with dynamic symbol data.

+
+ +
+
+--hash-table
+

Display the hash table for dynamic symbols.

+
+ +
+
+--notes, -n
+

Display all notes.

+
+ +
+
+--program-headers, --segments, -l
+

Display the program headers.

+
+ +
+
+--raw-relr
+

Do not decode relocations in RELR relocation sections when displaying them.

+
+ +
+
+--section-mapping
+

Display the section to segment mapping.

+
+ +
+
+--stack-sizes
+

Display the contents of the stack sizes section(s), i.e. pairs of function +names and the size of their stack frames. Currently only implemented for GNU +style output.

+
+ +
+
+--version-info, -V
+

Display version sections.

+
+ +
+
+

MACH-O SPECIFIC OPTIONS

+

The following options are implemented only for the Mach-O file format.

+
+
+--macho-data-in-code
+

Display the Data in Code command.

+
+ +
+
+--macho-dsymtab
+

Display the Dsymtab command.

+
+ +
+
+--macho-indirect-symbols
+

Display indirect symbols.

+
+ +
+
+--macho-linker-options
+

Display the Mach-O-specific linker options.

+
+ +
+
+--macho-segment
+

Display the Segment command.

+
+ +
+
+--macho-version-min
+

Display the version min command.

+
+ +
+
+

PE/COFF SPECIFIC OPTIONS

+

The following options are implemented only for the PE/COFF file format.

+
+
+--codeview
+

Display CodeView debug information.

+
+ +
+
+--codeview-ghash
+

Enable global hashing for CodeView type stream de-duplication.

+
+ +
+
+--codeview-merged-types
+

Display the merged CodeView type stream.

+
+ +
+
+--codeview-subsection-bytes
+

Dump raw contents of CodeView debug sections and records.

+
+ +
+
+--coff-basereloc
+

Display the .reloc section.

+
+ +
+
+--coff-debug-directory
+

Display the debug directory.

+
+ +
+
+--coff-tls-directory
+

Display the TLS directory.

+
+ +
+
+--coff-directives
+

Display the .drectve section.

+
+ +
+
+--coff-exports
+

Display the export table.

+
+ +
+
+--coff-imports
+

Display the import table.

+
+ +
+
+--coff-load-config
+

Display the load config.

+
+ +
+
+--coff-resources
+

Display the .rsrc section.

+
+ +
+
+

EXIT STATUS

+

llvm-readobj returns 0 under normal operation. It returns a non-zero +exit code if there were any errors.

+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-size.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-size.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-size.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-size.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,347 @@ + + + + + + + + + llvm-size - print size information — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-size - print size information

+
+

SYNOPSIS

+

llvm-size [options] [input…]

+
+
+

DESCRIPTION

+

llvm-size is a tool that prints size information for binary files. +It is intended to be a drop-in replacement for GNU’s size.

+

The tool prints size information for each input specified. If no input is +specified, the program prints size information for a.out. If “-” is +specified as an input file, llvm-size reads a file from the standard +input stream. If an input is an archive, size information will be displayed for +all its members.

+
+
+

OPTIONS

+
+
+-A
+

Equivalent to --format with a value of sysv.

+
+ +
+
+--arch=<arch>
+

Architecture(s) from Mach-O universal binaries to display information for.

+
+ +
+
+-B
+

Equivalent to --format with a value of berkeley.

+
+ +
+
+--common
+

Include ELF common symbol sizes in bss size for berkeley output format, or +as a separate section entry for sysv output. If not specified, these +symbols are ignored.

+
+ +
+
+-d
+

Equivalent to --radix with a value of 10.

+
+ +
+
+-l
+

Display verbose address and offset information for segments and sections in +Mach-O files in darwin format.

+
+ +
+
+--format=<format>
+

Set the output format to the <format> specified. Available <format> +options are berkeley (the default), sysv and darwin.

+

Berkeley output summarises text, data and bss sizes in each file, as shown +below for a typical pair of ELF files:

+
$ llvm-size --format=berkeley test.o test2.o
+   text    data     bss     dec     hex filename
+    182      16       5     203      cb test.elf
+     82       8       1      91      5b test2.o
+
+
+

For Mach-O files, the output format is slightly different:

+
$ llvm-size --format=berkeley macho.obj macho2.obj
+__TEXT  __DATA  __OBJC  others  dec     hex
+4       8       0       0       12      c       macho.obj
+16      32      0       0       48      30      macho2.obj
+
+
+

Sysv output displays size and address information for most sections, with each +file being listed separately:

+
$ llvm-size --format=sysv test.elf test2.o
+   test.elf  :
+   section       size      addr
+   .eh_frame       92   2097496
+   .text           90   2101248
+   .data           16   2105344
+   .bss             5   2105360
+   .comment       209         0
+   Total          412
+
+   test2.o  :
+   section             size   addr
+   .text                 26      0
+   .data                  8      0
+   .bss                   1      0
+   .comment             106      0
+   .note.GNU-stack        0      0
+   .eh_frame             56      0
+   .llvm_addrsig          2      0
+   Total                199
+
+
+

darwin format only affects Mach-O input files. If an input of a different +file format is specified, llvm-size falls back to berkeley +format. When producing darwin format, the tool displays information about +segments and sections:

+
$ llvm-size --format=darwin macho.obj macho2.obj
+   macho.obj:
+   Segment : 12
+           Section (__TEXT, __text): 4
+           Section (__DATA, __data): 8
+           total 12
+   total 12
+   macho2.obj:
+   Segment : 48
+           Section (__TEXT, __text): 16
+           Section (__DATA, __data): 32
+           total 48
+   total 48
+
+
+
+ +
+
+--help, -h
+

Display a summary of command line options.

+
+ +
+
+-m
+

Equivalent to --format with a value of darwin.

+
+ +
+
+-o
+

Equivalent to --radix with a value of 8.

+
+ +
+
+--radix=<value>
+

Display size information in the specified radix. Permitted values are 8, +10 (the default) and 16 for octal, decimal and hexadecimal output +respectively.

+

Example:

+
$ llvm-size --radix=8 test.o
+   text    data     bss     oct     hex filename
+   0152      04      04     162      72 test.o
+
+$ llvm-size --radix=10 test.o
+   text    data     bss     dec     hex filename
+    106       4       4     114      72 test.o
+
+$ llvm-size --radix=16 test.o
+   text    data     bss     dec     hex filename
+   0x6a     0x4     0x4     114      72 test.o
+
+
+
+ +
+
+--totals, -t
+

Applies only to berkeley output format. Display the totals for all listed +fields, in addition to the individual file listings.

+

Example:

+
$ llvm-size --totals test.elf test2.o
+   text    data     bss     dec     hex filename
+    182      16       5     203      cb test.elf
+     82       8       1      91      5b test2.o
+    264      24       6     294     126 (TOTALS)
+
+
+
+ +
+
+--version
+

Display the version of the llvm-size executable.

+
+ +
+
+-x
+

Equivalent to --radix with a value of 16.

+
+ +
+
+@<FILE>
+

Read command-line options from response file <FILE>.

+
+ +
+
+

EXIT STATUS

+

llvm-size exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0.

+
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-stress.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-stress.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-stress.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-stress.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,178 @@ + + + + + + + + + llvm-stress - generate random .ll files — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-stress - generate random .ll files

+
+

SYNOPSIS

+

llvm-stress [-size=filesize] [-seed=initialseed] [-o=outfile]

+
+
+

DESCRIPTION

+

The llvm-stress tool is used to generate random .ll files that +can be used to test different components of LLVM.

+
+
+

OPTIONS

+
+
+-o filename
+

Specify the output filename.

+
+ +
+
+-size size
+

Specify the size of the generated .ll file.

+
+ +
+
+-seed seed
+

Specify the seed to be used for the randomly generated instructions.

+
+ +
+
+

EXIT STATUS

+

llvm-stress returns 0.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-strings.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-strings.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-strings.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-strings.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,270 @@ + + + + + + + + + llvm-strings - print strings — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-strings - print strings

+
+

SYNOPSIS

+

llvm-strings [options] [input…]

+
+
+

DESCRIPTION

+

llvm-strings is a tool intended as a drop-in replacement for GNU’s +strings, which looks for printable strings in files and writes them +to the standard output stream. A printable string is any sequence of four (by +default) or more printable ASCII characters. The end of the file, or any other +byte, terminates the current sequence.

+

llvm-strings looks for strings in each input file specified. +Unlike GNU strings it looks in the entire input file, regardless of +file format, rather than restricting the search to certain sections of object +files. If “-” is specified as an input, or no input is specified, +the program reads from the standard input stream.

+
+
+

EXAMPLE

+
$ cat input.txt
+bars
+foo
+wibble blob
+$ llvm-strings input.txt
+bars
+wibble blob
+
+
+
+
+

OPTIONS

+
+
+--all, -a
+

Silently ignored. Present for GNU strings compatibility.

+
+ +
+
+--bytes=<length>, -n
+

Set the minimum number of printable ASCII characters required for a sequence of +bytes to be considered a string. The default value is 4.

+
+ +
+
+--help, -h
+

Display a summary of command line options.

+
+ +
+
+--print-file-name, -f
+

Display the name of the containing file before each string.

+

Example:

+
$ llvm-strings --print-file-name test.o test.elf
+test.o: _Z5hellov
+test.o: some_bss
+test.o: test.cpp
+test.o: main
+test.elf: test.cpp
+test.elf: test2.cpp
+test.elf: _Z5hellov
+test.elf: main
+test.elf: some_bss
+
+
+
+ +
+
+--radix=<radix>, -t
+

Display the offset within the file of each string, before the string and using +the specified radix. Valid <radix> values are o, d and x for +octal, decimal and hexadecimal respectively.

+

Example:

+
$ llvm-strings --radix=o test.o
+    1054 _Z5hellov
+    1066 .rela.text
+    1101 .comment
+    1112 some_bss
+    1123 .bss
+    1130 test.cpp
+    1141 main
+$ llvm-strings --radix=d test.o
+    556 _Z5hellov
+    566 .rela.text
+    577 .comment
+    586 some_bss
+    595 .bss
+    600 test.cpp
+    609 main
+$ llvm-strings -t x test.o
+    22c _Z5hellov
+    236 .rela.text
+    241 .comment
+    24a some_bss
+    253 .bss
+    258 test.cpp
+    261 main
+
+
+
+ +
+
+--version
+

Display the version of the llvm-strings executable.

+
+ +
+
+@<FILE>
+

Read command-line options from response file <FILE>.

+
+ +
+
+

EXIT STATUS

+

llvm-strings exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0.

+
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-strip.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-strip.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-strip.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-strip.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,402 @@ + + + + + + + + + llvm-strip - object stripping tool — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-strip - object stripping tool

+
+

SYNOPSIS

+

llvm-strip [options] inputs…

+
+
+

DESCRIPTION

+

llvm-strip is a tool to strip sections and symbols from object files. +If no other stripping or remove options are specified, --strip-all +will be enabled.

+

By default, the input files are modified in-place. If “-” is specified for the +input file, the input is read from the program’s standard input stream.

+

If the input is an archive, any requested operations will be applied to each +archive member individually.

+

The tool is still in active development, but in most scenarios it works as a +drop-in replacement for GNU’s strip.

+
+
+

GENERIC AND CROSS-PLATFORM OPTIONS

+

The following options are either agnostic of the file format, or apply to +multiple file formats.

+
+
+--disable-deterministic-archives, -U
+

Use real values for UIDs, GIDs and timestamps when updating archive member +headers.

+
+ +
+
+--discard-all, -x
+

Remove most local symbols from the output. Different file formats may limit +this to a subset of the local symbols. For example, file and section symbols in +ELF objects will not be discarded. Additionally, remove all debug sections.

+
+ +
+
+--enable-deterministic-archives, -D
+

Enable deterministic mode when stripping archives, i.e. use 0 for archive member +header UIDs, GIDs and timestamp fields. On by default.

+
+ +
+
+--help, -h
+

Print a summary of command line options.

+
+ +
+
+--no-strip-all
+

Disable --strip-all.

+
+ +
+
+-o <file>
+

Write output to <file>. Multiple input files cannot be used in combination +with -o.

+
+ +
+
+--regex
+

If specified, symbol and section names specified by other switches are treated +as extended POSIX regular expression patterns.

+
+ +
+
+--remove-section <section>, -R
+

Remove the specified section from the output. Can be specified multiple times +to remove multiple sections simultaneously.

+
+ +
+
+--strip-all-gnu
+

Remove all symbols, debug sections and relocations from the output. This option +is equivalent to GNU strip’s --strip-all switch.

+
+ +
+
+--strip-all, -s
+

For ELF objects, remove from the output all symbols and non-alloc sections not +within segments, except for .gnu.warning, .ARM.attribute sections and the +section name table.

+

For COFF objects, remove all symbols, debug sections, and relocations from the +output.

+
+ +
+
+--strip-debug, -d, -g, -S
+

Remove all debug sections from the output.

+
+ +
+
+--strip-symbol <symbol>, -N
+

Remove all symbols named <symbol> from the output. Can be specified +multiple times to remove multiple symbols.

+
+ +
+
+--strip-unneeded
+

Remove from the output all local or undefined symbols that are not required by +relocations. Also remove all debug sections.

+
+ +
+
+--version, -V
+

Display the version of the llvm-strip executable.

+
+ +
+
+--wildcard, -w
+

Allow wildcard syntax for symbol-related flags. On by default for +section-related flags. Incompatible with –regex.

+

Wildcard syntax allows the following special symbols:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Character

Meaning

Equivalent

*

Any number of characters

.*

?

Any single character

.

\

Escape the next character

\

[a-z]

Character class

[a-z]

[!a-z], [^a-z]

Negated character class

[^a-z]

+

Additionally, starting a wildcard with ‘!’ will prevent a match, even if +another flag matches. For example -w -N '*' -N '!x' will strip all symbols +except for x.

+

The order of wildcards does not matter. For example, -w -N '*' -N '!x' is +the same as -w -N '!x' -N '*'.

+
+ +
+
+@<FILE>
+

Read command-line options and commands from response file <FILE>.

+
+ +
+
+

COFF-SPECIFIC OPTIONS

+

The following options are implemented only for COFF objects. If used with other +objects, llvm-strip will either emit an error or silently ignore +them.

+
+
+--only-keep-debug
+

Remove the contents of non-debug sections from the output, but keep the section +headers.

+
+ +
+
+

ELF-SPECIFIC OPTIONS

+

The following options are implemented only for ELF objects. If used with other +objects, llvm-strip will either emit an error or silently ignore +them.

+
+ +

Allow llvm-strip to remove sections even if it would leave invalid +section references. Any invalid sh_link fields will be set to zero.

+
+ +
+
+--discard-locals, -X
+

Remove local symbols starting with “.L” from the output.

+
+ +
+
+--keep-file-symbols
+

Keep symbols of type STT_FILE, even if they would otherwise be stripped.

+
+ +
+
+--keep-section <section>
+

When removing sections from the output, do not remove sections named +<section>. Can be specified multiple times to keep multiple sections.

+
+ +
+
+--keep-symbol <symbol>, -K
+

When removing symbols from the output, do not remove symbols named +<symbol>. Can be specified multiple times to keep multiple symbols.

+
+ +
+
+--preserve-dates, -p
+

Preserve access and modification timestamps in the output.

+
+ +
+
+--strip-sections
+

Remove from the output all section headers and all section data not within +segments. Note that many tools will not be able to use an object without +section headers.

+
+ +
+
+-T
+

Remove Swift symbols.

+
+ +
+
+

EXIT STATUS

+

llvm-strip exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0.

+
+
+

BUGS

+

To report bugs, please visit <https://bugs.llvm.org/>.

+
+
+

SEE ALSO

+

llvm-objcopy(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-symbolizer.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-symbolizer.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-symbolizer.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-symbolizer.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,597 @@ + + + + + + + + + llvm-symbolizer - convert addresses into source code locations — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-symbolizer - convert addresses into source code locations

+
+

SYNOPSIS

+

llvm-symbolizer [options] [addresses…]

+
+
+

DESCRIPTION

+

llvm-symbolizer reads object file names and addresses from the +command-line and prints corresponding source code locations to standard output.

+

If no address is specified on the command-line, it reads the addresses from +standard input. If no object file is specified on the command-line, but +addresses are, or if at any time an input value is not recognized, the input is +simply echoed to the output.

+

A positional argument or standard input value can be preceded by “DATA” or +“CODE” to indicate that the address should be symbolized as data or executable +code respectively. If neither is specified, “CODE” is assumed. DATA is +symbolized as address and symbol size rather than line number.

+

Object files can be specified together with the addresses either on standard +input or as positional arguments on the command-line, following any “DATA” or +“CODE” prefix.

+

llvm-symbolizer parses options from the environment variable +LLVM_SYMBOLIZER_OPTS after parsing options from the command line. +LLVM_SYMBOLIZER_OPTS is primarily useful for supplementing the command-line +options when llvm-symbolizer is invoked by another program or +runtime.

+
+
+

EXAMPLES

+

All of the following examples use the following two source files as input. They +use a mixture of C-style and C++-style linkage to illustrate how these names are +printed differently (see --demangle).

+
// test.h
+extern "C" inline int foz() {
+  return 1234;
+}
+
+
+
// test.cpp
+#include "test.h"
+int bar=42;
+
+int foo() {
+  return bar;
+}
+
+int baz() {
+  volatile int k = 42;
+  return foz() + k;
+}
+
+int main() {
+  return foo() + baz();
+}
+
+
+

These files are built as follows:

+
$ clang -g test.cpp -o test.elf
+$ clang -g -O2 test.cpp -o inlined.elf
+
+
+

Example 1 - addresses and object on command-line:

+
$ llvm-symbolizer --obj=test.elf 0x4004d0 0x400490
+foz
+/tmp/test.h:1:0
+
+baz()
+/tmp/test.cpp:11:0
+
+
+

Example 2 - addresses on standard input:

+
$ cat addr.txt
+0x4004a0
+0x400490
+0x4004d0
+$ llvm-symbolizer --obj=test.elf < addr.txt
+main
+/tmp/test.cpp:15:0
+
+baz()
+/tmp/test.cpp:11:0
+
+foz
+/tmp/./test.h:1:0
+
+
+

Example 3 - object specified with address:

+
$ llvm-symbolizer "test.elf 0x400490" "inlined.elf 0x400480"
+baz()
+/tmp/test.cpp:11:0
+
+foo()
+/tmp/test.cpp:8:10
+
+$ cat addr2.txt
+test.elf 0x4004a0
+inlined.elf 0x400480
+
+$ llvm-symbolizer < addr2.txt
+main
+/tmp/test.cpp:15:0
+
+foo()
+/tmp/test.cpp:8:10
+
+
+

Example 4 - CODE and DATA prefixes:

+
$ llvm-symbolizer --obj=test.elf "CODE 0x400490" "DATA 0x601028"
+baz()
+/tmp/test.cpp:11:0
+
+bar
+6295592 4
+
+$ cat addr3.txt
+CODE test.elf 0x4004a0
+DATA inlined.elf 0x601028
+
+$ llvm-symbolizer < addr3.txt
+main
+/tmp/test.cpp:15:0
+
+bar
+6295592 4
+
+
+

Example 5 - path-style options:

+

This example uses the same source file as above, but the source file’s +full path is /tmp/foo/test.cpp and is compiled as follows. The first case +shows the default absolute path, the second –basenames, and the third +shows –relativenames.

+
$ pwd
+/tmp
+$ clang -g foo/test.cpp -o test.elf
+$ llvm-symbolizer --obj=test.elf 0x4004a0
+main
+/tmp/foo/test.cpp:15:0
+$ llvm-symbolizer --obj=test.elf 0x4004a0 --basenames
+main
+test.cpp:15:0
+$ llvm-symbolizer --obj=test.elf 0x4004a0 --relativenames
+main
+foo/test.cpp:15:0
+
+
+
+
+

OPTIONS

+
+
+--adjust-vma <offset>
+

Add the specified offset to object file addresses when performing lookups. +This can be used to perform lookups as if the object were relocated by the +offset.

+
+ +
+
+--basenames, -s
+

Print just the file’s name without any directories, instead of the +absolute path.

+
+ +
+
+--demangle, -C
+

Print demangled function names, if the names are mangled (e.g. the mangled +name _Z3bazv becomes baz(), whilst the non-mangled name foz is printed +as is). Defaults to true.

+
+ +
+
+--dwp <path>
+

Use the specified DWP file at <path> for any CUs that have split DWARF +debug data.

+
+ +
+
+--fallback-debug-path <path>
+

When a separate file contains debug data, and is referenced by a GNU debug +link section, use the specified path as a basis for locating the debug data if +it cannot be found relative to the object.

+
+ +
+
+--functions [=<none|short|linkage>], -f
+

Specify the way function names are printed (omit function name, print short +function name, or print full linkage name, respectively). Defaults to +linkage.

+
+ +
+
+--help, -h
+

Show help and usage for this command.

+
+ +
+
+--inlining, --inlines, -i
+

If a source code location is in an inlined function, prints all the inlined +frames. This is the default.

+
+ +
+
+--no-inlines
+

Don’t print inlined frames.

+
+ +
+
+--no-demangle
+

Don’t print demangled function names.

+
+ +
+
+--obj <path>, --exe, -e
+

Path to object file to be symbolized. If - is specified, read the object +directly from the standard input stream.

+
+ +
+
+--output-style <LLVM|GNU|JSON>
+

Specify the preferred output style. Defaults to LLVM. When the output +style is set to GNU, the tool follows the style of GNU’s addr2line. +The differences from the LLVM style are:

+
    +
  • Does not print the column of a source code location.

  • +
  • Does not add an empty line after the report for an address.

  • +
  • Does not replace the name of an inlined function with the name of the +topmost caller when inlined frames are not shown.

  • +
  • Prints an address’s debug-data discriminator when it is non-zero. One way to +produce discriminators is to compile with clang’s -fdebug-info-for-profiling.

  • +
+
+
JSON style provides a machine readable output in JSON. If addresses are

supplied via stdin, the output JSON will be a series of individual objects. +Otherwise, all results will be contained in a single array.

+
+
+
$ llvm-symbolizer --obj=inlined.elf 0x4004be 0x400486 -p
+baz() at /tmp/test.cpp:11:18
+ (inlined by) main at /tmp/test.cpp:15:0
+
+foo() at /tmp/test.cpp:6:3
+
+$ llvm-symbolizer --output-style=LLVM --obj=inlined.elf 0x4004be 0x400486 -p --no-inlines
+main at /tmp/test.cpp:11:18
+
+foo() at /tmp/test.cpp:6:3
+
+$ llvm-symbolizer --output-style=GNU --obj=inlined.elf 0x4004be 0x400486 -p --no-inlines
+baz() at /tmp/test.cpp:11
+foo() at /tmp/test.cpp:6
+
+$ clang -g -fdebug-info-for-profiling test.cpp -o profiling.elf
+$ llvm-symbolizer --output-style=GNU --obj=profiling.elf 0x401167 -p --no-inlines
+main at /tmp/test.cpp:15 (discriminator 2)
+
+$ llvm-symbolizer --output-style=JSON --obj=inlined.elf 0x4004be 0x400486 -p
+[
+  {
+    "Address": "0x4004be",
+    "ModuleName": "inlined.elf",
+    "Symbol": [
+      {
+        "Column": 18,
+        "Discriminator": 0,
+        "FileName": "/tmp/test.cpp",
+        "FunctionName": "baz()",
+        "Line": 11,
+        "Source": "",
+        "StartFileName": "/tmp/test.cpp",
+        "StartLine": 9
+      },
+      {
+        "Column": 0,
+        "Discriminator": 0,
+        "FileName": "/tmp/test.cpp",
+        "FunctionName": "main",
+        "Line": 15,
+        "Source": "",
+        "StartFileName": "/tmp/test.cpp",
+        "StartLine": 14
+      }
+    ]
+  },
+  {
+    "Address": "0x400486",
+    "ModuleName": "inlined.elf",
+    "Symbol": [
+      {
+        "Column": 3,
+        "Discriminator": 0,
+        "FileName": "/tmp/test.cpp",
+        "FunctionName": "foo()",
+        "Line": 6,
+        "Source": "",
+        "StartFileName": "/tmp/test.cpp",
+        "StartLine": 5
+      }
+    ]
+  }
+]
+
+
+
+ +
+
+--pretty-print, -p
+

Print human readable output. If --inlining is specified, the +enclosing scope is prefixed by (inlined by). +For JSON output, the option will cause JSON to be indented and split over +new lines. Otherwise, the JSON output will be printed in a compact form.

+
$ llvm-symbolizer --obj=inlined.elf 0x4004be --inlining --pretty-print
+baz() at /tmp/test.cpp:11:18
+ (inlined by) main at /tmp/test.cpp:15:0
+
+
+
+ +
+
+--print-address, --addresses, -a
+

Print address before the source code location. Defaults to false.

+
$ llvm-symbolizer --obj=inlined.elf --print-address 0x4004be
+0x4004be
+baz()
+/tmp/test.cpp:11:18
+main
+/tmp/test.cpp:15:0
+
+$ llvm-symbolizer --obj=inlined.elf 0x4004be --pretty-print --print-address
+0x4004be: baz() at /tmp/test.cpp:11:18
+ (inlined by) main at /tmp/test.cpp:15:0
+
+
+
+ +
+
+--print-source-context-lines <N>
+

Print N lines of source context for each symbolized address.

+
$ llvm-symbolizer --obj=test.elf 0x400490 --print-source-context-lines=3
+baz()
+/tmp/test.cpp:11:0
+10  :   volatile int k = 42;
+11 >:   return foz() + k;
+12  : }
+
+
+
+ +
+
+--relativenames
+

Print the file’s path relative to the compilation directory, instead +of the absolute path. If the command-line to the compiler included +the full path, this will be the same as the default.

+
+ +
+
+--verbose
+

Print verbose address, line and column information.

+
$ llvm-symbolizer --obj=inlined.elf --verbose 0x4004be
+baz()
+  Filename: /tmp/test.cpp
+  Function start filename: /tmp/test.cpp
+  Function start line: 9
+  Function start address: 0x4004b6
+  Line: 11
+  Column: 18
+main
+  Filename: /tmp/test.cpp
+  Function start filename: /tmp/test.cpp
+  Function start line: 14
+  Function start address: 0x4004b0
+  Line: 15
+  Column: 18
+
+
+
+ +
+
+--version, -v
+

Print version information for the tool.

+
+ +
+
+@<FILE>
+

Read command-line options from response file <FILE>.

+
+ +
+
+

WINDOWS/PDB SPECIFIC OPTIONS

+
+
+--dia
+

Use the Windows DIA SDK for symbolization. If the DIA SDK is not found, +llvm-symbolizer will fall back to the native implementation.

+
+ +
+
+

MACH-O SPECIFIC OPTIONS

+
+
+--default-arch <arch>
+

If a binary contains object files for multiple architectures (e.g. it is a +Mach-O universal binary), symbolize the object file for a given architecture. +You can also specify the architecture by writing binary_name:arch_name in +the input (see example below). If the architecture is not specified in either +way, the address will not be symbolized. Defaults to empty string.

+
$ cat addr.txt
+/tmp/mach_universal_binary:i386 0x1f84
+/tmp/mach_universal_binary:x86_64 0x100000f24
+
+$ llvm-symbolizer < addr.txt
+_main
+/tmp/source_i386.cc:8
+
+_main
+/tmp/source_x86_64.cc:8
+
+
+
+ +
+
+--dsym-hint <path/to/file.dSYM>
+

If the debug info for a binary isn’t present in the default location, look for +the debug info at the .dSYM path provided via this option. This flag can be +used multiple times.

+
+ +
+
+

EXIT STATUS

+

llvm-symbolizer returns 0. Other exit codes imply an internal program +error.

+
+
+

SEE ALSO

+

llvm-addr2line(1)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-tblgen.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-tblgen.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/llvm-tblgen.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/llvm-tblgen.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + llvm-tblgen - Target Description to C++ Code for LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

llvm-tblgen - Target Description to C++ Code for LLVM

+
+

SYNOPSIS

+

llvm-tblgen [options] [filename]

+
+
+

DESCRIPTION

+

llvm-tblgen is a program that translates compiler-related target +description (.td) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler.

+

Please see tblgen - Description to C++ Code +for a description of the filename argument and options, including the +options common to all *-tblgen programs.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/mlir-tblgen.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/mlir-tblgen.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/mlir-tblgen.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/mlir-tblgen.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,158 @@ + + + + + + + + + mlir-tblgen - Description to C++ Code for MLIR — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

mlir-tblgen - Description to C++ Code for MLIR

+
+

SYNOPSIS

+

mlir-tblgen [options] [filename]

+
+
+

DESCRIPTION

+

mlir-tblgen is a program that translates compiler-related target +description (.td) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler.

+

Please see tblgen - Description to C++ Code +for a description of the filename argument and options, including the +options common to all *-tblgen programs.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/opt.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/opt.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/opt.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/opt.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,264 @@ + + + + + + + + + opt - LLVM optimizer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

opt - LLVM optimizer

+
+

SYNOPSIS

+

opt [options] [filename]

+
+
+

DESCRIPTION

+

The opt command is the modular LLVM optimizer and analyzer. It +takes LLVM source files as input, runs the specified optimizations or analyses +on it, and then outputs the optimized file or the analysis results. The +function of opt depends on whether the -analyze option is +given.

+

When -analyze is specified, opt performs various analyses +of the input source. It will usually print the results on standard output, but +in a few cases, it will print output to standard error or generate a file with +the analysis output, which is usually done when the output is meant for another +program.

+

While -analyze is not given, opt attempts to produce an +optimized output file. The optimizations available via opt depend +upon what libraries were linked into it as well as any additional libraries +that have been loaded with the -load option. Use the -help +option to determine what optimizations you can use.

+

If filename is omitted from the command line or is “-“, opt +reads its input from standard input. Inputs can be in either the LLVM assembly +language format (.ll) or the LLVM bitcode format (.bc).

+

If an output filename is not specified with the -o option, +opt writes its output to the standard output.

+
+
+

OPTIONS

+
+
+-f
+

Enable binary output on terminals. Normally, opt will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +opt will write raw bitcode regardless of the output device.

+
+ +
+
+-help
+

Print a summary of command line options.

+
+ +
+
+-o <filename>
+

Specify the output filename.

+
+ +
+
+-S
+

Write output in LLVM intermediate language (instead of bitcode).

+
+ +
+
+-{passname}
+

opt provides the ability to run any of LLVM’s optimization or +analysis passes in any order. The -help option lists all the passes +available. The order in which the options occur on the command line are the +order in which they are executed (within pass constraints).

+
+ +
+
+-strip-debug
+

This option causes opt to strip debug information from the module before +applying other optimizations. It is essentially the same as -strip +but it ensures that stripping of debug information is done first.

+
+ +
+
+-verify-each
+

This option causes opt to add a verify pass after every pass otherwise +specified on the command line (including -verify). This is useful +for cases where it is suspected that a pass is creating an invalid module but +it is not clear which pass is doing it.

+
+ +
+
+-stats
+

Print statistics.

+
+ +
+
+-time-passes
+

Record the amount of time needed for each pass and print it to standard +error.

+
+ +
+
+-debug
+

If this is a debug build, this option will enable debug printouts from passes +which use the LLVM_DEBUG() macro. See the LLVM Programmer’s Manual, section #DEBUG for more information.

+
+ +
+
+-load=<plugin>
+

Load the dynamic object plugin. This object should register new +optimization or analysis passes. Once loaded, the object will add new command +line options to enable various optimizations or analyses. To see the new +complete list of optimizations, use the -help and -load +options together. For example:

+
opt -load=plugin.so -help
+
+
+
+ +
+
+

EXIT STATUS

+

If opt succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/tblgen.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/tblgen.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandGuide/tblgen.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandGuide/tblgen.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,1209 @@ + + + + + + + + + tblgen - Description to C++ Code — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

tblgen - Description to C++ Code

+
+

SYNOPSIS

+

clang-tblgen [options] [filename]

+

lldb-tblgen [options] [filename]

+

llvm-tblgen [options] [filename]

+

mlir-tblgen [options] [filename]

+
+
+

DESCRIPTION

+

*-tblgen is a family of programs that translates target +description (.td) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for +writing parts of the compiler, debugger, and LLVM target backends.

+

The details of the input and output of the *-tblgen programs is +beyond the scope of this short introduction; please see the TableGen +Overview for an introduction and for references to +additional TableGen documents.

+

The filename argument specifies the name of the Target Description (.td) +file that TableGen processes.

+
+
+

OPTIONS

+
+

General Options

+
+
+-help
+

Print a description of the command line options.

+
+ +
+
+-help-list
+

Print a description of the command line options in a simple list format.

+
+ +
+
+-D=macroname
+

Specify the name of a macro to be defined. The name is defined, but it +has no particular value.

+
+ +
+
+-d=filename
+

Specify the name of the dependency filename.

+
+ +
+
+-debug
+

Enable debug output.

+
+ +
+
+-dump-json
+

Print a JSON representation of all records, suitable for further +automated processing.

+
+ +
+
+-I directory
+

Specify where to find other target description files for inclusion. The +directory value should be a full or partial path to a directory that +contains target description files.

+
+ +
+
+-null-backend
+

Parse the source files and build the records, but do not run any +backend. This is useful for timing the frontend.

+
+ +
+
+-o filename
+

Specify the output file name. If filename is -, then +*-tblgen sends its output to standard output.

+
+ +
+
+-print-records
+

Print all classes and records to standard output (default backend option).

+
+ +
+
+-print-detailed-records
+

Print a detailed report of all global variables, classes, and records +to standard output.

+
+ +
+
+-stats
+

Print a report with any statistics collected by the backend.

+
+ +
+
+-time-phases
+

Time the parser and backend phases and print a report.

+
+ +
+
+-version
+

Show the version number of the program.

+
+ +
+
+-write-if-changed
+

Write the output file only if it is new or has changed.

+
+ +
+
+

clang-tblgen Options

+
+
+-gen-clang-attr-classes
+

Generate Clang attribute clases.

+
+ +
+
+-gen-clang-attr-parser-string-switches
+

Generate all parser-related attribute string switches.

+
+ +
+
+-gen-clang-attr-subject-match-rules-parser-string-switches
+

Generate all parser-related attribute subject match rule string switches.

+
+ +
+
+-gen-clang-attr-impl
+

Generate Clang attribute implementations.

+
+ +
+
+-gen-clang-attr-list"
+

Generate a Clang attribute list.

+
+ +
+
+-gen-clang-attr-subject-match-rule-list
+

Generate a Clang attribute subject match rule list.

+
+ +
+
+-gen-clang-attr-pch-read
+

Generate Clang PCH attribute reader.

+
+ +
+
+-gen-clang-attr-pch-write
+

Generate Clang PCH attribute writer.

+
+ +
+
+-gen-clang-attr-has-attribute-impl
+

Generate a Clang attribute spelling list.

+
+ +
+
+-gen-clang-attr-spelling-index
+

Generate a Clang attribute spelling index.

+
+ +
+
+-gen-clang-attr-ast-visitor
+

Generate a recursive AST visitor for Clang attributes.

+
+ +
+
+-gen-clang-attr-template-instantiate
+

Generate a Clang template instantiate code.

+
+ +
+
+-gen-clang-attr-parsed-attr-list
+

Generate a Clang parsed attribute list.

+
+ +
+
+-gen-clang-attr-parsed-attr-impl
+

Generate the Clang parsed attribute helpers.

+
+ +
+
+-gen-clang-attr-parsed-attr-kinds
+

Generate a Clang parsed attribute kinds.

+
+ +
+
+-gen-clang-attr-text-node-dump
+

Generate Clang attribute text node dumper.

+
+ +
+
+-gen-clang-attr-node-traverse
+

Generate Clang attribute traverser.

+
+ +
+
+-gen-clang-diags-defs
+

Generate Clang diagnostics definitions.

+
+ +
+
+-clang-component component
+

Only use warnings from specified component.

+
+ +
+
+-gen-clang-diag-groups
+

Generate Clang diagnostic groups.

+
+ +
+
+-gen-clang-diags-index-name
+

Generate Clang diagnostic name index.

+
+ +
+
+-gen-clang-basic-reader
+

Generate Clang BasicReader classes.

+
+ +
+
+-gen-clang-basic-writer
+

Generate Clang BasicWriter classes.

+
+ +
+
+-gen-clang-comment-nodes
+

Generate Clang AST comment nodes.

+
+ +
+
+-gen-clang-decl-nodes
+

Generate Clang AST declaration nodes.

+
+ +
+
+-gen-clang-stmt-nodes
+

Generate Clang AST statement nodes.

+
+ +
+
+-gen-clang-type-nodes
+

Generate Clang AST type nodes.

+
+ +
+
+-gen-clang-type-reader
+

Generate Clang AbstractTypeReader class.

+
+ +
+
+-gen-clang-type-writer
+

Generate Clang AbstractTypeWriter class.

+
+ +
+
+-gen-clang-opcodes
+

Generate Clang constexpr interpreter opcodes.

+
+ +
+
+-gen-clang-sa-checkers
+

Generate Clang static analyzer checkers.

+
+ +
+
+-gen-clang-comment-html-tags
+

Generate efficient matchers for HTML tag names that are used in +documentation comments.

+
+ +
+
+-gen-clang-comment-html-tags-properties
+

Generate efficient matchers for HTML tag properties.

+
+ +
+
+-gen-clang-comment-html-named-character-references
+

Generate function to translate named character references to UTF-8 sequences.

+
+ +
+
+-gen-clang-comment-command-info
+

Generate command properties for commands that are used in documentation comments.

+
+ +
+
+-gen-clang-comment-command-list
+

Generate list of commands that are used in documentation comments.

+
+ +
+
+-gen-clang-opencl-builtins
+

Generate OpenCL builtin declaration handlers.

+
+ +
+
+-gen-arm-neon
+

Generate arm_neon.h for Clang.

+
+ +
+
+-gen-arm-fp16
+

Generate arm_fp16.h for Clang.

+
+ +
+
+-gen-arm-bf16
+

Generate arm_bf16.h for Clang.

+
+ +
+
+-gen-arm-neon-sema
+

Generate ARM NEON sema support for Clang.

+
+ +
+
+-gen-arm-neon-test
+

Generate ARM NEON tests for Clang.

+
+ +
+
+-gen-arm-sve-header
+

Generate arm_sve.h for Clang.

+
+ +
+
+-gen-arm-sve-builtins
+

Generate arm_sve_builtins.inc for Clang.

+
+ +
+
+-gen-arm-sve-builtin-codegen
+

Generate arm_sve_builtin_cg_map.inc for Clang.

+
+ +
+
+-gen-arm-sve-typeflags
+

Generate arm_sve_typeflags.inc for Clang.

+
+ +
+
+-gen-arm-sve-sema-rangechecks
+

Generate arm_sve_sema_rangechecks.inc for Clang.

+
+ +
+
+-gen-arm-mve-header
+

Generate arm_mve.h for Clang.

+
+ +
+
+-gen-arm-mve-builtin-def
+

Generate ARM MVE builtin definitions for Clang.

+
+ +
+
+-gen-arm-mve-builtin-sema
+

Generate ARM MVE builtin sema checks for Clang.

+
+ +
+
+-gen-arm-mve-builtin-codegen
+

Generate ARM MVE builtin code-generator for Clang.

+
+ +
+
+-gen-arm-mve-builtin-aliases
+

Generate list of valid ARM MVE builtin aliases for Clang.

+
+ +
+
+-gen-arm-cde-header
+

Generate arm_cde.h for Clang.

+
+ +
+
+-gen-arm-cde-builtin-def
+

Generate ARM CDE builtin definitions for Clang.

+
+ +
+
+-gen-arm-cde-builtin-sema
+

Generate ARM CDE builtin sema checks for Clang.

+
+ +
+
+-gen-arm-cde-builtin-codegen
+

Generate ARM CDE builtin code-generator for Clang.

+
+ +
+
+-gen-arm-cde-builtin-aliases
+

Generate list of valid ARM CDE builtin aliases for Clang.

+
+ +
+
+-gen-riscv-vector-header
+

Generate riscv_vector.h for Clang.

+
+ +
+
+-gen-riscv-vector-builtins
+

Generate riscv_vector_builtins.inc for Clang.

+
+ +
+
+-gen-riscv-vector-builtin-codegen
+

Generate riscv_vector_builtin_cg.inc for Clang.

+
+ +
+
+-gen-attr-docs
+

Generate attribute documentation.

+
+ +
+
+-gen-diag-docs
+

Generate diagnostic documentation.

+
+ +
+
+-gen-opt-docs
+

Generate option documentation.

+
+ +
+
+-gen-clang-data-collectors
+

Generate data collectors for AST nodes.

+
+ +
+
+-gen-clang-test-pragma-attribute-supported-attributes
+

Generate a list of attributes supported by #pragma Clang attribute for +testing purposes.

+
+ +
+
+

lldb-tblgen Options

+
+
+gen-lldb-option-defs
+

Generate lldb OptionDefinition values.

+
+ +
+
+gen-lldb-property-defs
+

Generate lldb PropertyDefinition values.

+
+ +
+
+gen-lldb-property-enum-defs
+

Generate lldb PropertyDefinition enum values.

+
+ +
+
+

llvm-tblgen Options

+
+
+-gen-asm-matcher
+

Generate assembly instruction matcher.

+
+ +
+
+-match-prefix=prefix
+

Make -gen-asm-matcher match only instructions with the given prefix.

+
+ +
+
+-gen-asm-parser
+

Generate assembly instruction parser.

+
+ +
+
+-asmparsernum=n
+

Make -gen-asm-parser emit assembly parser number n.

+
+ +
+
+-gen-asm-writer
+

Generate assembly writer.

+
+ +
+
+-asmwriternum=n
+

Make -gen-asm-writer emit assembly writer number n.

+
+ +
+
+-gen-attrs
+

Generate attributes.

+
+ +
+
+-gen-automata
+

Generate generic automata.

+
+ +
+
+-gen-callingconv
+

Generate calling convention descriptions.

+
+ +
+
+-gen-compress-inst-emitter
+

Generate RISC-V compressed instructions.

+
+ +
+
+-gen-ctags
+

Generate ctags-compatible index.

+
+ +
+
+-gen-dag-isel
+

Generate a DAG (directed acyclic graph) instruction selector.

+
+ +
+
+-instrument-coverage
+

Make -gen-dag-isel generate tables to help identify the patterns matched.

+
+ +
+
+-omit-comments
+

Make -gen-dag-isel omit comments. The default is false.

+
+ +
+
+-gen-dfa-packetizer
+

Generate DFA Packetizer for VLIW targets.

+
+ +
+
+-gen-directive-decl
+

Generate directive related declaration code (header file).

+
+ +
+
+-gen-directive-gen
+

Generate directive related implementation code part.

+
+ +
+
+-gen-directive-impl
+

Generate directive related implementation code.

+
+ +
+
+-gen-disassembler
+

Generate disassembler.

+
+ +
+
+-gen-emitter
+

Generate machine code emitter.

+
+ +
+
+-gen-exegesis
+

Generate llvm-exegesis tables.

+
+ +
+
+-gen-fast-isel
+

Generate a “fast” instruction selector.

+
+ +
+
+-gen-global-isel
+

Generate GlobalISel selector.

+
+ +
+
+-gisel-coverage-file=filename
+

Specify the file from which to retrieve coverage information.

+
+ +
+
+-instrument-gisel-coverage
+

Make -gen-global-isel generate coverage instrumentation.

+
+ +
+
+-optimize-match-table
+

Make -gen-global-isel generate an optimized version of the match table.

+
+ +
+
+-warn-on-skipped-patterns
+

Make -gen-global-isel explain why a pattern was skipped for inclusion.

+
+ +
+
+-gen-global-isel-combiner
+

Generate GlobalISel combiner.

+
+ +
+
+-combiners=list
+

Make -gen-global-isel-combiner emit the specified combiners.

+
+ +
+
+-gicombiner-show-expansions
+

Make -gen-global-isel-combiner use C++ comments to indicate occurrences +of code expansion.

+
+ +
+
+-gicombiner-stop-after-build
+

Make -gen-global-isel-combiner stop processing after building the match tree.

+
+ +
+
+-gicombiner-stop-after-parse
+

Make -gen-global-isel-combiner stop processing after parsing rules +and dump state.

+
+ +
+
+-gen-instr-info
+

Generate instruction descriptions.

+
+ +
+
+-gen-instr-docs
+

Generate instruction documentation.

+
+ +
+
+-gen-intrinsic-enums
+

Generate intrinsic enums.

+
+ +
+
+-intrinsic-prefix=prefix
+

Make -gen-intrinsic-enums generate intrinsics with this target prefix.

+
+ +
+
+-gen-intrinsic-impl
+

Generate intrinsic information.

+
+ +
+
+-gen-opt-parser-defs
+

Generate options definitions.

+
+ +
+
+-gen-opt-rst
+

Generate option RST.

+
+ +
+
+-gen-pseudo-lowering
+

Generate pseudo instruction lowering.

+
+ +
+
+-gen-register-bank
+

Generate register bank descriptions.

+
+ +
+
+-gen-register-info
+

Generate registers and register classes info.

+
+ +
+
+-register-info-debug
+

Make -gen-register-info dump register information for debugging.

+
+ +
+
+-gen-searchable-tables
+

Generate generic searchable tables. See TableGen BackEnds +for a detailed description.

+
+ +
+
+-gen-subtarget
+

Generate subtarget enumerations.

+
+ +
+
+-gen-x86-EVEX2VEX-tables
+

Generate X86 EVEX to VEX compress tables.

+
+ +
+
+-gen-x86-fold-tables
+

Generate X86 fold tables.

+
+ +
+
+-long-string-literals
+

When emitting large string tables, prefer string literals over +comma-separated char literals. This can be a readability and +compile-time performance win, but upsets some compilers.

+
+ +
+
+-print-enums
+

Print enumeration values for a class.

+
+ +
+
+-class=classname
+

Make -print-enums print the enumeration list for the specified class.

+
+ +
+
+-print-sets
+

Print expanded sets for testing DAG exprs.

+
+ +
+
+

mlir-tblgen Options

+
+
+-gen-avail-interface-decls
+

Generate availability interface declarations.

+
+ +
+
+-gen-avail-interface-defs
+

Generate op interface definitions.

+
+ +
+
+-gen-dialect-doc
+

Generate dialect documentation.

+
+ +
+
+-dialect
+

The dialect to generate.

+
+ +
+
+-gen-directive-decl
+

Generate declarations for directives (OpenMP, etc.).

+
+ +
+
+-gen-enum-decls
+

Generate enum utility declarations.

+
+ +
+
+-gen-enum-defs
+

Generate enum utility definitions.

+
+ +
+
+-gen-enum-from-llvmir-conversions
+

Generate conversions of EnumAttrs from LLVM IR.

+
+ +
+
+-gen-enum-to-llvmir-conversions
+

Generate conversions of EnumAttrs to LLVM IR.

+
+ +
+
+-gen-llvmir-conversions
+

Generate LLVM IR conversions.

+
+ +
+
+-gen-llvmir-intrinsics
+

Generate LLVM IR intrinsics.

+
+ +
+
+-llvmir-intrinsics-filter
+

Only keep the intrinsics with the specified substring in their record name.

+
+ +
+
+-dialect-opclass-base
+

The base class for the ops in the dialect we are to emit.

+
+ +
+
+-gen-op-decls
+

Generate operation declarations.

+
+ +
+
+-gen-op-defs
+

Generate operation definitions.

+
+ +
+
+-asmformat-error-is-fatal
+

Emit a fatal error if format parsing fails.

+
+ +
+
+-op-exclude-regex
+

Regular expression of name of ops to exclude (no filter if empty).

+
+ +
+
+-op-include-regex
+

Regular expression of name of ops to include (no filter if empty).

+
+ +
+
+-gen-op-doc
+

Generate operation documentation.

+
+ +
+
+-gen-pass-decls
+

Generate operation documentation.

+
+ +
+
+-name namestring
+

The name of this group of passes.

+
+ +
+
+-gen-pass-doc
+

Generate pass documentation.

+
+ +
+
+-gen-rewriters
+

Generate pattern rewriters.

+
+ +
+
+-gen-spirv-avail-impls
+

Generate SPIR-V operation utility definitions.

+
+ +
+
+-gen-spirv-capability-implication
+

Generate utility function to return implied capabilities for a given capability.

+
+ +
+
+-gen-spirv-enum-avail-decls
+

Generate SPIR-V enum availability declarations.

+
+ +
+
+-gen-spirv-enum-avail-defs
+

Generate SPIR-V enum availability definitions.

+
+ +
+
+-gen-spirv-op-utils
+

Generate SPIR-V operation utility definitions.

+
+ +
+
+-gen-spirv-serialization
+

Generate SPIR-V (de)serialization utilities and functions.

+
+ +
+
+-gen-struct-attr-decls
+

Generate struct utility declarations.

+
+ +
+
+-gen-struct-attr-defs
+

Generate struct utility definitions.

+
+ +
+
+-gen-typedef-decls
+

Generate TypeDef declarations.

+
+ +
+
+-gen-typedef-defs
+

Generate TypeDef definitions.

+
+ +
+
+-typedefs-dialect name
+

Generate types for this dialect.

+
+ +
+
+
+

EXIT STATUS

+

If *-tblgen succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandLine.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandLine.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CommandLine.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CommandLine.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,1640 @@ + + + + + + + + + CommandLine 2.0 Library Manual — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

CommandLine 2.0 Library Manual

+ +
+

Introduction

+

This document describes the CommandLine argument processing library. It will +show you how to use it, and what it can do. The CommandLine library uses a +declarative approach to specifying the command line options that your program +takes. By default, these options declarations implicitly hold the value parsed +for the option declared (of course this can be changed).

+

Although there are a lot of command line argument parsing libraries out +there in many different languages, none of them fit well with what I needed. By +looking at the features and problems of other libraries, I designed the +CommandLine library to have the following features:

+
    +
  1. Speed: The CommandLine library is very quick and uses little resources. The +parsing time of the library is directly proportional to the number of +arguments parsed, not the number of options recognized. Additionally, +command line argument values are captured transparently into user defined +global variables, which can be accessed like any other variable (and with the +same performance).

  2. +
  3. Type Safe: As a user of CommandLine, you don’t have to worry about +remembering the type of arguments that you want (is it an int? a string? a +bool? an enum?) and keep casting it around. Not only does this help prevent +error prone constructs, it also leads to dramatically cleaner source code.

  4. +
  5. No subclasses required: To use CommandLine, you instantiate variables that +correspond to the arguments that you would like to capture, you don’t +subclass a parser. This means that you don’t have to write any +boilerplate code.

  6. +
  7. Globally accessible: Libraries can specify command line arguments that are +automatically enabled in any tool that links to the library. This is +possible because the application doesn’t have to keep a list of arguments to +pass to the parser. This also makes supporting dynamically loaded options +trivial.

  8. +
  9. Cleaner: CommandLine supports enum and other types directly, meaning that +there is less error and more security built into the library. You don’t have +to worry about whether your integral command line argument accidentally got +assigned a value that is not valid for your enum type.

  10. +
  11. Powerful: The CommandLine library supports many different types of arguments, +from simple boolean flags to scalars arguments (strings, +integers, enums, doubles), to lists of arguments. This is +possible because CommandLine is…

  12. +
  13. Extensible: It is very simple to add a new argument type to CommandLine. +Simply specify the parser that you want to use with the command line option +when you declare it. Custom parsers are no problem.

  14. +
  15. Labor Saving: The CommandLine library cuts down on the amount of grunt work +that you, the user, have to do. For example, it automatically provides a +-help option that shows the available command line options for your tool. +Additionally, it does most of the basic correctness checking for you.

  16. +
  17. Capable: The CommandLine library can handle lots of different forms of +options often found in real programs. For example, positional arguments, +ls style grouping options (to allow processing ‘ls -lad’ +naturally), ld style prefix options (to parse ‘-lmalloc +-L/usr/lib’), and interpreter style options.

  18. +
+

This document will hopefully let you jump in and start using CommandLine in your +utility quickly and painlessly. Additionally it should be a simple reference +manual to figure out how stuff works.

+
+
+

Quick Start Guide

+

This section of the manual runs through a simple CommandLine’ification of a +basic compiler tool. This is intended to show you how to jump into using the +CommandLine library in your own program, and show you some of the cool things it +can do.

+

To start out, you need to include the CommandLine header file into your program:

+
#include "llvm/Support/CommandLine.h"
+
+
+

Additionally, you need to add this as the first line of your main program:

+
int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv);
+  ...
+}
+
+
+

… which actually parses the arguments and fills in the variable declarations.

+

Now that you are ready to support command line arguments, we need to tell the +system which ones we want, and what type of arguments they are. The CommandLine +library uses a declarative syntax to model command line arguments with the +global variable declarations that capture the parsed values. This means that +for every command line option that you would like to support, there should be a +global variable declaration to capture the result. For example, in a compiler, +we would like to support the Unix-standard ‘-o <filename>’ option to specify +where to put the output. With the CommandLine library, this is represented like +this:

+
cl::opt<string> OutputFilename("o", cl::desc("Specify output filename"), cl::value_desc("filename"));
+
+
+

This declares a global variable “OutputFilename” that is used to capture the +result of the “o” argument (first parameter). We specify that this is a +simple scalar option by using the “cl::opt” template (as opposed to the +“cl::list” template), and tell the CommandLine library that the data +type that we are parsing is a string.

+

The second and third parameters (which are optional) are used to specify what to +output for the “-help” option. In this case, we get a line that looks like +this:

+
USAGE: compiler [options]
+
+OPTIONS:
+  -h                - Alias for -help
+  -help             - display available options (-help-hidden for more)
+  -o <filename>     - Specify output filename
+
+
+

Because we specified that the command line option should parse using the +string data type, the variable declared is automatically usable as a real +string in all contexts that a normal C++ string object may be used. For +example:

+
...
+std::ofstream Output(OutputFilename.c_str());
+if (Output.good()) ...
+...
+
+
+

There are many different options that you can use to customize the command line +option handling library, but the above example shows the general interface to +these options. The options can be specified in any order, and are specified +with helper functions like cl::desc(…), so there are no positional +dependencies to remember. The available options are discussed in detail in the +Reference Guide.

+

Continuing the example, we would like to have our compiler take an input +filename as well as an output filename, but we do not want the input filename to +be specified with a hyphen (ie, not -filename.c). To support this style of +argument, the CommandLine library allows for positional arguments to be +specified for the program. These positional arguments are filled with command +line parameters that are not in option form. We use this feature like this:

+
cl::opt<string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+
+

This declaration indicates that the first positional argument should be treated +as the input filename. Here we use the cl::init option to specify an initial +value for the command line option, which is used if the option is not specified +(if you do not specify a cl::init modifier for an option, then the default +constructor for the data type is used to initialize the value). Command line +options default to being optional, so if we would like to require that the user +always specify an input filename, we would add the cl::Required flag, and we +could eliminate the cl::init modifier, like this:

+
cl::opt<string> InputFilename(cl::Positional, cl::desc("<input file>"), cl::Required);
+
+
+

Again, the CommandLine library does not require the options to be specified in +any particular order, so the above declaration is equivalent to:

+
cl::opt<string> InputFilename(cl::Positional, cl::Required, cl::desc("<input file>"));
+
+
+

By simply adding the cl::Required flag, the CommandLine library will +automatically issue an error if the argument is not specified, which shifts all +of the command line option verification code out of your application into the +library. This is just one example of how using flags can alter the default +behaviour of the library, on a per-option basis. By adding one of the +declarations above, the -help option synopsis is now extended to:

+
USAGE: compiler [options] <input file>
+
+OPTIONS:
+  -h                - Alias for -help
+  -help             - display available options (-help-hidden for more)
+  -o <filename>     - Specify output filename
+
+
+

… indicating that an input filename is expected.

+
+

Boolean Arguments

+

In addition to input and output filenames, we would like the compiler example to +support three boolean flags: “-f” to force writing binary output to a +terminal, “--quiet” to enable quiet mode, and “-q” for backwards +compatibility with some of our users. We can support these by declaring options +of boolean type like this:

+
cl::opt<bool> Force ("f", cl::desc("Enable binary output on terminals"));
+cl::opt<bool> Quiet ("quiet", cl::desc("Don't print informational messages"));
+cl::opt<bool> Quiet2("q", cl::desc("Don't print informational messages"), cl::Hidden);
+
+
+

This does what you would expect: it declares three boolean variables +(“Force”, “Quiet”, and “Quiet2”) to recognize these options. Note +that the “-q” option is specified with the “cl::Hidden” flag. This +modifier prevents it from being shown by the standard “-help” output (note +that it is still shown in the “-help-hidden” output).

+

The CommandLine library uses a different parser for different data types. +For example, in the string case, the argument passed to the option is copied +literally into the content of the string variable… we obviously cannot do that +in the boolean case, however, so we must use a smarter parser. In the case of +the boolean parser, it allows no options (in which case it assigns the value of +true to the variable), or it allows the values “true” or “false” to be +specified, allowing any of the following inputs:

+
compiler -f          # No value, 'Force' == true
+compiler -f=true     # Value specified, 'Force' == true
+compiler -f=TRUE     # Value specified, 'Force' == true
+compiler -f=FALSE    # Value specified, 'Force' == false
+
+
+

… you get the idea. The bool parser just turns the string values into +boolean values, and rejects things like ‘compiler -f=foo’. Similarly, the +float, double, and int parsers work like you would expect, using the +‘strtol’ and ‘strtod’ C library calls to parse the string value into the +specified data type.

+

With the declarations above, “compiler -help” emits this:

+
USAGE: compiler [options] <input file>
+
+OPTIONS:
+  -f     - Enable binary output on terminals
+  -o     - Override output filename
+  -quiet - Don't print informational messages
+  -help  - display available options (-help-hidden for more)
+
+
+

and “compiler -help-hidden” prints this:

+
USAGE: compiler [options] <input file>
+
+OPTIONS:
+  -f     - Enable binary output on terminals
+  -o     - Override output filename
+  -q     - Don't print informational messages
+  -quiet - Don't print informational messages
+  -help  - display available options (-help-hidden for more)
+
+
+

This brief example has shown you how to use the ‘cl::opt’ class to parse +simple scalar command line arguments. In addition to simple scalar arguments, +the CommandLine library also provides primitives to support CommandLine option +aliases, and lists of options.

+
+
+

Argument Aliases

+

So far, the example works well, except for the fact that we need to check the +quiet condition like this now:

+
...
+  if (!Quiet && !Quiet2) printInformationalMessage(...);
+...
+
+
+

… which is a real pain! Instead of defining two values for the same +condition, we can use the “cl::alias” class to make the “-q” option an +alias for the “-quiet” option, instead of providing a value itself:

+
cl::opt<bool> Force ("f", cl::desc("Overwrite output files"));
+cl::opt<bool> Quiet ("quiet", cl::desc("Don't print informational messages"));
+cl::alias     QuietA("q", cl::desc("Alias for -quiet"), cl::aliasopt(Quiet));
+
+
+

The third line (which is the only one we modified from above) defines a “-q” +alias that updates the “Quiet” variable (as specified by the cl::aliasopt +modifier) whenever it is specified. Because aliases do not hold state, the only +thing the program has to query is the Quiet variable now. Another nice +feature of aliases is that they automatically hide themselves from the -help +output (although, again, they are still visible in the -help-hidden output).

+

Now the application code can simply use:

+
...
+  if (!Quiet) printInformationalMessage(...);
+...
+
+
+

… which is much nicer! The “cl::alias” can be used to specify an +alternative name for any variable type, and has many uses.

+
+
+

Selecting an alternative from a set of possibilities

+

So far we have seen how the CommandLine library handles builtin types like +std::string, bool and int, but how does it handle things it doesn’t +know about, like enums or ‘int*’s?

+

The answer is that it uses a table-driven generic parser (unless you specify +your own parser, as described in the Extension Guide). This parser maps +literal strings to whatever type is required, and requires you to tell it what +this mapping should be.

+

Let’s say that we would like to add four optimization levels to our optimizer, +using the standard flags “-g”, “-O0”, “-O1”, and “-O2”. We +could easily implement this with boolean options like above, but there are +several problems with this strategy:

+
    +
  1. A user could specify more than one of the options at a time, for example, +“compiler -O3 -O2”. The CommandLine library would not be able to catch +this erroneous input for us.

  2. +
  3. We would have to test 4 different variables to see which ones are set.

  4. +
  5. This doesn’t map to the numeric levels that we want… so we cannot easily +see if some level >= “-O1” is enabled.

  6. +
+

To cope with these problems, we can use an enum value, and have the CommandLine +library fill it in with the appropriate level directly, which is used like this:

+
enum OptLevel {
+  g, O1, O2, O3
+};
+
+cl::opt<OptLevel> OptimizationLevel(cl::desc("Choose optimization level:"),
+  cl::values(
+    clEnumVal(g , "No optimizations, enable debugging"),
+    clEnumVal(O1, "Enable trivial optimizations"),
+    clEnumVal(O2, "Enable default optimizations"),
+    clEnumVal(O3, "Enable expensive optimizations")));
+
+...
+  if (OptimizationLevel >= O2) doPartialRedundancyElimination(...);
+...
+
+
+

This declaration defines a variable “OptimizationLevel” of the +“OptLevel” enum type. This variable can be assigned any of the values that +are listed in the declaration. The CommandLine library enforces that +the user can only specify one of the options, and it ensure that only valid enum +values can be specified. The “clEnumVal” macros ensure that the command +line arguments matched the enum values. With this option added, our help output +now is:

+
USAGE: compiler [options] <input file>
+
+OPTIONS:
+  Choose optimization level:
+    -g          - No optimizations, enable debugging
+    -O1         - Enable trivial optimizations
+    -O2         - Enable default optimizations
+    -O3         - Enable expensive optimizations
+  -f            - Enable binary output on terminals
+  -help         - display available options (-help-hidden for more)
+  -o <filename> - Specify output filename
+  -quiet        - Don't print informational messages
+
+
+

In this case, it is sort of awkward that flag names correspond directly to enum +names, because we probably don’t want an enum definition named “g” in our +program. Because of this, we can alternatively write this example like this:

+
enum OptLevel {
+  Debug, O1, O2, O3
+};
+
+cl::opt<OptLevel> OptimizationLevel(cl::desc("Choose optimization level:"),
+  cl::values(
+   clEnumValN(Debug, "g", "No optimizations, enable debugging"),
+    clEnumVal(O1        , "Enable trivial optimizations"),
+    clEnumVal(O2        , "Enable default optimizations"),
+    clEnumVal(O3        , "Enable expensive optimizations")));
+
+...
+  if (OptimizationLevel == Debug) outputDebugInfo(...);
+...
+
+
+

By using the “clEnumValN” macro instead of “clEnumVal”, we can directly +specify the name that the flag should get. In general a direct mapping is nice, +but sometimes you can’t or don’t want to preserve the mapping, which is when you +would use it.

+
+
+

Named Alternatives

+

Another useful argument form is a named alternative style. We shall use this +style in our compiler to specify different debug levels that can be used. +Instead of each debug level being its own switch, we want to support the +following options, of which only one can be specified at a time: +“--debug-level=none”, “--debug-level=quick”, +“--debug-level=detailed”. To do this, we use the exact same format as our +optimization level flags, but we also specify an option name. For this case, +the code looks like this:

+
enum DebugLev {
+  nodebuginfo, quick, detailed
+};
+
+// Enable Debug Options to be specified on the command line
+cl::opt<DebugLev> DebugLevel("debug_level", cl::desc("Set the debugging level:"),
+  cl::values(
+    clEnumValN(nodebuginfo, "none", "disable debug information"),
+     clEnumVal(quick,               "enable quick debug information"),
+     clEnumVal(detailed,            "enable detailed debug information")));
+
+
+

This definition defines an enumerated command line variable of type “enum +DebugLev”, which works exactly the same way as before. The difference here is +just the interface exposed to the user of your program and the help output by +the “-help” option:

+
USAGE: compiler [options] <input file>
+
+OPTIONS:
+  Choose optimization level:
+    -g          - No optimizations, enable debugging
+    -O1         - Enable trivial optimizations
+    -O2         - Enable default optimizations
+    -O3         - Enable expensive optimizations
+  -debug_level  - Set the debugging level:
+    =none       - disable debug information
+    =quick      - enable quick debug information
+    =detailed   - enable detailed debug information
+  -f            - Enable binary output on terminals
+  -help         - display available options (-help-hidden for more)
+  -o <filename> - Specify output filename
+  -quiet        - Don't print informational messages
+
+
+

Again, the only structural difference between the debug level declaration and +the optimization level declaration is that the debug level declaration includes +an option name ("debug_level"), which automatically changes how the library +processes the argument. The CommandLine library supports both forms so that you +can choose the form most appropriate for your application.

+
+
+

Parsing a list of options

+

Now that we have the standard run-of-the-mill argument types out of the way, +lets get a little wild and crazy. Lets say that we want our optimizer to accept +a list of optimizations to perform, allowing duplicates. For example, we +might want to run: “compiler -dce -instsimplify -inline -dce -strip”. In this +case, the order of the arguments and the number of appearances is very +important. This is what the “cl::list” template is for. First, start by +defining an enum of the optimizations that you would like to perform:

+
enum Opts {
+  // 'inline' is a C++ keyword, so name it 'inlining'
+  dce, instsimplify, inlining, strip
+};
+
+
+

Then define your “cl::list” variable:

+
cl::list<Opts> OptimizationList(cl::desc("Available Optimizations:"),
+  cl::values(
+    clEnumVal(dce               , "Dead Code Elimination"),
+    clEnumVal(instsimplify      , "Instruction Simplification"),
+   clEnumValN(inlining, "inline", "Procedure Integration"),
+    clEnumVal(strip             , "Strip Symbols")));
+
+
+

This defines a variable that is conceptually of the type +“std::vector<enum Opts>”. Thus, you can access it with standard vector +methods:

+
for (unsigned i = 0; i != OptimizationList.size(); ++i)
+  switch (OptimizationList[i])
+     ...
+
+
+

… to iterate through the list of options specified.

+

Note that the “cl::list” template is completely general and may be used with +any data types or other arguments that you can use with the “cl::opt” +template. One especially useful way to use a list is to capture all of the +positional arguments together if there may be more than one specified. In the +case of a linker, for example, the linker takes several ‘.o’ files, and +needs to capture them into a list. This is naturally specified as:

+
...
+cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<Input files>"), cl::OneOrMore);
+...
+
+
+

This variable works just like a “vector<string>” object. As such, accessing +the list is simple, just like above. In this example, we used the +cl::OneOrMore modifier to inform the CommandLine library that it is an error +if the user does not specify any .o files on our command line. Again, this +just reduces the amount of checking we have to do.

+
+
+

Collecting options as a set of flags

+

Instead of collecting sets of options in a list, it is also possible to gather +information for enum values in a bit vector. The representation used by the +cl::bits class is an unsigned integer. An enum value is represented by a +0/1 in the enum’s ordinal value bit position. 1 indicating that the enum was +specified, 0 otherwise. As each specified value is parsed, the resulting enum’s +bit is set in the option’s bit vector:

+
bits |= 1 << (unsigned)enum;
+
+
+

Options that are specified multiple times are redundant. Any instances after +the first are discarded.

+

Reworking the above list example, we could replace cl::list with cl::bits:

+
cl::bits<Opts> OptimizationBits(cl::desc("Available Optimizations:"),
+  cl::values(
+    clEnumVal(dce               , "Dead Code Elimination"),
+    clEnumVal(instsimplify      , "Instruction Simplification"),
+   clEnumValN(inlining, "inline", "Procedure Integration"),
+    clEnumVal(strip             , "Strip Symbols")));
+
+
+

To test to see if instsimplify was specified, we can use the cl:bits::isSet +function:

+
if (OptimizationBits.isSet(instsimplify)) {
+  ...
+}
+
+
+

It’s also possible to get the raw bit vector using the cl::bits::getBits +function:

+
unsigned bits = OptimizationBits.getBits();
+
+
+

Finally, if external storage is used, then the location specified must be of +type unsigned. In all other ways a cl::bits option is equivalent to a +cl::list option.

+
+
+

Adding freeform text to help output

+

As our program grows and becomes more mature, we may decide to put summary +information about what it does into the help output. The help output is styled +to look similar to a Unix man page, providing concise information about a +program. Unix man pages, however often have a description about what the +program does. To add this to your CommandLine program, simply pass a third +argument to the cl::ParseCommandLineOptions call in main. This additional +argument is then printed as the overview information for your program, allowing +you to include any additional information that you want. For example:

+
int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv, " CommandLine compiler example\n\n"
+                              "  This program blah blah blah...\n");
+  ...
+}
+
+
+

would yield the help output:

+
**OVERVIEW: CommandLine compiler example
+
+  This program blah blah blah...**
+
+USAGE: compiler [options] <input file>
+
+OPTIONS:
+  ...
+  -help             - display available options (-help-hidden for more)
+  -o <filename>     - Specify output filename
+
+
+
+
+

Grouping options into categories

+

If our program has a large number of options it may become difficult for users +of our tool to navigate the output of -help. To alleviate this problem we +can put our options into categories. This can be done by declaring option +categories (cl::OptionCategory objects) and then placing our options into +these categories using the cl::cat option attribute. For example:

+
cl::OptionCategory StageSelectionCat("Stage Selection Options",
+                                     "These control which stages are run.");
+
+cl::opt<bool> Preprocessor("E",cl::desc("Run preprocessor stage."),
+                           cl::cat(StageSelectionCat));
+
+cl::opt<bool> NoLink("c",cl::desc("Run all stages except linking."),
+                     cl::cat(StageSelectionCat));
+
+
+

The output of -help will become categorized if an option category is +declared. The output looks something like

+
OVERVIEW: This is a small program to demo the LLVM CommandLine API
+USAGE: Sample [options]
+
+OPTIONS:
+
+  General options:
+
+    -help              - Display available options (-help-hidden for more)
+    -help-list         - Display list of available options (-help-list-hidden for more)
+
+
+  Stage Selection Options:
+  These control which stages are run.
+
+    -E                 - Run preprocessor stage.
+    -c                 - Run all stages except linking.
+
+
+

In addition to the behaviour of -help changing when an option category is +declared, the command line option -help-list becomes visible which will +print the command line options as uncategorized list.

+

Note that Options that are not explicitly categorized will be placed in the +cl::getGeneralCategory() category.

+
+
+
+

Reference Guide

+

Now that you know the basics of how to use the CommandLine library, this section +will give you the detailed information you need to tune how command line options +work, as well as information on more “advanced” command line option processing +capabilities.

+
+

Positional Arguments

+

Positional arguments are those arguments that are not named, and are not +specified with a hyphen. Positional arguments should be used when an option is +specified by its position alone. For example, the standard Unix grep tool +takes a regular expression argument, and an optional filename to search through +(which defaults to standard input if a filename is not specified). Using the +CommandLine library, this would be specified as:

+
cl::opt<string> Regex   (cl::Positional, cl::desc("<regular expression>"), cl::Required);
+cl::opt<string> Filename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+
+

Given these two option declarations, the -help output for our grep +replacement would look like this:

+
USAGE: spiffygrep [options] <regular expression> <input file>
+
+OPTIONS:
+  -help - display available options (-help-hidden for more)
+
+
+

… and the resultant program could be used just like the standard grep +tool.

+

Positional arguments are sorted by their order of construction. This means that +command line options will be ordered according to how they are listed in a .cpp +file, but will not have an ordering defined if the positional arguments are +defined in multiple .cpp files. The fix for this problem is simply to define +all of your positional arguments in one .cpp file.

+
+

Specifying positional options with hyphens

+

Sometimes you may want to specify a value to your positional argument that +starts with a hyphen (for example, searching for ‘-foo’ in a file). At +first, you will have trouble doing this, because it will try to find an argument +named ‘-foo’, and will fail (and single quotes will not save you). Note +that the system grep has the same problem:

+
$ spiffygrep '-foo' test.txt
+Unknown command line argument '-foo'.  Try: spiffygrep -help'
+
+$ grep '-foo' test.txt
+grep: illegal option -- f
+grep: illegal option -- o
+grep: illegal option -- o
+Usage: grep -hblcnsviw pattern file . . .
+
+
+

The solution for this problem is the same for both your tool and the system +version: use the ‘--‘ marker. When the user specifies ‘--‘ on the +command line, it is telling the program that all options after the ‘--‘ +should be treated as positional arguments, not options. Thus, we can use it +like this:

+
$ spiffygrep -- -foo test.txt
+  ...output...
+
+
+
+
+

Determining absolute position with getPosition()

+

Sometimes an option can affect or modify the meaning of another option. For +example, consider gcc’s -x LANG option. This tells gcc to ignore the +suffix of subsequent positional arguments and force the file to be interpreted +as if it contained source code in language LANG. In order to handle this +properly, you need to know the absolute position of each argument, especially +those in lists, so their interaction(s) can be applied correctly. This is also +useful for options like -llibname which is actually a positional argument +that starts with a dash.

+

So, generally, the problem is that you have two cl::list variables that +interact in some way. To ensure the correct interaction, you can use the +cl::list::getPosition(optnum) method. This method returns the absolute +position (as found on the command line) of the optnum item in the +cl::list.

+

The idiom for usage is like this:

+
static cl::list<std::string> Files(cl::Positional, cl::OneOrMore);
+static cl::list<std::string> Libraries("l", cl::ZeroOrMore);
+
+int main(int argc, char**argv) {
+  // ...
+  std::vector<std::string>::iterator fileIt = Files.begin();
+  std::vector<std::string>::iterator libIt  = Libraries.begin();
+  unsigned libPos = 0, filePos = 0;
+  while ( 1 ) {
+    if ( libIt != Libraries.end() )
+      libPos = Libraries.getPosition( libIt - Libraries.begin() );
+    else
+      libPos = 0;
+    if ( fileIt != Files.end() )
+      filePos = Files.getPosition( fileIt - Files.begin() );
+    else
+      filePos = 0;
+
+    if ( filePos != 0 && (libPos == 0 || filePos < libPos) ) {
+      // Source File Is next
+      ++fileIt;
+    }
+    else if ( libPos != 0 && (filePos == 0 || libPos < filePos) ) {
+      // Library is next
+      ++libIt;
+    }
+    else
+      break; // we're done with the list
+  }
+}
+
+
+

Note that, for compatibility reasons, the cl::opt also supports an +unsigned getPosition() option that will provide the absolute position of +that option. You can apply the same approach as above with a cl::opt and a +cl::list option as you can with two lists.

+
+
+

The cl::ConsumeAfter modifier

+

The cl::ConsumeAfter formatting option is used to construct programs that +use “interpreter style” option processing. With this style of option +processing, all arguments specified after the last positional argument are +treated as special interpreter arguments that are not interpreted by the command +line argument.

+

As a concrete example, lets say we are developing a replacement for the standard +Unix Bourne shell (/bin/sh). To run /bin/sh, first you specify options +to the shell itself (like -x which turns on trace output), then you specify +the name of the script to run, then you specify arguments to the script. These +arguments to the script are parsed by the Bourne shell command line option +processor, but are not interpreted as options to the shell itself. Using the +CommandLine library, we would specify this as:

+
cl::opt<string> Script(cl::Positional, cl::desc("<input script>"), cl::init("-"));
+cl::list<string>  Argv(cl::ConsumeAfter, cl::desc("<program arguments>..."));
+cl::opt<bool>    Trace("x", cl::desc("Enable trace output"));
+
+
+

which automatically provides the help output:

+
USAGE: spiffysh [options] <input script> <program arguments>...
+
+OPTIONS:
+  -help - display available options (-help-hidden for more)
+  -x    - Enable trace output
+
+
+

At runtime, if we run our new shell replacement as `spiffysh -x test.sh -a -x +-y bar’, the Trace variable will be set to true, the Script variable +will be set to “test.sh”, and the Argv list will contain ["-a", "-x", +"-y", "bar"], because they were specified after the last positional argument +(which is the script name).

+

There are several limitations to when cl::ConsumeAfter options can be +specified. For example, only one cl::ConsumeAfter can be specified per +program, there must be at least one positional argument specified, there must +not be any cl::list positional arguments, and the cl::ConsumeAfter option +should be a cl::list option.

+
+
+
+

Internal vs External Storage

+

By default, all command line options automatically hold the value that they +parse from the command line. This is very convenient in the common case, +especially when combined with the ability to define command line options in the +files that use them. This is called the internal storage model.

+

Sometimes, however, it is nice to separate the command line option processing +code from the storage of the value parsed. For example, lets say that we have a +‘-debug’ option that we would like to use to enable debug information across +the entire body of our program. In this case, the boolean value controlling the +debug code should be globally accessible (in a header file, for example) yet the +command line option processing code should not be exposed to all of these +clients (requiring lots of .cpp files to #include CommandLine.h).

+

To do this, set up your .h file with your option, like this for example:

+
// DebugFlag.h - Get access to the '-debug' command line option
+//
+
+// DebugFlag - This boolean is set to true if the '-debug' command line option
+// is specified.  This should probably not be referenced directly, instead, use
+// the DEBUG macro below.
+//
+extern bool DebugFlag;
+
+// DEBUG macro - This macro should be used by code to emit debug information.
+// In the '-debug' option is specified on the command line, and if this is a
+// debug build, then the code specified as the option to the macro will be
+// executed.  Otherwise it will not be.
+#ifdef NDEBUG
+#define LLVM_DEBUG(X)
+#else
+#define LLVM_DEBUG(X) do { if (DebugFlag) { X; } } while (0)
+#endif
+
+
+

This allows clients to blissfully use the LLVM_DEBUG() macro, or the +DebugFlag explicitly if they want to. Now we just need to be able to set +the DebugFlag boolean when the option is set. To do this, we pass an +additional argument to our command line argument processor, and we specify where +to fill in with the cl::location attribute:

+
bool DebugFlag;                  // the actual value
+static cl::opt<bool, true>       // The parser
+Debug("debug", cl::desc("Enable debug output"), cl::Hidden, cl::location(DebugFlag));
+
+
+

In the above example, we specify “true” as the second argument to the +cl::opt template, indicating that the template should not maintain a copy of +the value itself. In addition to this, we specify the cl::location +attribute, so that DebugFlag is automatically set.

+
+
+

Option Attributes

+

This section describes the basic attributes that you can specify on options.

+
    +
  • The option name attribute (which is required for all options, except +positional options) specifies what the option name is. This option is +specified in simple double quotes:

    +
    cl::opt<bool> Quiet("quiet");
    +
    +
    +
  • +
+
    +
  • The cl::desc attribute specifies a description for the option to be +shown in the -help output for the program. This attribute supports +multi-line descriptions with lines separated by ‘n’.

  • +
+
    +
  • The cl::value_desc attribute specifies a string that can be used to +fine tune the -help output for a command line option. Look here for an +example.

  • +
+
    +
  • The cl::init attribute specifies an initial value for a scalar +option. If this attribute is not specified then the command line option value +defaults to the value created by the default constructor for the +type.

    +
    +

    Warning

    +

    If you specify both cl::init and cl::location for an option, you +must specify cl::location first, so that when the command-line parser +sees cl::init, it knows where to put the initial value. (You will get an +error at runtime if you don’t put them in the right order.)

    +
    +
  • +
+
    +
  • The cl::location attribute where to store the value for a parsed command +line option if using external storage. See the section on Internal vs +External Storage for more information.

  • +
+
    +
  • The cl::aliasopt attribute specifies which option a cl::alias option is +an alias for.

  • +
+
    +
  • The cl::values attribute specifies the string-to-value mapping to be used +by the generic parser. It takes a list of (option, value, description) +triplets that specify the option name, the value mapped to, and the +description shown in the -help for the tool. Because the generic parser +is used most frequently with enum values, two macros are often useful:

    +
      +
    1. The clEnumVal macro is used as a nice simple way to specify a triplet +for an enum. This macro automatically makes the option name be the same as +the enum name. The first option to the macro is the enum, the second is +the description for the command line option.

    2. +
    3. The clEnumValN macro is used to specify macro options where the option +name doesn’t equal the enum name. For this macro, the first argument is +the enum value, the second is the flag name, and the second is the +description.

    4. +
    +

    You will get a compile time error if you try to use cl::values with a parser +that does not support it.

    +
  • +
+
    +
  • The cl::multi_val attribute specifies that this option takes has multiple +values (example: -sectalign segname sectname sectvalue). This attribute +takes one unsigned argument - the number of values for the option. This +attribute is valid only on cl::list options (and will fail with compile +error if you try to use it with other option types). It is allowed to use all +of the usual modifiers on multi-valued options (besides +cl::ValueDisallowed, obviously).

  • +
+
    +
  • The cl::cat attribute specifies the option category that the option +belongs to. The category should be a cl::OptionCategory object.

  • +
+
    +
  • The cl::callback attribute specifies a callback function that is +called when an option is seen, and can be used to set other options, +as in option B implies option A. If the option is a cl::list, +and cl::CommaSeparated is also specified, the callback will fire +once for each value. This could be used to validate combinations or +selectively set other options.

    +
    cl::opt<bool> OptA("a", cl::desc("option a"));
    +cl::opt<bool> OptB(
    +    "b", cl::desc("option b -- This option turns on option a"),
    +    cl::callback([&](const bool &) { OptA = true; }));
    +cl::list<std::string, cl::list<std::string>> List(
    +  "list",
    +  cl::desc("option list -- This option turns on options a when "
    +           "'foo' is included in list"),
    +  cl::CommaSeparated,
    +  cl::callback([&](const std::string &Str) {
    +    if (Str == "foo")
    +      OptA = true;
    +  }));
    +
    +
    +
  • +
+
+
+

Option Modifiers

+

Option modifiers are the flags and expressions that you pass into the +constructors for cl::opt and cl::list. These modifiers give you the +ability to tweak how options are parsed and how -help output is generated to +fit your application well.

+

These options fall into five main categories:

+
    +
  1. Hiding an option from -help output

  2. +
  3. Controlling the number of occurrences required and allowed

  4. +
  5. Controlling whether or not a value must be specified

  6. +
  7. Controlling other formatting options

  8. +
  9. Miscellaneous option modifiers

  10. +
+

It is not possible to specify two options from the same category (you’ll get a +runtime error) to a single option, except for options in the miscellaneous +category. The CommandLine library specifies defaults for all of these settings +that are the most useful in practice and the most common, which mean that you +usually shouldn’t have to worry about these.

+
+

Hiding an option from -help output

+

The cl::NotHidden, cl::Hidden, and cl::ReallyHidden modifiers are +used to control whether or not an option appears in the -help and +-help-hidden output for the compiled program:

+
    +
  • The cl::NotHidden modifier (which is the default for cl::opt and +cl::list options) indicates the option is to appear in both help +listings.

  • +
+
    +
  • The cl::Hidden modifier (which is the default for cl::alias options) +indicates that the option should not appear in the -help output, but +should appear in the -help-hidden output.

  • +
+
    +
  • The cl::ReallyHidden modifier indicates that the option should not appear +in any help output.

  • +
+
+
+

Controlling the number of occurrences required and allowed

+

This group of options is used to control how many time an option is allowed (or +required) to be specified on the command line of your program. Specifying a +value for this setting allows the CommandLine library to do error checking for +you.

+

The allowed values for this option group are:

+
    +
  • The cl::Optional modifier (which is the default for the cl::opt and +cl::alias classes) indicates that your program will allow either zero or +one occurrence of the option to be specified.

  • +
+
    +
  • The cl::ZeroOrMore modifier (which is the default for the cl::list +class) indicates that your program will allow the option to be specified zero +or more times.

  • +
+
    +
  • The cl::Required modifier indicates that the specified option must be +specified exactly one time.

  • +
+
    +
  • The cl::OneOrMore modifier indicates that the option must be specified at +least one time.

  • +
  • The cl::ConsumeAfter modifier is described in the Positional arguments +section.

  • +
+

If an option is not specified, then the value of the option is equal to the +value specified by the cl::init attribute. If the cl::init attribute is +not specified, the option value is initialized with the default constructor for +the data type.

+

If an option is specified multiple times for an option of the cl::opt class, +only the last value will be retained.

+
+
+

Controlling whether or not a value must be specified

+

This group of options is used to control whether or not the option allows a +value to be present. In the case of the CommandLine library, a value is either +specified with an equal sign (e.g. ‘-index-depth=17’) or as a trailing +string (e.g. ‘-o a.out’).

+

The allowed values for this option group are:

+
    +
  • The cl::ValueOptional modifier (which is the default for bool typed +options) specifies that it is acceptable to have a value, or not. A boolean +argument can be enabled just by appearing on the command line, or it can have +an explicit ‘-foo=true’. If an option is specified with this mode, it is +illegal for the value to be provided without the equal sign. Therefore +‘-foo true’ is illegal. To get this behavior, you must use +the cl::ValueRequired modifier.

  • +
+
    +
  • The cl::ValueRequired modifier (which is the default for all other types +except for unnamed alternatives using the generic parser) specifies that a +value must be provided. This mode informs the command line library that if an +option is not provides with an equal sign, that the next argument provided +must be the value. This allows things like ‘-o a.out’ to work.

  • +
+
    +
  • The cl::ValueDisallowed modifier (which is the default for unnamed +alternatives using the generic parser) indicates that it is a runtime error +for the user to specify a value. This can be provided to disallow users from +providing options to boolean options (like ‘-foo=true’).

  • +
+

In general, the default values for this option group work just like you would +want them to. As mentioned above, you can specify the cl::ValueDisallowed +modifier to a boolean argument to restrict your command line parser. These +options are mostly useful when extending the library.

+
+
+

Controlling other formatting options

+

The formatting option group is used to specify that the command line option has +special abilities and is otherwise different from other command line arguments. +As usual, you can only specify one of these arguments at most.

+
    +
  • The cl::NormalFormatting modifier (which is the default all options) +specifies that this option is “normal”.

  • +
+
    +
  • The cl::Positional modifier specifies that this is a positional argument +that does not have a command line option associated with it. See the +Positional Arguments section for more information.

  • +
  • The cl::ConsumeAfter modifier specifies that this option is used to +capture “interpreter style” arguments. See this section for more +information.

  • +
+
    +
  • The cl::Prefix modifier specifies that this option prefixes its value. +With ‘Prefix’ options, the equal sign does not separate the value from the +option name specified. Instead, the value is everything after the prefix, +including any equal sign if present. This is useful for processing odd +arguments like -lmalloc and -L/usr/lib in a linker tool or +-DNAME=value in a compiler tool. Here, the ‘l’, ‘D’ and ‘L’ +options are normal string (or list) options, that have the cl::Prefix +modifier added to allow the CommandLine library to recognize them. Note that +cl::Prefix options must not have the cl::ValueDisallowed modifier +specified.

  • +
+
+
+

Controlling options grouping

+

The cl::Grouping modifier can be combined with any formatting types except +for cl::Positional. It is used to implement Unix-style tools (like ls) +that have lots of single letter arguments, but only require a single dash. +For example, the ‘ls -labF’ command actually enables four different options, +all of which are single letters.

+

Note that cl::Grouping options can have values only if they are used +separately or at the end of the groups. For cl::ValueRequired, it is +a runtime error if such an option is used elsewhere in the group.

+

The CommandLine library does not restrict how you use the cl::Prefix or +cl::Grouping modifiers, but it is possible to specify ambiguous argument +settings. Thus, it is possible to have multiple letter options that are prefix +or grouping options, and they will still work as designed.

+

To do this, the CommandLine library uses a greedy algorithm to parse the input +option into (potentially multiple) prefix and grouping options. The strategy +basically looks like this:

+
parse(string OrigInput) {
+
+1. string Input = OrigInput;
+2. if (isOption(Input)) return getOption(Input).parse();  // Normal option
+3. while (!Input.empty() && !isOption(Input)) Input.pop_back();  // Remove the last letter
+4. while (!Input.empty()) {
+     string MaybeValue = OrigInput.substr(Input.length())
+     if (getOption(Input).isPrefix())
+       return getOption(Input).parse(MaybeValue)
+     if (!MaybeValue.empty() && MaybeValue[0] == '=')
+       return getOption(Input).parse(MaybeValue.substr(1))
+     if (!getOption(Input).isGrouping())
+       return error()
+     getOption(Input).parse()
+     Input = OrigInput = MaybeValue
+     while (!Input.empty() && !isOption(Input)) Input.pop_back();
+     if (!Input.empty() && !getOption(Input).isGrouping())
+       return error()
+   }
+5. if (!OrigInput.empty()) error();
+
+}
+
+
+
+
+

Miscellaneous option modifiers

+

The miscellaneous option modifiers are the only flags where you can specify more +than one flag from the set: they are not mutually exclusive. These flags +specify boolean properties that modify the option.

+
    +
  • The cl::CommaSeparated modifier indicates that any commas specified for an +option’s value should be used to split the value up into multiple values for +the option. For example, these two options are equivalent when +cl::CommaSeparated is specified: “-foo=a -foo=b -foo=c” and +“-foo=a,b,c”. This option only makes sense to be used in a case where the +option is allowed to accept one or more values (i.e. it is a cl::list +option).

  • +
+
    +
  • The cl::DefaultOption modifier is used to specify that the option is a +default that can be overridden by application specific parsers. For example, +the -help alias, -h, is registered this way, so it can be overridden +by applications that need to use the -h option for another purpose, +either as a regular option or an alias for another option.

  • +
+
    +
  • The cl::PositionalEatsArgs modifier (which only applies to positional +arguments, and only makes sense for lists) indicates that positional argument +should consume any strings after it (including strings that start with a “-“) +up until another recognized positional argument. For example, if you have two +“eating” positional arguments, “pos1” and “pos2”, the string “-pos1 +-foo -bar baz -pos2 -bork” would cause the “-foo -bar -baz” strings to +be applied to the “-pos1” option and the “-bork” string to be applied +to the “-pos2” option.

  • +
+
    +
  • The cl::Sink modifier is used to handle unknown options. If there is at +least one option with cl::Sink modifier specified, the parser passes +unrecognized option strings to it as values instead of signaling an error. As +with cl::CommaSeparated, this modifier only makes sense with a cl::list +option.

  • +
+
+
+

Response files

+

Some systems, such as certain variants of Microsoft Windows and some older +Unices have a relatively low limit on command-line length. It is therefore +customary to use the so-called ‘response files’ to circumvent this +restriction. These files are mentioned on the command-line (using the “@file”) +syntax. The program reads these files and inserts the contents into argv, +thereby working around the command-line length limits.

+
+
+
+

Top-Level Classes and Functions

+

Despite all of the built-in flexibility, the CommandLine option library really +only consists of one function cl::ParseCommandLineOptions and three main +classes: cl::opt, cl::list, and cl::alias. This section describes +these three classes in detail.

+
+

The cl::getRegisteredOptions function

+

The cl::getRegisteredOptions function is designed to give a programmer +access to declared non-positional command line options so that how they appear +in -help can be modified prior to calling cl::ParseCommandLineOptions. +Note this method should not be called during any static initialisation because +it cannot be guaranteed that all options will have been initialised. Hence it +should be called from main.

+

This function can be used to gain access to options declared in libraries that +the tool writer may not have direct access to.

+

The function retrieves a StringMap that maps the option +string (e.g. -help) to an Option*.

+

Here is an example of how the function could be used:

+
using namespace llvm;
+int main(int argc, char **argv) {
+  cl::OptionCategory AnotherCategory("Some options");
+
+  StringMap<cl::Option*> &Map = cl::getRegisteredOptions();
+
+  //Unhide useful option and put it in a different category
+  assert(Map.count("print-all-options") > 0);
+  Map["print-all-options"]->setHiddenFlag(cl::NotHidden);
+  Map["print-all-options"]->setCategory(AnotherCategory);
+
+  //Hide an option we don't want to see
+  assert(Map.count("enable-no-infs-fp-math") > 0);
+  Map["enable-no-infs-fp-math"]->setHiddenFlag(cl::Hidden);
+
+  //Change --version to --show-version
+  assert(Map.count("version") > 0);
+  Map["version"]->setArgStr("show-version");
+
+  //Change --help description
+  assert(Map.count("help") > 0);
+  Map["help"]->setDescription("Shows help");
+
+  cl::ParseCommandLineOptions(argc, argv, "This is a small program to demo the LLVM CommandLine API");
+  ...
+}
+
+
+
+
+

The cl::ParseCommandLineOptions function

+

The cl::ParseCommandLineOptions function is designed to be called directly +from main, and is used to fill in the values of all of the command line +option variables once argc and argv are available.

+

The cl::ParseCommandLineOptions function requires two parameters (argc +and argv), but may also take an optional third parameter which holds +additional extra text to emit when the -help option is invoked.

+
+
+

The cl::SetVersionPrinter function

+

The cl::SetVersionPrinter function is designed to be called directly from +main and before cl::ParseCommandLineOptions. Its use is optional. It +simply arranges for a function to be called in response to the --version +option instead of having the CommandLine library print out the usual version +string for LLVM. This is useful for programs that are not part of LLVM but wish +to use the CommandLine facilities. Such programs should just define a small +function that takes no arguments and returns void and that prints out +whatever version information is appropriate for the program. Pass the address of +that function to cl::SetVersionPrinter to arrange for it to be called when +the --version option is given by the user.

+
+
+

The cl::opt class

+

The cl::opt class is the class used to represent scalar command line +options, and is the one used most of the time. It is a templated class which +can take up to three arguments (all except for the first have default values +though):

+
namespace cl {
+  template <class DataType, bool ExternalStorage = false,
+            class ParserClass = parser<DataType> >
+  class opt;
+}
+
+
+

The first template argument specifies what underlying data type the command line +argument is, and is used to select a default parser implementation. The second +template argument is used to specify whether the option should contain the +storage for the option (the default) or whether external storage should be used +to contain the value parsed for the option (see Internal vs External Storage +for more information).

+

The third template argument specifies which parser to use. The default value +selects an instantiation of the parser class based on the underlying data +type of the option. In general, this default works well for most applications, +so this option is only used when using a custom parser.

+
+
+

The cl::list class

+

The cl::list class is the class used to represent a list of command line +options. It too is a templated class which can take up to three arguments:

+
namespace cl {
+  template <class DataType, class Storage = bool,
+            class ParserClass = parser<DataType> >
+  class list;
+}
+
+
+

This class works the exact same as the cl::opt class, except that the second +argument is the type of the external storage, not a boolean value. For this +class, the marker type ‘bool’ is used to indicate that internal storage +should be used.

+
+
+

The cl::bits class

+

The cl::bits class is the class used to represent a list of command line +options in the form of a bit vector. It is also a templated class which can +take up to three arguments:

+
namespace cl {
+  template <class DataType, class Storage = bool,
+            class ParserClass = parser<DataType> >
+  class bits;
+}
+
+
+

This class works the exact same as the cl::list class, except that the second +argument must be of type unsigned if external storage is used.

+
+
+

The cl::alias class

+

The cl::alias class is a nontemplated class that is used to form aliases for +other arguments.

+
namespace cl {
+  class alias;
+}
+
+
+

The cl::aliasopt attribute should be used to specify which option this is an +alias for. Alias arguments default to being cl::Hidden, and use the aliased +options parser to do the conversion from string to data.

+
+
+

The cl::extrahelp class

+

The cl::extrahelp class is a nontemplated class that allows extra help text +to be printed out for the -help option.

+
namespace cl {
+  struct extrahelp;
+}
+
+
+

To use the extrahelp, simply construct one with a const char* parameter to +the constructor. The text passed to the constructor will be printed at the +bottom of the help message, verbatim. Note that multiple cl::extrahelp +can be used, but this practice is discouraged. If your tool needs to print +additional help information, put all that help into a single cl::extrahelp +instance.

+

For example:

+
cl::extrahelp("\nADDITIONAL HELP:\n\n  This is the extra help\n");
+
+
+
+
+

The cl::OptionCategory class

+

The cl::OptionCategory class is a simple class for declaring +option categories.

+
namespace cl {
+  class OptionCategory;
+}
+
+
+

An option category must have a name and optionally a description which are +passed to the constructor as const char*.

+

Note that declaring an option category and associating it with an option before +parsing options (e.g. statically) will change the output of -help from +uncategorized to categorized. If an option category is declared but not +associated with an option then it will be hidden from the output of -help +but will be shown in the output of -help-hidden.

+
+
+
+

Builtin parsers

+

Parsers control how the string value taken from the command line is translated +into a typed value, suitable for use in a C++ program. By default, the +CommandLine library uses an instance of parser<type> if the command line +option specifies that it uses values of type ‘type’. Because of this, +custom option processing is specified with specializations of the ‘parser’ +class.

+

The CommandLine library provides the following builtin parser specializations, +which are sufficient for most applications. It can, however, also be extended to +work with new data types and new ways of interpreting the same data. See the +Writing a Custom Parser for more details on this type of library extension.

+
    +
  • The generic parser<t> parser can be used to map strings values to any data +type, through the use of the cl::values property, which specifies the +mapping information. The most common use of this parser is for parsing enum +values, which allows you to use the CommandLine library for all of the error +checking to make sure that only valid enum values are specified (as opposed to +accepting arbitrary strings). Despite this, however, the generic parser class +can be used for any data type.

  • +
+
    +
  • The parser<bool> specialization is used to convert boolean strings to a +boolean value. Currently accepted strings are “true”, “TRUE”, +“True”, “1”, “false”, “FALSE”, “False”, and “0”.

  • +
  • The parser<boolOrDefault> specialization is used for cases where the value +is boolean, but we also need to know whether the option was specified at all. +boolOrDefault is an enum with 3 values, BOU_UNSET, BOU_TRUE and BOU_FALSE. +This parser accepts the same strings as ``parser<bool>``.

  • +
+
    +
  • The parser<string> specialization simply stores the parsed string into the +string value specified. No conversion or modification of the data is +performed.

  • +
+
    +
  • The parser<int> specialization uses the C strtol function to parse the +string input. As such, it will accept a decimal number (with an optional ‘+’ +or ‘-‘ prefix) which must start with a non-zero digit. It accepts octal +numbers, which are identified with a ‘0’ prefix digit, and hexadecimal +numbers with a prefix of ‘0x’ or ‘0X’.

  • +
+
    +
  • The parser<double> and parser<float> specializations use the standard +C strtod function to convert floating point strings into floating point +values. As such, a broad range of string formats is supported, including +exponential notation (ex: 1.7e15) and properly supports locales.

  • +
+
+
+
+

Extension Guide

+

Although the CommandLine library has a lot of functionality built into it +already (as discussed previously), one of its true strengths lie in its +extensibility. This section discusses how the CommandLine library works under +the covers and illustrates how to do some simple, common, extensions.

+
+

Writing a custom parser

+

One of the simplest and most common extensions is the use of a custom parser. +As discussed previously, parsers are the portion of the CommandLine library +that turns string input from the user into a particular parsed data type, +validating the input in the process.

+

There are two ways to use a new parser:

+
    +
  1. Specialize the cl::parser template for your custom data type.

    +

    This approach has the advantage that users of your custom data type will +automatically use your custom parser whenever they define an option with a +value type of your data type. The disadvantage of this approach is that it +doesn’t work if your fundamental data type is something that is already +supported.

    +
  2. +
  3. Write an independent class, using it explicitly from options that need it.

    +

    This approach works well in situations where you would line to parse an +option using special syntax for a not-very-special data-type. The drawback +of this approach is that users of your parser have to be aware that they are +using your parser instead of the builtin ones.

    +
  4. +
+

To guide the discussion, we will discuss a custom parser that accepts file +sizes, specified with an optional unit after the numeric size. For example, we +would like to parse “102kb”, “41M”, “1G” into the appropriate integer value. In +this case, the underlying data type we want to parse into is ‘unsigned’. We +choose approach #2 above because we don’t want to make this the default for all +unsigned options.

+

To start out, we declare our new FileSizeParser class:

+
struct FileSizeParser : public cl::parser<unsigned> {
+  // parse - Return true on error.
+  bool parse(cl::Option &O, StringRef ArgName, const std::string &ArgValue,
+             unsigned &Val);
+};
+
+
+

Our new class inherits from the cl::parser template class to fill in +the default, boiler plate code for us. We give it the data type that we parse +into, the last argument to the parse method, so that clients of our custom +parser know what object type to pass in to the parse method. (Here we declare +that we parse into ‘unsigned’ variables.)

+

For most purposes, the only method that must be implemented in a custom parser +is the parse method. The parse method is called whenever the option is +invoked, passing in the option itself, the option name, the string to parse, and +a reference to a return value. If the string to parse is not well-formed, the +parser should output an error message and return true. Otherwise it should +return false and set ‘Val’ to the parsed value. In our example, we +implement parse as:

+
bool FileSizeParser::parse(cl::Option &O, StringRef ArgName,
+                           const std::string &Arg, unsigned &Val) {
+  const char *ArgStart = Arg.c_str();
+  char *End;
+
+  // Parse integer part, leaving 'End' pointing to the first non-integer char
+  Val = (unsigned)strtol(ArgStart, &End, 0);
+
+  while (1) {
+    switch (*End++) {
+    case 0: return false;   // No error
+    case 'i':               // Ignore the 'i' in KiB if people use that
+    case 'b': case 'B':     // Ignore B suffix
+      break;
+
+    case 'g': case 'G': Val *= 1024*1024*1024; break;
+    case 'm': case 'M': Val *= 1024*1024;      break;
+    case 'k': case 'K': Val *= 1024;           break;
+
+    default:
+      // Print an error message if unrecognized character!
+      return O.error("'" + Arg + "' value invalid for file size argument!");
+    }
+  }
+}
+
+
+

This function implements a very simple parser for the kinds of strings we are +interested in. Although it has some holes (it allows “123KKK” for example), +it is good enough for this example. Note that we use the option itself to print +out the error message (the error method always returns true) in order to get +a nice error message (shown below). Now that we have our parser class, we can +use it like this:

+
static cl::opt<unsigned, false, FileSizeParser>
+MFS("max-file-size", cl::desc("Maximum file size to accept"),
+    cl::value_desc("size"));
+
+
+

Which adds this to the output of our program:

+
OPTIONS:
+  -help                 - display available options (-help-hidden for more)
+  ...
+  -max-file-size=<size> - Maximum file size to accept
+
+
+

And we can test that our parse works correctly now (the test program just prints +out the max-file-size argument value):

+
$ ./test
+MFS: 0
+$ ./test -max-file-size=123MB
+MFS: 128974848
+$ ./test -max-file-size=3G
+MFS: 3221225472
+$ ./test -max-file-size=dog
+-max-file-size option: 'dog' value invalid for file size argument!
+
+
+

It looks like it works. The error message that we get is nice and helpful, and +we seem to accept reasonable file sizes. This wraps up the “custom parser” +tutorial.

+
+
+

Exploiting external storage

+

Several of the LLVM libraries define static cl::opt instances that will +automatically be included in any program that links with that library. This is +a feature. However, sometimes it is necessary to know the value of the command +line option outside of the library. In these cases the library does or should +provide an external storage location that is accessible to users of the +library. Examples of this include the llvm::DebugFlag exported by the +lib/Support/Debug.cpp file and the llvm::TimePassesIsEnabled flag +exported by the lib/IR/PassManager.cpp file.

+
+ +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CompileCudaWithLLVM.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CompileCudaWithLLVM.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CompileCudaWithLLVM.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CompileCudaWithLLVM.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,653 @@ + + + + + + + + + Compiling CUDA with clang — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Compiling CUDA with clang

+ +
+

Introduction

+

This document describes how to compile CUDA code with clang, and gives some +details about LLVM and clang’s CUDA implementations.

+

This document assumes a basic familiarity with CUDA. Information about CUDA +programming can be found in the +CUDA programming guide.

+
+
+

Compiling CUDA Code

+
+

Prerequisites

+

CUDA is supported since llvm 3.9. Clang currently supports CUDA 7.0 through +10.1. If clang detects a newer CUDA version, it will issue a warning and will +attempt to use detected CUDA SDK it as if it were CUDA-10.1.

+

Before you build CUDA code, you’ll need to have installed the CUDA SDK. See +NVIDIA’s CUDA installation guide for +details. Note that clang maynot support the CUDA toolkit as installed by +some Linux package managers. Clang does attempt to deal with specific details of +CUDA installation on a handful of common Linux distributions, but in general the +most reliable way to make it work is to install CUDA in a single directory from +NVIDIA’s .run package and specify its location via –cuda-path=… argument.

+

CUDA compilation is supported on Linux. Compilation on MacOS and Windows may or +may not work and currently have no maintainers.

+
+
+

Invoking clang

+

Invoking clang for CUDA compilation works similarly to compiling regular C++. +You just need to be aware of a few additional flags.

+

You can use this +program as a toy example. Save it as axpy.cu. (Clang detects that you’re +compiling CUDA code by noticing that your filename ends with .cu. +Alternatively, you can pass -x cuda.)

+

To build and run, run the following commands, filling in the parts in angle +brackets as described below:

+
$ clang++ axpy.cu -o axpy --cuda-gpu-arch=<GPU arch> \
+    -L<CUDA install path>/<lib64 or lib>             \
+    -lcudart_static -ldl -lrt -pthread
+$ ./axpy
+y[0] = 2
+y[1] = 4
+y[2] = 6
+y[3] = 8
+
+
+

On MacOS, replace -lcudart_static with -lcudart; otherwise, you may get +“CUDA driver version is insufficient for CUDA runtime version” errors when you +run your program.

+
    +
  • <CUDA install path> – the directory where you installed CUDA SDK. +Typically, /usr/local/cuda.

    +

    Pass e.g. -L/usr/local/cuda/lib64 if compiling in 64-bit mode; otherwise, +pass e.g. -L/usr/local/cuda/lib. (In CUDA, the device code and host code +always have the same pointer widths, so if you’re compiling 64-bit code for +the host, you’re also compiling 64-bit code for the device.) Note that as of +v10.0 CUDA SDK no longer supports compilation of 32-bit +applications.

    +
  • +
  • <GPU arch> – the compute capability of your GPU. For example, if you +want to run your program on a GPU with compute capability of 3.5, specify +--cuda-gpu-arch=sm_35.

    +

    Note: You cannot pass compute_XX as an argument to --cuda-gpu-arch; +only sm_XX is currently supported. However, clang always includes PTX in +its binaries, so e.g. a binary compiled with --cuda-gpu-arch=sm_30 would be +forwards-compatible with e.g. sm_35 GPUs.

    +

    You can pass --cuda-gpu-arch multiple times to compile for multiple archs.

    +
  • +
+

The -L and -l flags only need to be passed when linking. When compiling, +you may also need to pass --cuda-path=/path/to/cuda if you didn’t install +the CUDA SDK into /usr/local/cuda or /usr/local/cuda-X.Y.

+
+
+

Flags that control numerical code

+

If you’re using GPUs, you probably care about making numerical code run fast. +GPU hardware allows for more control over numerical operations than most CPUs, +but this results in more compiler options for you to juggle.

+

Flags you may wish to tweak include:

+
    +
  • -ffp-contract={on,off,fast} (defaults to fast on host and device when +compiling CUDA) Controls whether the compiler emits fused multiply-add +operations.

    +
      +
    • off: never emit fma operations, and prevent ptxas from fusing multiply +and add instructions.

    • +
    • on: fuse multiplies and adds within a single statement, but never +across statements (C11 semantics). Prevent ptxas from fusing other +multiplies and adds.

    • +
    • fast: fuse multiplies and adds wherever profitable, even across +statements. Doesn’t prevent ptxas from fusing additional multiplies and +adds.

    • +
    +

    Fused multiply-add instructions can be much faster than the unfused +equivalents, but because the intermediate result in an fma is not rounded, +this flag can affect numerical code.

    +
  • +
  • -fcuda-flush-denormals-to-zero (default: off) When this is enabled, +floating point operations may flush denormal inputs and/or outputs to 0. +Operations on denormal numbers are often much slower than the same operations +on normal numbers.

  • +
  • -fcuda-approx-transcendentals (default: off) When this is enabled, the +compiler may emit calls to faster, approximate versions of transcendental +functions, instead of using the slower, fully IEEE-compliant versions. For +example, this flag allows clang to emit the ptx sin.approx.f32 +instruction.

    +

    This is implied by -ffast-math.

    +
  • +
+
+
+
+

Standard library support

+

In clang and nvcc, most of the C++ standard library is not supported on the +device side.

+
+

<math.h> and <cmath>

+

In clang, math.h and cmath are available and pass +tests +adapted from libc++’s test suite.

+

In nvcc math.h and cmath are mostly available. Versions of ::foof +in namespace std (e.g. std::sinf) are not available, and where the standard +calls for overloads that take integral arguments, these are usually not +available.

+
#include <math.h>
+#include <cmath.h>
+
+// clang is OK with everything in this function.
+__device__ void test() {
+  std::sin(0.); // nvcc - ok
+  std::sin(0);  // nvcc - error, because no std::sin(int) override is available.
+  sin(0);       // nvcc - same as above.
+
+  sinf(0.);       // nvcc - ok
+  std::sinf(0.);  // nvcc - no such function
+}
+
+
+
+
+

<std::complex>

+

nvcc does not officially support std::complex. It’s an error to use +std::complex in __device__ code, but it often works in __host__ +__device__ code due to nvcc’s interpretation of the “wrong-side rule” (see +below). However, we have heard from implementers that it’s possible to get +into situations where nvcc will omit a call to an std::complex function, +especially when compiling without optimizations.

+

As of 2016-11-16, clang supports std::complex without these caveats. It is +tested with libstdc++ 4.8.5 and newer, but is known to work only with libc++ +newer than 2016-11-16.

+
+
+

<algorithm>

+

In C++14, many useful functions from <algorithm> (notably, std::min and +std::max) become constexpr. You can therefore use these in device code, +when compiling with clang.

+
+
+
+

Detecting clang vs NVCC from code

+

Although clang’s CUDA implementation is largely compatible with NVCC’s, you may +still want to detect when you’re compiling CUDA code specifically with clang.

+

This is tricky, because NVCC may invoke clang as part of its own compilation +process! For example, NVCC uses the host compiler’s preprocessor when +compiling for device code, and that host compiler may in fact be clang.

+

When clang is actually compiling CUDA code – rather than being used as a +subtool of NVCC’s – it defines the __CUDA__ macro. __CUDA_ARCH__ is +defined only in device mode (but will be defined if NVCC is using clang as a +preprocessor). So you can use the following incantations to detect clang CUDA +compilation, in host and device modes:

+
#if defined(__clang__) && defined(__CUDA__) && !defined(__CUDA_ARCH__)
+// clang compiling CUDA code, host mode.
+#endif
+
+#if defined(__clang__) && defined(__CUDA__) && defined(__CUDA_ARCH__)
+// clang compiling CUDA code, device mode.
+#endif
+
+
+

Both clang and nvcc define __CUDACC__ during CUDA compilation. You can +detect NVCC specifically by looking for __NVCC__.

+
+
+

Dialect Differences Between clang and nvcc

+

There is no formal CUDA spec, and clang and nvcc speak slightly different +dialects of the language. Below, we describe some of the differences.

+

This section is painful; hopefully you can skip this section and live your life +blissfully unaware.

+
+

Compilation Models

+

Most of the differences between clang and nvcc stem from the different +compilation models used by clang and nvcc. nvcc uses split compilation, +which works roughly as follows:

+
+
    +
  • Run a preprocessor over the input .cu file to split it into two source +files: H, containing source code for the host, and D, containing +source code for the device.

  • +
  • For each GPU architecture arch that we’re compiling for, do:

    +
      +
    • Compile D using nvcc proper. The result of this is a ptx file for +P_arch.

    • +
    • Optionally, invoke ptxas, the PTX assembler, to generate a file, +S_arch, containing GPU machine code (SASS) for arch.

    • +
    +
  • +
  • Invoke fatbin to combine all P_arch and S_arch files into a +single “fat binary” file, F.

  • +
  • Compile H using an external host compiler (gcc, clang, or whatever you +like). F is packaged up into a header file which is force-included into +H; nvcc generates code that calls into this header to e.g. launch +kernels.

  • +
+
+

clang uses merged parsing. This is similar to split compilation, except all +of the host and device code is present and must be semantically-correct in both +compilation steps.

+
+
    +
  • For each GPU architecture arch that we’re compiling for, do:

    +
      +
    • Compile the input .cu file for device, using clang. __host__ code +is parsed and must be semantically correct, even though we’re not +generating code for the host at this time.

      +

      The output of this step is a ptx file P_arch.

      +
    • +
    • Invoke ptxas to generate a SASS file, S_arch. Note that, unlike +nvcc, clang always generates SASS code.

    • +
    +
  • +
  • Invoke fatbin to combine all P_arch and S_arch files into a +single fat binary file, F.

  • +
  • Compile H using clang. __device__ code is parsed and must be +semantically correct, even though we’re not generating code for the device +at this time.

    +

    F is passed to this compilation, and clang includes it in a special ELF +section, where it can be found by tools like cuobjdump.

    +
  • +
+
+

(You may ask at this point, why does clang need to parse the input file +multiple times? Why not parse it just once, and then use the AST to generate +code for the host and each device architecture?

+

Unfortunately this can’t work because we have to define different macros during +host compilation and during device compilation for each GPU architecture.)

+

clang’s approach allows it to be highly robust to C++ edge cases, as it doesn’t +need to decide at an early stage which declarations to keep and which to throw +away. But it has some consequences you should be aware of.

+
+
+

Overloading Based on __host__ and __device__ Attributes

+

Let “H”, “D”, and “HD” stand for “__host__ functions”, “__device__ +functions”, and “__host__ __device__ functions”, respectively. Functions +with no attributes behave the same as H.

+

nvcc does not allow you to create H and D functions with the same signature:

+
// nvcc: error - function "foo" has already been defined
+__host__ void foo() {}
+__device__ void foo() {}
+
+
+

However, nvcc allows you to “overload” H and D functions with different +signatures:

+
// nvcc: no error
+__host__ void foo(int) {}
+__device__ void foo() {}
+
+
+

In clang, the __host__ and __device__ attributes are part of a +function’s signature, and so it’s legal to have H and D functions with +(otherwise) the same signature:

+
// clang: no error
+__host__ void foo() {}
+__device__ void foo() {}
+
+
+

HD functions cannot be overloaded by H or D functions with the same signature:

+
// nvcc: error - function "foo" has already been defined
+// clang: error - redefinition of 'foo'
+__host__ __device__ void foo() {}
+__device__ void foo() {}
+
+// nvcc: no error
+// clang: no error
+__host__ __device__ void bar(int) {}
+__device__ void bar() {}
+
+
+

When resolving an overloaded function, clang considers the host/device +attributes of the caller and callee. These are used as a tiebreaker during +overload resolution. See IdentifyCUDAPreference for the full set of rules, +but at a high level they are:

+
+
    +
  • D functions prefer to call other Ds. HDs are given lower priority.

  • +
  • Similarly, H functions prefer to call other Hs, or __global__ functions +(with equal priority). HDs are given lower priority.

  • +
  • HD functions prefer to call other HDs.

    +

    When compiling for device, HDs will call Ds with lower priority than HD, and +will call Hs with still lower priority. If it’s forced to call an H, the +program is malformed if we emit code for this HD function. We call this the +“wrong-side rule”, see example below.

    +

    The rules are symmetrical when compiling for host.

    +
  • +
+
+

Some examples:

+
__host__ void foo();
+__device__ void foo();
+
+__host__ void bar();
+__host__ __device__ void bar();
+
+__host__ void test_host() {
+  foo();  // calls H overload
+  bar();  // calls H overload
+}
+
+__device__ void test_device() {
+  foo();  // calls D overload
+  bar();  // calls HD overload
+}
+
+__host__ __device__ void test_hd() {
+  foo();  // calls H overload when compiling for host, otherwise D overload
+  bar();  // always calls HD overload
+}
+
+
+

Wrong-side rule example:

+
__host__ void host_only();
+
+// We don't codegen inline functions unless they're referenced by a
+// non-inline function.  inline_hd1() is called only from the host side, so
+// does not generate an error.  inline_hd2() is called from the device side,
+// so it generates an error.
+inline __host__ __device__ void inline_hd1() { host_only(); }  // no error
+inline __host__ __device__ void inline_hd2() { host_only(); }  // error
+
+__host__ void host_fn() { inline_hd1(); }
+__device__ void device_fn() { inline_hd2(); }
+
+// This function is not inline, so it's always codegen'ed on both the host
+// and the device.  Therefore, it generates an error.
+__host__ __device__ void not_inline_hd() { host_only(); }
+
+
+

For the purposes of the wrong-side rule, templated functions also behave like +inline functions: They aren’t codegen’ed unless they’re instantiated +(usually as part of the process of invoking them).

+

clang’s behavior with respect to the wrong-side rule matches nvcc’s, except +nvcc only emits a warning for not_inline_hd; device code is allowed to call +not_inline_hd. In its generated code, nvcc may omit not_inline_hd’s +call to host_only entirely, or it may try to generate code for +host_only on the device. What you get seems to depend on whether or not +the compiler chooses to inline host_only.

+

Member functions, including constructors, may be overloaded using H and D +attributes. However, destructors cannot be overloaded.

+
+
+

Using a Different Class on Host/Device

+

Occasionally you may want to have a class with different host/device versions.

+

If all of the class’s members are the same on the host and device, you can just +provide overloads for the class’s member functions.

+

However, if you want your class to have different members on host/device, you +won’t be able to provide working H and D overloads in both classes. In this +case, clang is likely to be unhappy with you.

+
#ifdef __CUDA_ARCH__
+struct S {
+  __device__ void foo() { /* use device_only */ }
+  int device_only;
+};
+#else
+struct S {
+  __host__ void foo() { /* use host_only */ }
+  double host_only;
+};
+
+__device__ void test() {
+  S s;
+  // clang generates an error here, because during host compilation, we
+  // have ifdef'ed away the __device__ overload of S::foo().  The __device__
+  // overload must be present *even during host compilation*.
+  S.foo();
+}
+#endif
+
+
+

We posit that you don’t really want to have classes with different members on H +and D. For example, if you were to pass one of these as a parameter to a +kernel, it would have a different layout on H and D, so would not work +properly.

+

To make code like this compatible with clang, we recommend you separate it out +into two classes. If you need to write code that works on both host and +device, consider writing an overloaded wrapper function that returns different +types on host and device.

+
struct HostS { ... };
+struct DeviceS { ... };
+
+__host__ HostS MakeStruct() { return HostS(); }
+__device__ DeviceS MakeStruct() { return DeviceS(); }
+
+// Now host and device code can call MakeStruct().
+
+
+

Unfortunately, this idiom isn’t compatible with nvcc, because it doesn’t allow +you to overload based on the H/D attributes. Here’s an idiom that works with +both clang and nvcc:

+
struct HostS { ... };
+struct DeviceS { ... };
+
+#ifdef __NVCC__
+  #ifndef __CUDA_ARCH__
+    __host__ HostS MakeStruct() { return HostS(); }
+  #else
+    __device__ DeviceS MakeStruct() { return DeviceS(); }
+  #endif
+#else
+  __host__ HostS MakeStruct() { return HostS(); }
+  __device__ DeviceS MakeStruct() { return DeviceS(); }
+#endif
+
+// Now host and device code can call MakeStruct().
+
+
+

Hopefully you don’t have to do this sort of thing often.

+
+
+
+

Optimizations

+

Modern CPUs and GPUs are architecturally quite different, so code that’s fast +on a CPU isn’t necessarily fast on a GPU. We’ve made a number of changes to +LLVM to make it generate good GPU code. Among these changes are:

+
    +
  • Straight-line scalar optimizations – These +reduce redundancy within straight-line code.

  • +
  • Aggressive speculative execution +– This is mainly for promoting straight-line scalar optimizations, which are +most effective on code along dominator paths.

  • +
  • Memory space inference – +In PTX, we can operate on pointers that are in a particular “address space” +(global, shared, constant, or local), or we can operate on pointers in the +“generic” address space, which can point to anything. Operations in a +non-generic address space are faster, but pointers in CUDA are not explicitly +annotated with their address space, so it’s up to LLVM to infer it where +possible.

  • +
  • Bypassing 64-bit divides – +This was an existing optimization that we enabled for the PTX backend.

    +

    64-bit integer divides are much slower than 32-bit ones on NVIDIA GPUs. +Many of the 64-bit divides in our benchmarks have a divisor and dividend +which fit in 32-bits at runtime. This optimization provides a fast path for +this common case.

    +
  • +
  • Aggressive loop unrolling and function inlining – Loop unrolling and +function inlining need to be more aggressive for GPUs than for CPUs because +control flow transfer in GPU is more expensive. More aggressive unrolling and +inlining also promote other optimizations, such as constant propagation and +SROA, which sometimes speed up code by over 10x.

    +

    (Programmers can force unrolling and inline using clang’s loop unrolling pragmas +and __attribute__((always_inline)).)

    +
  • +
+
+
+

Publication

+

The team at Google published a paper in CGO 2016 detailing the optimizations +they’d made to clang/LLVM. Note that “gpucc” is no longer a meaningful name: +The relevant tools are now just vanilla clang/LLVM.

+
+ +
Jingyue Wu, Artem Belevich, Eli Bendersky, Mark Heffernan, Chris Leary, Jacques Pienaar, Bjarke Roune, Rob Springer, Xuetian Weng, Robert Hundt
+
Proceedings of the 2016 International Symposium on Code Generation and Optimization (CGO 2016)
+

+ +

+ +
+
+
+

Obtaining Help

+

To obtain help on LLVM in general and its CUDA support, see the LLVM +community.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CompilerWriterInfo.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CompilerWriterInfo.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CompilerWriterInfo.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CompilerWriterInfo.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,371 @@ + + + + + + + + + Architecture & Platform Information for Compiler Writers — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Architecture & Platform Information for Compiler Writers

+ +
+

Note

+

This document is a work-in-progress. Additions and clarifications are +welcome.

+
+
+

Hardware

+
+

AArch64 & ARM

+ +
+ + + + +
+

AMDGPU

+

Refer to User Guide for AMDGPU Backend for additional documentation.

+
+ + + + + + + + +
+ +
+

NVPTX

+ +
+
+

Miscellaneous Resources

+ +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Contributing.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Contributing.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Contributing.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Contributing.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,274 @@ + + + + + + + + + Contributing to LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Contributing to LLVM

+

Thank you for your interest in contributing to LLVM! There are multiple ways to +contribute, and we appreciate all contributions. In case you +have questions, you can either use the Developer’s List (llvm-dev) +or the #llvm channel on irc.oftc.net.

+

If you want to contribute code, please familiarize yourself with the LLVM Developer Policy.

+ +
+

Ways to Contribute

+
+

Bug Reports

+

If you are working with LLVM and run into a bug, we definitely want to know +about it. Please let us know and follow the instructions in +How to submit an LLVM bug report to create a bug report.

+
+
+

Bug Fixes

+

If you are interested in contributing code to LLVM, bugs labeled with the +beginner keyword in the bug tracker are a good way to get familiar with +the code base. If you are interested in fixing a bug, please create an account +for the bug tracker and assign it to yourself, to let people know you are working on +it.

+

Then try to reproduce and fix the bug with upstream LLVM. Start by building +LLVM from source as described in Getting Started with the LLVM System and +and use the built binaries to reproduce the failure described in the bug. Use +a debug build (-DCMAKE_BUILD_TYPE=Debug) or a build with assertions +(-DLLVM_ENABLE_ASSERTIONS=On, enabled for Debug builds).

+
+
+

Reporting a Security Issue

+

There is a separate process to submit security-related bugs, see How to report a security issue?.

+
+
+

Bigger Pieces of Work

+

In case you are interested in taking on a bigger piece of work, a list of +interesting projects is maintained at the LLVM’s Open Projects page. In case +you are interested in working on any of these projects, please send a mail to +the LLVM Developer’s mailing list, so that we know the project is being +worked on.

+
+
+
+

How to Submit a Patch

+

Once you have a patch ready, it is time to submit it. The patch should:

+
    +
  • include a small unit test

  • +
  • conform to the LLVM Coding Standards. You can use the clang-format-diff.py or git-clang-format tools to automatically format your patch properly.

  • +
  • not contain any unrelated changes

  • +
  • be an isolated change. Independent changes should be submitted as separate patches as this makes reviewing easier.

  • +
+

Before sending a patch for review, please also try to ensure it is +formatted properly. We use clang-format for this, which has git integration +through the git-clang-format script. On some systems, it may already be +installed (or be installable via your package manager). If so, you can simply +run it – the following command will format only the code changed in the most +recent commit:

+
% git clang-format HEAD~1
+
+
+

Note that this modifies the files, but doesn’t commit them – you’ll likely want +to run

+
% git commit --amend -a
+
+
+

in order to update the last commit with all pending changes.

+
+

Note

+

If you don’t already have clang-format or git clang-format installed +on your system, the clang-format binary will be built alongside clang, and +the git integration can be run from +clang/tools/clang-format/git-clang-format.

+
+

To get a patch accepted, it has to be reviewed by the LLVM community. This can +be done using LLVM’s Phabricator or the llvm-commits mailing list. +Please follow Phabricator#phabricator-reviews +to request a review using Phabricator.

+

To make sure the right people see your patch, please select suitable reviewers +and add them to your patch when requesting a review. Suitable reviewers are the +code owner (see CODE_OWNERS.txt) and other people doing work in the area your +patch touches. If you are using Phabricator, add them to the Reviewers field +when creating a review and if you are using llvm-commits, add them to the CC of +your email.

+

A reviewer may request changes or ask questions during the review. If you are +uncertain on how to provide test cases, documentation, etc., feel free to ask +for guidance during the review. Please address the feedback and re-post an +updated version of your patch. This cycle continues until all requests and comments +have been addressed and a reviewer accepts the patch with a Looks good to me or LGTM. +Once that is done the change can be committed. If you do not have commit +access, please let people know during the review and someone should commit it +on your behalf.

+

If you have received no comments on your patch for a week, you can request a +review by ‘ping’ing a patch by responding to the email thread containing the +patch, or the Phabricator review with “Ping.” The common courtesy ‘ping’ rate +is once a week. Please remember that you are asking for valuable time from other +professional developers.

+

For more information on LLVM’s code-review process, please see LLVM Code-Review Policy and Practices.

+
+
+

Helpful Information About LLVM

+

LLVM’s documentation provides a wealth of information about LLVM’s internals as +well as various user guides. The pages listed below should provide a good overview +of LLVM’s high-level design, as well as its internals:

+
+
Getting Started with the LLVM System

Discusses how to get up and running quickly with the LLVM infrastructure. +Everything from unpacking and compilation of the distribution to execution +of some tools.

+
+
LLVM Language Reference Manual

Defines the LLVM intermediate representation.

+
+
LLVM Programmer’s Manual

Introduction to the general layout of the LLVM sourcebase, important classes +and APIs, and some tips & tricks.

+
+
LLVM for Grad Students

This is an introduction to the LLVM infrastructure by Adrian Sampson. While it +has been written for grad students, it provides a good, compact overview of +LLVM’s architecture, LLVM’s IR and how to write a new pass.

+
+
Intro to LLVM

Book chapter providing a compiler hacker’s introduction to LLVM.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Coroutines.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Coroutines.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Coroutines.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Coroutines.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,1729 @@ + + + + + + + + + Coroutines in LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Coroutines in LLVM

+ +
+

Warning

+

This is a work in progress. Compatibility across LLVM releases is not +guaranteed.

+
+
+

Introduction

+

LLVM coroutines are functions that have one or more suspend points. +When a suspend point is reached, the execution of a coroutine is suspended and +control is returned back to its caller. A suspended coroutine can be resumed +to continue execution from the last suspend point or it can be destroyed.

+

In the following example, we call function f (which may or may not be a +coroutine itself) that returns a handle to a suspended coroutine +(coroutine handle) that is used by main to resume the coroutine twice and +then destroy it:

+
define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.resume(i8* %hdl)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+}
+
+
+

In addition to the function stack frame which exists when a coroutine is +executing, there is an additional region of storage that contains objects that +keep the coroutine state when a coroutine is suspended. This region of storage +is called the coroutine frame. It is created when a coroutine is called +and destroyed when a coroutine either runs to completion or is destroyed +while suspended.

+

LLVM currently supports two styles of coroutine lowering. These styles +support substantially different sets of features, have substantially +different ABIs, and expect substantially different patterns of frontend +code generation. However, the styles also have a great deal in common.

+

In all cases, an LLVM coroutine is initially represented as an ordinary LLVM +function that has calls to coroutine intrinsics defining the structure of +the coroutine. The coroutine function is then, in the most general case, +rewritten by the coroutine lowering passes to become the “ramp function”, +the initial entrypoint of the coroutine, which executes until a suspend point +is first reached. The remainder of the original coroutine function is split +out into some number of “resume functions”. Any state which must persist +across suspensions is stored in the coroutine frame. The resume functions +must somehow be able to handle either a “normal” resumption, which continues +the normal execution of the coroutine, or an “abnormal” resumption, which +must unwind the coroutine without attempting to suspend it.

+
+

Switched-Resume Lowering

+

In LLVM’s standard switched-resume lowering, signaled by the use of +llvm.coro.id, the coroutine frame is stored as part of a “coroutine +object” which represents a handle to a particular invocation of the +coroutine. All coroutine objects support a common ABI allowing certain +features to be used without knowing anything about the coroutine’s +implementation:

+
    +
  • A coroutine object can be queried to see if it has reached completion +with llvm.coro.done.

  • +
  • A coroutine object can be resumed normally if it has not already reached +completion with llvm.coro.resume.

  • +
  • A coroutine object can be destroyed, invalidating the coroutine object, +with llvm.coro.destroy. This must be done separately even if the +coroutine has reached completion normally.

  • +
  • “Promise” storage, which is known to have a certain size and alignment, +can be projected out of the coroutine object with llvm.coro.promise. +The coroutine implementation must have been compiled to define a promise +of the same size and alignment.

  • +
+

In general, interacting with a coroutine object in any of these ways while +it is running has undefined behavior.

+

The coroutine function is split into three functions, representing three +different ways that control can enter the coroutine:

+
    +
  1. the ramp function that is initially invoked, which takes arbitrary +arguments and returns a pointer to the coroutine object;

  2. +
  3. a coroutine resume function that is invoked when the coroutine is resumed, +which takes a pointer to the coroutine object and returns void;

  4. +
  5. a coroutine destroy function that is invoked when the coroutine is +destroyed, which takes a pointer to the coroutine object and returns +void.

  6. +
+

Because the resume and destroy functions are shared across all suspend +points, suspend points must store the index of the active suspend in +the coroutine object, and the resume/destroy functions must switch over +that index to get back to the correct point. Hence the name of this +lowering.

+

Pointers to the resume and destroy functions are stored in the coroutine +object at known offsets which are fixed for all coroutines. A completed +coroutine is represented with a null resume function.

+

There is a somewhat complex protocol of intrinsics for allocating and +deallocating the coroutine object. It is complex in order to allow the +allocation to be elided due to inlining. This protocol is discussed +in further detail below.

+

The frontend may generate code to call the coroutine function directly; +this will become a call to the ramp function and will return a pointer +to the coroutine object. The frontend should always resume or destroy +the coroutine using the corresponding intrinsics.

+
+
+

Returned-Continuation Lowering

+

In returned-continuation lowering, signaled by the use of +llvm.coro.id.retcon or llvm.coro.id.retcon.once, some aspects of +the ABI must be handled more explicitly by the frontend.

+

In this lowering, every suspend point takes a list of “yielded values” +which are returned back to the caller along with a function pointer, +called the continuation function. The coroutine is resumed by simply +calling this continuation function pointer. The original coroutine +is divided into the ramp function and then an arbitrary number of +these continuation functions, one for each suspend point.

+

LLVM actually supports two closely-related returned-continuation +lowerings:

+
    +
  • In normal returned-continuation lowering, the coroutine may suspend +itself multiple times. This means that a continuation function +itself returns another continuation pointer, as well as a list of +yielded values.

    +

    The coroutine indicates that it has run to completion by returning +a null continuation pointer. Any yielded values will be undef +should be ignored.

    +
  • +
  • In yield-once returned-continuation lowering, the coroutine must +suspend itself exactly once (or throw an exception). The ramp +function returns a continuation function pointer and yielded +values, but the continuation function simply returns void +when the coroutine has run to completion.

  • +
+

The coroutine frame is maintained in a fixed-size buffer that is +passed to the coro.id intrinsic, which guarantees a certain size +and alignment statically. The same buffer must be passed to the +continuation function(s). The coroutine will allocate memory if the +buffer is insufficient, in which case it will need to store at +least that pointer in the buffer; therefore the buffer must always +be at least pointer-sized. How the coroutine uses the buffer may +vary between suspend points.

+

In addition to the buffer pointer, continuation functions take an +argument indicating whether the coroutine is being resumed normally +(zero) or abnormally (non-zero).

+

LLVM is currently ineffective at statically eliminating allocations +after fully inlining returned-continuation coroutines into a caller. +This may be acceptable if LLVM’s coroutine support is primarily being +used for low-level lowering and inlining is expected to be applied +earlier in the pipeline.

+
+
+

Async Lowering

+

In async-continuation lowering, signaled by the use of llvm.coro.id.async, +handling of control-flow must be handled explicitly by the frontend.

+

In this lowering, a coroutine is assumed to take the current async context as +one of its arguments (the argument position is determined by +llvm.coro.id.async). It is used to marshal arguments and return values of the +coroutine. Therefore an async coroutine returns void.

+
define swiftcc void @async_coroutine(i8* %async.ctxt, i8*, i8*) {
+}
+
+
+

Values live across a suspend point need to be stored in the coroutine frame to +be available in the continuation function. This frame is stored as a tail to the +async context.

+

Every suspend point takes an context projection function argument which +describes how-to obtain the continuations async context and every suspend +point has an associated resume function denoted by the +llvm.coro.async.resume intrinsic. The coroutine is resumed by calling this +resume function passing the async context as the one of its arguments +argument. The resume function can restore its (the caller’s) async context +by applying a context projection function that is provided by the frontend as +a parameter to the llvm.coro.suspend.async intrinsic.

+
// For example:
+struct async_context {
+  struct async_context *caller_context;
+  ...
+}
+
+char *context_projection_function(struct async_context *callee_ctxt) {
+   return callee_ctxt->caller_context;
+}
+
+
+
%resume_func_ptr = call i8* @llvm.coro.async.resume()
+call {i8*, i8*, i8*} (i8*, i8*, ...) @llvm.coro.suspend.async(
+                                            i8* %resume_func_ptr,
+                                            i8* %context_projection_function
+
+
+

The frontend should provide a async function pointer struct associated with +each async coroutine by llvm.coro.id.async’s argument. The initial size and +alignment of the async context must be provided as arguments to the +llvm.coro.id.async intrinsic. Lowering will update the size entry with the +coroutine frame requirements. The frontend is responsible for allocating the +memory for the async context but can use the async function pointer struct +to obtain the required size.

+
struct async_function_pointer {
+  uint32_t relative_function_pointer_to_async_impl;
+  uint32_t context_size;
+}
+
+
+

Lowering will split an async coroutine into a ramp function and one resume +function per suspend point.

+

How control-flow is passed between caller, suspension point, and back to +resume function is left up to the frontend.

+

The suspend point takes a function and its arguments. The function is intended +to model the transfer to the callee function. It will be tail called by +lowering and therefore must have the same signature and calling convention as +the async coroutine.

+
call {i8*, i8*, i8*} (i8*, i8*, ...) @llvm.coro.suspend.async(
+                 i8* %resume_func_ptr,
+                 i8* %context_projection_function,
+                 i8* (bitcast void (i8*, i8*, i8*)* to i8*) %suspend_function,
+                 i8* %arg1, i8* %arg2, i8 %arg3)
+
+
+
+
+
+

Coroutines by Example

+

The examples below are all of switched-resume coroutines.

+
+

Coroutine Representation

+

Let’s look at an example of an LLVM coroutine with the behavior sketched +by the following pseudo-code.

+
void *f(int n) {
+   for(;;) {
+     print(n++);
+     <suspend> // returns a coroutine handle on first suspend
+   }
+}
+
+
+

This coroutine calls some function print with value n as an argument and +suspends execution. Every time this coroutine resumes, it calls print again with an argument one bigger than the last time. This coroutine never completes by itself and must be destroyed explicitly. If we use this coroutine with +a main shown in the previous section. It will call print with values 4, 5 +and 6 after which the coroutine will be destroyed.

+

The LLVM IR for this coroutine looks like this:

+
define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  br label %loop
+loop:
+  %n.val = phi i32 [ %n, %entry ], [ %inc, %loop ]
+  %inc = add nsw i32 %n.val, 1
+  call void @print(i32 %n.val)
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  %unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+
+

The entry block establishes the coroutine frame. The coro.size intrinsic is +lowered to a constant representing the size required for the coroutine frame. +The coro.begin intrinsic initializes the coroutine frame and returns the +coroutine handle. The second parameter of coro.begin is given a block of memory +to be used if the coroutine frame needs to be allocated dynamically. +The coro.id intrinsic serves as coroutine identity useful in cases when the +coro.begin intrinsic get duplicated by optimization passes such as +jump-threading.

+

The cleanup block destroys the coroutine frame. The coro.free intrinsic, +given the coroutine handle, returns a pointer of the memory block to be freed or +null if the coroutine frame was not allocated dynamically. The cleanup +block is entered when coroutine runs to completion by itself or destroyed via +call to the coro.destroy intrinsic.

+

The suspend block contains code to be executed when coroutine runs to +completion or suspended. The coro.end intrinsic marks the point where +a coroutine needs to return control back to the caller if it is not an initial +invocation of the coroutine.

+

The loop blocks represents the body of the coroutine. The coro.suspend +intrinsic in combination with the following switch indicates what happens to +control flow when a coroutine is suspended (default case), resumed (case 0) or +destroyed (case 1).

+
+
+

Coroutine Transformation

+

One of the steps of coroutine lowering is building the coroutine frame. The +def-use chains are analyzed to determine which objects need be kept alive across +suspend points. In the coroutine shown in the previous section, use of virtual register +%inc is separated from the definition by a suspend point, therefore, it +cannot reside on the stack frame since the latter goes away once the coroutine +is suspended and control is returned back to the caller. An i32 slot is +allocated in the coroutine frame and %inc is spilled and reloaded from that +slot as needed.

+

We also store addresses of the resume and destroy functions so that the +coro.resume and coro.destroy intrinsics can resume and destroy the coroutine +when its identity cannot be determined statically at compile time. For our +example, the coroutine frame will be:

+
%f.frame = type { void (%f.frame*)*, void (%f.frame*)*, i32 }
+
+
+

After resume and destroy parts are outlined, function f will contain only the +code responsible for creation and initialization of the coroutine frame and +execution of the coroutine until a suspend point is reached:

+
define i8* @f(i32 %n) {
+entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %alloc = call noalias i8* @malloc(i32 24)
+  %0 = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %frame = bitcast i8* %0 to %f.frame*
+  %1 = getelementptr %f.frame, %f.frame* %frame, i32 0, i32 0
+  store void (%f.frame*)* @f.resume, void (%f.frame*)** %1
+  %2 = getelementptr %f.frame, %f.frame* %frame, i32 0, i32 1
+  store void (%f.frame*)* @f.destroy, void (%f.frame*)** %2
+
+  %inc = add nsw i32 %n, 1
+  %inc.spill.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2
+  store i32 %inc, i32* %inc.spill.addr
+  call void @print(i32 %n)
+
+  ret i8* %frame
+}
+
+
+

Outlined resume part of the coroutine will reside in function f.resume:

+
define internal fastcc void @f.resume(%f.frame* %frame.ptr.resume) {
+entry:
+  %inc.spill.addr = getelementptr %f.frame, %f.frame* %frame.ptr.resume, i64 0, i32 2
+  %inc.spill = load i32, i32* %inc.spill.addr, align 4
+  %inc = add i32 %n.val, 1
+  store i32 %inc, i32* %inc.spill.addr, align 4
+  tail call void @print(i32 %inc)
+  ret void
+}
+
+
+

Whereas function f.destroy will contain the cleanup code for the coroutine:

+
define internal fastcc void @f.destroy(%f.frame* %frame.ptr.destroy) {
+entry:
+  %0 = bitcast %f.frame* %frame.ptr.destroy to i8*
+  tail call void @free(i8* %0)
+  ret void
+}
+
+
+
+
+

Avoiding Heap Allocations

+

A particular coroutine usage pattern, which is illustrated by the main +function in the overview section, where a coroutine is created, manipulated and +destroyed by the same calling function, is common for coroutines implementing +RAII idiom and is suitable for allocation elision optimization which avoid +dynamic allocation by storing the coroutine frame as a static alloca in its +caller.

+

In the entry block, we will call coro.alloc intrinsic that will return true +when dynamic allocation is required, and false if dynamic allocation is +elided.

+
entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @CustomAlloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+
+
+

In the cleanup block, we will make freeing the coroutine frame conditional on +coro.free intrinsic. If allocation is elided, coro.free returns null +thus skipping the deallocation code:

+
cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  %need.dyn.free = icmp ne i8* %mem, null
+  br i1 %need.dyn.free, label %dyn.free, label %if.end
+dyn.free:
+  call void @CustomFree(i8* %mem)
+  br label %if.end
+if.end:
+  ...
+
+
+

With allocations and deallocations represented as described as above, after +coroutine heap allocation elision optimization, the resulting main will be:

+
define i32 @main() {
+entry:
+  call void @print(i32 4)
+  call void @print(i32 5)
+  call void @print(i32 6)
+  ret i32 0
+}
+
+
+
+
+

Multiple Suspend Points

+

Let’s consider the coroutine that has more than one suspend point:

+
void *f(int n) {
+   for(;;) {
+     print(n++);
+     <suspend>
+     print(-n);
+     <suspend>
+   }
+}
+
+
+

Matching LLVM code would look like (with the rest of the code remaining the same +as the code in the previous section):

+
loop:
+  %n.addr = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
+  call void @print(i32 %n.addr) #4
+  %2 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %2, label %suspend [i8 0, label %loop.resume
+                                i8 1, label %cleanup]
+loop.resume:
+  %inc = add nsw i32 %n.addr, 1
+  %sub = xor i32 %n.addr, -1
+  call void @print(i32 %sub)
+  %3 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %3, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+
+
+

In this case, the coroutine frame would include a suspend index that will +indicate at which suspend point the coroutine needs to resume. The resume +function will use an index to jump to an appropriate basic block and will look +as follows:

+
define internal fastcc void @f.Resume(%f.Frame* %FramePtr) {
+entry.Resume:
+  %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i64 0, i32 2
+  %index = load i8, i8* %index.addr, align 1
+  %switch = icmp eq i8 %index, 0
+  %n.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i64 0, i32 3
+  %n = load i32, i32* %n.addr, align 4
+  br i1 %switch, label %loop.resume, label %loop
+
+loop.resume:
+  %sub = xor i32 %n, -1
+  call void @print(i32 %sub)
+  br label %suspend
+loop:
+  %inc = add nsw i32 %n, 1
+  store i32 %inc, i32* %n.addr, align 4
+  tail call void @print(i32 %inc)
+  br label %suspend
+
+suspend:
+  %storemerge = phi i8 [ 0, %loop ], [ 1, %loop.resume ]
+  store i8 %storemerge, i8* %index.addr, align 1
+  ret void
+}
+
+
+

If different cleanup code needs to get executed for different suspend points, +a similar switch will be in the f.destroy function.

+
+

Note

+

Using suspend index in a coroutine state and having a switch in f.resume and +f.destroy is one of the possible implementation strategies. We explored +another option where a distinct f.resume1, f.resume2, etc. are created for +every suspend point, and instead of storing an index, the resume and destroy +function pointers are updated at every suspend. Early testing showed that the +current approach is easier on the optimizer than the latter so it is a +lowering strategy implemented at the moment.

+
+
+
+

Distinct Save and Suspend

+

In the previous example, setting a resume index (or some other state change that +needs to happen to prepare a coroutine for resumption) happens at the same time as +a suspension of a coroutine. However, in certain cases, it is necessary to control +when coroutine is prepared for resumption and when it is suspended.

+

In the following example, a coroutine represents some activity that is driven +by completions of asynchronous operations async_op1 and async_op2 which get +a coroutine handle as a parameter and resume the coroutine once async +operation is finished.

+
void g() {
+   for (;;)
+     if (cond()) {
+        async_op1(<coroutine-handle>); // will resume once async_op1 completes
+        <suspend>
+        do_one();
+     }
+     else {
+        async_op2(<coroutine-handle>); // will resume once async_op2 completes
+        <suspend>
+        do_two();
+     }
+   }
+}
+
+
+

In this case, coroutine should be ready for resumption prior to a call to +async_op1 and async_op2. The coro.save intrinsic is used to indicate a +point when coroutine should be ready for resumption (namely, when a resume index +should be stored in the coroutine frame, so that it can be resumed at the +correct resume point):

+
if.true:
+  %save1 = call token @llvm.coro.save(i8* %hdl)
+  call void @async_op1(i8* %hdl)
+  %suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false)
+  switch i8 %suspend1, label %suspend [i8 0, label %resume1
+                                       i8 1, label %cleanup]
+if.false:
+  %save2 = call token @llvm.coro.save(i8* %hdl)
+  call void @async_op2(i8* %hdl)
+  %suspend2 = call i1 @llvm.coro.suspend(token %save2, i1 false)
+  switch i8 %suspend1, label %suspend [i8 0, label %resume2
+                                       i8 1, label %cleanup]
+
+
+
+
+

Coroutine Promise

+

A coroutine author or a frontend may designate a distinguished alloca that can +be used to communicate with the coroutine. This distinguished alloca is called +coroutine promise and is provided as the second parameter to the +coro.id intrinsic.

+

The following coroutine designates a 32 bit integer promise and uses it to +store the current value produced by a coroutine.

+
define i8* @f(i32 %n) {
+entry:
+  %promise = alloca i32
+  %pv = bitcast i32* %promise to i8*
+  %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
+  %need.dyn.alloc = call i1 @llvm.coro.alloc(token %id)
+  br i1 %need.dyn.alloc, label %dyn.alloc, label %coro.begin
+dyn.alloc:
+  %size = call i32 @llvm.coro.size.i32()
+  %alloc = call i8* @malloc(i32 %size)
+  br label %coro.begin
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %dyn.alloc ]
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %phi)
+  br label %loop
+loop:
+  %n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
+  %inc = add nsw i32 %n.val, 1
+  store i32 %n.val, i32* %promise
+  %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %0, label %suspend [i8 0, label %loop
+                                i8 1, label %cleanup]
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+suspend:
+  %unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
+  ret i8* %hdl
+}
+
+
+

A coroutine consumer can rely on the coro.promise intrinsic to access the +coroutine promise.

+
define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  %promise.addr.raw = call i8* @llvm.coro.promise(i8* %hdl, i32 4, i1 false)
+  %promise.addr = bitcast i8* %promise.addr.raw to i32*
+  %val0 = load i32, i32* %promise.addr
+  call void @print(i32 %val0)
+  call void @llvm.coro.resume(i8* %hdl)
+  %val1 = load i32, i32* %promise.addr
+  call void @print(i32 %val1)
+  call void @llvm.coro.resume(i8* %hdl)
+  %val2 = load i32, i32* %promise.addr
+  call void @print(i32 %val2)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+}
+
+
+

After example in this section is compiled, result of the compilation will be:

+
define i32 @main() {
+entry:
+  tail call void @print(i32 4)
+  tail call void @print(i32 5)
+  tail call void @print(i32 6)
+  ret i32 0
+}
+
+
+
+
+

Final Suspend

+

A coroutine author or a frontend may designate a particular suspend to be final, +by setting the second argument of the coro.suspend intrinsic to true. +Such a suspend point has two properties:

+
    +
  • it is possible to check whether a suspended coroutine is at the final suspend +point via coro.done intrinsic;

  • +
  • a resumption of a coroutine stopped at the final suspend point leads to +undefined behavior. The only possible action for a coroutine at a final +suspend point is destroying it via coro.destroy intrinsic.

  • +
+

From the user perspective, the final suspend point represents an idea of a +coroutine reaching the end. From the compiler perspective, it is an optimization +opportunity for reducing number of resume points (and therefore switch cases) in +the resume function.

+

The following is an example of a function that keeps resuming the coroutine +until the final suspend point is reached after which point the coroutine is +destroyed:

+
define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4)
+  br label %while
+while:
+  call void @llvm.coro.resume(i8* %hdl)
+  %done = call i1 @llvm.coro.done(i8* %hdl)
+  br i1 %done, label %end, label %while
+end:
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+}
+
+
+

Usually, final suspend point is a frontend injected suspend point that does not +correspond to any explicitly authored suspend point of the high level language. +For example, for a Python generator that has only one suspend point:

+
def coroutine(n):
+  for i in range(n):
+    yield i
+
+
+

Python frontend would inject two more suspend points, so that the actual code +looks like this:

+
void* coroutine(int n) {
+  int current_value;
+  <designate current_value to be coroutine promise>
+  <SUSPEND> // injected suspend point, so that the coroutine starts suspended
+  for (int i = 0; i < n; ++i) {
+    current_value = i; <SUSPEND>; // corresponds to "yield i"
+  }
+  <SUSPEND final=true> // injected final suspend point
+}
+
+
+

and python iterator __next__ would look like:

+
int __next__(void* hdl) {
+  coro.resume(hdl);
+  if (coro.done(hdl)) throw StopIteration();
+  return *(int*)coro.promise(hdl, 4, false);
+}
+
+
+
+
+
+

Intrinsics

+
+

Coroutine Manipulation Intrinsics

+

Intrinsics described in this section are used to manipulate an existing +coroutine. They can be used in any function which happen to have a pointer +to a coroutine frame or a pointer to a coroutine promise.

+
+

‘llvm.coro.destroy’ Intrinsic

+
+
Syntax:
+
declare void @llvm.coro.destroy(i8* <handle>)
+
+
+
+
+
Overview:
+

The ‘llvm.coro.destroy’ intrinsic destroys a suspended +switched-resume coroutine.

+
+
+
Arguments:
+

The argument is a coroutine handle to a suspended coroutine.

+
+
+
Semantics:
+

When possible, the coro.destroy intrinsic is replaced with a direct call to +the coroutine destroy function. Otherwise it is replaced with an indirect call +based on the function pointer for the destroy function stored in the coroutine +frame. Destroying a coroutine that is not suspended leads to undefined behavior.

+
+
+
+

‘llvm.coro.resume’ Intrinsic

+
declare void @llvm.coro.resume(i8* <handle>)
+
+
+
+
Overview:
+

The ‘llvm.coro.resume’ intrinsic resumes a suspended switched-resume coroutine.

+
+
+
Arguments:
+

The argument is a handle to a suspended coroutine.

+
+
+
Semantics:
+

When possible, the coro.resume intrinsic is replaced with a direct call to the +coroutine resume function. Otherwise it is replaced with an indirect call based +on the function pointer for the resume function stored in the coroutine frame. +Resuming a coroutine that is not suspended leads to undefined behavior.

+
+
+
+

‘llvm.coro.done’ Intrinsic

+
declare i1 @llvm.coro.done(i8* <handle>)
+
+
+
+
Overview:
+

The ‘llvm.coro.done’ intrinsic checks whether a suspended +switched-resume coroutine is at the final suspend point or not.

+
+
+
Arguments:
+

The argument is a handle to a suspended coroutine.

+
+
+
Semantics:
+

Using this intrinsic on a coroutine that does not have a final suspend point +or on a coroutine that is not suspended leads to undefined behavior.

+
+
+
+

‘llvm.coro.promise’ Intrinsic

+
declare i8* @llvm.coro.promise(i8* <ptr>, i32 <alignment>, i1 <from>)
+
+
+
+
Overview:
+

The ‘llvm.coro.promise’ intrinsic obtains a pointer to a +coroutine promise given a switched-resume coroutine handle and vice versa.

+
+
+
Arguments:
+

The first argument is a handle to a coroutine if from is false. Otherwise, +it is a pointer to a coroutine promise.

+

The second argument is an alignment requirements of the promise. +If a frontend designated %promise = alloca i32 as a promise, the alignment +argument to coro.promise should be the alignment of i32 on the target +platform. If a frontend designated %promise = alloca i32, align 16 as a +promise, the alignment argument should be 16. +This argument only accepts constants.

+

The third argument is a boolean indicating a direction of the transformation. +If from is true, the intrinsic returns a coroutine handle given a pointer +to a promise. If from is false, the intrinsics return a pointer to a promise +from a coroutine handle. This argument only accepts constants.

+
+
+
Semantics:
+

Using this intrinsic on a coroutine that does not have a coroutine promise +leads to undefined behavior. It is possible to read and modify coroutine +promise of the coroutine which is currently executing. The coroutine author and +a coroutine user are responsible to makes sure there is no data races.

+
+
+
Example:
+
define i8* @f(i32 %n) {
+entry:
+  %promise = alloca i32
+  %pv = bitcast i32* %promise to i8*
+  ; the second argument to coro.id points to the coroutine promise.
+  %id = call token @llvm.coro.id(i32 0, i8* %pv, i8* null, i8* null)
+  ...
+  %hdl = call noalias i8* @llvm.coro.begin(token %id, i8* %alloc)
+  ...
+  store i32 42, i32* %promise ; store something into the promise
+  ...
+  ret i8* %hdl
+}
+
+define i32 @main() {
+entry:
+  %hdl = call i8* @f(i32 4) ; starts the coroutine and returns its handle
+  %promise.addr.raw = call i8* @llvm.coro.promise(i8* %hdl, i32 4, i1 false)
+  %promise.addr = bitcast i8* %promise.addr.raw to i32*
+  %val = load i32, i32* %promise.addr ; load a value from the promise
+  call void @print(i32 %val)
+  call void @llvm.coro.destroy(i8* %hdl)
+  ret i32 0
+}
+
+
+
+
+
+
+

Coroutine Structure Intrinsics

+

Intrinsics described in this section are used within a coroutine to describe +the coroutine structure. They should not be used outside of a coroutine.

+
+

‘llvm.coro.size’ Intrinsic

+
declare i32 @llvm.coro.size.i32()
+declare i64 @llvm.coro.size.i64()
+
+
+
+
Overview:
+

The ‘llvm.coro.size’ intrinsic returns the number of bytes +required to store a coroutine frame. This is only supported for +switched-resume coroutines.

+
+
+
Arguments:
+

None

+
+
+
Semantics:
+

The coro.size intrinsic is lowered to a constant representing the size of +the coroutine frame.

+
+
+
+

‘llvm.coro.begin’ Intrinsic

+
declare i8* @llvm.coro.begin(token <id>, i8* <mem>)
+
+
+
+
Overview:
+

The ‘llvm.coro.begin’ intrinsic returns an address of the coroutine frame.

+
+
+
Arguments:
+

The first argument is a token returned by a call to ‘llvm.coro.id’ +identifying the coroutine.

+

The second argument is a pointer to a block of memory where coroutine frame +will be stored if it is allocated dynamically. This pointer is ignored +for returned-continuation coroutines.

+
+
+
Semantics:
+

Depending on the alignment requirements of the objects in the coroutine frame +and/or on the codegen compactness reasons the pointer returned from coro.begin +may be at offset to the %mem argument. (This could be beneficial if +instructions that express relative access to data can be more compactly encoded +with small positive and negative offsets).

+

A frontend should emit exactly one coro.begin intrinsic per coroutine.

+
+
+
+

‘llvm.coro.free’ Intrinsic

+
declare i8* @llvm.coro.free(token %id, i8* <frame>)
+
+
+
+
Overview:
+

The ‘llvm.coro.free’ intrinsic returns a pointer to a block of memory where +coroutine frame is stored or null if this instance of a coroutine did not use +dynamically allocated memory for its coroutine frame. This intrinsic is not +supported for returned-continuation coroutines.

+
+
+
Arguments:
+

The first argument is a token returned by a call to ‘llvm.coro.id’ +identifying the coroutine.

+

The second argument is a pointer to the coroutine frame. This should be the same +pointer that was returned by prior coro.begin call.

+
+
+
Example (custom deallocation function):
+
cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %frame)
+  %mem_not_null = icmp ne i8* %mem, null
+  br i1 %mem_not_null, label %if.then, label %if.end
+if.then:
+  call void @CustomFree(i8* %mem)
+  br label %if.end
+if.end:
+  ret void
+
+
+
+
+
Example (standard deallocation functions):
+
cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %frame)
+  call void @free(i8* %mem)
+  ret void
+
+
+
+
+
+

‘llvm.coro.alloc’ Intrinsic

+
declare i1 @llvm.coro.alloc(token <id>)
+
+
+
+
Overview:
+

The ‘llvm.coro.alloc’ intrinsic returns true if dynamic allocation is +required to obtain a memory for the coroutine frame and false otherwise. +This is not supported for returned-continuation coroutines.

+
+
+
Arguments:
+

The first argument is a token returned by a call to ‘llvm.coro.id’ +identifying the coroutine.

+
+
+
Semantics:
+

A frontend should emit at most one coro.alloc intrinsic per coroutine. +The intrinsic is used to suppress dynamic allocation of the coroutine frame +when possible.

+
+
+
Example:
+
entry:
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %dyn.alloc.required = call i1 @llvm.coro.alloc(token %id)
+  br i1 %dyn.alloc.required, label %coro.alloc, label %coro.begin
+
+coro.alloc:
+  %frame.size = call i32 @llvm.coro.size()
+  %alloc = call i8* @MyAlloc(i32 %frame.size)
+  br label %coro.begin
+
+coro.begin:
+  %phi = phi i8* [ null, %entry ], [ %alloc, %coro.alloc ]
+  %frame = call i8* @llvm.coro.begin(token %id, i8* %phi)
+
+
+
+
+
+

‘llvm.coro.noop’ Intrinsic

+
declare i8* @llvm.coro.noop()
+
+
+
+
Overview:
+

The ‘llvm.coro.noop’ intrinsic returns an address of the coroutine frame of +a coroutine that does nothing when resumed or destroyed.

+
+
+
Arguments:
+

None

+
+
+
Semantics:
+

This intrinsic is lowered to refer to a private constant coroutine frame. The +resume and destroy handlers for this frame are empty functions that do nothing. +Note that in different translation units llvm.coro.noop may return different pointers.

+
+
+
+

‘llvm.coro.frame’ Intrinsic

+
declare i8* @llvm.coro.frame()
+
+
+
+
Overview:
+

The ‘llvm.coro.frame’ intrinsic returns an address of the coroutine frame of +the enclosing coroutine.

+
+
+
Arguments:
+

None

+
+
+
Semantics:
+

This intrinsic is lowered to refer to the coro.begin instruction. This is +a frontend convenience intrinsic that makes it easier to refer to the +coroutine frame.

+
+
+
+

‘llvm.coro.id’ Intrinsic

+
declare token @llvm.coro.id(i32 <align>, i8* <promise>, i8* <coroaddr>,
+                                                        i8* <fnaddrs>)
+
+
+
+
Overview:
+

The ‘llvm.coro.id’ intrinsic returns a token identifying a +switched-resume coroutine.

+
+
+
Arguments:
+

The first argument provides information on the alignment of the memory returned +by the allocation function and given to coro.begin by the first argument. If +this argument is 0, the memory is assumed to be aligned to 2 * sizeof(i8*). +This argument only accepts constants.

+

The second argument, if not null, designates a particular alloca instruction +to be a coroutine promise.

+

The third argument is null coming out of the frontend. The CoroEarly pass sets +this argument to point to the function this coro.id belongs to.

+

The fourth argument is null before coroutine is split, and later is replaced +to point to a private global constant array containing function pointers to +outlined resume and destroy parts of the coroutine.

+
+
+
Semantics:
+

The purpose of this intrinsic is to tie together coro.id, coro.alloc and +coro.begin belonging to the same coroutine to prevent optimization passes from +duplicating any of these instructions unless entire body of the coroutine is +duplicated.

+

A frontend should emit exactly one coro.id intrinsic per coroutine.

+
+
+
+

‘llvm.coro.id.async’ Intrinsic

+
declare token @llvm.coro.id.async(i32 <context size>, i32 <align>,
+                                  i8* <context arg>,
+                                  i8* <async function pointer>)
+
+
+
+
Overview:
+

The ‘llvm.coro.id.async’ intrinsic returns a token identifying an async coroutine.

+
+
+
Arguments:
+

The first argument provides the initial size of the async context as required +from the frontend. Lowering will add to this size the size required by the frame +storage and store that value to the async function pointer.

+

The second argument, is the alignment guarantee of the memory of the +async context. The frontend guarantees that the memory will be aligned by this +value.

+

The third argument is the async context argument in the current coroutine.

+

The fourth argument is the address of the async function pointer struct. +Lowering will update the context size requirement in this struct by adding the +coroutine frame size requirement to the initial size requirement as specified by +the first argument of this intrinsic.

+
+
+
Semantics:
+

A frontend should emit exactly one coro.id.async intrinsic per coroutine.

+
+
+
+

‘llvm.coro.id.retcon’ Intrinsic

+
declare token @llvm.coro.id.retcon(i32 <size>, i32 <align>, i8* <buffer>,
+                                   i8* <continuation prototype>,
+                                   i8* <alloc>, i8* <dealloc>)
+
+
+
+
Overview:
+

The ‘llvm.coro.id.retcon’ intrinsic returns a token identifying a +multiple-suspend returned-continuation coroutine.

+

The ‘result-type sequence’ of the coroutine is defined as follows:

+
    +
  • if the return type of the coroutine function is void, it is the +empty sequence;

  • +
  • if the return type of the coroutine function is a struct, it is the +element types of that struct in order;

  • +
  • otherwise, it is just the return type of the coroutine function.

  • +
+

The first element of the result-type sequence must be a pointer type; +continuation functions will be coerced to this type. The rest of +the sequence are the ‘yield types’, and any suspends in the coroutine +must take arguments of these types.

+
+
+
Arguments:
+

The first and second arguments are the expected size and alignment of +the buffer provided as the third argument. They must be constant.

+

The fourth argument must be a reference to a global function, called +the ‘continuation prototype function’. The type, calling convention, +and attributes of any continuation functions will be taken from this +declaration. The return type of the prototype function must match the +return type of the current function. The first parameter type must be +a pointer type. The second parameter type must be an integer type; +it will be used only as a boolean flag.

+

The fifth argument must be a reference to a global function that will +be used to allocate memory. It may not fail, either by returning null +or throwing an exception. It must take an integer and return a pointer.

+

The sixth argument must be a reference to a global function that will +be used to deallocate memory. It must take a pointer and return void.

+
+
+
+

‘llvm.coro.id.retcon.once’ Intrinsic

+
declare token @llvm.coro.id.retcon.once(i32 <size>, i32 <align>, i8* <buffer>,
+                                        i8* <prototype>,
+                                        i8* <alloc>, i8* <dealloc>)
+
+
+
+
Overview:
+

The ‘llvm.coro.id.retcon.once’ intrinsic returns a token identifying a +unique-suspend returned-continuation coroutine.

+
+
+
Arguments:
+

As for llvm.core.id.retcon, except that the return type of the +continuation prototype must be void instead of matching the +coroutine’s return type.

+
+
+
+

‘llvm.coro.end’ Intrinsic

+
declare i1 @llvm.coro.end(i8* <handle>, i1 <unwind>)
+
+
+
+
Overview:
+

The ‘llvm.coro.end’ marks the point where execution of the resume part of +the coroutine should end and control should return to the caller.

+
+
+
Arguments:
+

The first argument should refer to the coroutine handle of the enclosing +coroutine. A frontend is allowed to supply null as the first parameter, in this +case coro-early pass will replace the null with an appropriate coroutine +handle value.

+

The second argument should be true if this coro.end is in the block that is +part of the unwind sequence leaving the coroutine body due to an exception and +false otherwise.

+
+
+
Semantics:
+

The purpose of this intrinsic is to allow frontends to mark the cleanup and +other code that is only relevant during the initial invocation of the coroutine +and should not be present in resume and destroy parts.

+

In returned-continuation lowering, llvm.coro.end fully destroys the +coroutine frame. If the second argument is false, it also returns from +the coroutine with a null continuation pointer, and the next instruction +will be unreachable. If the second argument is true, it falls through +so that the following logic can resume unwinding. In a yield-once +coroutine, reaching a non-unwind llvm.coro.end without having first +reached a llvm.coro.suspend.retcon has undefined behavior.

+

The remainder of this section describes the behavior under switched-resume +lowering.

+

This intrinsic is lowered when a coroutine is split into +the start, resume and destroy parts. In the start part, it is a no-op, +in resume and destroy parts, it is replaced with ret void instruction and +the rest of the block containing coro.end instruction is discarded. +In landing pads it is replaced with an appropriate instruction to unwind to +caller. The handling of coro.end differs depending on whether the target is +using landingpad or WinEH exception model.

+

For landingpad based exception model, it is expected that frontend uses the +coro.end intrinsic as follows:

+
ehcleanup:
+  %InResumePart = call i1 @llvm.coro.end(i8* null, i1 true)
+  br i1 %InResumePart, label %eh.resume, label %cleanup.cont
+
+cleanup.cont:
+  ; rest of the cleanup
+
+eh.resume:
+  %exn = load i8*, i8** %exn.slot, align 8
+  %sel = load i32, i32* %ehselector.slot, align 4
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val29 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val29
+
+
+

The CoroSpit pass replaces coro.end with True in the resume functions, +thus leading to immediate unwind to the caller, whereas in start function it +is replaced with False, thus allowing to proceed to the rest of the cleanup +code that is only needed during initial invocation of the coroutine.

+

For Windows Exception handling model, a frontend should attach a funclet bundle +referring to an enclosing cleanuppad as follows:

+
ehcleanup:
+  %tok = cleanuppad within none []
+  %unused = call i1 @llvm.coro.end(i8* null, i1 true) [ "funclet"(token %tok) ]
+  cleanupret from %tok unwind label %RestOfTheCleanup
+
+
+

The CoroSplit pass, if the funclet bundle is present, will insert +cleanupret from %tok unwind to caller before +the coro.end intrinsic and will remove the rest of the block.

+

The following table summarizes the handling of coro.end intrinsic.

+ ++++++ + + + + + + + + + + + + + + + + + + + +

In Start Function

In Resume/Destroy Functions

unwind=false

nothing

ret void

unwind=true

WinEH

nothing

cleanupret unwind to caller

Landingpad

nothing

nothing

+
+
+
+

‘llvm.coro.end.async’ Intrinsic

+
declare i1 @llvm.coro.end.async(i8* <handle>, i1 <unwind>, ...)
+
+
+
+
Overview:
+

The ‘llvm.coro.end.async’ marks the point where execution of the resume part +of the coroutine should end and control should return to the caller. As part of +its variable tail arguments this instruction allows to specify a function and +the function’s arguments that are to be tail called as the last action before +returning.

+
+
+
Arguments:
+

The first argument should refer to the coroutine handle of the enclosing +coroutine. A frontend is allowed to supply null as the first parameter, in this +case coro-early pass will replace the null with an appropriate coroutine +handle value.

+

The second argument should be true if this coro.end is in the block that is +part of the unwind sequence leaving the coroutine body due to an exception and +false otherwise.

+

The third argument if present should specify a function to be called.

+

If the third argument is present, the remaining arguments are the arguments to +the function call.

+
call i1 (i8*, i1, ...) @llvm.coro.end.async(
+                         i8* %hdl, i1 0,
+                         void (i8*, %async.task*, %async.actor*)* @must_tail_call_return,
+                         i8* %ctxt, %async.task* %task, %async.actor* %actor)
+unreachable
+
+
+
+
+
+

‘llvm.coro.suspend’ Intrinsic

+
declare i8 @llvm.coro.suspend(token <save>, i1 <final>)
+
+
+
+
Overview:
+

The ‘llvm.coro.suspend’ marks the point where execution of a +switched-resume coroutine is suspended and control is returned back +to the caller. Conditional branches consuming the result of this +intrinsic lead to basic blocks where coroutine should proceed when +suspended (-1), resumed (0) or destroyed (1).

+
+
+
Arguments:
+

The first argument refers to a token of coro.save intrinsic that marks the +point when coroutine state is prepared for suspension. If none token is passed, +the intrinsic behaves as if there were a coro.save immediately preceding +the coro.suspend intrinsic.

+

The second argument indicates whether this suspension point is final. +The second argument only accepts constants. If more than one suspend point is +designated as final, the resume and destroy branches should lead to the same +basic blocks.

+
+
+
Example (normal suspend point):
+
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
+switch i8 %0, label %suspend [i8 0, label %resume
+                              i8 1, label %cleanup]
+
+
+
+
+
Example (final suspend point):
+
while.end:
+  %s.final = call i8 @llvm.coro.suspend(token none, i1 true)
+  switch i8 %s.final, label %suspend [i8 0, label %trap
+                                      i8 1, label %cleanup]
+trap:
+  call void @llvm.trap()
+  unreachable
+
+
+
+
+
Semantics:
+

If a coroutine that was suspended at the suspend point marked by this intrinsic +is resumed via coro.resume the control will transfer to the basic block +of the 0-case. If it is resumed via coro.destroy, it will proceed to the +basic block indicated by the 1-case. To suspend, coroutine proceed to the +default label.

+

If suspend intrinsic is marked as final, it can consider the true branch +unreachable and can perform optimizations that can take advantage of that fact.

+
+
+
+

‘llvm.coro.save’ Intrinsic

+
declare token @llvm.coro.save(i8* <handle>)
+
+
+
+
Overview:
+

The ‘llvm.coro.save’ marks the point where a coroutine need to update its +state to prepare for resumption to be considered suspended (and thus eligible +for resumption).

+
+
+
Arguments:
+

The first argument points to a coroutine handle of the enclosing coroutine.

+
+
+
Semantics:
+

Whatever coroutine state changes are required to enable resumption of +the coroutine from the corresponding suspend point should be done at the point +of coro.save intrinsic.

+
+
+
Example:
+

Separate save and suspend points are necessary when a coroutine is used to +represent an asynchronous control flow driven by callbacks representing +completions of asynchronous operations.

+

In such a case, a coroutine should be ready for resumption prior to a call to +async_op function that may trigger resumption of a coroutine from the same or +a different thread possibly prior to async_op call returning control back +to the coroutine:

+
%save1 = call token @llvm.coro.save(i8* %hdl)
+call void @async_op1(i8* %hdl)
+%suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false)
+switch i8 %suspend1, label %suspend [i8 0, label %resume1
+                                     i8 1, label %cleanup]
+
+
+
+
+
+

‘llvm.coro.suspend.async’ Intrinsic

+
declare {i8*, i8*, i8*} @llvm.coro.suspend.async(
+                           i8* <resume function>,
+                           i8* <context projection function>,
+                           ... <function to call>
+                           ... <arguments to function>)
+
+
+
+
Overview:
+

The ‘llvm.coro.suspend.async’ intrinsic marks the point where +execution of a async coroutine is suspended and control is passed to a callee.

+
+
+
Arguments:
+

The first argument should be the result of the llvm.coro.async.resume intrinsic. +Lowering will replace this intrinsic with the resume function for this suspend +point.

+

The second argument is the context projection function. It should describe +how-to restore the async context in the continuation function from the first +argument of the continuation function. Its type is i8* (i8*).

+

The third argument is the function that models transfer to the callee at the +suspend point. It should take 3 arguments. Lowering will musttail call this +function.

+

The fourth to six argument are the arguments for the third argument.

+
+
+
Semantics:
+

The result of the intrinsic are mapped to the arguments of the resume function. +Execution is suspended at this intrinsic and resumed when the resume function is +called.

+
+
+
+

‘llvm.coro.prepare.async’ Intrinsic

+
declare i8* @llvm.coro.prepare.async(i8* <coroutine function>)
+
+
+
+
Overview:
+

The ‘llvm.coro.prepare.async’ intrinsic is used to block inlining of the +async coroutine until after coroutine splitting.

+
+
+
Arguments:
+

The first argument should be an async coroutine of type void (i8*, i8*, i8*). +Lowering will replace this intrinsic with its coroutine function argument.

+
+
+
+

‘llvm.coro.suspend.retcon’ Intrinsic

+
declare i1 @llvm.coro.suspend.retcon(...)
+
+
+
+
Overview:
+

The ‘llvm.coro.suspend.retcon’ intrinsic marks the point where +execution of a returned-continuation coroutine is suspended and control +is returned back to the caller.

+

llvm.coro.suspend.retcon` does not support separate save points; +they are not useful when the continuation function is not locally +accessible. That would be a more appropriate feature for a passcon +lowering that is not yet implemented.

+
+
+
Arguments:
+

The types of the arguments must exactly match the yielded-types sequence +of the coroutine. They will be turned into return values from the ramp +and continuation functions, along with the next continuation function.

+
+
+
Semantics:
+

The result of the intrinsic indicates whether the coroutine should resume +abnormally (non-zero).

+

In a normal coroutine, it is undefined behavior if the coroutine executes +a call to llvm.coro.suspend.retcon after resuming abnormally.

+

In a yield-once coroutine, it is undefined behavior if the coroutine +executes a call to llvm.coro.suspend.retcon after resuming in any way.

+
+
+
+

‘llvm.coro.param’ Intrinsic

+
declare i1 @llvm.coro.param(i8* <original>, i8* <copy>)
+
+
+
+
Overview:
+

The ‘llvm.coro.param’ is used by a frontend to mark up the code used to +construct and destruct copies of the parameters. If the optimizer discovers that +a particular parameter copy is not used after any suspends, it can remove the +construction and destruction of the copy by replacing corresponding coro.param +with i1 false and replacing any use of the copy with the original.

+
+
+
Arguments:
+

The first argument points to an alloca storing the value of a parameter to a +coroutine.

+

The second argument points to an alloca storing the value of the copy of that +parameter.

+
+
+
Semantics:
+

The optimizer is free to always replace this intrinsic with i1 true.

+

The optimizer is also allowed to replace it with i1 false provided that the +parameter copy is only used prior to control flow reaching any of the suspend +points. The code that would be DCE’d if the coro.param is replaced with +i1 false is not considered to be a use of the parameter copy.

+

The frontend can emit this intrinsic if its language rules allow for this +optimization.

+
+
+
Example:
+

Consider the following example. A coroutine takes two parameters a and b +that has a destructor and a move constructor.

+
struct A { ~A(); A(A&&); bool foo(); void bar(); };
+
+task<int> f(A a, A b) {
+  if (a.foo())
+    return 42;
+
+  a.bar();
+  co_await read_async(); // introduces suspend point
+  b.bar();
+}
+
+
+

Note that, uses of b is used after a suspend point and thus must be copied +into a coroutine frame, whereas a does not have to, since it never used +after suspend.

+

A frontend can create parameter copies for a and b as follows:

+
task<int> f(A a', A b') {
+  a = alloca A;
+  b = alloca A;
+  // move parameters to its copies
+  if (coro.param(a', a)) A::A(a, A&& a');
+  if (coro.param(b', b)) A::A(b, A&& b');
+  ...
+  // destroy parameters copies
+  if (coro.param(a', a)) A::~A(a);
+  if (coro.param(b', b)) A::~A(b);
+}
+
+
+

The optimizer can replace coro.param(a’,a) with i1 false and replace all uses +of a with a’, since it is not used after suspend.

+

The optimizer must replace coro.param(b’, b) with i1 true, since b is used +after suspend and therefore, it has to reside in the coroutine frame.

+
+
+
+
+
+

Coroutine Transformation Passes

+
+

CoroEarly

+

The pass CoroEarly lowers coroutine intrinsics that hide the details of the +structure of the coroutine frame, but, otherwise not needed to be preserved to +help later coroutine passes. This pass lowers coro.frame, coro.done, +and coro.promise intrinsics.

+
+
+

CoroSplit

+

The pass CoroSplit buides coroutine frame and outlines resume and destroy parts +into separate functions.

+
+
+

CoroElide

+

The pass CoroElide examines if the inlined coroutine is eligible for heap +allocation elision optimization. If so, it replaces +coro.begin intrinsic with an address of a coroutine frame placed on its caller +and replaces coro.alloc and coro.free intrinsics with false and null +respectively to remove the deallocation code. +This pass also replaces coro.resume and coro.destroy intrinsics with direct +calls to resume and destroy functions for a particular coroutine where possible.

+
+
+

CoroCleanup

+

This pass runs late to lower all coroutine related intrinsics not replaced by +earlier passes.

+
+
+
+

Areas Requiring Attention

+
    +
  1. When coro.suspend returns -1, the coroutine is suspended, and it’s possible +that the coroutine has already been destroyed (hence the frame has been freed). +We cannot access anything on the frame on the suspend path. +However there is nothing that prevents the compiler from moving instructions +along that path (e.g. LICM), which can lead to use-after-free. At the moment +we disabled LICM for loops that have coro.suspend, but the general problem still +exists and requires a general solution.

  2. +
  3. Take advantage of the lifetime intrinsics for the data that goes into the +coroutine frame. Leave lifetime intrinsics as is for the data that stays in +allocas.

  4. +
  5. The CoroElide optimization pass relies on coroutine ramp function to be +inlined. It would be beneficial to split the ramp function further to +increase the chance that it will get inlined into its caller.

  6. +
  7. Design a convention that would make it possible to apply coroutine heap +elision optimization across ABI boundaries.

  8. +
  9. Cannot handle coroutines with inalloca parameters (used in x86 on Windows).

  10. +
  11. Alignment is ignored by coro.begin and coro.free intrinsics.

  12. +
  13. Make required changes to make sure that coroutine optimizations work with +LTO.

  14. +
  15. More tests, more tests, more tests

  16. +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CoverageMappingFormat.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CoverageMappingFormat.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/CoverageMappingFormat.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/CoverageMappingFormat.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,711 @@ + + + + + + + + + LLVM Code Coverage Mapping Format — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Code Coverage Mapping Format

+ +
+

Introduction

+

LLVM’s code coverage mapping format is used to provide code coverage +analysis using LLVM’s and Clang’s instrumentation based profiling +(Clang’s -fprofile-instr-generate option).

+

This document is aimed at those who would like to know how LLVM’s code coverage +mapping works under the hood. A prior knowledge of how Clang’s profile guided +optimization works is useful, but not required. For those interested in using +LLVM to provide code coverage analysis for their own programs, see the Clang +documentation <https://clang.llvm.org/docs/SourceBasedCodeCoverage.html>.

+

We start by briefly describing LLVM’s code coverage mapping format and the +way that Clang and LLVM’s code coverage tool work with this format. After +the basics are down, more advanced features of the coverage mapping format +are discussed - such as the data structures, LLVM IR representation and +the binary encoding.

+
+
+

High Level Overview

+

LLVM’s code coverage mapping format is designed to be a self contained +data format that can be embedded into the LLVM IR and into object files. +It’s described in this document as a mapping format because its goal is +to store the data that is required for a code coverage tool to map between +the specific source ranges in a file and the execution counts obtained +after running the instrumented version of the program.

+

The mapping data is used in two places in the code coverage process:

+
    +
  1. When clang compiles a source file with -fcoverage-mapping, it +generates the mapping information that describes the mapping between the +source ranges and the profiling instrumentation counters. +This information gets embedded into the LLVM IR and conveniently +ends up in the final executable file when the program is linked.

  2. +
  3. It is also used by llvm-cov - the mapping information is extracted from an +object file and is used to associate the execution counts (the values of the +profile instrumentation counters), and the source ranges in a file. +After that, the tool is able to generate various code coverage reports +for the program.

  4. +
+

The coverage mapping format aims to be a “universal format” that would be +suitable for usage by any frontend, and not just by Clang. It also aims to +provide the frontend the possibility of generating the minimal coverage mapping +data in order to reduce the size of the IR and object files - for example, +instead of emitting mapping information for each statement in a function, the +frontend is allowed to group the statements with the same execution count into +regions of code, and emit the mapping information only for those regions.

+
+
+

Advanced Concepts

+

The remainder of this guide is meant to give you insight into the way the +coverage mapping format works.

+

The coverage mapping format operates on a per-function level as the +profile instrumentation counters are associated with a specific function. +For each function that requires code coverage, the frontend has to create +coverage mapping data that can map between the source code ranges and +the profile instrumentation counters for that function.

+
+

Mapping Region

+

The function’s coverage mapping data contains an array of mapping regions. +A mapping region stores the source code range that is covered by this region, +the file id, the coverage mapping counter and +the region’s kind. +There are several kinds of mapping regions:

+
    +
  • Code regions associate portions of source code and coverage mapping +counters. They make up the majority of the mapping regions. They are used +by the code coverage tool to compute the execution counts for lines, +highlight the regions of code that were never executed, and to obtain +the various code coverage statistics for a function. +For example:

    +

    int main(int argc, const char *argv[]) {     // Code Region from 1:40 to 9:2
    +                                            
    +  if (argc > 1) {                            // Code Region from 3:17 to 5:4
    +    printf("%s\n", argv[1]);              
    +  } else {                                   // Code Region from 5:10 to 7:4
    +    printf("\n");                         
    +  }                                         
    +  return 0;                                 
    +}
    +

    +
  • +
  • Skipped regions are used to represent source ranges that were skipped +by Clang’s preprocessor. They don’t associate with +coverage mapping counters, as the frontend knows that they are never +executed. They are used by the code coverage tool to mark the skipped lines +inside a function as non-code lines that don’t have execution counts. +For example:

    +

    int main() {                // Code Region from 1:12 to 6:2
    +#ifdef DEBUG                // Skipped Region from 2:1 to 4:2
    +  printf("Hello world"); 
    +#endif                     
    +  return 0;                
    +}
    +

    +
  • +
  • Expansion regions are used to represent Clang’s macro expansions. They +have an additional property - expanded file id. This property can be +used by the code coverage tool to find the mapping regions that are created +as a result of this macro expansion, by checking if their file id matches the +expanded file id. They don’t associate with coverage mapping counters, +as the code coverage tool can determine the execution count for this region +by looking up the execution count of the first region with a corresponding +file id. +For example:

    +

    int func(int x) {                             
    +  #define MAX(x,y) ((x) > (y)? (x) : (y))     
    +  return MAX(x, 42);                           // Expansion Region from 3:10 to 3:13
    +}
    +

    +
  • +
  • Branch regions associate instrumentable branch conditions in the source code +with a coverage mapping counter to track how many times an individual +condition evaluated to ‘true’ and another coverage mapping counter to +track how many times that condition evaluated to false. Instrumentable +branch conditions may comprise larger boolean expressions using boolean +logical operators. The ‘true’ and ‘false’ cases reflect unique branch paths +that can be traced back to the source code. +For example:

    +

    int func(int x, int y) {
    +  if ((x > 1) || (y > 3)) {  // Branch Region from 3:6 to 3:12
    +                             // Branch Region from 3:17 to 3:23
    +    printf("%d\n", x);              
    +  } else {                                
    +    printf("\n");                         
    +  }
    +  return 0;                                 
    +}
    +

    +
  • +
+
+

Source Range:

+

The source range record contains the starting and ending location of a certain +mapping region. Both locations include the line and the column numbers.

+
+
+

File ID:

+

The file id an integer value that tells us +in which source file or macro expansion is this region located. +It enables Clang to produce mapping information for the code +defined inside macros, like this example demonstrates:

+

void func(const char *str) {         // Code Region from 1:28 to 6:2 with file id 0
+  #define PUT printf("%s\n", str)    // 2 Code Regions from 2:15 to 2:34 with file ids 1 and 2
+  if(*str)                          
+    PUT;                             // Expansion Region from 4:5 to 4:8 with file id 0 that expands a macro with file id 1
+  PUT;                               // Expansion Region from 5:3 to 5:6 with file id 0 that expands a macro with file id 2
+}
+

+
+
+

Counter:

+

A coverage mapping counter can represents a reference to the profile +instrumentation counter. The execution count for a region with such counter +is determined by looking up the value of the corresponding profile +instrumentation counter.

+

It can also represent a binary arithmetical expression that operates on +coverage mapping counters or other expressions. +The execution count for a region with an expression counter is determined by +evaluating the expression’s arguments and then adding them together or +subtracting them from one another. +In the example below, a subtraction expression is used to compute the execution +count for the compound statement that follows the else keyword:

+

int main(int argc, const char *argv[]) {    // Region's counter is a reference to the profile counter #0
+                                           
+  if (argc > 1) {                           // Region's counter is a reference to the profile counter #1
+    printf("%s\n", argv[1]);                
+  } else {                                  // Region's counter is an expression (reference to the profile counter #0 - reference to the profile counter #1)
+    printf("\n");                        
+  }                                        
+  return 0;                                
+}
+

+

Finally, a coverage mapping counter can also represent an execution count of +of zero. The zero counter is used to provide coverage mapping for +unreachable statements and expressions, like in the example below:

+

int main() {                  
+  return 0;                   
+  printf("Hello world!\n");    // Unreachable region's counter is zero
+}
+

+

The zero counters allow the code coverage tool to display proper line execution +counts for the unreachable lines and highlight the unreachable code. +Without them, the tool would think that those lines and regions were still +executed, as it doesn’t possess the frontend’s knowledge.

+

Note that branch regions are created to track branch conditions in the source +code and refer to two coverage mapping counters, one to track the number of +times the branch condition evaluated to “true”, and one to track the number of +times the branch condition evaluated to “false”.

+
+
+
+
+

LLVM IR Representation

+

The coverage mapping data is stored in the LLVM IR using a global constant +structure variable called __llvm_coverage_mapping with the IPSK_covmap +section specifier (i.e. “.lcovmap$M” on Windows and “__llvm_covmap” elsewhere).

+

For example, let’s consider a C file and how it gets compiled to LLVM:

+
int foo() {
+  return 42;
+}
+int bar() {
+  return 13;
+}
+
+
+

The coverage mapping variable generated by Clang has 2 fields:

+
    +
  • Coverage mapping header.

  • +
  • An optionally compressed list of filenames present in the translation unit.

  • +
+

The variable has 8-byte alignment because ld64 cannot always pack symbols from +different object files tightly (the word-level alignment assumption is baked in +too deeply).

+
@__llvm_coverage_mapping = internal constant { { i32, i32, i32, i32 }, [32 x i8] }
+{
+  { i32, i32, i32, i32 } ; Coverage map header
+  {
+    i32 0,  ; Always 0. In prior versions, the number of affixed function records
+    i32 32, ; The length of the string that contains the encoded translation unit filenames
+    i32 0,  ; Always 0. In prior versions, the length of the affixed string that contains the encoded coverage mapping data
+    i32 3,  ; Coverage mapping format version
+  },
+ [32 x i8] c"..." ; Encoded data (dissected later)
+}, section "__llvm_covmap", align 8
+
+
+

The current version of the format is version 6.

+

There is one difference between versions 6 and 5:

+
    +
  • The first entry in the filename list is the compilation directory. When the +filename is relative, the compilation directory is combined with the relative +path to get an absolute path. This can reduce size by omitting the duplicate +prefix in filenames.

  • +
+

There is one difference between versions 5 and 4:

+
    +
  • The notion of branch region has been introduced along with a corresponding +region kind. Branch regions encode two counters, one to track how many +times a “true” branch condition is taken, and one to track how many times a +“false” branch condition is taken.

  • +
+

There are two differences between versions 4 and 3:

+
    +
  • Function records are now named symbols, and are marked linkonce_odr. This +allows linkers to merge duplicate function records. Merging of duplicate +dummy records (emitted for functions included-but-not-used in a translation +unit) reduces size bloat in the coverage mapping data. As part of this +change, region mapping information for a function is now included within the +function record, instead of being affixed to the coverage header.

  • +
  • The filename list for a translation unit may optionally be zlib-compressed.

  • +
+

The only difference between versions 3 and 2 is that a special encoding for +column end locations was introduced to indicate gap regions.

+

In version 1, the function record for foo was defined as follows:

+
{ i8*, i32, i32, i64 } { i8* getelementptr inbounds ([3 x i8]* @__profn_foo, i32 0, i32 0), ; Function's name
+  i32 3, ; Function's name length
+  i32 9, ; Function's encoded coverage mapping data string length
+  i64 0  ; Function's structural hash
+}
+
+
+

In version 2, the function record for foo was defined as follows:

+
{ i64, i32, i64 } {
+  i64 0x5cf8c24cdb18bdac, ; Function's name MD5
+  i32 9, ; Function's encoded coverage mapping data string length
+  i64 0  ; Function's structural hash
+
+
+
+

Coverage Mapping Header:

+

The coverage mapping header has the following fields:

+
    +
  • The number of function records affixed to the coverage header. Always 0, but present for backwards compatibility.

  • +
  • The length of the string in the third field of __llvm_coverage_mapping that contains the encoded translation unit filenames.

  • +
  • The length of the string in the third field of __llvm_coverage_mapping that contains any encoded coverage mapping data affixed to the coverage header. Always 0, but present for backwards compatibility.

  • +
  • The format version. The current version is 4 (encoded as a 3).

  • +
+
+
+

Function record:

+

A function record is a structure of the following type:

+
{ i64, i32, i64, i64, [? x i8] }
+
+
+

It contains the function name’s MD5, the length of the encoded mapping data for +that function, the function’s structural hash value, the hash of the filenames +in the function’s translation unit, and the encoded mapping data.

+
+

Dissecting the sample:

+

Here’s an overview of the encoded data that was stored in the +IR for the coverage mapping sample that was shown earlier:

+
    +
  • The IR contains the following string constant that represents the encoded +coverage mapping data for the sample translation unit:

    +
    c"\01\15\1Dx\DA\13\D1\0F-N-*\D6/+\CE\D6/\C9-\D0O\CB\CF\D7K\06\00N+\07]"
    +
    +
    +
  • +
  • The string contains values that are encoded in the LEB128 format, which is +used throughout for storing integers. It also contains a compressed payload.

  • +
  • The first three LEB128-encoded numbers in the sample specify the number of +filenames, the length of the uncompressed filenames, and the length of the +compressed payload (or 0 if compression is disabled). In this sample, there +is 1 filename that is 21 bytes in length (uncompressed), and stored in 29 +bytes (compressed).

  • +
  • The coverage mapping from the first function record is encoded in this string:

    +
    c"\01\00\00\01\01\01\0C\02\02"
    +
    +
    +

    This string consists of the following bytes:

    + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    0x01

    The number of file ids used by this function. There is only one file id used by the mapping data in this function.

    0x00

    An index into the filenames array which corresponds to the file “/Users/alex/test.c”.

    0x00

    The number of counter expressions used by this function. This function doesn’t use any expressions.

    0x01

    The number of mapping regions that are stored in an array for the function’s file id #0.

    0x01

    The coverage mapping counter for the first region in this function. The value of 1 tells us that it’s a coverage +mapping counter that is a reference to the profile instrumentation counter with an index of 0.

    0x01

    The starting line of the first mapping region in this function.

    0x0C

    The starting column of the first mapping region in this function.

    0x02

    The ending line of the first mapping region in this function.

    0x02

    The ending column of the first mapping region in this function.

    +
  • +
  • The length of the substring that contains the encoded coverage mapping data +for the second function record is also 9. It’s structured like the mapping data +for the first function record.

  • +
  • The two trailing bytes are zeroes and are used to pad the coverage mapping +data to give it the 8 byte alignment.

  • +
+
+
+
+
+

Encoding

+

The per-function coverage mapping data is encoded as a stream of bytes, +with a simple structure. The structure consists of the encoding +types like variable-length unsigned integers, that +are used to encode File ID Mapping, Counter Expressions and +the Mapping Regions.

+

The format of the structure follows:

+
+

[file id mapping, counter expressions, mapping regions]

+
+

The translation unit filenames are encoded using the same encoding +types as the per-function coverage mapping data, with the +following structure:

+
+

[numFilenames : LEB128, filename0 : string, filename1 : string, ...]

+
+
+

Types

+

This section describes the basic types that are used by the encoding format +and can appear after : in the [foo : type] description.

+
+

LEB128

+

LEB128 is an unsigned integer value that is encoded using DWARF’s LEB128 +encoding, optimizing for the case where values are small +(1 byte for values less than 128).

+
+
+

Strings

+

[length : LEB128, characters...]

+

String values are encoded with a LEB value for the length +of the string and a sequence of bytes for its characters.

+
+
+
+

File ID Mapping

+

[numIndices : LEB128, filenameIndex0 : LEB128, filenameIndex1 : LEB128, ...]

+

File id mapping in a function’s coverage mapping stream +contains the indices into the translation unit’s filenames array.

+
+
+

Counter

+

[value : LEB128]

+

A coverage mapping counter is stored in a single LEB value. +It is composed of two things — the tag +which is stored in the lowest 2 bits, and the counter data which is stored +in the remaining bits.

+
+

Tag:

+

The counter’s tag encodes the counter’s kind +and, if the counter is an expression, the expression’s kind. +The possible tag values are:

+
    +
  • 0 - The counter is zero.

  • +
  • 1 - The counter is a reference to the profile instrumentation counter.

  • +
  • 2 - The counter is a subtraction expression.

  • +
  • 3 - The counter is an addition expression.

  • +
+
+
+

Data:

+

The counter’s data is interpreted in the following manner:

+
    +
  • When the counter is a reference to the profile instrumentation counter, +then the counter’s data is the id of the profile counter.

  • +
  • When the counter is an expression, then the counter’s data +is the index into the array of counter expressions.

  • +
+
+
+
+

Counter Expressions

+

[numExpressions : LEB128, expr0LHS : LEB128, expr0RHS : LEB128, expr1LHS : LEB128, expr1RHS : LEB128, ...]

+

Counter expressions consist of two counters as they +represent binary arithmetic operations. +The expression’s kind is determined from the tag of the +counter that references this expression.

+
+
+

Mapping Regions

+

[numRegionArrays : LEB128, regionsForFile0, regionsForFile1, ...]

+

The mapping regions are stored in an array of sub-arrays where every +region in a particular sub-array has the same file id.

+

The file id for a sub-array of regions is the index of that +sub-array in the main array e.g. The first sub-array will have the file id +of 0.

+
+

Sub-Array of Regions

+

[numRegions : LEB128, region0, region1, ...]

+

The mapping regions for a specific file id are stored in an array that is +sorted in an ascending order by the region’s starting location.

+
+
+

Mapping Region

+

[header, source range]

+

The mapping region record contains two sub-records — +the header, which stores the counter and/or the region’s kind, +and the source range that contains the starting and ending +location of this region.

+
+ +
+

Source Range

+

[deltaLineStart : LEB128, columnStart : LEB128, numLines : LEB128, columnEnd : LEB128]

+

The source range record contains the following fields:

+
    +
  • deltaLineStart: The difference between the starting line of the +current mapping region and the starting line of the previous mapping region.

    +

    If the current mapping region is the first region in the current +sub-array, then it stores the starting line of that region.

    +
  • +
  • columnStart: The starting column of the mapping region.

  • +
  • numLines: The difference between the ending line and the starting line +of the current mapping region.

  • +
  • columnEnd: The ending column of the mapping region. If the high bit is set, +the current mapping region is a gap area. A count for a gap area is only used +as the line execution count if there are no other regions on a line.

  • +
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/DebuggingJITedCode.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/DebuggingJITedCode.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/DebuggingJITedCode.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/DebuggingJITedCode.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,300 @@ + + + + + + + + + Debugging JIT-ed Code — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Debugging JIT-ed Code

+
+

Background

+

Without special runtime support, debugging dynamically generated code can be +quite painful. Debuggers generally read debug information from object files on +disk, but for JITed code there is no such file to look for.

+

In order to hand over the necessary debug info, GDB established an +interface +for registering JITed code with debuggers. LLDB implements it in the +JITLoaderGDB plugin. On the JIT side, LLVM MCJIT does implement the interface +for ELF object files.

+

At a high level, whenever MCJIT generates new machine code, it does so in an +in-memory object file that contains the debug information in DWARF format. +MCJIT then adds this in-memory object file to a global list of dynamically +generated object files and calls a special function +__jit_debug_register_code that the debugger knows about. When the debugger +attaches to a process, it puts a breakpoint in this function and associates a +special handler with it. Once MCJIT calls the registration function, the +debugger catches the breakpoint signal, loads the new object file from the +inferior’s memory and resumes execution. This way it can obtain debug +information for pure in-memory object files.

+
+
+

GDB Version

+

In order to debug code JIT-ed by LLVM, you need GDB 7.0 or newer, which is +available on most modern distributions of Linux. The version of GDB that +Apple ships with Xcode has been frozen at 6.3 for a while.

+
+
+

LLDB Version

+

Due to a regression in release 6.0, LLDB didn’t support JITed code debugging for +a while. The bug was fixed in mainline recently, so that debugging JITed ELF +objects should be possible again from the upcoming release 12.0 on. On macOS the +feature must be enabled explicitly using the plugin.jit-loader.gdb.enable +setting.

+
+
+

Debugging MCJIT-ed code

+

The emerging MCJIT component of LLVM allows full debugging of JIT-ed code with +GDB. This is due to MCJIT’s ability to use the MC emitter to provide full +DWARF debugging information to GDB.

+

Note that lli has to be passed the --jit-kind=mcjit flag to JIT the code +with MCJIT instead of the newer ORC JIT.

+
+

Example

+

Consider the following C code (with line numbers added to make the example +easier to follow):

+
1   int compute_factorial(int n)
+2   {
+3       if (n <= 1)
+4           return 1;
+5
+6       int f = n;
+7       while (--n > 1)
+8           f *= n;
+9       return f;
+10  }
+11
+12
+13  int main(int argc, char** argv)
+14  {
+15      if (argc < 2)
+16          return -1;
+17      char firstletter = argv[1][0];
+18      int result = compute_factorial(firstletter - '0');
+19
+20      // Returned result is clipped at 255...
+21      return result;
+22  }
+
+
+

Here is a sample command line session that shows how to build and run this +code via lli inside LLDB:

+
> export BINPATH=/workspaces/llvm-project/build/bin
+> $BINPATH/clang -g -S -emit-llvm --target=x86_64-unknown-unknown-elf showdebug.c
+> lldb $BINPATH/lli
+(lldb) target create "/workspaces/llvm-project/build/bin/lli"
+Current executable set to '/workspaces/llvm-project/build/bin/lli' (x86_64).
+(lldb) settings set plugin.jit-loader.gdb.enable on
+(lldb) b compute_factorial
+Breakpoint 1: no locations (pending).
+WARNING:  Unable to resolve breakpoint to any actual locations.
+(lldb) run --jit-kind=mcjit showdebug.ll 5
+1 location added to breakpoint 1
+Process 21340 stopped
+* thread #1, name = 'lli', stop reason = breakpoint 1.1
+   frame #0: 0x00007ffff7fd0007 JIT(0x45c2cb0)`compute_factorial(n=5) at showdebug.c:3:11
+   1    int compute_factorial(int n)
+   2    {
+-> 3        if (n <= 1)
+   4            return 1;
+   5        int f = n;
+   6        while (--n > 1)
+   7            f *= n;
+(lldb) p n
+(int) $0 = 5
+(lldb) b showdebug.c:9
+Breakpoint 2: where = JIT(0x45c2cb0)`compute_factorial + 60 at showdebug.c:9:1, address = 0x00007ffff7fd003c
+(lldb) c
+Process 21340 resuming
+Process 21340 stopped
+* thread #1, name = 'lli', stop reason = breakpoint 2.1
+   frame #0: 0x00007ffff7fd003c JIT(0x45c2cb0)`compute_factorial(n=1) at showdebug.c:9:1
+   6        while (--n > 1)
+   7            f *= n;
+   8        return f;
+-> 9    }
+   10
+   11   int main(int argc, char** argv)
+   12   {
+(lldb) p f
+(int) $1 = 120
+(lldb) bt
+* thread #1, name = 'lli', stop reason = breakpoint 2.1
+* frame #0: 0x00007ffff7fd003c JIT(0x45c2cb0)`compute_factorial(n=1) at showdebug.c:9:1
+   frame #1: 0x00007ffff7fd0095 JIT(0x45c2cb0)`main(argc=2, argv=0x00000000046122f0) at showdebug.c:16:18
+   frame #2: 0x0000000002a8306e lli`llvm::MCJIT::runFunction(this=0x000000000458ed10, F=0x0000000004589ff8, ArgValues=ArrayRef<llvm::GenericValue> @ 0x00007fffffffc798) at MCJIT.cpp:554:31
+   frame #3: 0x00000000029bdb45 lli`llvm::ExecutionEngine::runFunctionAsMain(this=0x000000000458ed10, Fn=0x0000000004589ff8, argv=size=0, envp=0x00007fffffffe140) at ExecutionEngine.cpp:467:10
+   frame #4: 0x0000000001f2fc2f lli`main(argc=4, argv=0x00007fffffffe118, envp=0x00007fffffffe140) at lli.cpp:643:18
+   frame #5: 0x00007ffff788c09b libc.so.6`__libc_start_main(main=(lli`main at lli.cpp:387), argc=4, argv=0x00007fffffffe118, init=<unavailable>, fini=<unavailable>, rtld_fini=<unavailable>, stack_end=0x00007fffffffe108) at libc-start.c:308:16
+   frame #6: 0x0000000001f2dc7a lli`_start + 42
+(lldb) finish
+Process 21340 stopped
+* thread #1, name = 'lli', stop reason = step out
+Return value: (int) $2 = 120
+
+   frame #0: 0x00007ffff7fd0095 JIT(0x45c2cb0)`main(argc=2, argv=0x00000000046122f0) at showdebug.c:16:9
+   13       if (argc < 2)
+   14           return -1;
+   15       char firstletter = argv[1][0];
+-> 16       int result = compute_factorial(firstletter - '0');
+   17
+   18       // Returned result is clipped at 255...
+   19       return result;
+(lldb) p result
+(int) $3 = 73670648
+(lldb) n
+Process 21340 stopped
+* thread #1, name = 'lli', stop reason = step over
+   frame #0: 0x00007ffff7fd0098 JIT(0x45c2cb0)`main(argc=2, argv=0x00000000046122f0) at showdebug.c:19:12
+   16       int result = compute_factorial(firstletter - '0');
+   17
+   18       // Returned result is clipped at 255...
+-> 19       return result;
+   20   }
+(lldb) p result
+(int) $4 = 120
+(lldb) expr result=42
+(int) $5 = 42
+(lldb) p result
+(int) $6 = 42
+(lldb) c
+Process 21340 resuming
+Process 21340 exited with status = 42 (0x0000002a)
+(lldb) exit
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/DependenceGraphs/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/DependenceGraphs/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/DependenceGraphs/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/DependenceGraphs/index.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,294 @@ + + + + + + + + + Dependence Graphs in LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Dependence Graphs in LLVM

+ +
+

Introduction

+

Dependence graphs are useful tools in compilers for analyzing relationships +between various program elements to help guide optimizations. The ideas +behind these graphs are described in papers 1 and 2.

+

The implementation of these ideas in LLVM may be slightly different than +what is mentioned in the papers. These differences are documented in +the implementation details.

+
+
+

Data Dependence Graph

+

In its simplest form the Data Dependence Graph (or DDG) represents data +dependencies between individual instructions. Each node in such a graph +represents a single instruction and is referred to as an “atomic” node. +It is also possible to combine some atomic nodes that have a simple +def-use dependency between them into larger nodes that contain multiple- +instructions.

+

As described in 1 the DDG uses graph abstraction to group nodes +that are part of a strongly connected component of the graph +into special nodes called pi-blocks. pi-blocks represent cycles of data +dependency that prevent reordering transformations. Since any strongly +connected component of the graph is a maximal subgraph of all the nodes +that form a cycle, pi-blocks are at most one level deep. In other words, +no pi-blocks are nested inside another pi-block, resulting in a +hierarchical representation that is at most one level deep.

+

For example, consider the following:

+
for (int i = 1; i < n; i++) {
+  b[i] = c[i] + b[i-1];
+}
+
+
+

This code contains a statement that has a loop carried dependence on +itself creating a cycle in the DDG. The figure below illustrates +how the cycle of dependency is carried through multiple def-use relations +and a memory access dependency.

+../_images/cycle.png +

The DDG corresponding to this example would have a pi-block that contains +all the nodes participating in the cycle, as shown below:

+../_images/cycle_pi.png +
+
+

Program Dependence Graph

+

The Program Dependence Graph (or PDG) has a similar structure as the +DDG, but it is capable of representing both data dependencies and +control-flow dependencies between program elements such as +instructions, groups of instructions, basic blocks or groups of +basic blocks.

+
+
+

High-Level Design

+

The DDG and the PDG are both directed graphs and they extend the +DirectedGraph class. Each implementation extends its corresponding +node and edge types resulting in the inheritance relationship depicted +in the UML diagram below:

+../_images/uml_nodes_and_edges.png +
+

Graph Construction

+

The graph build algorithm considers dependencies between elements of +a given set of instructions or basic blocks. Any dependencies coming +into or going out of instructions that do not belong to that range +are ignored. The steps in the build algorithm for the DDG are very +similar to the steps in the build algorithm for the PDG. As such, +one of the design goals is to reuse the build algorithm code to +allow creation of both DDG and PDG representations while allowing +the two implementations to define their own distinct and independent +node and edge types. This is achieved by using the well-known builder +design pattern to isolate the construction of the dependence graph +from its concrete representation.

+

The following UML diagram depicts the overall structure of the design +pattern as it applies to the dependence graph implementation.

+../_images/uml_builder_pattern.png +

Notice that the common code for building the two types of graphs are +provided in the DependenceGraphBuilder class, while the DDGBuilder +and PDGBuilder control some aspects of how the graph is constructed +by the way of overriding virtual methods defined in DependenceGraphBuilder.

+

Note also that the steps and the names used in this diagram are for +illustrative purposes and may be different from those in the actual +implementation.

+
+
+

Design Trade-offs

+
+

Advantages:

+
+
    +
  • Builder allows graph construction code to be reused for DDG and PDG.

  • +
  • Builder allows us to create DDG and PDG as separate graphs.

  • +
  • DDG nodes and edges are completely disjoint from PDG nodes and edges allowing them to change easily and independently.

  • +
+
+
+
+

Disadvantages:

+
+
    +
  • Builder may be perceived as over-engineering at first.

  • +
  • There are some similarities between DDG nodes and edges compared to PDG nodes and edges, but there is little reuse of the class definitions.

    +
      +
    • This is tolerable given that the node and edge types are fairly simple and there is little code reuse opportunity anyway.

    • +
    +
  • +
+
+
+
+
+
+

Implementation Details

+

The current implementation of DDG differs slightly from the dependence +graph described in 1 in the following ways:

+
+
    +
  1. The graph nodes in the paper represent three main program components, namely assignment statements, for loop headers and while loop headers. In this implementation, DDG nodes naturally represent LLVM IR instructions. An assignment statement in this implementation typically involves a node representing the store instruction along with a number of individual nodes computing the right-hand-side of the assignment that connect to the store node via a def-use edge. The loop header instructions are not represented as special nodes in this implementation because they have limited uses and can be easily identified, for example, through LoopAnalysis.

  2. +
  3. The paper describes five types of dependency edges between nodes namely loop dependency, flow-, anti-, output-, and input- dependencies. In this implementation memory edges represent the flow-, anti-, output-, and input- dependencies. However, loop dependencies are not made explicit, because they mainly represent association between a loop structure and the program elements inside the loop and this association is fairly obvious in LLVM IR itself.

  4. +
  5. The paper describes two types of pi-blocks; recurrences whose bodies are SCCs and IN nodes whose bodies are not part of any SCC. In this implementation, pi-blocks are only created for recurrences. IN nodes remain as simple DDG nodes in the graph.

  6. +
+
+
+

References

+
+
1(1,2,3)
+

“D. J. Kuck, R. H. Kuhn, D. A. Padua, B. Leasure, and M. Wolfe (1981). DEPENDENCE GRAPHS AND COMPILER OPTIMIZATIONS.”

+
+
2
+

“J. FERRANTE (IBM), K. J. OTTENSTEIN (Michigan Technological University) and JOE D. WARREN (Rice University), 1987. The Program Dependence Graph and Its Use in Optimization.”

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/DeveloperPolicy.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/DeveloperPolicy.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/DeveloperPolicy.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/DeveloperPolicy.html 2021-09-19 16:16:34.000000000 +0000 @@ -0,0 +1,1173 @@ + + + + + + + + + LLVM Developer Policy — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Developer Policy

+ +
+

Introduction

+

This document contains the LLVM Developer Policy which defines the project’s +policy towards developers and their contributions. The intent of this policy is +to eliminate miscommunication, rework, and confusion that might arise from the +distributed nature of LLVM’s development. By stating the policy in clear terms, +we hope each developer can know ahead of time what to expect when making LLVM +contributions. This policy covers all llvm.org subprojects, including Clang, +LLDB, libc++, etc.

+

This policy is also designed to accomplish the following objectives:

+
    +
  1. Attract both users and developers to the LLVM project.

  2. +
  3. Make life as simple and easy for contributors as possible.

  4. +
  5. Keep the top of tree as stable as possible.

  6. +
  7. Establish awareness of the project’s copyright, license, and patent +policies with contributors to the project.

  8. +
+

This policy is aimed at frequent contributors to LLVM. People interested in +contributing one-off patches can do so in an informal way by sending them to the +llvm-commits mailing list and engaging another +developer to see it through the process.

+
+
+

Developer Policies

+

This section contains policies that pertain to frequent LLVM developers. We +always welcome one-off patches from people who do not routinely contribute to +LLVM, but we expect more from frequent contributors to keep the system as +efficient as possible for everyone. Frequent LLVM contributors are expected to +meet the following requirements in order for LLVM to maintain a high standard of +quality.

+
+

Stay Informed

+

Developers should stay informed by reading at least the “dev” mailing list for +the projects you are interested in, such as llvm-dev for LLVM, cfe-dev for Clang, or lldb-dev for LLDB. If you are +doing anything more than just casual work on LLVM, it is suggested that you also +subscribe to the “commits” mailing list for the subproject you’re interested in, +such as llvm-commits, cfe-commits, or lldb-commits. Reading the +“commits” list and paying attention to changes being made by others is a good +way to see what other people are interested in and watching the flow of the +project as a whole.

+

We recommend that active developers register an email account with LLVM +Bugzilla and preferably subscribe to the llvm-bugs email list to keep track +of bugs and enhancements occurring in LLVM. We really appreciate people who are +proactive at catching incoming bugs in their components and dealing with them +promptly.

+

Please be aware that all public LLVM mailing lists are public and archived, and +that notices of confidentiality or non-disclosure cannot be respected.

+
+
+

Making and Submitting a Patch

+

When making a patch for review, the goal is to make it as easy for the reviewer +to read it as possible. As such, we recommend that you:

+
    +
  1. Make your patch against git main, not a branch, and not an old version +of LLVM. This makes it easy to apply the patch. For information on how to +clone from git, please see the Getting Started Guide.

  2. +
  3. Similarly, patches should be submitted soon after they are generated. Old +patches may not apply correctly if the underlying code changes between the +time the patch was created and the time it is applied.

  4. +
  5. Patches should be made with git format-patch, or similar (see special +commands for Requesting Phabricator review via the web interface ). If you use a +different tool, make sure it uses the diff -u format and that it +doesn’t contain clutter which makes it hard to read.

  6. +
+

Once your patch is ready, submit it by emailing it to the appropriate project’s +commit mailing list (or commit it directly if applicable). Alternatively, some +patches get sent to the project’s development list or component of the LLVM bug +tracker, but the commit list is the primary place for reviews and should +generally be preferred.

+

When sending a patch to a mailing list, it is a good idea to send it as an +attachment to the message, not embedded into the text of the message. This +ensures that your mailer will not mangle the patch when it sends it (e.g. by +making whitespace changes or by wrapping lines).

+

For Thunderbird users: Before submitting a patch, please open Preferences > +Advanced > General > Config Editor, find the key +mail.content_disposition_type, and set its value to 1. Without this +setting, Thunderbird sends your attachment using Content-Disposition: inline +rather than Content-Disposition: attachment. Apple Mail gamely displays such +a file inline, making it difficult to work with for reviewers using that +program.

+

When submitting patches, please do not add confidentiality or non-disclosure +notices to the patches themselves. These notices conflict with the LLVM +licensing terms and may result in your contribution being excluded.

+
+
+

Code Reviews

+

LLVM has a code-review policy. Code review is one way to increase the quality of +software. Please see LLVM Code-Review Policy and Practices for more information on LLVM’s code-review +process.

+
+
+

Code Owners

+

The LLVM Project relies on two features of its process to maintain rapid +development in addition to the high quality of its source base: the combination +of code review plus post-commit review for trusted maintainers. Having both is +a great way for the project to take advantage of the fact that most people do +the right thing most of the time, and only commit patches without pre-commit +review when they are confident they are right.

+

The trick to this is that the project has to guarantee that all patches that are +committed are reviewed after they go in: you don’t want everyone to assume +someone else will review it, allowing the patch to go unreviewed. To solve this +problem, we have a notion of an ‘owner’ for a piece of the code. The sole +responsibility of a code owner is to ensure that a commit to their area of the +code is appropriately reviewed, either by themself or by someone else. The list +of current code owners can be found in the file CODE_OWNERS.TXT in the +root of the LLVM source tree.

+

Note that code ownership is completely different than reviewers: anyone can +review a piece of code, and we welcome code review from anyone who is +interested. Code owners are the “last line of defense” to guarantee that all +patches that are committed are actually reviewed.

+

Being a code owner is a somewhat unglamorous position, but it is incredibly +important for the ongoing success of the project. Because people get busy, +interests change, and unexpected things happen, code ownership is purely opt-in, +and anyone can choose to resign their “title” at any time. For now, we do not +have an official policy on how one gets elected to be a code owner.

+
+
+

Test Cases

+

Developers are required to create test cases for any bugs fixed and any new +features added. Some tips for getting your testcase approved:

+
    +
  • All feature and regression test cases are added to the llvm/test +directory. The appropriate sub-directory should be selected (see the +Testing Guide for details).

  • +
  • Test cases should be written in LLVM assembly language.

  • +
  • Test cases, especially for regressions, should be reduced as much as possible, +by bugpoint or manually. It is unacceptable to place an +entire failing program into llvm/test as this creates a time-to-test +burden on all developers. Please keep them short.

  • +
+

Note that llvm/test and clang/test are designed for regression and small feature +tests only. More extensive test cases (e.g., entire applications, benchmarks, +etc) should be added to the llvm-test test suite. The llvm-test suite is +for coverage (correctness, performance, etc) testing, not feature or regression +testing.

+
+
+

Quality

+

The minimum quality standards that any change must satisfy before being +committed to the main development branch are:

+
    +
  1. Code must adhere to the LLVM Coding Standards.

  2. +
  3. Code must compile cleanly (no errors, no warnings) on at least one platform.

  4. +
  5. Bug fixes and new features should include a testcase so we know if the +fix/feature ever regresses in the future.

  6. +
  7. Code must pass the llvm/test test suite.

  8. +
  9. The code must not cause regressions on a reasonable subset of llvm-test, +where “reasonable” depends on the contributor’s judgement and the scope of +the change (more invasive changes require more testing). A reasonable subset +might be something like “llvm-test/MultiSource/Benchmarks”.

  10. +
+

Additionally, the committer is responsible for addressing any problems found in +the future that the change is responsible for. For example:

+
    +
  • The code should compile cleanly on all supported platforms.

  • +
  • The changes should not cause any correctness regressions in the llvm-test +suite and must not cause any major performance regressions.

  • +
  • The change set should not cause performance or correctness regressions for the +LLVM tools.

  • +
  • The changes should not cause performance or correctness regressions in code +compiled by LLVM on all applicable targets.

  • +
  • You are expected to address any Bugzilla bugs that +result from your change.

  • +
+

We prefer for this to be handled before submission but understand that it isn’t +possible to test all of this for every submission. Our build bots and nightly +testing infrastructure normally finds these problems. A good rule of thumb is +to check the nightly testers for regressions the day after your change. Build +bots will directly email you if a group of commits that included yours caused a +failure. You are expected to check the build bot messages to see if they are +your fault and, if so, fix the breakage.

+

Commits that violate these quality standards (e.g. are very broken) may be +reverted. This is necessary when the change blocks other developers from making +progress. The developer is welcome to re-commit the change after the problem has +been fixed.

+
+
+

Commit messages

+

Although we don’t enforce the format of commit messages, we prefer that +you follow these guidelines to help review, search in logs, email formatting +and so on. These guidelines are very similar to rules used by other open source +projects.

+

Most importantly, the contents of the message should be carefully written to +convey the rationale of the change (without delving too much in detail). It +also should avoid being vague or overly specific. For example, “bits were not +set right” will leave the reviewer wondering about which bits, and why they +weren’t right, while “Correctly set overflow bits in TargetInfo” conveys almost +all there is to the change.

+

Below are some guidelines about the format of the message itself:

+
    +
  • Separate the commit message into title and body separated by a blank line.

  • +
  • If you’re not the original author, ensure the ‘Author’ property of the commit is +set to the original author and the ‘Committer’ property is set to yourself. +You can use a command similar to +git commit --amend --author="John Doe <jdoe@llvm.org>" to correct the +author property if it is incorrect. See Attribution of Changes for more +information including the method we used for attribution before the project +migrated to git.

  • +
  • The title should be concise. Because all commits are emailed to the list with +the first line as the subject, long titles are frowned upon. Short titles +also look better in git log.

  • +
  • When the changes are restricted to a specific part of the code (e.g. a +back-end or optimization pass), it is customary to add a tag to the +beginning of the line in square brackets. For example, “[SCEV] …” +or “[OpenMP] …”. This helps email filters and searches for post-commit +reviews.

  • +
  • The body, if it exists, should be separated from the title by an empty line.

  • +
  • The body should be concise, but explanatory, including a complete +reasoning. Unless it is required to understand the change, examples, +code snippets and gory details should be left to bug comments, web +review or the mailing list.

  • +
  • If the patch fixes a bug in bugzilla, please include the PR# in the message.

  • +
  • Text formatting and spelling should follow the same rules as documentation +and in-code comments, ex. capitalization, full stop, etc.

  • +
  • If the commit is a bug fix on top of another recently committed patch, or a +revert or reapply of a patch, include the git commit hash of the prior +related commit. This could be as simple as “Revert commit NNNN because it +caused PR#”.

  • +
  • If the patch has been reviewed, add a link to its review page, as shown +here.

  • +
+

For minor violations of these recommendations, the community normally favors +reminding the contributor of this policy over reverting. Minor corrections and +omissions can be handled by sending a reply to the commits mailing list.

+
+
+

Patch reversion policy

+

As a community, we strongly value having the tip of tree in a good state while +allowing rapid iterative development. As such, we tend to make much heavier +use of reverts to keep the tree healthy than some other open source projects, +and our norms are a bit different.

+

How should you respond if someone reverted your change?

+
    +
  • Remember, it is normal and healthy to have patches reverted. Having a patch +reverted does not necessarily mean you did anything wrong.

  • +
  • We encourage explicitly thanking the person who reverted the patch for doing +the task on your behalf.

  • +
  • If you need more information to address the problem, please follow up in the +original commit thread with the reverting patch author.

  • +
+

When should you revert your own change?

+
    +
  • Any time you learn of a serious problem with a change, you should revert it. +We strongly encourage “revert to green” as opposed to “fixing forward”. We +encourage reverting first, investigating offline, and then reapplying the +fixed patch - possibly after another round of review if warranted.

  • +
  • If you break a buildbot in a way which can’t be quickly fixed, please revert.

  • +
  • If a test case that demonstrates a problem is reported in the commit thread, +please revert and investigate offline.

  • +
  • If you receive substantial post-commit review +feedback, please revert and address said feedback before recommitting. +(Possibly after another round of review.)

  • +
  • If you are asked to revert by another contributor, please revert and discuss +the merits of the request offline (unless doing so would further destabilize +tip of tree).

  • +
+

When should you revert someone else’s change?

+
    +
  • In general, if the author themselves would revert the change per these +guidelines, we encourage other contributors to do so as a courtesy to the +author. This is one of the major cases where our norms differ from others; +we generally consider reverting a normal part of development. We don’t +expect contributors to be always available, and the assurance that a +problematic patch will be reverted and we can return to it at our next +opportunity enables this.

  • +
+

What are the expectations around a revert?

+
    +
  • Use your best judgment. If you’re uncertain, please start an email on +the commit thread asking for assistance. We aren’t trying to enumerate +every case, but rather give a set of guidelines.

  • +
  • You should be sure that reverting the change improves the stability of tip +of tree. Sometimes reverting one change in a series can worsen things +instead of improving them. We expect reasonable judgment to ensure that +the proper patch or set of patches is being reverted.

  • +
  • The commit message for the reverting commit should explain why patch +is being reverted.

  • +
  • It is customary to respond to the original commit email mentioning the +revert. This serves as both a notice to the original author that their +patch was reverted, and helps others following llvm-commits track context.

  • +
  • Ideally, you should have a publicly reproducible test case ready to share. +Where possible, we encourage sharing of test cases in commit threads, or +in PRs. We encourage the reverter to minimize the test case and to prune +dependencies where practical. This even applies when reverting your own +patch; documenting the reasons for others who might be following along +is critical.

  • +
  • It is not considered reasonable to revert without at least the promise to +provide a means for the patch author to debug the root issue. If a situation +arises where a public reproducer can not be shared for some reason (e.g. +requires hardware patch author doesn’t have access to, sharp regression in +compile time of internal workload, etc.), the reverter is expected to be +proactive about working with the patch author to debug and test candidate +patches.

  • +
  • Reverts should be reasonably timely. A change submitted two hours ago +can be reverted without prior discussion. A change submitted two years ago +should not be. Where exactly the transition point is is hard to say, but +it’s probably in the handful of days in tree territory. If you are unsure, +we encourage you to reply to the commit thread, give the author a bit to +respond, and then proceed with the revert if the author doesn’t seem to be +actively responding.

  • +
  • When re-applying a reverted patch, the commit message should be updated to +indicate the problem that was addressed and how it was addressed.

  • +
+
+
+

Obtaining Commit Access

+

We grant commit access to contributors with a track record of submitting high +quality patches. If you would like commit access, please send an email to +Chris with your GitHub username. This is true +for former contributors with SVN access as well as new contributors.

+

Prior to obtaining commit access, it is common practice to request that +someone with commit access commits on your behalf. When doing so, please +provide the name and email address you would like to use in the Author +property of the commit.

+

Your first commit to a repository may require the autogenerated email to be +approved by a moderator of the mailing list. +This is normal and will be done when the mailing list owner has time.

+

If you have recently been granted commit access, these policies apply:

+
    +
  1. You are granted commit-after-approval to all parts of LLVM. For +information on how to get approval for a patch, please see LLVM Code-Review Policy and Practices. +When approved, you may commit it yourself.

  2. +
  3. You are allowed to commit patches without approval which you think are +obvious. This is clearly a subjective decision — we simply expect you to +use good judgement. Examples include: fixing build breakage, reverting +obviously broken patches, documentation/comment changes, any other minor +changes. Avoid committing formatting- or whitespace-only changes outside of +code you plan to make subsequent changes to. Also, try to separate +formatting or whitespace changes from functional changes, either by +correcting the format first (ideally) or afterward. Such changes should be +highly localized and the commit message should clearly state that the commit +is not intended to change functionality, usually by stating it is +NFC.

  4. +
  5. You are allowed to commit patches without approval to those portions of LLVM +that you have contributed or maintain (i.e., have been assigned +responsibility for), with the proviso that such commits must not break the +build. This is a “trust but verify” policy, and commits of this nature are +reviewed after they are committed.

  6. +
  7. Multiple violations of these policies or a single egregious violation may +cause commit access to be revoked.

  8. +
+

In any case, your changes are still subject to code review (either before or +after they are committed, depending on the nature of the change). You are +encouraged to review other peoples’ patches as well, but you aren’t required +to do so.

+
+
+

Making a Major Change

+

When a developer begins a major new project with the aim of contributing it back +to LLVM, they should inform the community with an email to the llvm-dev email list, to the extent +possible. The reason for this is to:

+
    +
  1. keep the community informed about future changes to LLVM,

  2. +
  3. avoid duplication of effort by preventing multiple parties working on the +same thing and not knowing about it, and

  4. +
  5. ensure that any technical issues around the proposed work are discussed and +resolved before any significant work is done.

  6. +
+

The design of LLVM is carefully controlled to ensure that all the pieces fit +together well and are as consistent as possible. If you plan to make a major +change to the way LLVM works or want to add a major new extension, it is a good +idea to get consensus with the development community before you start working on +it.

+

Once the design of the new feature is finalized, the work itself should be done +as a series of incremental changes, not as a long-term development branch.

+
+
+

Incremental Development

+

In the LLVM project, we do all significant changes as a series of incremental +patches. We have a strong dislike for huge changes or long-term development +branches. Long-term development branches have a number of drawbacks:

+
    +
  1. Branches must have mainline merged into them periodically. If the branch +development and mainline development occur in the same pieces of code, +resolving merge conflicts can take a lot of time.

  2. +
  3. Other people in the community tend to ignore work on branches.

  4. +
  5. Huge changes (produced when a branch is merged back onto mainline) are +extremely difficult to code review.

  6. +
  7. Branches are not routinely tested by our nightly tester infrastructure.

  8. +
  9. Changes developed as monolithic large changes often don’t work until the +entire set of changes is done. Breaking it down into a set of smaller +changes increases the odds that any of the work will be committed to the main +repository.

  10. +
+

To address these problems, LLVM uses an incremental development style and we +require contributors to follow this practice when making a large/invasive +change. Some tips:

+
    +
  • Large/invasive changes usually have a number of secondary changes that are +required before the big change can be made (e.g. API cleanup, etc). These +sorts of changes can often be done before the major change is done, +independently of that work.

  • +
  • The remaining inter-related work should be decomposed into unrelated sets of +changes if possible. Once this is done, define the first increment and get +consensus on what the end goal of the change is.

  • +
  • Each change in the set can be stand alone (e.g. to fix a bug), or part of a +planned series of changes that works towards the development goal.

  • +
  • Each change should be kept as small as possible. This simplifies your work +(into a logical progression), simplifies code review and reduces the chance +that you will get negative feedback on the change. Small increments also +facilitate the maintenance of a high quality code base.

  • +
  • Often, an independent precursor to a big change is to add a new API and slowly +migrate clients to use the new API. Each change to use the new API is often +“obvious” and can be committed without review. Once the new API is in place +and used, it is much easier to replace the underlying implementation of the +API. This implementation change is logically separate from the API +change.

  • +
+

If you are interested in making a large change, and this scares you, please make +sure to first discuss the change/gather consensus then ask about the best way +to go about making the change.

+
+
+

Attribution of Changes

+

When contributors submit a patch to an LLVM project, other developers with +commit access may commit it for the author once appropriate (based on the +progression of code review, etc.). When doing so, it is important to retain +correct attribution of contributions to their contributors. However, we do not +want the source code to be littered with random attributions “this code written +by J. Random Hacker” (this is noisy and distracting). In practice, the revision +control system keeps a perfect history of who changed what, and the CREDITS.txt +file describes higher-level contributions. If you commit a patch for someone +else, please follow the attribution of changes in the simple manner as outlined +by the commit messages section. Overall, please do not add contributor names +to the source code.

+

Also, don’t commit patches authored by others unless they have submitted the +patch to the project or you have been authorized to submit them on their behalf +(you work together and your company authorized you to contribute the patches, +etc.). The author should first submit them to the relevant project’s commit +list, development list, or LLVM bug tracker component. If someone sends you +a patch privately, encourage them to submit it to the appropriate list first.

+

Our previous version control system (subversion) did not distinguish between the +author and the committer like git does. As such, older commits used a different +attribution mechanism. The previous method was to include “Patch by John Doe.” +in a separate line of the commit message and there are automated processes that +rely on this format.

+
+
+

IR Backwards Compatibility

+

When the IR format has to be changed, keep in mind that we try to maintain some +backwards compatibility. The rules are intended as a balance between convenience +for llvm users and not imposing a big burden on llvm developers:

+
    +
  • The textual format is not backwards compatible. We don’t change it too often, +but there are no specific promises.

  • +
  • Additions and changes to the IR should be reflected in +test/Bitcode/compatibility.ll.

  • +
  • The current LLVM version supports loading any bitcode since version 3.0.

  • +
  • After each X.Y release, compatibility.ll must be copied to +compatibility-X.Y.ll. The corresponding bitcode file should be assembled +using the X.Y build and committed as compatibility-X.Y.ll.bc.

  • +
  • Newer releases can ignore features from older releases, but they cannot +miscompile them. For example, if nsw is ever replaced with something else, +dropping it would be a valid way to upgrade the IR.

  • +
  • Debug metadata is special in that it is currently dropped during upgrades.

  • +
  • Non-debug metadata is defined to be safe to drop, so a valid way to upgrade +it is to drop it. That is not very user friendly and a bit more effort is +expected, but no promises are made.

  • +
+
+
+

C API Changes

+
    +
  • Stability Guarantees: The C API is, in general, a “best effort” for stability. +This means that we make every attempt to keep the C API stable, but that +stability will be limited by the abstractness of the interface and the +stability of the C++ API that it wraps. In practice, this means that things +like “create debug info” or “create this type of instruction” are likely to be +less stable than “take this IR file and JIT it for my current machine”.

  • +
  • Release stability: We won’t break the C API on the release branch with patches +that go on that branch, with the exception that we will fix an unintentional +C API break that will keep the release consistent with both the previous and +next release.

  • +
  • Testing: Patches to the C API are expected to come with tests just like any +other patch.

  • +
  • Including new things into the API: If an LLVM subcomponent has a C API already +included, then expanding that C API is acceptable. Adding C API for +subcomponents that don’t currently have one needs to be discussed on the +mailing list for design and maintainability feedback prior to implementation.

  • +
  • Documentation: Any changes to the C API are required to be documented in the +release notes so that it’s clear to external users who do not follow the +project how the C API is changing and evolving.

  • +
+
+
+

Updating Toolchain Requirements

+

We intend to require newer toolchains as time goes by. This means LLVM’s +codebase can use newer versions of C++ as they get standardized. Requiring newer +toolchains to build LLVM can be painful for those building LLVM; therefore, it +will only be done through the following process:

+
+
    +
  • It is a general goal to support LLVM and GCC versions from the last 3 years +at a minimum. This time-based guideline is not strict: we may support much +older compilers, or decide to support fewer versions.

  • +
  • An RFC is sent to the llvm-dev mailing list

    +
      +
    • Detail upsides of the version increase (e.g. which newer C++ language or +library features LLVM should use; avoid miscompiles in particular compiler +versions, etc).

    • +
    • Detail downsides on important platforms (e.g. Ubuntu LTS status).

    • +
    +
  • +
  • Once the RFC reaches consensus, update the CMake toolchain version checks as +well as the getting started guide. This provides a +softer transition path for developers compiling LLVM, because the +error can be turned into a warning using a CMake flag. This is an important +step: LLVM still doesn’t have code which requires the new toolchains, but it +soon will. If you compile LLVM but don’t read the mailing list, we should +tell you!

  • +
  • Ensure that at least one LLVM release has had this soft-error. Not all +developers compile LLVM top-of-tree. These release-bound developers should +also be told about upcoming changes.

  • +
  • Turn the soft-error into a hard-error after said LLVM release has branched.

  • +
  • Update the coding standards to allow the new +features we’ve explicitly approved in the RFC.

  • +
  • Start using the new features in LLVM’s codebase.

  • +
+
+

Here’s a sample RFC and the +corresponding change.

+
+
+

Working with the CI system

+

The main continuous integration (CI) tool for the LLVM project is the +LLVM Buildbot. It uses different builders +to cover a wide variety of sub-projects and configurations. The builds are +executed on different workers. Builders and workers are configured and +provided by community members.

+

The Buildbot tracks the commits on the main branch and the release branches. +This means that patches are built and tested after they are merged to the these +branches (aka post-merge testing). This also means it’s okay to break the build +occasionally, as it’s unreasonable to expect contributors to build and test +their patch with every possible configuration.

+

If your commit broke the build:

+
    +
  • Fix the build as soon as possible as this might block other contributors or +downstream users.

  • +
  • If you need more time to analyze and fix the bug, please revert your change to +unblock others.

  • +
+

If someone else broke the build and this blocks your work

+
    +
  • Comment on the code review in Phabricator +(if available) or email the author, explain the problem and how this impacts +you. Add a link to the broken build and the error message so folks can +understand the problem.

  • +
  • Revert the commit if this blocks your work, see revert_policy .

  • +
+

If a build/worker is permanently broken

+
    +
  • 1st step: contact the owner of the worker. You can find the name and contact +information for the Admin of worker on the page of the build in the +Worker tab:

    +_images/buildbot_worker_contact.png +
  • +
  • 2nd step: If the owner does not respond or fix the worker, please escalate +to Galina Kostanova, the maintainer of the BuildBot master.

  • +
  • 3rd step: If Galina could not help you, please escalate to the +Infrastructure Working Group.

  • +
+
+
+
+

Introducing New Components into LLVM

+

The LLVM community is a vibrant and exciting place to be, and we look to be +inclusive of new projects and foster new communities, and increase +collaboration across industry and academia.

+

That said, we need to strike a balance between being inclusive of new ideas and +people and the cost of ongoing maintenance that new code requires. As such, we +have a general support policy for introducing major new +components into the LLVM world, depending on the degree of detail and +responsibility required. Core projects need a higher degree of scrutiny +than peripheral projects, and the latter may have additional differences.

+

However, this is really only intended to cover common cases +that we have seen arise: different situations are different, and we are open +to discussing unusual cases as well - just start an RFC thread on the +llvm-dev mailing list.

+
+

Adding a New Target

+

LLVM is very receptive to new targets, even experimental ones, but a number of +problems can appear when adding new large portions of code, and back-ends are +normally added in bulk. New targets need the same level of support as other +core parts of the compiler, so they are covered in the core tier of our +support policy.

+

We have found that landing large pieces of new code and then trying to fix +emergent problems in-tree is problematic for a variety of reasons. For these +reasons, new targets are always added as experimental until they can be +proven stable, and later moved to non-experimental.

+

The differences between both classes are:

+
    +
  • Experimental targets are not built by default (they need to be explicitly +enabled at CMake time).

  • +
  • Test failures, bugs, and build breakages that only appear when the +experimental target is enabled, caused by changes unrelated to the target, are +the responsibility of the community behind the target to fix.

  • +
+

The basic rules for a back-end to be upstreamed in experimental mode are:

+
    +
  • Every target must have a code owner. The CODE_OWNERS.TXT +file has to be updated as part of the first merge. The code owner makes sure +that changes to the target get reviewed and steers the overall effort.

  • +
  • There must be an active community behind the target. This community +will help maintain the target by providing buildbots, fixing +bugs, answering the LLVM community’s questions and making sure the new +target doesn’t break any of the other targets, or generic code. This +behavior is expected to continue throughout the lifetime of the +target’s code.

  • +
  • The code must be free of contentious issues, for example, large +changes in how the IR behaves or should be formed by the front-ends, +unless agreed by the majority of the community via refactoring of the +(IR standard) before the merge of the new target changes, +following the IR Backwards Compatibility.

  • +
  • The code conforms to all of the policies laid out in this developer policy +document, including license, patent, and coding standards.

  • +
  • The target should have either reasonable documentation on how it +works (ISA, ABI, etc.) or a publicly available simulator/hardware +(either free or cheap enough) - preferably both. This allows +developers to validate assumptions, understand constraints and review code +that can affect the target.

  • +
+

In addition, the rules for a back-end to be promoted to official are:

+
    +
  • The target must have addressed every other minimum requirement and +have been stable in tree for at least 3 months. This cool down +period is to make sure that the back-end and the target community can +endure continuous upstream development for the foreseeable future.

  • +
  • The target’s code must have been completely adapted to this policy +as well as the coding standards. Any exceptions that +were made to move into experimental mode must have been fixed before +becoming official.

  • +
  • The test coverage needs to be broad and well written (small tests, +well documented). The build target check-all must pass with the +new target built, and where applicable, the test-suite must also +pass without errors, in at least one configuration (publicly +demonstrated, for example, via buildbots).

  • +
  • Public buildbots need to be created and actively maintained, unless +the target requires no additional buildbots (ex. check-all covers +all tests). The more relevant and public the new target’s CI infrastructure +is, the more the LLVM community will embrace it.

  • +
+

To continue as a supported and official target:

+
    +
  • The maintainer(s) must continue following these rules throughout the lifetime +of the target. Continuous violations of aforementioned rules and policies +could lead to complete removal of the target from the code base.

  • +
  • Degradation in support, documentation or test coverage will make the target as +nuisance to other targets and be considered a candidate for deprecation and +ultimately removed.

  • +
+

In essences, these rules are necessary for targets to gain and retain their +status, but also markers to define bit-rot, and will be used to clean up the +tree from unmaintained targets.

+
+
+

Adding an Established Project To the LLVM Monorepo

+

The LLVM monorepo is the centerpoint +of development in the LLVM world, and has all of the primary LLVM components, +including the LLVM optimizer and code generators, Clang, LLDB, etc. Monorepos +in general are great because they +allow atomic commits to the project, simplify CI, and make it easier for +subcommunities to collaborate.

+

Like new targets, most projects already in the monorepo are considered to be in +the core tier of our support policy. The burden to add +things to the LLVM monorepo needs to be very high - code that is added to this +repository is checked out by everyone in the community. As such, we hold +components to a high bar similar to “official targets”, they:

+
+
    +
  • Must be generally aligned with the mission of the LLVM project to advance +compilers, languages, tools, runtimes, etc.

  • +
  • Must conform to all of the policies laid out in this developer policy +document, including license, patent, coding standards, and code of conduct.

  • +
  • Must have an active community that maintains the code, including established +code owners.

  • +
  • Should have reasonable documentation about how it works, including a high +quality README file.

  • +
  • Should have CI to catch breakage within the project itself or due to +underlying LLVM dependencies.

  • +
  • Should have code free of issues the community finds contentious, or be on a +clear path to resolving them.

  • +
  • Must be proposed through the LLVM RFC process, and have its addition approved +by the LLVM community - this ultimately mediates the resolution of the +“should” concerns above.

  • +
+
+

If you have a project that you think would make sense to add to the LLVM +monorepo, please start an RFC thread on the llvm-dev mailing list to kick off +the discussion. This process can take some time and iteration - please don’t +be discouraged or intimidated by that!

+

If you have an earlier stage project that you think is aligned with LLVM, please +see the “Incubating New Projects” section.

+
+
+

Incubating New Projects

+

The burden to add a new project to the LLVM monorepo is intentionally very high, +but that can have a chilling effect on new and innovative projects. To help +foster these sorts of projects, LLVM supports an “incubator” process that is +much easier to get started with. It provides space for potentially valuable, +new top-level and sub-projects to reach a critical mass before they have enough +code to prove their utility and grow a community. This also allows +collaboration between teams that already have permissions to make contributions +to projects under the LLVM umbrella.

+

Projects which can be considered for the LLVM incubator meet the following +criteria:

+
+
    +
  • Must be generally aligned with the mission of the LLVM project to advance +compilers, languages, tools, runtimes, etc.

  • +
  • Must conform to the license, patent, and code of conduct policies laid out +in this developer policy document.

  • +
  • Must have a documented charter and development plan, e.g. in the form of a +README file, mission statement, and/or manifesto.

  • +
  • Should conform to coding standards, incremental development process, and +other expectations.

  • +
  • Should have a sense of the community that it hopes to eventually foster, and +there should be interest from members with different affiliations / +organizations.

  • +
  • Should have a feasible path to eventually graduate as a dedicated top-level +or sub-project within the LLVM monorepo.

  • +
  • Should include a notice (e.g. in the project README or web page) that the +project is in ‘incubation status’ and is not included in LLVM releases (see +suggested wording below).

  • +
  • Must be proposed through the LLVM RFC process, and have its addition +approved by the LLVM community - this ultimately mediates the resolution of +the “should” concerns above.

  • +
+
+

That said, the project need not have any code to get started, and need not have +an established community at all! Furthermore, incubating projects may pass +through transient states that violate the “Should” guidelines above, or would +otherwise make them unsuitable for direct inclusion in the monorepo (e.g. +dependencies that have not yet been factored appropriately, leveraging +experimental components or APIs that are not yet upstream, etc).

+
+
When approved, the llvm-admin group can grant the new project:
    +
  • A new repository in the LLVM Github Organization - but not the LLVM monorepo.

  • +
  • New mailing list, discourse forum, and/or discord chat hosted with other LLVM +forums.

  • +
  • Other infrastructure integration can be discussed on a case-by-case basis.

  • +
+
+
+

Graduation to the mono-repo would follow existing processes and standards for +becoming a first-class part of the monorepo. Similarly, an incubating project +may be eventually retired, but no process has been established for that yet. If +and when this comes up, please start an RFC discussion on llvm-dev.

+

This process is very new - please expect the details to change, it is always +safe to ask on the llvm-dev mailing list about this.

+

Suggested disclaimer for the project README and the main project web page:

+
This project is participating in the LLVM Incubator process: as such, it is
+not part of any official LLVM release.  While incubation status is not
+necessarily a reflection of the completeness or stability of the code, it
+does indicate that the project is not yet endorsed as a component of LLVM.
+
+
+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Docker.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Docker.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Docker.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Docker.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,302 @@ + + + + + + + + + A guide to Dockerfiles for building LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

A guide to Dockerfiles for building LLVM

+
+

Introduction

+

You can find a number of sources to build docker images with LLVM components in +llvm/utils/docker. They can be used by anyone who wants to build the docker +images for their own use, or as a starting point for someone who wants to write +their own Dockerfiles.

+

We currently provide Dockerfiles with debian8 and nvidia-cuda base images. +We also provide an example image, which contains placeholders that one would need +to fill out in order to produce Dockerfiles for a new docker image.

+
+

Why?

+

Docker images provide a way to produce binary distributions of +software inside a controlled environment. Having Dockerfiles to builds docker images +inside LLVM repo makes them much more discoverable than putting them into any other +place.

+
+
+

Docker basics

+

If you’ve never heard about Docker before, you might find this section helpful +to get a very basic explanation of it. +Docker is a popular solution for running programs in +an isolated and reproducible environment, especially to maintain releases for +software deployed to large distributed fleets. +It uses linux kernel namespaces and cgroups to provide a lightweight isolation +inside currently running linux kernel. +A single active instance of dockerized environment is called a docker +container. +A snapshot of a docker container filesystem is called a docker image. +One can start a container from a prebuilt docker image.

+

Docker images are built from a so-called Dockerfile, a source file written in +a specialized language that defines instructions to be used when build +the docker image (see official +documentation for more +details). A minimal Dockerfile typically contains a base image and a number +of RUN commands that have to be executed to build the image. When building a new +image, docker will first download your base image, mount its filesystem as +read-only and then add a writable overlay on top of it to keep track of all +filesystem modifications, performed while building your image. When the build +process is finished, a diff between your image’s final filesystem state and the +base image’s filesystem is stored in the resulting image.

+
+
+
+

Overview

+

The llvm/utils/docker folder contains Dockerfiles and simple bash scripts to +serve as a basis for anyone who wants to create their own Docker image with +LLVM components, compiled from sources. The sources are checked out from the +upstream git repository when building the image.

+

The resulting image contains only the requested LLVM components and a few extra +packages to make the image minimally useful for C++ development, e.g. libstdc++ +and binutils.

+

The interface to run the build is build_docker_image.sh script. It accepts a +list of LLVM repositories to checkout and arguments for CMake invocation.

+

If you want to write your own docker image, start with an example/ subfolder. +It provides an incomplete Dockerfile with (very few) FIXMEs explaining the steps +you need to take in order to make your Dockerfiles functional.

+
+
+

Usage

+

The llvm/utils/build_docker_image.sh script provides a rather high degree of +control on how to run the build. It allows you to specify the projects to +checkout from git and provide a list of CMake arguments to use during when +building LLVM inside docker container.

+

Here’s a very simple example of getting a docker image with clang binary, +compiled by the system compiler in the debian8 image:

+
./llvm/utils/docker/build_docker_image.sh \
+    --source debian8 \
+    --docker-repository clang-debian8 --docker-tag "staging" \
+    -p clang -i install-clang -i install-clang-resource-headers \
+    -- \
+    -DCMAKE_BUILD_TYPE=Release
+
+
+

Note that a build like that doesn’t use a 2-stage build process that +you probably want for clang. Running a 2-stage build is a little more intricate, +this command will do that:

+
# Run a 2-stage build.
+#   LLVM_TARGETS_TO_BUILD=Native is to reduce stage1 compile time.
+#   Options, starting with BOOTSTRAP_* are passed to stage2 cmake invocation.
+./build_docker_image.sh \
+    --source debian8 \
+    --docker-repository clang-debian8 --docker-tag "staging" \
+    -p clang -i stage2-install-clang -i stage2-install-clang-resource-headers \
+    -- \
+    -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \
+    -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \
+    -DCLANG_ENABLE_BOOTSTRAP=ON -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-resource-headers"
+
+
+

This will produce a new image clang-debian8:staging from the latest +upstream revision. +After the image is built you can run bash inside a container based on your image +like this:

+
docker run -ti clang-debian8:staging bash
+
+
+

Now you can run bash commands as you normally would:

+
root@80f351b51825:/# clang -v
+clang version 5.0.0 (trunk 305064)
+Target: x86_64-unknown-linux-gnu
+Thread model: posix
+InstalledDir: /bin
+Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8
+Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.8.4
+Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
+Found candidate GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9.2
+Selected GCC installation: /usr/lib/gcc/x86_64-linux-gnu/4.9
+Candidate multilib: .;@m64
+Selected multilib: .;@m64
+
+
+
+
+

Which image should I choose?

+

We currently provide two images: debian8-based and nvidia-cuda-based. They +differ in the base image that they use, i.e. they have a different set of +preinstalled binaries. Debian8 is very minimal, nvidia-cuda is larger, but has +preinstalled CUDA libraries and allows to access a GPU, installed on your +machine.

+

If you need a minimal linux distribution with only clang and libstdc++ included, +you should try debian8-based image.

+

If you want to use CUDA libraries and have access to a GPU on your machine, +you should choose nvidia-cuda-based image and use nvidia-docker to run your docker containers. Note +that you don’t need nvidia-docker to build the images, but you need it in order +to have an access to GPU from a docker container that is running the built +image.

+

If you have a different use-case, you could create your own image based on +example/ folder.

+

Any docker image can be built and run using only the docker binary, i.e. you can +run debian8 build on Fedora or any other Linux distribution. You don’t need to +install CMake, compilers or any other clang dependencies. It is all handled +during the build process inside Docker’s isolated environment.

+
+
+

Stable build

+

If you want a somewhat recent and somewhat stable build, use the +branches/google/stable branch, i.e. the following command will produce a +debian8-based image using the latest google/stable sources for you:

+
./llvm/utils/docker/build_docker_image.sh \
+    -s debian8 --d clang-debian8 -t "staging" \
+    --branch branches/google/stable \
+    -p clang -i install-clang -i install-clang-resource-headers \
+    -- \
+    -DCMAKE_BUILD_TYPE=Release
+
+
+
+
+

Minimizing docker image size

+

Due to how Docker’s filesystem works, all intermediate writes are persisted in +the resulting image, even if they are removed in the following commands. +To minimize the resulting image size we use multi-stage Docker builds. +Internally Docker builds two images. The first image does all the work: installs +build dependencies, checks out LLVM source code, compiles LLVM, etc. +The first image is only used during build and does not have a descriptive name, +i.e. it is only accessible via the hash value after the build is finished. +The second image is our resulting image. It contains only the built binaries +and not any build dependencies. It is also accessible via a descriptive name +(specified by -d and -t flags).

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ExceptionHandling.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ExceptionHandling.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ExceptionHandling.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ExceptionHandling.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,971 @@ + + + + + + + + + Exception Handling in LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Exception Handling in LLVM

+ +
+

Introduction

+

This document is the central repository for all information pertaining to +exception handling in LLVM. It describes the format that LLVM exception +handling information takes, which is useful for those interested in creating +front-ends or dealing directly with the information. Further, this document +provides specific examples of what exception handling information is used for in +C and C++.

+
+

Itanium ABI Zero-cost Exception Handling

+

Exception handling for most programming languages is designed to recover from +conditions that rarely occur during general use of an application. To that end, +exception handling should not interfere with the main flow of an application’s +algorithm by performing checkpointing tasks, such as saving the current pc or +register state.

+

The Itanium ABI Exception Handling Specification defines a methodology for +providing outlying data in the form of exception tables without inlining +speculative exception handling code in the flow of an application’s main +algorithm. Thus, the specification is said to add “zero-cost” to the normal +execution of an application.

+

A more complete description of the Itanium ABI exception handling runtime +support of can be found at Itanium C++ ABI: Exception Handling. A description of the +exception frame format can be found at Exception Frames, +with details of the DWARF 4 specification at DWARF 4 Standard. A description for the C++ exception +table formats can be found at Exception Handling Tables.

+
+
+

Setjmp/Longjmp Exception Handling

+

Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics +llvm.eh.sjlj.setjmp and llvm.eh.sjlj.longjmp to handle control flow for +exception handling.

+

For each function which does exception processing — be it try/catch +blocks or cleanups — that function registers itself on a global frame +list. When exceptions are unwinding, the runtime uses this list to identify +which functions need processing.

+

Landing pad selection is encoded in the call site entry of the function +context. The runtime returns to the function via llvm.eh.sjlj.longjmp, where +a switch table transfers control to the appropriate landing pad based on the +index stored in the function context.

+

In contrast to DWARF exception handling, which encodes exception regions and +frame information in out-of-line tables, SJLJ exception handling builds and +removes the unwind frame context at runtime. This results in faster exception +handling at the expense of slower execution when no exceptions are thrown. As +exceptions are, by their nature, intended for uncommon code paths, DWARF +exception handling is generally preferred to SJLJ.

+
+
+

Windows Runtime Exception Handling

+

LLVM supports handling exceptions produced by the Windows runtime, but it +requires a very different intermediate representation. It is not based on the +“landingpad” instruction like the other two models, and is +described later in this document under Exception Handling using the Windows Runtime.

+
+
+

Overview

+

When an exception is thrown in LLVM code, the runtime does its best to find a +handler suited to processing the circumstance.

+

The runtime first attempts to find an exception frame corresponding to the +function where the exception was thrown. If the programming language supports +exception handling (e.g. C++), the exception frame contains a reference to an +exception table describing how to process the exception. If the language does +not support exception handling (e.g. C), or if the exception needs to be +forwarded to a prior activation, the exception frame contains information about +how to unwind the current activation and restore the state of the prior +activation. This process is repeated until the exception is handled. If the +exception is not handled and no activations remain, then the application is +terminated with an appropriate error message.

+

Because different programming languages have different behaviors when handling +exceptions, the exception handling ABI provides a mechanism for +supplying personalities. An exception handling personality is defined by +way of a personality function (e.g. __gxx_personality_v0 in C++), +which receives the context of the exception, an exception structure +containing the exception object type and value, and a reference to the exception +table for the current function. The personality function for the current +compile unit is specified in a common exception frame.

+

The organization of an exception table is language dependent. For C++, an +exception table is organized as a series of code ranges defining what to do if +an exception occurs in that range. Typically, the information associated with a +range defines which types of exception objects (using C++ type info) that are +handled in that range, and an associated action that should take place. Actions +typically pass control to a landing pad.

+

A landing pad corresponds roughly to the code found in the catch portion of +a try/catch sequence. When execution resumes at a landing pad, it +receives an exception structure and a selector value corresponding to the +type of exception thrown. The selector is then used to determine which catch +should actually process the exception.

+
+
+
+

LLVM Code Generation

+

From a C++ developer’s perspective, exceptions are defined in terms of the +throw and try/catch statements. In this section we will describe the +implementation of LLVM exception handling in terms of C++ examples.

+
+

Throw

+

Languages that support exception handling typically provide a throw +operation to initiate the exception process. Internally, a throw operation +breaks down into two steps.

+
    +
  1. A request is made to allocate exception space for an exception structure. +This structure needs to survive beyond the current activation. This structure +will contain the type and value of the object being thrown.

  2. +
  3. A call is made to the runtime to raise the exception, passing the exception +structure as an argument.

  4. +
+

In C++, the allocation of the exception structure is done by the +__cxa_allocate_exception runtime function. The exception raising is handled +by __cxa_throw. The type of the exception is represented using a C++ RTTI +structure.

+
+
+

Try/Catch

+

A call within the scope of a try statement can potentially raise an +exception. In those circumstances, the LLVM C++ front-end replaces the call with +an invoke instruction. Unlike a call, the invoke has two potential +continuation points:

+
    +
  1. where to continue when the call succeeds as per normal, and

  2. +
  3. where to continue if the call raises an exception, either by a throw or the +unwinding of a throw

  4. +
+

The term used to define the place where an invoke continues after an +exception is called a landing pad. LLVM landing pads are conceptually +alternative function entry points where an exception structure reference and a +type info index are passed in as arguments. The landing pad saves the exception +structure reference and then proceeds to select the catch block that corresponds +to the type info of the exception object.

+

The LLVM ‘landingpad’ Instruction is used to convey information about the landing +pad to the back end. For C++, the landingpad instruction returns a pointer +and integer pair corresponding to the pointer to the exception structure and +the selector value respectively.

+

The landingpad instruction looks for a reference to the personality +function to be used for this try/catch sequence in the parent +function’s attribute list. The instruction contains a list of cleanup, +catch, and filter clauses. The exception is tested against the clauses +sequentially from first to last. The clauses have the following meanings:

+
    +
  • catch <type> @ExcType

    +
      +
    • This clause means that the landingpad block should be entered if the +exception being thrown is of type @ExcType or a subtype of +@ExcType. For C++, @ExcType is a pointer to the std::type_info +object (an RTTI object) representing the C++ exception type.

    • +
    • If @ExcType is null, any exception matches, so the landingpad +should always be entered. This is used for C++ catch-all blocks (“catch +(...)”).

    • +
    • When this clause is matched, the selector value will be equal to the value +returned by “@llvm.eh.typeid.for(i8* @ExcType)”. This will always be a +positive value.

    • +
    +
  • +
  • filter <type> [<type> @ExcType1, ..., <type> @ExcTypeN]

    +
      +
    • This clause means that the landingpad should be entered if the exception +being thrown does not match any of the types in the list (which, for C++, +are again specified as std::type_info pointers).

    • +
    • C++ front-ends use this to implement C++ exception specifications, such as +“void foo() throw (ExcType1, ..., ExcTypeN) { ... }”.

    • +
    • When this clause is matched, the selector value will be negative.

    • +
    • The array argument to filter may be empty; for example, “[0 x i8**] +undef”. This means that the landingpad should always be entered. (Note +that such a filter would not be equivalent to “catch i8* null”, +because filter and catch produce negative and positive selector +values respectively.)

    • +
    +
  • +
  • cleanup

    +
      +
    • This clause means that the landingpad should always be entered.

    • +
    • C++ front-ends use this for calling objects’ destructors.

    • +
    • When this clause is matched, the selector value will be zero.

    • +
    • The runtime may treat “cleanup” differently from “catch <type> +null”.

      +

      In C++, if an unhandled exception occurs, the language runtime will call +std::terminate(), but it is implementation-defined whether the runtime +unwinds the stack and calls object destructors first. For example, the GNU +C++ unwinder does not call object destructors when an unhandled exception +occurs. The reason for this is to improve debuggability: it ensures that +std::terminate() is called from the context of the throw, so that +this context is not lost by unwinding the stack. A runtime will typically +implement this by searching for a matching non-cleanup clause, and +aborting if it does not find one, before entering any landingpad blocks.

      +
    • +
    +
  • +
+

Once the landing pad has the type info selector, the code branches to the code +for the first catch. The catch then checks the value of the type info selector +against the index of type info for that catch. Since the type info index is not +known until all the type infos have been gathered in the backend, the catch code +must call the llvm.eh.typeid.for intrinsic to determine the index for a given +type info. If the catch fails to match the selector then control is passed on to +the next catch.

+

Finally, the entry and exit of catch code is bracketed with calls to +__cxa_begin_catch and __cxa_end_catch.

+
    +
  • __cxa_begin_catch takes an exception structure reference as an argument +and returns the value of the exception object.

  • +
  • __cxa_end_catch takes no arguments. This function:

    +
      +
    1. Locates the most recently caught exception and decrements its handler +count,

    2. +
    3. Removes the exception from the caught stack if the handler count goes to +zero, and

    4. +
    5. Destroys the exception if the handler count goes to zero and the exception +was not re-thrown by throw.

    6. +
    +
    +

    Note

    +

    a rethrow from within the catch may replace this call with a +__cxa_rethrow.

    +
    +
  • +
+
+
+

Cleanups

+

A cleanup is extra code which needs to be run as part of unwinding a scope. C++ +destructors are a typical example, but other languages and language extensions +provide a variety of different kinds of cleanups. In general, a landing pad may +need to run arbitrary amounts of cleanup code before actually entering a catch +block. To indicate the presence of cleanups, a ‘landingpad’ Instruction should have +a cleanup clause. Otherwise, the unwinder will not stop at the landing pad if +there are no catches or filters that require it to.

+
+

Note

+

Do not allow a new exception to propagate out of the execution of a +cleanup. This can corrupt the internal state of the unwinder. Different +languages describe different high-level semantics for these situations: for +example, C++ requires that the process be terminated, whereas Ada cancels both +exceptions and throws a third.

+
+

When all cleanups are finished, if the exception is not handled by the current +function, resume unwinding by calling the resume instruction, +passing in the result of the landingpad instruction for the original +landing pad.

+
+
+

Throw Filters

+

C++ allows the specification of which exception types may be thrown from a +function. To represent this, a top level landing pad may exist to filter out +invalid types. To express this in LLVM code the ‘landingpad’ Instruction will have a +filter clause. The clause consists of an array of type infos. +landingpad will return a negative value +if the exception does not match any of the type infos. If no match is found then +a call to __cxa_call_unexpected should be made, otherwise +_Unwind_Resume. Each of these functions requires a reference to the +exception structure. Note that the most general form of a landingpad +instruction can have any number of catch, cleanup, and filter clauses (though +having more than one cleanup is pointless). The LLVM C++ front-end can generate +such landingpad instructions due to inlining creating nested exception +handling scopes.

+
+
+

Restrictions

+

The unwinder delegates the decision of whether to stop in a call frame to that +call frame’s language-specific personality function. Not all unwinders guarantee +that they will stop to perform cleanups. For example, the GNU C++ unwinder +doesn’t do so unless the exception is actually caught somewhere further up the +stack.

+

In order for inlining to behave correctly, landing pads must be prepared to +handle selector results that they did not originally advertise. Suppose that a +function catches exceptions of type A, and it’s inlined into a function that +catches exceptions of type B. The inliner will update the landingpad +instruction for the inlined landing pad to include the fact that B is also +caught. If that landing pad assumes that it will only be entered to catch an +A, it’s in for a rude awakening. Consequently, landing pads must test for +the selector results they understand and then resume exception propagation with +the resume instruction if none of the conditions +match.

+
+
+
+

Exception Handling Intrinsics

+

In addition to the landingpad and resume instructions, LLVM uses several +intrinsic functions (name prefixed with llvm.eh) to provide exception +handling information at various points in generated code.

+
+

llvm.eh.typeid.for

+
i32 @llvm.eh.typeid.for(i8* %type_info)
+
+
+

This intrinsic returns the type info index in the exception table of the current +function. This value can be used to compare against the result of +landingpad instruction. The single argument is a reference to a type info.

+

Uses of this intrinsic are generated by the C++ front-end.

+
+
+

llvm.eh.begincatch

+
void @llvm.eh.begincatch(i8* %ehptr, i8* %ehobj)
+
+
+

This intrinsic marks the beginning of catch handling code within the blocks +following a landingpad instruction. The exact behavior of this function +depends on the compilation target and the personality function associated +with the landingpad instruction.

+

The first argument to this intrinsic is a pointer that was previously extracted +from the aggregate return value of the landingpad instruction. The second +argument to the intrinsic is a pointer to stack space where the exception object +should be stored. The runtime handles the details of copying the exception +object into the slot. If the second parameter is null, no copy occurs.

+

Uses of this intrinsic are generated by the C++ front-end. Many targets will +use implementation-specific functions (such as __cxa_begin_catch) instead +of this intrinsic. The intrinsic is provided for targets that require a more +abstract interface.

+

When used in the native Windows C++ exception handling implementation, this +intrinsic serves as a placeholder to delimit code before a catch handler is +outlined. When the handler is outlined, this intrinsic will be replaced +by instructions that retrieve the exception object pointer from the frame +allocation block.

+
+
+

llvm.eh.endcatch

+
void @llvm.eh.endcatch()
+
+
+

This intrinsic marks the end of catch handling code within the current block, +which will be a successor of a block which called llvm.eh.begincatch''. +The exact behavior of this function depends on the compilation target and the +personality function associated with the corresponding ``landingpad +instruction.

+

There may be more than one call to llvm.eh.endcatch for any given call to +llvm.eh.begincatch with each llvm.eh.endcatch call corresponding to the +end of a different control path. All control paths following a call to +llvm.eh.begincatch must reach a call to llvm.eh.endcatch.

+

Uses of this intrinsic are generated by the C++ front-end. Many targets will +use implementation-specific functions (such as __cxa_begin_catch) instead +of this intrinsic. The intrinsic is provided for targets that require a more +abstract interface.

+

When used in the native Windows C++ exception handling implementation, this +intrinsic serves as a placeholder to delimit code before a catch handler is +outlined. After the handler is outlined, this intrinsic is simply removed.

+
+
+

llvm.eh.exceptionpointer

+
i8 addrspace(N)* @llvm.eh.padparam.pNi8(token %catchpad)
+
+
+

This intrinsic retrieves a pointer to the exception caught by the given +catchpad.

+
+
+

SJLJ Intrinsics

+

The llvm.eh.sjlj intrinsics are used internally within LLVM’s +backend. Uses of them are generated by the backend’s +SjLjEHPrepare pass.

+
+

llvm.eh.sjlj.setjmp

+
i32 @llvm.eh.sjlj.setjmp(i8* %setjmp_buf)
+
+
+

For SJLJ based exception handling, this intrinsic forces register saving for the +current function and stores the address of the following instruction for use as +a destination address by llvm.eh.sjlj.longjmp. The buffer format and the +overall functioning of this intrinsic is compatible with the GCC +__builtin_setjmp implementation allowing code built with the clang and GCC +to interoperate.

+

The single parameter is a pointer to a five word buffer in which the calling +context is saved. The front end places the frame pointer in the first word, and +the target implementation of this intrinsic should place the destination address +for a llvm.eh.sjlj.longjmp in the second word. The following three words are +available for use in a target-specific manner.

+
+
+

llvm.eh.sjlj.longjmp

+
void @llvm.eh.sjlj.longjmp(i8* %setjmp_buf)
+
+
+

For SJLJ based exception handling, the llvm.eh.sjlj.longjmp intrinsic is +used to implement __builtin_longjmp(). The single parameter is a pointer to +a buffer populated by llvm.eh.sjlj.setjmp. The frame pointer and stack +pointer are restored from the buffer, then control is transferred to the +destination address.

+
+
+

llvm.eh.sjlj.lsda

+
i8* @llvm.eh.sjlj.lsda()
+
+
+

For SJLJ based exception handling, the llvm.eh.sjlj.lsda intrinsic returns +the address of the Language Specific Data Area (LSDA) for the current +function. The SJLJ front-end code stores this address in the exception handling +function context for use by the runtime.

+
+
+

llvm.eh.sjlj.callsite

+
void @llvm.eh.sjlj.callsite(i32 %call_site_num)
+
+
+

For SJLJ based exception handling, the llvm.eh.sjlj.callsite intrinsic +identifies the callsite value associated with the following invoke +instruction. This is used to ensure that landing pad entries in the LSDA are +generated in matching order.

+
+
+
+
+

Asm Table Formats

+

There are two tables that are used by the exception handling runtime to +determine which actions should be taken when an exception is thrown.

+
+

Exception Handling Frame

+

An exception handling frame eh_frame is very similar to the unwind frame +used by DWARF debug info. The frame contains all the information necessary to +tear down the current frame and restore the state of the prior frame. There is +an exception handling frame for each function in a compile unit, plus a common +exception handling frame that defines information common to all functions in the +unit.

+

The format of this call frame information (CFI) is often platform-dependent, +however. ARM, for example, defines their own format. Apple has their own compact +unwind info format. On Windows, another format is used for all architectures +since 32-bit x86. LLVM will emit whatever information is required by the +target.

+
+
+

Exception Tables

+

An exception table contains information about what actions to take when an +exception is thrown in a particular part of a function’s code. This is typically +referred to as the language-specific data area (LSDA). The format of the LSDA +table is specific to the personality function, but the majority of personalities +out there use a variation of the tables consumed by __gxx_personality_v0. +There is one exception table per function, except leaf functions and functions +that have calls only to non-throwing functions. They do not need an exception +table.

+
+
+
+

Exception Handling using the Windows Runtime

+
+

Background on Windows exceptions

+

Interacting with exceptions on Windows is significantly more complicated than +on Itanium C++ ABI platforms. The fundamental difference between the two models +is that Itanium EH is designed around the idea of “successive unwinding,” while +Windows EH is not.

+

Under Itanium, throwing an exception typically involves allocating thread local +memory to hold the exception, and calling into the EH runtime. The runtime +identifies frames with appropriate exception handling actions, and successively +resets the register context of the current thread to the most recently active +frame with actions to run. In LLVM, execution resumes at a landingpad +instruction, which produces register values provided by the runtime. If a +function is only cleaning up allocated resources, the function is responsible +for calling _Unwind_Resume to transition to the next most recently active +frame after it is finished cleaning up. Eventually, the frame responsible for +handling the exception calls __cxa_end_catch to destroy the exception, +release its memory, and resume normal control flow.

+

The Windows EH model does not use these successive register context resets. +Instead, the active exception is typically described by a frame on the stack. +In the case of C++ exceptions, the exception object is allocated in stack memory +and its address is passed to __CxxThrowException. General purpose structured +exceptions (SEH) are more analogous to Linux signals, and they are dispatched by +userspace DLLs provided with Windows. Each frame on the stack has an assigned EH +personality routine, which decides what actions to take to handle the exception. +There are a few major personalities for C and C++ code: the C++ personality +(__CxxFrameHandler3) and the SEH personalities (_except_handler3, +_except_handler4, and __C_specific_handler). All of them implement +cleanups by calling back into a “funclet” contained in the parent function.

+

Funclets, in this context, are regions of the parent function that can be called +as though they were a function pointer with a very special calling convention. +The frame pointer of the parent frame is passed into the funclet either using +the standard EBP register or as the first parameter register, depending on the +architecture. The funclet implements the EH action by accessing local variables +in memory through the frame pointer, and returning some appropriate value, +continuing the EH process. No variables live in to or out of the funclet can be +allocated in registers.

+

The C++ personality also uses funclets to contain the code for catch blocks +(i.e. all user code between the braces in catch (Type obj) { ... }). The +runtime must use funclets for catch bodies because the C++ exception object is +allocated in a child stack frame of the function handling the exception. If the +runtime rewound the stack back to frame of the catch, the memory holding the +exception would be overwritten quickly by subsequent function calls. The use of +funclets also allows __CxxFrameHandler3 to implement rethrow without +resorting to TLS. Instead, the runtime throws a special exception, and then uses +SEH (__try / __except) to resume execution with new information in the child +frame.

+

In other words, the successive unwinding approach is incompatible with Visual +C++ exceptions and general purpose Windows exception handling. Because the C++ +exception object lives in stack memory, LLVM cannot provide a custom personality +function that uses landingpads. Similarly, SEH does not provide any mechanism +to rethrow an exception or continue unwinding. Therefore, LLVM must use the IR +constructs described later in this document to implement compatible exception +handling.

+
+
+

SEH filter expressions

+

The SEH personality functions also use funclets to implement filter expressions, +which allow executing arbitrary user code to decide which exceptions to catch. +Filter expressions should not be confused with the filter clause of the LLVM +landingpad instruction. Typically filter expressions are used to determine +if the exception came from a particular DLL or code region, or if code faulted +while accessing a particular memory address range. LLVM does not currently have +IR to represent filter expressions because it is difficult to represent their +control dependencies. Filter expressions run during the first phase of EH, +before cleanups run, making it very difficult to build a faithful control flow +graph. For now, the new EH instructions cannot represent SEH filter +expressions, and frontends must outline them ahead of time. Local variables of +the parent function can be escaped and accessed using the llvm.localescape +and llvm.localrecover intrinsics.

+
+
+

New exception handling instructions

+

The primary design goal of the new EH instructions is to support funclet +generation while preserving information about the CFG so that SSA formation +still works. As a secondary goal, they are designed to be generic across MSVC +and Itanium C++ exceptions. They make very few assumptions about the data +required by the personality, so long as it uses the familiar core EH actions: +catch, cleanup, and terminate. However, the new instructions are hard to modify +without knowing details of the EH personality. While they can be used to +represent Itanium EH, the landingpad model is strictly better for optimization +purposes.

+

The following new instructions are considered “exception handling pads”, in that +they must be the first non-phi instruction of a basic block that may be the +unwind destination of an EH flow edge: +catchswitch, catchpad, and cleanuppad. +As with landingpads, when entering a try scope, if the +frontend encounters a call site that may throw an exception, it should emit an +invoke that unwinds to a catchswitch block. Similarly, inside the scope of a +C++ object with a destructor, invokes should unwind to a cleanuppad.

+

New instructions are also used to mark the points where control is transferred +out of a catch/cleanup handler (which will correspond to exits from the +generated funclet). A catch handler which reaches its end by normal execution +executes a catchret instruction, which is a terminator indicating where in +the function control is returned to. A cleanup handler which reaches its end +by normal execution executes a cleanupret instruction, which is a terminator +indicating where the active exception will unwind to next.

+

Each of these new EH pad instructions has a way to identify which action should +be considered after this action. The catchswitch instruction is a terminator +and has an unwind destination operand analogous to the unwind destination of an +invoke. The cleanuppad instruction is not +a terminator, so the unwind destination is stored on the cleanupret +instruction instead. Successfully executing a catch handler should resume +normal control flow, so neither catchpad nor catchret instructions can +unwind. All of these “unwind edges” may refer to a basic block that contains an +EH pad instruction, or they may unwind to the caller. Unwinding to the caller +has roughly the same semantics as the resume instruction in the landingpad +model. When inlining through an invoke, instructions that unwind to the caller +are hooked up to unwind to the unwind destination of the call site.

+

Putting things together, here is a hypothetical lowering of some C++ that uses +all of the new IR instructions:

+
struct Cleanup {
+  Cleanup();
+  ~Cleanup();
+  int m;
+};
+void may_throw();
+int f() noexcept {
+  try {
+    Cleanup obj;
+    may_throw();
+  } catch (int e) {
+    may_throw();
+    return e;
+  }
+  return 0;
+}
+
+
+
define i32 @f() nounwind personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %obj = alloca %struct.Cleanup, align 4
+  %e = alloca i32, align 4
+  %call = invoke %struct.Cleanup* @"??0Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj)
+          to label %invoke.cont unwind label %lpad.catch
+
+invoke.cont:                                      ; preds = %entry
+  invoke void @"?may_throw@@YAXXZ"()
+          to label %invoke.cont.2 unwind label %lpad.cleanup
+
+invoke.cont.2:                                    ; preds = %invoke.cont
+  call void @"??_DCleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind
+  br label %return
+
+return:                                           ; preds = %invoke.cont.3, %invoke.cont.2
+  %retval.0 = phi i32 [ 0, %invoke.cont.2 ], [ %3, %invoke.cont.3 ]
+  ret i32 %retval.0
+
+lpad.cleanup:                                     ; preds = %invoke.cont.2
+  %0 = cleanuppad within none []
+  call void @"??1Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind
+  cleanupret %0 unwind label %lpad.catch
+
+lpad.catch:                                       ; preds = %lpad.cleanup, %entry
+  %1 = catchswitch within none [label %catch.body] unwind label %lpad.terminate
+
+catch.body:                                       ; preds = %lpad.catch
+  %catch = catchpad within %1 [%rtti.TypeDescriptor2* @"??_R0H@8", i32 0, i32* %e]
+  invoke void @"?may_throw@@YAXXZ"()
+          to label %invoke.cont.3 unwind label %lpad.terminate
+
+invoke.cont.3:                                    ; preds = %catch.body
+  %3 = load i32, i32* %e, align 4
+  catchret from %catch to label %return
+
+lpad.terminate:                                   ; preds = %catch.body, %lpad.catch
+  cleanuppad within none []
+  call void @"?terminate@@YAXXZ"
+  unreachable
+}
+
+
+
+
+

Funclet parent tokens

+

In order to produce tables for EH personalities that use funclets, it is +necessary to recover the nesting that was present in the source. This funclet +parent relationship is encoded in the IR using tokens produced by the new “pad” +instructions. The token operand of a “pad” or “ret” instruction indicates which +funclet it is in, or “none” if it is not nested within another funclet.

+

The catchpad and cleanuppad instructions establish new funclets, and +their tokens are consumed by other “pad” instructions to establish membership. +The catchswitch instruction does not create a funclet, but it produces a +token that is always consumed by its immediate successor catchpad +instructions. This ensures that every catch handler modelled by a catchpad +belongs to exactly one catchswitch, which models the dispatch point after a +C++ try.

+

Here is an example of what this nesting looks like using some hypothetical +C++ code:

+
void f() {
+  try {
+    throw;
+  } catch (...) {
+    try {
+      throw;
+    } catch (...) {
+    }
+  }
+}
+
+
+
define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %0 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+          to label %unreachable unwind label %catch.dispatch2
+
+catch.dispatch2:                                  ; preds = %catch
+  %2 = catchswitch within %1 [label %catch3] unwind to caller
+
+catch3:                                           ; preds = %catch.dispatch2
+  %3 = catchpad within %2 [i8* null, i32 64, i8* null]
+  catchret from %3 to label %try.cont
+
+try.cont:                                         ; preds = %catch3
+  catchret from %1 to label %try.cont6
+
+try.cont6:                                        ; preds = %try.cont
+  ret void
+
+unreachable:                                      ; preds = %catch, %entry
+  unreachable
+}
+
+
+

The “inner” catchswitch consumes %1 which is produced by the outer +catchswitch.

+
+
+

Funclet transitions

+

The EH tables for personalities that use funclets make implicit use of the +funclet nesting relationship to encode unwind destinations, and so are +constrained in the set of funclet transitions they can represent. The related +LLVM IR instructions accordingly have constraints that ensure encodability of +the EH edges in the flow graph.

+

A catchswitch, catchpad, or cleanuppad is said to be “entered” +when it executes. It may subsequently be “exited” by any of the following +means:

+
    +
  • A catchswitch is immediately exited when none of its constituent +catchpads are appropriate for the in-flight exception and it unwinds +to its unwind destination or the caller.

  • +
  • A catchpad and its parent catchswitch are both exited when a +catchret from the catchpad is executed.

  • +
  • A cleanuppad is exited when a cleanupret from it is executed.

  • +
  • Any of these pads is exited when control unwinds to the function’s caller, +either by a call which unwinds all the way to the function’s caller, +a nested catchswitch marked “unwinds to caller”, or a nested +cleanuppad‘s cleanupret marked “unwinds to caller".

  • +
  • Any of these pads is exited when an unwind edge (from an invoke, +nested catchswitch, or nested cleanuppad‘s cleanupret) +unwinds to a destination pad that is not a descendant of the given pad.

  • +
+

Note that the ret instruction is not a valid way to exit a funclet pad; +it is undefined behavior to execute a ret when a pad has been entered but +not exited.

+

A single unwind edge may exit any number of pads (with the restrictions that +the edge from a catchswitch must exit at least itself, and the edge from +a cleanupret must exit at least its cleanuppad), and then must enter +exactly one pad, which must be distinct from all the exited pads. The parent +of the pad that an unwind edge enters must be the most-recently-entered +not-yet-exited pad (after exiting from any pads that the unwind edge exits), +or “none” if there is no such pad. This ensures that the stack of executing +funclets at run-time always corresponds to some path in the funclet pad tree +that the parent tokens encode.

+

All unwind edges which exit any given funclet pad (including cleanupret +edges exiting their cleanuppad and catchswitch edges exiting their +catchswitch) must share the same unwind destination. Similarly, any +funclet pad which may be exited by unwind to caller must not be exited by +any exception edges which unwind anywhere other than the caller. This +ensures that each funclet as a whole has only one unwind destination, which +EH tables for funclet personalities may require. Note that any unwind edge +which exits a catchpad also exits its parent catchswitch, so this +implies that for any given catchswitch, its unwind destination must also +be the unwind destination of any unwind edge that exits any of its constituent +catchpads. Because catchswitch has no nounwind variant, and +because IR producers are not required to annotate calls which will not +unwind as nounwind, it is legal to nest a call or an “unwind to +callercatchswitch within a funclet pad that has an unwind +destination other than caller; it is undefined behavior for such a call +or catchswitch to unwind.

+

Finally, the funclet pads’ unwind destinations cannot form a cycle. This +ensures that EH lowering can construct “try regions” with a tree-like +structure, which funclet-based personalities may require.

+
+
+
+

Exception Handling support on the target

+

In order to support exception handling on particular target, there are a few +items need to be implemented.

+
    +
  • CFI directives

    +

    First, you have to assign each target register with a unique DWARF number. +Then in TargetFrameLowering’s emitPrologue, you have to emit CFI +directives +to specify how to calculate the CFA (Canonical Frame Address) and how register +is restored from the address pointed by the CFA with an offset. The assembler +is instructed by CFI directives to build .eh_frame section, which is used +by th unwinder to unwind stack during exception handling.

    +
  • +
  • getExceptionPointerRegister and getExceptionSelectorRegister

    +

    TargetLowering must implement both functions. The personality function +passes the exception structure (a pointer) and selector value (an integer) +to the landing pad through the registers specified by getExceptionPointerRegister +and getExceptionSelectorRegister respectively. On most platforms, they +will be GPRs and will be the same as the ones specified in the calling convention.

    +
  • +
  • EH_RETURN

    +

    The ISD node represents the undocumented GCC extension __builtin_eh_return (offset, handler), +which adjusts the stack by offset and then jumps to the handler. __builtin_eh_return +is used in GCC unwinder (libgcc), +but not in LLVM unwinder (libunwind). +If you are on the top of libgcc and have particular requirement on your target, +you have to handle EH_RETURN in TargetLowering.

    +
  • +
+

If you don’t leverage the existing runtime (libstdc++ and libgcc), +you have to take a look on libc++ and +libunwind +to see what have to be done there. For libunwind, you have to do the following

+
    +
  • __libunwind_config.h

    +

    Define macros for your target.

    +
  • +
  • include/libunwind.h

    +

    Define enum for the target registers.

    +
  • +
  • src/Registers.hpp

    +

    Define Registers class for your target, implement setter and getter functions.

    +
  • +
  • src/UnwindCursor.hpp

    +

    Define dwarfEncoding and stepWithCompactEncoding for your Registers +class.

    +
  • +
  • src/UnwindRegistersRestore.S

    +

    Write an assembly function to restore all your target registers from the memory.

    +
  • +
  • src/UnwindRegistersSave.S

    +

    Write an assembly function to save all your target registers on the memory.

    +
  • +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ExtendingLLVM.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ExtendingLLVM.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ExtendingLLVM.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ExtendingLLVM.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,415 @@ + + + + + + + + + Extending LLVM: Adding instructions, intrinsics, types, etc. — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Extending LLVM: Adding instructions, intrinsics, types, etc.

+
+

Introduction and Warning

+

During the course of using LLVM, you may wish to customize it for your research +project or for experimentation. At this point, you may realize that you need to +add something to LLVM, whether it be a new fundamental type, a new intrinsic +function, or a whole new instruction.

+

When you come to this realization, stop and think. Do you really need to extend +LLVM? Is it a new fundamental capability that LLVM does not support at its +current incarnation or can it be synthesized from already pre-existing LLVM +elements? If you are not sure, ask on the LLVM-dev list. The reason is that +extending LLVM will get involved as you need to update all the different passes +that you intend to use with your extension, and there are many LLVM analyses +and transformations, so it may be quite a bit of work.

+

Adding an intrinsic function is far easier than adding an +instruction, and is transparent to optimization passes. If your added +functionality can be expressed as a function call, an intrinsic function is the +method of choice for LLVM extension.

+

Before you invest a significant amount of effort into a non-trivial extension, +ask on the list if what you are looking to do can be done with +already-existing infrastructure, or if maybe someone else is already working on +it. You will save yourself a lot of time and effort by doing so.

+
+
+

Adding a new intrinsic function

+

Adding a new intrinsic function to LLVM is much easier than adding a new +instruction. Almost all extensions to LLVM should start as an intrinsic +function and then be turned into an instruction if warranted.

+
    +
  1. llvm/docs/LangRef.html:

    +

    Document the intrinsic. Decide whether it is code generator specific and +what the restrictions are. Talk to other people about it so that you are +sure it’s a good idea.

    +
  2. +
  3. llvm/include/llvm/IR/Intrinsics*.td:

    +

    Add an entry for your intrinsic. Describe its memory access +characteristics for optimization (this controls whether it will be +DCE’d, CSE’d, etc). If any arguments need to be immediates, these +must be indicated with the ImmArg property. Note that any intrinsic +using one of the llvm_any*_ty types for an argument or return +type will be deemed by tblgen as overloaded and the +corresponding suffix will be required on the intrinsic’s name.

    +
  4. +
  5. llvm/lib/Analysis/ConstantFolding.cpp:

    +

    If it is possible to constant fold your intrinsic, add support to it in the +canConstantFoldCallTo and ConstantFoldCall functions.

    +
  6. +
  7. llvm/test/*:

    +

    Add test cases for your test cases to the test suite

    +
  8. +
+

Once the intrinsic has been added to the system, you must add code generator +support for it. Generally you must do the following steps:

+

Add support to the .td file for the target(s) of your choice in +lib/Target/*/*.td.

+
+

This is usually a matter of adding a pattern to the .td file that matches the +intrinsic, though it may obviously require adding the instructions you want to +generate as well. There are lots of examples in the PowerPC and X86 backend +to follow.

+
+
+
+

Adding a new SelectionDAG node

+

As with intrinsics, adding a new SelectionDAG node to LLVM is much easier than +adding a new instruction. New nodes are often added to help represent +instructions common to many targets. These nodes often map to an LLVM +instruction (add, sub) or intrinsic (byteswap, population count). In other +cases, new nodes have been added to allow many targets to perform a common task +(converting between floating point and integer representation) or capture more +complicated behavior in a single node (rotate).

+
    +
  1. include/llvm/CodeGen/ISDOpcodes.h:

    +

    Add an enum value for the new SelectionDAG node.

    +
  2. +
  3. lib/CodeGen/SelectionDAG/SelectionDAG.cpp:

    +
    +
    Add code to print the node to getOperationName. If your new node can be

    evaluated at compile time when given constant arguments (such as an add of a +constant with another constant), find the getNode method that takes the +appropriate number of arguments, and add a case for your node to the switch +statement that performs constant folding for nodes that take the same number +of arguments as your new node.

    +
    +
    +
  4. +
  5. lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:

    +

    Add code to legalize, promote, and expand the node as necessary. At a +minimum, you will need to add a case statement for your node in +LegalizeOp which calls LegalizeOp on the node’s operands, and returns a +new node if any of the operands changed as a result of being legalized. It +is likely that not all targets supported by the SelectionDAG framework will +natively support the new node. In this case, you must also add code in your +node’s case statement in LegalizeOp to Expand your node into simpler, +legal operations. The case for ISD::UREM for expanding a remainder into +a divide, multiply, and a subtract is a good example.

    +
  6. +
  7. lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:

    +
    +
    If targets may support the new node being added only at certain sizes, you

    will also need to add code to your node’s case statement in LegalizeOp +to Promote your node’s operands to a larger size, and perform the correct +operation. You will also need to add code to PromoteOp to do this as +well. For a good example, see ISD::BSWAP, which promotes its operand to +a wider size, performs the byteswap, and then shifts the correct bytes right +to emulate the narrower byteswap in the wider type.

    +
    +
    +
  8. +
  9. lib/CodeGen/SelectionDAG/LegalizeDAG.cpp:

    +

    Add a case for your node in ExpandOp to teach the legalizer how to +perform the action represented by the new node on a value that has been split +into high and low halves. This case will be used to support your node with a +64 bit operand on a 32 bit target.

    +
  10. +
  11. lib/CodeGen/SelectionDAG/DAGCombiner.cpp:

    +

    If your node can be combined with itself, or other existing nodes in a +peephole-like fashion, add a visit function for it, and call that function +from. There are several good examples for simple combines you can do; +visitFABS and visitSRL are good starting places.

    +
  12. +
  13. lib/Target/PowerPC/PPCISelLowering.cpp:

    +

    Each target has an implementation of the TargetLowering class, usually in +its own file (although some targets include it in the same file as the +DAGToDAGISel). The default behavior for a target is to assume that your new +node is legal for all types that are legal for that target. If this target +does not natively support your node, then tell the target to either Promote +it (if it is supported at a larger type) or Expand it. This will cause the +code you wrote in LegalizeOp above to decompose your new node into other +legal nodes for this target.

    +
  14. +
  15. include/llvm/Target/TargetSelectionDAG.td:

    +

    Most current targets supported by LLVM generate code using the DAGToDAG +method, where SelectionDAG nodes are pattern matched to target-specific +nodes, which represent individual instructions. In order for the targets to +match an instruction to your new node, you must add a def for that node to +the list in this file, with the appropriate type constraints. Look at +add, bswap, and fadd for examples.

    +
  16. +
  17. lib/Target/PowerPC/PPCInstrInfo.td:

    +

    Each target has a tablegen file that describes the target’s instruction set. +For targets that use the DAGToDAG instruction selection framework, add a +pattern for your new node that uses one or more target nodes. Documentation +for this is a bit sparse right now, but there are several decent examples. +See the patterns for rotl in PPCInstrInfo.td.

    +
  18. +
  19. TODO: document complex patterns.

  20. +
  21. llvm/test/CodeGen/*:

    +

    Add test cases for your new node to the test suite. +llvm/test/CodeGen/X86/bswap.ll is a good example.

    +
  22. +
+
+
+

Adding a new instruction

+
+

Warning

+

Adding instructions changes the bitcode format, and it will take some effort +to maintain compatibility with the previous version. Only add an instruction +if it is absolutely necessary.

+
+
    +
  1. llvm/include/llvm/IR/Instruction.def:

    +

    add a number for your instruction and an enum name

    +
  2. +
  3. llvm/include/llvm/IR/Instructions.h:

    +

    add a definition for the class that will represent your instruction

    +
  4. +
  5. llvm/include/llvm/IR/InstVisitor.h:

    +

    add a prototype for a visitor to your new instruction type

    +
  6. +
  7. llvm/lib/AsmParser/LLLexer.cpp:

    +

    add a new token to parse your instruction from assembly text file

    +
  8. +
  9. llvm/lib/AsmParser/LLParser.cpp:

    +

    add the grammar on how your instruction can be read and what it will +construct as a result

    +
  10. +
  11. llvm/lib/Bitcode/Reader/BitcodeReader.cpp:

    +

    add a case for your instruction and how it will be parsed from bitcode

    +
  12. +
  13. llvm/lib/Bitcode/Writer/BitcodeWriter.cpp:

    +

    add a case for your instruction and how it will be parsed from bitcode

    +
  14. +
  15. llvm/lib/IR/Instruction.cpp:

    +

    add a case for how your instruction will be printed out to assembly

    +
  16. +
  17. llvm/lib/IR/Instructions.cpp:

    +

    implement the class you defined in llvm/include/llvm/Instructions.h

    +
  18. +
  19. Test your instruction

  20. +
  21. llvm/lib/Target/*:

    +

    add support for your instruction to code generators, or add a lowering pass.

    +
  22. +
  23. llvm/test/*:

    +

    add your test cases to the test suite.

    +
  24. +
+

Also, you need to implement (or modify) any analyses or passes that you want to +understand this new instruction.

+
+
+

Adding a new type

+
+

Warning

+

Adding new types changes the bitcode format, and will break compatibility with +currently-existing LLVM installations. Only add new types if it is absolutely +necessary.

+
+
+

Adding a fundamental type

+
    +
  1. llvm/include/llvm/IR/Type.h:

    +

    add enum for the new type; add static Type* for this type

    +
  2. +
  3. llvm/lib/IR/Type.cpp and llvm/lib/CodeGen/ValueTypes.cpp:

    +

    add mapping from TypeID => Type*; initialize the static Type*

    +
  4. +
  5. llvm/include/llvm-c/Core.h and llvm/lib/IR/Core.cpp:

    +

    add enum LLVMTypeKind and modify +LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) for the new type

    +
  6. +
  7. llvm/lib/AsmParser/LLLexer.cpp:

    +

    add ability to parse in the type from text assembly

    +
  8. +
  9. llvm/lib/AsmParser/LLParser.cpp:

    +

    add a token for that type

    +
  10. +
  11. llvm/lib/Bitcode/Writer/BitcodeWriter.cpp:

    +

    modify void ModuleBitcodeWriter::writeTypeTable() to serialize your type

    +
  12. +
  13. llvm/lib/Bitcode/Reader/BitcodeReader.cpp:

    +

    modify Error BitcodeReader::parseTypeTableBody() to read your data type

    +
  14. +
  15. include/llvm/Bitcode/LLVMBitCodes.h:

    +

    add enum TypeCodes for the new type

    +
  16. +
+
+
+

Adding a derived type

+
    +
  1. llvm/include/llvm/IR/Type.h:

    +

    add enum for the new type; add a forward declaration of the type also

    +
  2. +
  3. llvm/include/llvm/IR/DerivedTypes.h:

    +

    add new class to represent new class in the hierarchy; add forward +declaration to the TypeMap value type

    +
  4. +
  5. llvm/lib/IR/Type.cpp and llvm/lib/CodeGen/ValueTypes.cpp:

    +

    add support for derived type, notably enum TypeID and is, get methods.

    +
  6. +
  7. llvm/include/llvm-c/Core.h and llvm/lib/IR/Core.cpp:

    +

    add enum LLVMTypeKind and modify +LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) for the new type

    +
  8. +
  9. llvm/lib/AsmParser/LLLexer.cpp:

    +

    modify lltok::Kind LLLexer::LexIdentifier() to add ability to +parse in the type from text assembly

    +
  10. +
  11. llvm/lib/Bitcode/Writer/BitcodeWriter.cpp:

    +

    modify void ModuleBitcodeWriter::writeTypeTable() to serialize your type

    +
  12. +
  13. llvm/lib/Bitcode/Reader/BitcodeReader.cpp:

    +

    modify Error BitcodeReader::parseTypeTableBody() to read your data type

    +
  14. +
  15. include/llvm/Bitcode/LLVMBitCodes.h:

    +

    add enum TypeCodes for the new type

    +
  16. +
  17. llvm/lib/IR/AsmWriter.cpp:

    +

    modify void TypePrinting::print(Type *Ty, raw_ostream &OS) +to output the new derived type

    +
  18. +
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Extensions.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Extensions.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Extensions.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Extensions.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,720 @@ + + + + + + + + + LLVM Extensions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Extensions

+ +
+
+
+

Introduction

+

This document describes extensions to tools and formats LLVM seeks compatibility +with.

+
+
+

General Assembly Syntax

+
+

C99-style Hexadecimal Floating-point Constants

+

LLVM’s assemblers allow floating-point constants to be written in C99’s +hexadecimal format instead of decimal if desired.

+
.section .data
+.float 0x1c2.2ap3
+
+
+
+
+
+

Machine-specific Assembly Syntax

+
+

X86/COFF-Dependent

+
+

Relocations

+

The following additional relocation types are supported:

+

@IMGREL (AT&T syntax only) generates an image-relative relocation that +corresponds to the COFF relocation types IMAGE_REL_I386_DIR32NB (32-bit) or +IMAGE_REL_AMD64_ADDR32NB (64-bit).

+
.text
+fun:
+  mov foo@IMGREL(%ebx, %ecx, 4), %eax
+
+.section .pdata
+  .long fun@IMGREL
+  .long (fun@imgrel + 0x3F)
+  .long $unwind$fun@imgrel
+
+
+

.secrel32 generates a relocation that corresponds to the COFF relocation +types IMAGE_REL_I386_SECREL (32-bit) or IMAGE_REL_AMD64_SECREL (64-bit).

+

.secidx relocation generates an index of the section that contains +the target. It corresponds to the COFF relocation types +IMAGE_REL_I386_SECTION (32-bit) or IMAGE_REL_AMD64_SECTION (64-bit).

+
.section .debug$S,"rn"
+  .long 4
+  .long 242
+  .long 40
+  .secrel32 _function_name + 0
+  .secidx   _function_name
+  ...
+
+
+
+
+

.linkonce Directive

+

Syntax:

+
+

.linkonce [ comdat type ]

+
+

Supported COMDAT types:

+
+
discard

Discards duplicate sections with the same COMDAT symbol. This is the default +if no type is specified.

+
+
one_only

If the symbol is defined multiple times, the linker issues an error.

+
+
same_size

Duplicates are discarded, but the linker issues an error if any have +different sizes.

+
+
same_contents

Duplicates are discarded, but the linker issues an error if any duplicates +do not have exactly the same content.

+
+
largest

Links the largest section from among the duplicates.

+
+
newest

Links the newest section from among the duplicates.

+
+
+
.section .text$foo
+.linkonce
+  ...
+
+
+
+
+

.section Directive

+

MC supports passing the information in .linkonce at the end of +.section. For example, these two codes are equivalent

+
.section secName, "dr", discard, "Symbol1"
+.globl Symbol1
+Symbol1:
+.long 1
+
+
+
.section secName, "dr"
+.linkonce discard
+.globl Symbol1
+Symbol1:
+.long 1
+
+
+

Note that in the combined form the COMDAT symbol is explicit. This +extension exists to support multiple sections with the same name in +different COMDATs:

+
.section secName, "dr", discard, "Symbol1"
+.globl Symbol1
+Symbol1:
+.long 1
+
+.section secName, "dr", discard, "Symbol2"
+.globl Symbol2
+Symbol2:
+.long 1
+
+
+

In addition to the types allowed with .linkonce, .section also accepts +associative. The meaning is that the section is linked if a certain other +COMDAT section is linked. This other section is indicated by the comdat symbol +in this directive. It can be any symbol defined in the associated section, but +is usually the associated section’s comdat.

+
+

The following restrictions apply to the associated section:

+
    +
  1. It must be a COMDAT section.

  2. +
  3. It cannot be another associative COMDAT section.

  4. +
+
+

In the following example the symbol sym is the comdat symbol of .foo +and .bar is associated to .foo.

+
.section        .foo,"bw",discard, "sym"
+.section        .bar,"rd",associative, "sym"
+
+
+

MC supports these flags in the COFF .section directive:

+
+
    +
  • b: BSS section (IMAGE_SCN_CNT_INITIALIZED_DATA)

  • +
  • d: Data section (IMAGE_SCN_CNT_UNINITIALIZED_DATA)

  • +
  • n: Section is not loaded (IMAGE_SCN_LNK_REMOVE)

  • +
  • r: Read-only

  • +
  • s: Shared section

  • +
  • w: Writable

  • +
  • x: Executable section

  • +
  • y: Not readable

  • +
  • D: Discardable (IMAGE_SCN_MEM_DISCARDABLE)

  • +
+
+

These flags are all compatible with gas, with the exception of the D flag, +which gnu as does not support. For gas compatibility, sections with a name +starting with “.debug” are implicitly discardable.

+
+
+
+

ARM64/COFF-Dependent

+
+

Relocations

+

The following additional symbol variants are supported:

+

:secrel_lo12: generates a relocation that corresponds to the COFF relocation +types IMAGE_REL_ARM64_SECREL_LOW12A or IMAGE_REL_ARM64_SECREL_LOW12L.

+

:secrel_hi12: generates a relocation that corresponds to the COFF relocation +type IMAGE_REL_ARM64_SECREL_HIGH12A.

+
add x0, x0, :secrel_hi12:symbol
+ldr x0, [x0, :secrel_lo12:symbol]
+
+add x1, x1, :secrel_hi12:symbol
+add x1, x1, :secrel_lo12:symbol
+...
+
+
+
+
+
+

ELF-Dependent

+
+

.section Directive

+

In order to support creating multiple sections with the same name and comdat, +it is possible to add an unique number at the end of the .section directive. +For example, the following code creates two sections named .text.

+
.section        .text,"ax",@progbits,unique,1
+nop
+
+.section        .text,"ax",@progbits,unique,2
+nop
+
+
+

The unique number is not present in the resulting object at all. It is just used +in the assembler to differentiate the sections.

+

The ‘o’ flag is mapped to SHF_LINK_ORDER. If it is present, a symbol +must be given that identifies the section to be placed is the +.sh_link.

+
.section .foo,"a",@progbits
+.Ltmp:
+.section .bar,"ao",@progbits,.Ltmp
+
+
+

which is equivalent to just

+
.section .foo,"a",@progbits
+.section .bar,"ao",@progbits,.foo
+
+
+
+
+

.linker-options Section (linker options)

+

In order to support passing linker options from the frontend to the linker, a +special section of type SHT_LLVM_LINKER_OPTIONS (usually named +.linker-options though the name is not significant as it is identified by +the type). The contents of this section is a simple pair-wise encoding of +directives for consideration by the linker. The strings are encoded as standard +null-terminated UTF-8 strings. They are emitted inline to avoid having the +linker traverse the object file for retrieving the value. The linker is +permitted to not honour the option and instead provide a warning/error to the +user that the requested option was not honoured.

+

The section has type SHT_LLVM_LINKER_OPTIONS and has the SHF_EXCLUDE +flag to ensure that the section is treated as opaque by linkers which do not +support the feature and will not be emitted into the final linked binary.

+

This would be equivalent to the follow raw assembly:

+
.section ".linker-options","e",@llvm_linker_options
+.asciz "option 1"
+.asciz "value 1"
+.asciz "option 2"
+.asciz "value 2"
+
+
+

The following directives are specified:

+
+
    +
  • lib

    +

    The parameter identifies a library to be linked against. The library will +be looked up in the default and any specified library search paths +(specified to this point).

    +
  • +
  • libpath

    +

    The parameter identifies an additional library search path to be considered +when looking up libraries after the inclusion of this option.

    +
  • +
+
+
+
+

SHT_LLVM_DEPENDENT_LIBRARIES Section (Dependent Libraries)

+

This section contains strings specifying libraries to be added to the link by +the linker.

+

The section should be consumed by the linker and not written to the output.

+

The strings are encoded as standard null-terminated UTF-8 strings.

+

For example:

+
.section ".deplibs","MS",@llvm_dependent_libraries,1
+.asciz "library specifier 1"
+.asciz "library specifier 2"
+
+
+

The interpretation of the library specifiers is defined by the consuming linker.

+
+
+

SHT_LLVM_CALL_GRAPH_PROFILE Section (Call Graph Profile)

+

This section is used to pass a call graph profile to the linker which can be +used to optimize the placement of sections. It contains a sequence of +(from symbol, to symbol, weight) tuples.

+

It shall have a type of SHT_LLVM_CALL_GRAPH_PROFILE (0x6fff4c02), shall +have the SHF_EXCLUDE flag set, the sh_link member shall hold the section +header index of the associated symbol table, and shall have a sh_entsize of +16. It should be named .llvm.call-graph-profile.

+

The contents of the section shall be a sequence of Elf_CGProfile entries.

+
typedef struct {
+  Elf_Word cgp_from;
+  Elf_Word cgp_to;
+  Elf_Xword cgp_weight;
+} Elf_CGProfile;
+
+
+
+
cgp_from

The symbol index of the source of the edge.

+
+
cgp_to

The symbol index of the destination of the edge.

+
+
cgp_weight

The weight of the edge.

+
+
+

This is represented in assembly as:

+
.cg_profile from, to, 42
+
+
+

.cg_profile directives are processed at the end of the file. It is an error +if either from or to are undefined temporary symbols. If either symbol +is a temporary symbol, then the section symbol is used instead. If either +symbol is undefined, then that symbol is defined as if .weak symbol has been +written at the end of the file. This forces the symbol to show up in the symbol +table.

+
+
+

SHT_LLVM_ADDRSIG Section (address-significance table)

+

This section is used to mark symbols as address-significant, i.e. the address +of the symbol is used in a comparison or leaks outside the translation unit. It +has the same meaning as the absence of the LLVM attributes unnamed_addr +and local_unnamed_addr.

+

Any sections referred to by symbols that are not marked as address-significant +in any object file may be safely merged by a linker without breaking the +address uniqueness guarantee provided by the C and C++ language standards.

+

The contents of the section are a sequence of ULEB128-encoded integers +referring to the symbol table indexes of the address-significant symbols.

+

There are two associated assembly directives:

+
.addrsig
+
+
+

This instructs the assembler to emit an address-significance table. Without +this directive, all symbols are considered address-significant.

+
.addrsig_sym sym
+
+
+

This marks sym as address-significant.

+
+
+

SHT_LLVM_SYMPART Section (symbol partition specification)

+

This section is used to mark symbols with the partition that they +belong to. An .llvm_sympart section consists of a null-terminated string +specifying the name of the partition followed by a relocation referring to +the symbol that belongs to the partition. It may be constructed as follows:

+
.section ".llvm_sympart","",@llvm_sympart
+.asciz "libpartition.so"
+.word symbol_in_partition
+
+
+
+
+

SHT_LLVM_BB_ADDR_MAP Section (basic block address map)

+

This section stores the binary address of basic blocks along with other related +metadata. This information can be used to map binary profiles (like perf +profiles) directly to machine basic blocks. +This section is emitted with -basic-block-sections=labels and will contain +a BB address map table for every function which may be constructed as follows:

+
.section  ".llvm_bb_addr_map","",@llvm_bb_addr_map
+.quad     .Lfunc_begin0                 # address of the function
+.byte     2                             # number of basic blocks
+# BB record for BB_0
+ .uleb128  .Lfunc_beign0-.Lfunc_begin0  # BB_0 offset relative to function entry (always zero)
+ .uleb128  .LBB_END0_0-.Lfunc_begin0    # BB_0 size
+ .byte     x                            # BB_0 metadata
+# BB record for BB_1
+ .uleb128  .LBB0_1-.Lfunc_begin0        # BB_1 offset relative to function entry
+ .uleb128  .LBB_END0_1-.Lfunc_begin0    # BB_1 size
+ .byte     y                            # BB_1 metadata
+
+
+

This creates a BB address map table for a function with two basic blocks.

+
+
+
+

CodeView-Dependent

+
+

.cv_file Directive

+
+
Syntax:

.cv_file FileNumber FileName [ checksum ] [ checksumkind ]

+
+
+
+
+

.cv_func_id Directive

+

Introduces a function ID that can be used with .cv_loc.

+
+
Syntax:

.cv_func_id FunctionId

+
+
+
+
+

.cv_inline_site_id Directive

+

Introduces a function ID that can be used with .cv_loc. Includes +inlined at source location information for use in the line table of the +caller, whether the caller is a real function or another inlined call site.

+
+
Syntax:

.cv_inline_site_id FunctionId within Function inlined_at FileNumber Line [ Column ]

+
+
+
+
+

.cv_loc Directive

+

The first number is a file number, must have been previously assigned with a +.file directive, the second number is the line number and optionally the +third number is a column position (zero if not specified). The remaining +optional items are .loc sub-directives.

+
+
Syntax:

.cv_loc FunctionId FileNumber [ Line ] [ Column ] [ prologue_end ] [ is_stmt value ]

+
+
+
+
+

.cv_linetable Directive

+
+
Syntax:

.cv_linetable FunctionId , FunctionStart , FunctionEnd

+
+
+
+
+

.cv_inline_linetable Directive

+
+
Syntax:

.cv_inline_linetable PrimaryFunctionId , FileNumber Line FunctionStart FunctionEnd

+
+
+
+
+

.cv_def_range Directive

+

The GapStart and GapEnd options may be repeated as needed.

+
+
Syntax:

.cv_def_range RangeStart RangeEnd [ GapStart GapEnd ] , bytes

+
+
+
+ + +
+

.cv_filechecksumoffset Directive

+
+
Syntax:

.cv_filechecksumoffset FileNumber

+
+
+
+
+

.cv_fpo_data Directive

+
+
Syntax:

.cv_fpo_data procsym

+
+
+
+
+
+
+

Target Specific Behaviour

+
+

X86

+
+

Relocations

+

@ABS8 can be applied to symbols which appear as immediate operands to +instructions that have an 8-bit immediate form for that operand. It causes +the assembler to use the 8-bit form and an 8-bit relocation (e.g. R_386_8 +or R_X86_64_8) for the symbol.

+

For example:

+
cmpq $foo@ABS8, %rdi
+
+
+

This causes the assembler to select the form of the 64-bit cmpq instruction +that takes an 8-bit immediate operand that is sign extended to 64 bits, as +opposed to cmpq $foo, %rdi which takes a 32-bit immediate operand. This +is also not the same as cmpb $foo, %dil, which is an 8-bit comparison.

+
+
+
+

Windows on ARM

+
+

Stack Probe Emission

+

The reference implementation (Microsoft Visual Studio 2012) emits stack probes +in the following fashion:

+
movw r4, #constant
+bl __chkstk
+sub.w sp, sp, r4
+
+
+

However, this has the limitation of 32 MiB (±16MiB). In order to accommodate +larger binaries, LLVM supports the use of -mcmodel=large to allow a 4GiB +range via a slight deviation. It will generate an indirect jump as follows:

+
movw r4, #constant
+movw r12, :lower16:__chkstk
+movt r12, :upper16:__chkstk
+blx r12
+sub.w sp, sp, r4
+
+
+
+
+

Variable Length Arrays

+

The reference implementation (Microsoft Visual Studio 2012) does not permit the +emission of Variable Length Arrays (VLAs).

+

The Windows ARM Itanium ABI extends the base ABI by adding support for emitting +a dynamic stack allocation. When emitting a variable stack allocation, a call +to __chkstk is emitted unconditionally to ensure that guard pages are setup +properly. The emission of this stack probe emission is handled similar to the +standard stack probe emission.

+

The MSVC environment does not emit code for VLAs currently.

+
+
+
+

Windows on ARM64

+
+

Stack Probe Emission

+

The reference implementation (Microsoft Visual Studio 2017) emits stack probes +in the following fashion:

+
mov x15, #constant
+bl __chkstk
+sub sp, sp, x15, lsl #4
+
+
+

However, this has the limitation of 256 MiB (±128MiB). In order to accommodate +larger binaries, LLVM supports the use of -mcmodel=large to allow a 8GiB +(±4GiB) range via a slight deviation. It will generate an indirect jump as +follows:

+
mov x15, #constant
+adrp x16, __chkstk
+add x16, x16, :lo12:__chkstk
+blr x16
+sub sp, sp, x15, lsl #4
+
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/FAQ.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/FAQ.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/FAQ.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/FAQ.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,472 @@ + + + + + + + + + Frequently Asked Questions (FAQ) — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Frequently Asked Questions (FAQ)

+ +
+

License

+
+

Can I modify LLVM source code and redistribute the modified source?

+

Yes. The modified source distribution must retain the copyright notice and +follow the conditions listed in the Apache License v2.0 with LLVM Exceptions.

+
+
+

Can I modify the LLVM source code and redistribute binaries or other tools based on it, without redistributing the source?

+

Yes. This is why we distribute LLVM under a less restrictive license than GPL, +as explained in the first question above.

+
+
+
+

Source Code

+
+

In what language is LLVM written?

+

All of the LLVM tools and libraries are written in C++ with extensive use of +the STL.

+
+
+

How portable is the LLVM source code?

+

The LLVM source code should be portable to most modern Unix-like operating +systems. LLVM also has excellent support on Windows systems. +Most of the code is written in standard C++ with operating system +services abstracted to a support library. The tools required to build and +test LLVM have been ported to a plethora of platforms.

+
+
+

What API do I use to store a value to one of the virtual registers in LLVM IR’s SSA representation?

+

In short: you can’t. It’s actually kind of a silly question once you grok +what’s going on. Basically, in code like:

+
%result = add i32 %foo, %bar
+
+
+

, %result is just a name given to the Value of the add +instruction. In other words, %result is the add instruction. The +“assignment” doesn’t explicitly “store” anything to any “virtual register”; +the “=” is more like the mathematical sense of equality.

+

Longer explanation: In order to generate a textual representation of the +IR, some kind of name has to be given to each instruction so that other +instructions can textually reference it. However, the isomorphic in-memory +representation that you manipulate from C++ has no such restriction since +instructions can simply keep pointers to any other Value’s that they +reference. In fact, the names of dummy numbered temporaries like %1 are +not explicitly represented in the in-memory representation at all (see +Value::getName()).

+
+
+
+

Source Languages

+
+

What source languages are supported?

+

LLVM currently has full support for C and C++ source languages through +Clang. Many other language frontends have +been written using LLVM, and an incomplete list is available at +projects with LLVM.

+
+
+

I’d like to write a self-hosting LLVM compiler. How should I interface with the LLVM middle-end optimizers and back-end code generators?

+

Your compiler front-end will communicate with LLVM by creating a module in the +LLVM intermediate representation (IR) format. Assuming you want to write your +language’s compiler in the language itself (rather than C++), there are 3 +major ways to tackle generating LLVM IR from a front-end:

+
    +
  1. Call into the LLVM libraries code using your language’s FFI (foreign +function interface).

  2. +
+
+
    +
  • for: best tracks changes to the LLVM IR, .ll syntax, and .bc format

  • +
  • for: enables running LLVM optimization passes without a emit/parse +overhead

  • +
  • for: adapts well to a JIT context

  • +
  • against: lots of ugly glue code to write

  • +
+
+
    +
  1. Emit LLVM assembly from your compiler’s native language.

  2. +
+
+
    +
  • for: very straightforward to get started

  • +
  • against: the .ll parser is slower than the bitcode reader when +interfacing to the middle end

  • +
  • against: it may be harder to track changes to the IR

  • +
+
+
    +
  1. Emit LLVM bitcode from your compiler’s native language.

  2. +
+
+
    +
  • for: can use the more-efficient bitcode reader when interfacing to the +middle end

  • +
  • against: you’ll have to re-engineer the LLVM IR object model and bitcode +writer in your language

  • +
  • against: it may be harder to track changes to the IR

  • +
+
+

If you go with the first option, the C bindings in include/llvm-c should help +a lot, since most languages have strong support for interfacing with C. The +most common hurdle with calling C from managed code is interfacing with the +garbage collector. The C interface was designed to require very little memory +management, and so is straightforward in this regard.

+
+
+

What support is there for a higher level source language constructs for building a compiler?

+

Currently, there isn’t much. LLVM supports an intermediate representation +which is useful for code representation but will not support the high level +(abstract syntax tree) representation needed by most compilers. There are no +facilities for lexical nor semantic analysis.

+
+ +
+
+

Using the C and C++ Front Ends

+
+

Can I compile C or C++ code to platform-independent LLVM bitcode?

+

No. C and C++ are inherently platform-dependent languages. The most obvious +example of this is the preprocessor. A very common way that C code is made +portable is by using the preprocessor to include platform-specific code. In +practice, information about other platforms is lost after preprocessing, so +the result is inherently dependent on the platform that the preprocessing was +targeting.

+

Another example is sizeof. It’s common for sizeof(long) to vary +between platforms. In most C front-ends, sizeof is expanded to a +constant immediately, thus hard-wiring a platform-specific detail.

+

Also, since many platforms define their ABIs in terms of C, and since LLVM is +lower-level than C, front-ends currently must emit platform-specific IR in +order to have the result conform to the platform ABI.

+
+
+
+

Questions about code generated by the demo page

+
+

What is this llvm.global_ctors and _GLOBAL__I_a... stuff that happens when I #include <iostream>?

+

If you #include the <iostream> header into a C++ translation unit, +the file will probably use the std::cin/std::cout/… global objects. +However, C++ does not guarantee an order of initialization between static +objects in different translation units, so if a static ctor/dtor in your .cpp +file used std::cout, for example, the object would not necessarily be +automatically initialized before your use.

+

To make std::cout and friends work correctly in these scenarios, the STL +that we use declares a static object that gets created in every translation +unit that includes <iostream>. This object has a static constructor +and destructor that initializes and destroys the global iostream objects +before they could possibly be used in the file. The code that you see in the +.ll file corresponds to the constructor and destructor registration code.

+

If you would like to make it easier to understand the LLVM code generated +by the compiler in the demo page, consider using printf() instead of +iostreams to print values.

+
+
+

Where did all of my code go??

+

If you are using the LLVM demo page, you may often wonder what happened to +all of the code that you typed in. Remember that the demo script is running +the code through the LLVM optimizers, so if your code doesn’t actually do +anything useful, it might all be deleted.

+

To prevent this, make sure that the code is actually needed. For example, if +you are computing some expression, return the value from the function instead +of leaving it in a local variable. If you really want to constrain the +optimizer, you can read from and assign to volatile global variables.

+
+
+

What is this “undef” thing that shows up in my code?

+

undef is the LLVM way of representing a value that is not defined. You +can get these if you do not initialize a variable before you use it. For +example, the C function:

+
int X() { int i; return i; }
+
+
+

Is compiled to “ret i32 undef” because “i” never has a value specified +for it.

+
+
+

Why does instcombine + simplifycfg turn a call to a function with a mismatched calling convention into “unreachable”? Why not make the verifier reject it?

+

This is a common problem run into by authors of front-ends that are using +custom calling conventions: you need to make sure to set the right calling +convention on both the function and on each call to the function. For +example, this code:

+
define fastcc void @foo() {
+    ret void
+}
+define void @bar() {
+    call void @foo()
+    ret void
+}
+
+
+

Is optimized to:

+
define fastcc void @foo() {
+    ret void
+}
+define void @bar() {
+    unreachable
+}
+
+
+

… with “opt -instcombine -simplifycfg”. This often bites people because +“all their code disappears”. Setting the calling convention on the caller and +callee is required for indirect calls to work, so people often ask why not +make the verifier reject this sort of thing.

+

The answer is that this code has undefined behavior, but it is not illegal. +If we made it illegal, then every transformation that could potentially create +this would have to ensure that it doesn’t, and there is valid code that can +create this sort of construct (in dead code). The sorts of things that can +cause this to happen are fairly contrived, but we still need to accept them. +Here’s an example:

+
define fastcc void @foo() {
+    ret void
+}
+define internal void @bar(void()* %FP, i1 %cond) {
+    br i1 %cond, label %T, label %F
+T:
+    call void %FP()
+    ret void
+F:
+    call fastcc void %FP()
+    ret void
+}
+define void @test() {
+    %X = or i1 false, false
+    call void @bar(void()* @foo, i1 %X)
+    ret void
+}
+
+
+

In this example, “test” always passes @foo/false into bar, which +ensures that it is dynamically called with the right calling conv (thus, the +code is perfectly well defined). If you run this through the inliner, you +get this (the explicit “or” is there so that the inliner doesn’t dead code +eliminate a bunch of stuff):

+
define fastcc void @foo() {
+    ret void
+}
+define void @test() {
+    %X = or i1 false, false
+    br i1 %X, label %T.i, label %F.i
+T.i:
+    call void @foo()
+    br label %bar.exit
+F.i:
+    call fastcc void @foo()
+    br label %bar.exit
+bar.exit:
+    ret void
+}
+
+
+

Here you can see that the inlining pass made an undefined call to @foo +with the wrong calling convention. We really don’t want to make the inliner +have to know about this sort of thing, so it needs to be valid code. In this +case, dead code elimination can trivially remove the undefined code. However, +if %X was an input argument to @test, the inliner would produce this:

+
define fastcc void @foo() {
+    ret void
+}
+
+define void @test(i1 %X) {
+    br i1 %X, label %T.i, label %F.i
+T.i:
+    call void @foo()
+    br label %bar.exit
+F.i:
+    call fastcc void @foo()
+    br label %bar.exit
+bar.exit:
+    ret void
+}
+
+
+

The interesting thing about this is that %X must be false for the +code to be well-defined, but no amount of dead code elimination will be able +to delete the broken call as unreachable. However, since +instcombine/simplifycfg turns the undefined call into unreachable, we +end up with a branch on a condition that goes to unreachable: a branch to +unreachable can never happen, so “-inline -instcombine -simplifycfg” is +able to produce:

+
define fastcc void @foo() {
+   ret void
+}
+define void @test(i1 %X) {
+F.i:
+   call fastcc void @foo()
+   ret void
+}
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/FaultMaps.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/FaultMaps.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/FaultMaps.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/FaultMaps.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,269 @@ + + + + + + + + + FaultMaps and implicit checks — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

FaultMaps and implicit checks

+ +
+

Motivation

+

Code generated by managed language runtimes tend to have checks that +are required for safety but never fail in practice. In such cases, it +is profitable to make the non-failing case cheaper even if it makes +the failing case significantly more expensive. This asymmetry can be +exploited by folding such safety checks into operations that can be +made to fault reliably if the check would have failed, and recovering +from such a fault by using a signal handler.

+

For example, Java requires null checks on objects before they are read +from or written to. If the object is null then a +NullPointerException has to be thrown, interrupting normal +execution. In practice, however, dereferencing a null pointer is +extremely rare in well-behaved Java programs, and typically the null +check can be folded into a nearby memory operation that operates on +the same memory location.

+
+
+

The Fault Map Section

+

Information about implicit checks generated by LLVM are put in a +special “fault map” section. On Darwin this section is named +__llvm_faultmaps.

+

The format of this section is

+
Header {
+  uint8  : Fault Map Version (current version is 1)
+  uint8  : Reserved (expected to be 0)
+  uint16 : Reserved (expected to be 0)
+}
+uint32 : NumFunctions
+FunctionInfo[NumFunctions] {
+  uint64 : FunctionAddress
+  uint32 : NumFaultingPCs
+  uint32 : Reserved (expected to be 0)
+  FunctionFaultInfo[NumFaultingPCs] {
+    uint32  : FaultKind
+    uint32  : FaultingPCOffset
+    uint32  : HandlerPCOffset
+  }
+}
+
+
+

FailtKind describes the reason of expected fault. Currently three kind +of faults are supported:

+
+
    +
  1. FaultMaps::FaultingLoad - fault due to load from memory.

  2. +
  3. FaultMaps::FaultingLoadStore - fault due to instruction load and store.

  4. +
  5. FaultMaps::FaultingStore - fault due to store to memory.

  6. +
+
+
+
+

The ImplicitNullChecks pass

+

The ImplicitNullChecks pass transforms explicit control flow for +checking if a pointer is null, like:

+
  %ptr = call i32* @get_ptr()
+  %ptr_is_null = icmp i32* %ptr, null
+  br i1 %ptr_is_null, label %is_null, label %not_null, !make.implicit !0
+
+not_null:
+  %t = load i32, i32* %ptr
+  br label %do_something_with_t
+
+is_null:
+  call void @HFC()
+  unreachable
+
+!0 = !{}
+
+
+

to control flow implicit in the instruction loading or storing through +the pointer being null checked:

+
  %ptr = call i32* @get_ptr()
+  %t = load i32, i32* %ptr  ;; handler-pc = label %is_null
+  br label %do_something_with_t
+
+is_null:
+  call void @HFC()
+  unreachable
+
+
+

This transform happens at the MachineInstr level, not the LLVM IR +level (so the above example is only representative, not literal). The +ImplicitNullChecks pass runs during codegen, if +-enable-implicit-null-checks is passed to llc.

+

The ImplicitNullChecks pass adds entries to the +__llvm_faultmaps section described above as needed.

+
+

make.implicit metadata

+

Making null checks implicit is an aggressive optimization, and it can +be a net performance pessimization if too many memory operations end +up faulting because of it. A language runtime typically needs to +ensure that only a negligible number of implicit null checks actually +fault once the application has reached a steady state. A standard way +of doing this is by healing failed implicit null checks into explicit +null checks via code patching or recompilation. It follows that there +are two requirements an explicit null check needs to satisfy for it to +be profitable to convert it to an implicit null check:

+
+
    +
  1. The case where the pointer is actually null (i.e. the “failing” +case) is extremely rare.

  2. +
  3. The failing path heals the implicit null check into an explicit +null check so that the application does not repeatedly page +fault.

  4. +
+
+

The frontend is expected to mark branches that satisfy (1) and (2) +using a !make.implicit metadata node (the actual content of the +metadata node is ignored). Only branches that are marked with +!make.implicit metadata are considered as candidates for +conversion into implicit null checks.

+

(Note that while we could deal with (1) using profiling data, dealing +with (2) requires some information not present in branch profiles.)

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Frontend/PerformanceTips.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Frontend/PerformanceTips.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Frontend/PerformanceTips.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Frontend/PerformanceTips.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,420 @@ + + + + + + + + + Performance Tips for Frontend Authors — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Performance Tips for Frontend Authors

+ +
+

Abstract

+

The intended audience of this document is developers of language frontends +targeting LLVM IR. This document is home to a collection of tips on how to +generate IR that optimizes well.

+
+
+

IR Best Practices

+

As with any optimizer, LLVM has its strengths and weaknesses. In some cases, +surprisingly small changes in the source IR can have a large effect on the +generated code.

+

Beyond the specific items on the list below, it’s worth noting that the most +mature frontend for LLVM is Clang. As a result, the further your IR gets from +what Clang might emit, the less likely it is to be effectively optimized. It +can often be useful to write a quick C program with the semantics you’re trying +to model and see what decisions Clang’s IRGen makes about what IR to emit. +Studying Clang’s CodeGen directory can also be a good source of ideas. Note +that Clang and LLVM are explicitly version locked so you’ll need to make sure +you’re using a Clang built from the same git revision or release as the LLVM +library you’re using. As always, it’s strongly recommended that you track +tip of tree development, particularly during bring up of a new project.

+
+

The Basics

+
    +
  1. Make sure that your Modules contain both a data layout specification and +target triple. Without these pieces, non of the target specific optimization +will be enabled. This can have a major effect on the generated code quality.

  2. +
  3. For each function or global emitted, use the most private linkage type +possible (private, internal or linkonce_odr preferably). Doing so will +make LLVM’s inter-procedural optimizations much more effective.

  4. +
  5. Avoid high in-degree basic blocks (e.g. basic blocks with dozens or hundreds +of predecessors). Among other issues, the register allocator is known to +perform badly with confronted with such structures. The only exception to +this guidance is that a unified return block with high in-degree is fine.

  6. +
+
+
+

Use of allocas

+

An alloca instruction can be used to represent a function scoped stack slot, +but can also represent dynamic frame expansion. When representing function +scoped variables or locations, placing alloca instructions at the beginning of +the entry block should be preferred. In particular, place them before any +call instructions. Call instructions might get inlined and replaced with +multiple basic blocks. The end result is that a following alloca instruction +would no longer be in the entry basic block afterward.

+

The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt +to eliminate alloca instructions that are in the entry basic block. Given +SSA is the canonical form expected by much of the optimizer; if allocas can +not be eliminated by Mem2Reg or SROA, the optimizer is likely to be less +effective than it could be.

+
+
+

Avoid loads and stores of large aggregate type

+

LLVM currently does not optimize well loads and stores of large aggregate +types (i.e. structs and arrays). As an alternative, consider +loading individual fields from memory.

+

Aggregates that are smaller than the largest (performant) load or store +instruction supported by the targeted hardware are well supported. These can +be an effective way to represent collections of small packed fields.

+
+ +
+

Zext GEP indices to machine register width

+

Internally, LLVM often promotes the width of GEP indices to machine register +width. When it does so, it will default to using sign extension (sext) +operations for safety. If your source language provides information about +the range of the index, you may wish to manually extend indices to machine +register width using a zext instruction.

+
+
+

When to specify alignment

+

LLVM will always generate correct code if you don’t specify alignment, but may +generate inefficient code. For example, if you are targeting MIPS (or older +ARM ISAs) then the hardware does not handle unaligned loads and stores, and +so you will enter a trap-and-emulate path if you do a load or store with +lower-than-natural alignment. To avoid this, LLVM will emit a slower +sequence of loads, shifts and masks (or load-right + load-left on MIPS) for +all cases where the load / store does not have a sufficiently high alignment +in the IR.

+

The alignment is used to guarantee the alignment on allocas and globals, +though in most cases this is unnecessary (most targets have a sufficiently +high default alignment that they’ll be fine). It is also used to provide a +contract to the back end saying ‘either this load/store has this alignment, or +it is undefined behavior’. This means that the back end is free to emit +instructions that rely on that alignment (and mid-level optimizers are free to +perform transforms that require that alignment). For x86, it doesn’t make +much difference, as almost all instructions are alignment-independent. For +MIPS, it can make a big difference.

+

Note that if your loads and stores are atomic, the backend will be unable to +lower an under aligned access into a sequence of natively aligned accesses. +As a result, alignment is mandatory for atomic loads and stores.

+
+
+

Other Things to Consider

+
    +
  1. Use ptrtoint/inttoptr sparingly (they interfere with pointer aliasing +analysis), prefer GEPs

  2. +
  3. Prefer globals over inttoptr of a constant address - this gives you +dereferencability information. In MCJIT, use getSymbolAddress to provide +actual address.

  4. +
  5. Be wary of ordered and atomic memory operations. They are hard to optimize +and may not be well optimized by the current optimizer. Depending on your +source language, you may consider using fences instead.

  6. +
  7. If calling a function which is known to throw an exception (unwind), use +an invoke with a normal destination which contains an unreachable +instruction. This form conveys to the optimizer that the call returns +abnormally. For an invoke which neither returns normally or requires unwind +code in the current function, you can use a noreturn call instruction if +desired. This is generally not required because the optimizer will convert +an invoke with an unreachable unwind destination to a call instruction.

  8. +
  9. Use profile metadata to indicate statically known cold paths, even if +dynamic profiling information is not available. This can make a large +difference in code placement and thus the performance of tight loops.

  10. +
  11. When generating code for loops, try to avoid terminating the header block of +the loop earlier than necessary. If the terminator of the loop header +block is a loop exiting conditional branch, the effectiveness of LICM will +be limited for loads not in the header. (This is due to the fact that LLVM +may not know such a load is safe to speculatively execute and thus can’t +lift an otherwise loop invariant load unless it can prove the exiting +condition is not taken.) It can be profitable, in some cases, to emit such +instructions into the header even if they are not used along a rarely +executed path that exits the loop. This guidance specifically does not +apply if the condition which terminates the loop header is itself invariant, +or can be easily discharged by inspecting the loop index variables.

  12. +
  13. In hot loops, consider duplicating instructions from small basic blocks +which end in highly predictable terminators into their successor blocks. +If a hot successor block contains instructions which can be vectorized +with the duplicated ones, this can provide a noticeable throughput +improvement. Note that this is not always profitable and does involve a +potentially large increase in code size.

  14. +
  15. When checking a value against a constant, emit the check using a consistent +comparison type. The GVN pass will optimize redundant equalities even if +the type of comparison is inverted, but GVN only runs late in the pipeline. +As a result, you may miss the opportunity to run other important +optimizations. Improvements to EarlyCSE to remove this issue are tracked in +Bug 23333.

  16. +
  17. Avoid using arithmetic intrinsics unless you are required by your source +language specification to emit a particular code sequence. The optimizer +is quite good at reasoning about general control flow and arithmetic, it is +not anywhere near as strong at reasoning about the various intrinsics. If +profitable for code generation purposes, the optimizer will likely form the +intrinsics itself late in the optimization pipeline. It is very rarely +profitable to emit these directly in the language frontend. This item +explicitly includes the use of the overflow intrinsics.

  18. +
  19. Avoid using the assume intrinsic until you’ve +established that a) there’s no other way to express the given fact and b) +that fact is critical for optimization purposes. Assumes are a great +prototyping mechanism, but they can have negative effects on both compile +time and optimization effectiveness. The former is fixable with enough +effort, but the later is fairly fundamental to their designed purpose.

  20. +
+
+
+
+

Describing Language Specific Properties

+

When translating a source language to LLVM, finding ways to express concepts +and guarantees available in your source language which are not natively +provided by LLVM IR will greatly improve LLVM’s ability to optimize your code. +As an example, C/C++’s ability to mark every add as “no signed wrap (nsw)” goes +a long way to assisting the optimizer in reasoning about loop induction +variables and thus generating more optimal code for loops.

+

The LLVM LangRef includes a number of mechanisms for annotating the IR with +additional semantic information. It is strongly recommended that you become +highly familiar with this document. The list below is intended to highlight a +couple of items of particular interest, but is by no means exhaustive.

+
+

Restricted Operation Semantics

+
    +
  1. Add nsw/nuw flags as appropriate. Reasoning about overflow is +generally hard for an optimizer so providing these facts from the frontend +can be very impactful.

  2. +
  3. Use fast-math flags on floating point operations if legal. If you don’t +need strict IEEE floating point semantics, there are a number of additional +optimizations that can be performed. This can be highly impactful for +floating point intensive computations.

  4. +
+
+
+

Describing Aliasing Properties

+
    +
  1. Add noalias/align/dereferenceable/nonnull to function arguments and return +values as appropriate

  2. +
  3. Use pointer aliasing metadata, especially tbaa metadata, to communicate +otherwise-non-deducible pointer aliasing facts

  4. +
  5. Use inbounds on geps. This can help to disambiguate some aliasing queries.

  6. +
+
+
+

Modeling Memory Effects

+
    +
  1. Mark functions as readnone/readonly/argmemonly or noreturn/nounwind when +known. The optimizer will try to infer these flags, but may not always be +able to. Manual annotations are particularly important for external +functions that the optimizer can not analyze.

  2. +
  3. Use the lifetime.start/lifetime.end and invariant.start/invariant.end +intrinsics where possible. Common profitable uses are for stack like data +structures (thus allowing dead store elimination) and for describing +life times of allocas (thus allowing smaller stack sizes).

  4. +
  5. Mark invariant locations using !invariant.load and TBAA’s constant flags

  6. +
+
+
+

Pass Ordering

+

One of the most common mistakes made by new language frontend projects is to +use the existing -O2 or -O3 pass pipelines as is. These pass pipelines make a +good starting point for an optimizing compiler for any language, but they have +been carefully tuned for C and C++, not your target language. You will almost +certainly need to use a custom pass order to achieve optimal performance. A +couple specific suggestions:

+
    +
  1. For languages with numerous rarely executed guard conditions (e.g. null +checks, type checks, range checks) consider adding an extra execution or +two of LoopUnswitch and LICM to your pass order. The standard pass order, +which is tuned for C and C++ applications, may not be sufficient to remove +all dischargeable checks from loops.

  2. +
  3. If your language uses range checks, consider using the IRCE pass. It is not +currently part of the standard pass order.

  4. +
  5. A useful sanity check to run is to run your optimized IR back through the +-O2 pipeline again. If you see noticeable improvement in the resulting IR, +you likely need to adjust your pass order.

  6. +
+
+
+

I Still Can’t Find What I’m Looking For

+

If you didn’t find what you were looking for above, consider proposing a piece +of metadata which provides the optimization hint you need. Such extensions are +relatively common and are generally well received by the community. You will +need to ensure that your proposal is sufficiently general so that it benefits +others if you wish to contribute it upstream.

+

You should also consider describing the problem you’re facing on llvm-dev and asking for advice. +It’s entirely possible someone has encountered your problem before and can +give good advice. If there are multiple interested parties, that also +increases the chances that a metadata extension would be well received by the +community as a whole.

+
+
+
+

Adding to this document

+

If you run across a case that you feel deserves to be covered here, please send +a patch to llvm-commits for review.

+

If you have questions on these items, please direct them to llvm-dev. The more relevant +context you are able to give to your question, the more likely it is to be +answered.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/FuzzingLLVM.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/FuzzingLLVM.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/FuzzingLLVM.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/FuzzingLLVM.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,369 @@ + + + + + + + + + Fuzzing LLVM libraries and tools — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Fuzzing LLVM libraries and tools

+ +
+

Introduction

+

The LLVM tree includes a number of fuzzers for various components. These are +built on top of LibFuzzer. In order to build and run these +fuzzers, see Configuring LLVM to Build Fuzzers.

+
+
+

Available Fuzzers

+
+

clang-fuzzer

+

A generic fuzzer that tries to compile textual input as C++ code. Some of the +bugs this fuzzer has reported are on bugzilla and on OSS Fuzz’s +tracker.

+
+
+

clang-proto-fuzzer

+

A libprotobuf-mutator based fuzzer that compiles valid C++ programs generated from a protobuf +class that describes a subset of the C++ language.

+

This fuzzer accepts clang command line options after ignore_remaining_args=1. +For example, the following command will fuzz clang with a higher optimization +level:

+
% bin/clang-proto-fuzzer <corpus-dir> -ignore_remaining_args=1 -O3
+
+
+
+
+

clang-format-fuzzer

+

A generic fuzzer that runs clang-format on C++ text fragments. Some of the +bugs this fuzzer has reported are on bugzilla +and on OSS Fuzz’s tracker.

+
+
+

llvm-as-fuzzer

+

A generic fuzzer that tries to parse text as LLVM assembly. +Some of the bugs this fuzzer has reported are on bugzilla.

+
+
+

llvm-dwarfdump-fuzzer

+

A generic fuzzer that interprets inputs as object files and runs +llvm-dwarfdump on them. Some of the bugs +this fuzzer has reported are on OSS Fuzz’s tracker

+
+
+

llvm-demangle-fuzzer

+

A generic fuzzer for the Itanium demangler used in various LLVM tools. We’ve +fuzzed __cxa_demangle to death, why not fuzz LLVM’s implementation of the same +function!

+
+
+

llvm-isel-fuzzer

+

A structured LLVM IR fuzzer aimed at finding bugs in instruction selection.

+

This fuzzer accepts flags after ignore_remaining_args=1. The flags match +those of llc and the triple is required. For example, +the following command would fuzz AArch64 with Global Instruction Selection:

+
% bin/llvm-isel-fuzzer <corpus-dir> -ignore_remaining_args=1 -mtriple aarch64 -global-isel -O0
+
+
+

Some flags can also be specified in the binary name itself in order to support +OSS Fuzz, which has trouble with required arguments. To do this, you can copy +or move llvm-isel-fuzzer to llvm-isel-fuzzer--x-y-z, separating options +from the binary name using “–”. The valid options are architecture names +(aarch64, x86_64), optimization levels (O0, O2), or specific +keywords, like gisel for enabling global instruction selection. In this +mode, the same example could be run like so:

+
% bin/llvm-isel-fuzzer--aarch64-O0-gisel <corpus-dir>
+
+
+
+
+

llvm-opt-fuzzer

+

A structured LLVM IR fuzzer aimed at finding bugs in optimization passes.

+

It receives optimization pipeline and runs it for each fuzzer input.

+

Interface of this fuzzer almost directly mirrors llvm-isel-fuzzer. Both +mtriple and passes arguments are required. Passes are specified in a +format suitable for the new pass manager. You can find some documentation about +this format in the doxygen for PassBuilder::parsePassPipeline.

+
% bin/llvm-opt-fuzzer <corpus-dir> -ignore_remaining_args=1 -mtriple x86_64 -passes instcombine
+
+
+

Similarly to the llvm-isel-fuzzer arguments in some predefined configurations +might be embedded directly into the binary file name:

+
% bin/llvm-opt-fuzzer--x86_64-instcombine <corpus-dir>
+
+
+
+
+

llvm-mc-assemble-fuzzer

+

A generic fuzzer that fuzzes the MC layer’s assemblers by treating inputs as +target specific assembly.

+

Note that this fuzzer has an unusual command line interface which is not fully +compatible with all of libFuzzer’s features. Fuzzer arguments must be passed +after --fuzzer-args, and any llc flags must use two dashes. For +example, to fuzz the AArch64 assembler you might use the following command:

+
llvm-mc-fuzzer --triple=aarch64-linux-gnu --fuzzer-args -max_len=4
+
+
+

This scheme will likely change in the future.

+
+
+

llvm-mc-disassemble-fuzzer

+

A generic fuzzer that fuzzes the MC layer’s disassemblers by treating inputs +as assembled binary data.

+

Note that this fuzzer has an unusual command line interface which is not fully +compatible with all of libFuzzer’s features. See the notes above about +llvm-mc-assemble-fuzzer for details.

+
+
+
+

Mutators and Input Generators

+

The inputs for a fuzz target are generated via random mutations of a +corpus. There are a few options for the kinds of +mutations that a fuzzer in LLVM might want.

+
+

Generic Random Fuzzing

+

The most basic form of input mutation is to use the built in mutators of +LibFuzzer. These simply treat the input corpus as a bag of bits and make random +mutations. This type of fuzzer is good for stressing the surface layers of a +program, and is good at testing things like lexers, parsers, or binary +protocols.

+

Some of the in-tree fuzzers that use this type of mutator are clang-fuzzer, +clang-format-fuzzer, llvm-as-fuzzer, llvm-dwarfdump-fuzzer, +llvm-mc-assemble-fuzzer, and llvm-mc-disassemble-fuzzer.

+
+
+

Structured Fuzzing using libprotobuf-mutator

+

We can use libprotobuf-mutator in order to perform structured fuzzing and +stress deeper layers of programs. This works by defining a protobuf class that +translates arbitrary data into structurally interesting input. Specifically, we +use this to work with a subset of the C++ language and perform mutations that +produce valid C++ programs in order to exercise parts of clang that are more +interesting than parser error handling.

+

To build this kind of fuzzer you need protobuf and its dependencies +installed, and you need to specify some extra flags when configuring the build +with CMake. For example, clang-proto-fuzzer can be enabled by +adding -DCLANG_ENABLE_PROTO_FUZZER=ON to the flags described in +Configuring LLVM to Build Fuzzers.

+

The only in-tree fuzzer that uses libprotobuf-mutator today is +clang-proto-fuzzer.

+
+
+

Structured Fuzzing of LLVM IR

+

We also use a more direct form of structured fuzzing for fuzzers that take +LLVM IR as input. This is achieved through the FuzzMutate +library, which was discussed at EuroLLVM 2017.

+

The FuzzMutate library is used to structurally fuzz backends in +llvm-isel-fuzzer.

+
+
+
+

Building and Running

+
+

Configuring LLVM to Build Fuzzers

+

Fuzzers will be built and linked to libFuzzer by default as long as you build +LLVM with sanitizer coverage enabled. You would typically also enable at least +one sanitizer to find bugs faster. The most common way to build the fuzzers is +by adding the following two flags to your CMake invocation: +-DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=On.

+
+

Note

+

If you have compiler-rt checked out in an LLVM tree when building +with sanitizers, you’ll want to specify -DLLVM_BUILD_RUNTIME=Off +to avoid building the sanitizers themselves with sanitizers enabled.

+
+
+

Note

+

You may run into issues if you build with BFD ld, which is the +default linker on many unix systems. These issues are being tracked +in https://llvm.org/PR34636.

+
+
+
+

Continuously Running and Finding Bugs

+

There used to be a public buildbot running LLVM fuzzers continuously, and while +this did find issues, it didn’t have a very good way to report problems in an +actionable way. Because of this, we’re moving towards using OSS Fuzz more +instead.

+

You can browse the LLVM project issue list for the bugs found by +LLVM on OSS Fuzz. These are also mailed to the llvm-bugs mailing +list.

+
+
+
+

Utilities for Writing Fuzzers

+

There are some utilities available for writing fuzzers in LLVM.

+

Some helpers for handling the command line interface are available in +include/llvm/FuzzMutate/FuzzerCLI.h, including functions to parse command +line options in a consistent way and to implement standalone main functions so +your fuzzer can be built and tested when not built against libFuzzer.

+

There is also some handling of the CMake config for fuzzers, where you should +use the add_llvm_fuzzer to set up fuzzer targets. This function works +similarly to functions such as add_llvm_tool, but they take care of linking +to LibFuzzer when appropriate and can be passed the DUMMY_MAIN argument to +enable standalone testing.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GarbageCollection.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GarbageCollection.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GarbageCollection.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GarbageCollection.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,1216 @@ + + + + + + + + + Garbage Collection with LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Garbage Collection with LLVM

+ +
+

Abstract

+

This document covers how to integrate LLVM into a compiler for a language which +supports garbage collection. Note that LLVM itself does not provide a +garbage collector. You must provide your own.

+
+
+

Quick Start

+

First, you should pick a collector strategy. LLVM includes a number of built +in ones, but you can also implement a loadable plugin with a custom definition. +Note that the collector strategy is a description of how LLVM should generate +code such that it interacts with your collector and runtime, not a description +of the collector itself.

+

Next, mark your generated functions as using your chosen collector strategy. +From c++, you can call:

+
F.setGC(<collector description name>);
+
+
+

This will produce IR like the following fragment:

+
define void @foo() gc "<collector description name>" { ... }
+
+
+

When generating LLVM IR for your functions, you will need to:

+
    +
  • Use @llvm.gcread and/or @llvm.gcwrite in place of standard load and +store instructions. These intrinsics are used to represent load and store +barriers. If you collector does not require such barriers, you can skip +this step.

  • +
  • Use the memory allocation routines provided by your garbage collector’s +runtime library.

  • +
  • If your collector requires them, generate type maps according to your +runtime’s binary interface. LLVM is not involved in the process. In +particular, the LLVM type system is not suitable for conveying such +information though the compiler.

  • +
  • Insert any coordination code required for interacting with your collector. +Many collectors require running application code to periodically check a +flag and conditionally call a runtime function. This is often referred to +as a safepoint poll.

  • +
+

You will need to identify roots (i.e. references to heap objects your collector +needs to know about) in your generated IR, so that LLVM can encode them into +your final stack maps. Depending on the collector strategy chosen, this is +accomplished by using either the @llvm.gcroot intrinsics or an +gc.statepoint relocation sequence.

+

Don’t forget to create a root for each intermediate value that is generated when +evaluating an expression. In h(f(), g()), the result of f() could +easily be collected if evaluating g() triggers a collection.

+

Finally, you need to link your runtime library with the generated program +executable (for a static compiler) or ensure the appropriate symbols are +available for the runtime linker (for a JIT compiler).

+
+
+

Introduction

+
+

What is Garbage Collection?

+

Garbage collection is a widely used technique that frees the programmer from +having to know the lifetimes of heap objects, making software easier to produce +and maintain. Many programming languages rely on garbage collection for +automatic memory management. There are two primary forms of garbage collection: +conservative and accurate.

+

Conservative garbage collection often does not require any special support from +either the language or the compiler: it can handle non-type-safe programming +languages (such as C/C++) and does not require any special information from the +compiler. The Boehm collector is an example of a +state-of-the-art conservative collector.

+

Accurate garbage collection requires the ability to identify all pointers in the +program at run-time (which requires that the source-language be type-safe in +most cases). Identifying pointers at run-time requires compiler support to +locate all places that hold live pointer variables at run-time, including the +processor stack and registers.

+

Conservative garbage collection is attractive because it does not require any +special compiler support, but it does have problems. In particular, because the +conservative garbage collector cannot know that a particular word in the +machine is a pointer, it cannot move live objects in the heap (preventing the +use of compacting and generational GC algorithms) and it can occasionally suffer +from memory leaks due to integer values that happen to point to objects in the +program. In addition, some aggressive compiler transformations can break +conservative garbage collectors (though these seem rare in practice).

+

Accurate garbage collectors do not suffer from any of these problems, but they +can suffer from degraded scalar optimization of the program. In particular, +because the runtime must be able to identify and update all pointers active in +the program, some optimizations are less effective. In practice, however, the +locality and performance benefits of using aggressive garbage collection +techniques dominates any low-level losses.

+

This document describes the mechanisms and interfaces provided by LLVM to +support accurate garbage collection.

+
+
+

Goals and non-goals

+

LLVM’s intermediate representation provides garbage collection intrinsics that offer support for a broad class of collector models. For +instance, the intrinsics permit:

+
    +
  • semi-space collectors

  • +
  • mark-sweep collectors

  • +
  • generational collectors

  • +
  • incremental collectors

  • +
  • concurrent collectors

  • +
  • cooperative collectors

  • +
  • reference counting

  • +
+

We hope that the support built into the LLVM IR is sufficient to support a +broad class of garbage collected languages including Scheme, ML, Java, C#, +Perl, Python, Lua, Ruby, other scripting languages, and more.

+

Note that LLVM does not itself provide a garbage collector — this should +be part of your language’s runtime library. LLVM provides a framework for +describing the garbage collectors requirements to the compiler. In particular, +LLVM provides support for generating stack maps at call sites, polling for a +safepoint, and emitting load and store barriers. You can also extend LLVM - +possibly through a loadable code generation plugins - to +generate code and data structures which conforms to the binary interface +specified by the runtime library. This is similar to the relationship between +LLVM and DWARF debugging info, for example. The difference primarily lies in +the lack of an established standard in the domain of garbage collection — thus +the need for a flexible extension mechanism.

+

The aspects of the binary interface with which LLVM’s GC support is +concerned are:

+
    +
  • Creation of GC safepoints within code where collection is allowed to execute +safely.

  • +
  • Computation of the stack map. For each safe point in the code, object +references within the stack frame must be identified so that the collector may +traverse and perhaps update them.

  • +
  • Write barriers when storing object references to the heap. These are commonly +used to optimize incremental scans in generational collectors.

  • +
  • Emission of read barriers when loading object references. These are useful +for interoperating with concurrent collectors.

  • +
+

There are additional areas that LLVM does not directly address:

+
    +
  • Registration of global roots with the runtime.

  • +
  • Registration of stack map entries with the runtime.

  • +
  • The functions used by the program to allocate memory, trigger a collection, +etc.

  • +
  • Computation or compilation of type maps, or registration of them with the +runtime. These are used to crawl the heap for object references.

  • +
+

In general, LLVM’s support for GC does not include features which can be +adequately addressed with other features of the IR and does not specify a +particular binary interface. On the plus side, this means that you should be +able to integrate LLVM with an existing runtime. On the other hand, it can +have the effect of leaving a lot of work for the developer of a novel +language. We try to mitigate this by providing built in collector strategy +descriptions that can work with many common collector designs and easy +extension points. If you don’t already have a specific binary interface +you need to support, we recommend trying to use one of these built in collector +strategies.

+
+
+
+

LLVM IR Features

+

This section describes the garbage collection facilities provided by the +LLVM intermediate representation. The exact behavior of these +IR features is specified by the selected GC strategy description.

+
+

Specifying GC code generation: gc "..."

+
define <returntype> @name(...) gc "name" { ... }
+
+
+

The gc function attribute is used to specify the desired GC strategy to the +compiler. Its programmatic equivalent is the setGC method of Function.

+

Setting gc "name" on a function triggers a search for a matching subclass +of GCStrategy. Some collector strategies are built in. You can add others +using either the loadable plugin mechanism, or by patching your copy of LLVM. +It is the selected GC strategy which defines the exact nature of the code +generated to support GC. If none is found, the compiler will raise an error.

+

Specifying the GC style on a per-function basis allows LLVM to link together +programs that use different garbage collection algorithms (or none at all).

+
+
+

Identifying GC roots on the stack

+

LLVM currently supports two different mechanisms for describing references in +compiled code at safepoints. llvm.gcroot is the older mechanism; +gc.statepoint has been added more recently. At the moment, you can choose +either implementation (on a per GC strategy basis). Longer +term, we will probably either migrate away from llvm.gcroot entirely, or +substantially merge their implementations. Note that most new development +work is focused on gc.statepoint.

+
+

Using gc.statepoint

+

This page contains detailed documentation for +gc.statepoint.

+
+
+

Using llvm.gcwrite

+
void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+
+
+

The llvm.gcroot intrinsic is used to inform LLVM that a stack variable +references an object on the heap and is to be tracked for garbage collection. +The exact impact on generated code is specified by the Function’s selected +GC strategy. All calls to llvm.gcroot must reside +inside the first basic block.

+

The first argument must be a value referring to an alloca instruction or a +bitcast of an alloca. The second contains a pointer to metadata that should be +associated with the pointer, and must be a constant or global value +address. If your target collector uses tags, use a null pointer for metadata.

+

A compiler which performs manual SSA construction must ensure that SSA +values representing GC references are stored in to the alloca passed to the +respective gcroot before every call site and reloaded after every call. +A compiler which uses mem2reg to raise imperative code using alloca into +SSA form need only add a call to @llvm.gcroot for those variables which +are pointers into the GC heap.

+

It is also important to mark intermediate values with llvm.gcroot. For +example, consider h(f(), g()). Beware leaking the result of f() in the +case that g() triggers a collection. Note, that stack variables must be +initialized and marked with llvm.gcroot in function’s prologue.

+

The %metadata argument can be used to avoid requiring heap objects to have +‘isa’ pointers or tag bits. [Appel89, Goldberg91, Tolmach94] If specified, +its value will be tracked along with the location of the pointer in the stack +frame.

+

Consider the following fragment of Java code:

+
{
+  Object X;   // A null-initialized reference to an object
+  ...
+}
+
+
+

This block (which may be located in the middle of a function or in a loop nest), +could be compiled to this LLVM code:

+
Entry:
+   ;; In the entry block for the function, allocate the
+   ;; stack space for X, which is an LLVM pointer.
+   %X = alloca %Object*
+
+   ;; Tell LLVM that the stack space is a stack root.
+   ;; Java has type-tags on objects, so we pass null as metadata.
+   %tmp = bitcast %Object** %X to i8**
+   call void @llvm.gcroot(i8** %tmp, i8* null)
+   ...
+
+   ;; "CodeBlock" is the block corresponding to the start
+   ;;  of the scope above.
+CodeBlock:
+   ;; Java null-initializes pointers.
+   store %Object* null, %Object** %X
+
+   ...
+
+   ;; As the pointer goes out of scope, store a null value into
+   ;; it, to indicate that the value is no longer live.
+   store %Object* null, %Object** %X
+   ...
+
+
+
+
+
+

Reading and writing references in the heap

+

Some collectors need to be informed when the mutator (the program that needs +garbage collection) either reads a pointer from or writes a pointer to a field +of a heap object. The code fragments inserted at these points are called read +barriers and write barriers, respectively. The amount of code that needs to +be executed is usually quite small and not on the critical path of any +computation, so the overall performance impact of the barrier is tolerable.

+

Barriers often require access to the object pointer rather than the derived +pointer (which is a pointer to the field within the object). Accordingly, +these intrinsics take both pointers as separate arguments for completeness. In +this snippet, %object is the object pointer, and %derived is the derived +pointer:

+
;; An array type.
+%class.Array = type { %class.Object, i32, [0 x %class.Object*] }
+...
+
+;; Load the object pointer from a gcroot.
+%object = load %class.Array** %object_addr
+
+;; Compute the derived pointer.
+%derived = getelementptr %object, i32 0, i32 2, i32 %n
+
+
+

LLVM does not enforce this relationship between the object and derived pointer +(although a particular collector strategy might). However, it +would be an unusual collector that violated it.

+

The use of these intrinsics is naturally optional if the target GC does not +require the corresponding barrier. The GC strategy used with such a collector +should replace the intrinsic calls with the corresponding load or +store instruction if they are used.

+

One known deficiency with the current design is that the barrier intrinsics do +not include the size or alignment of the underlying operation performed. It is +currently assumed that the operation is of pointer size and the alignment is +assumed to be the target machine’s default alignment.

+
+

Write barrier: llvm.gcwrite

+
void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
+
+
+

For write barriers, LLVM provides the llvm.gcwrite intrinsic function. It +has exactly the same semantics as a non-volatile store to the derived +pointer (the third argument). The exact code generated is specified by the +Function’s selected GC strategy.

+

Many important algorithms require write barriers, including generational and +concurrent collectors. Additionally, write barriers could be used to implement +reference counting.

+
+
+

Read barrier: llvm.gcread

+
i8* @llvm.gcread(i8* %object, i8** %derived)
+
+
+

For read barriers, LLVM provides the llvm.gcread intrinsic function. It has +exactly the same semantics as a non-volatile load from the derived pointer +(the second argument). The exact code generated is specified by the Function’s +selected GC strategy.

+

Read barriers are needed by fewer algorithms than write barriers, and may have a +greater performance impact since pointer reads are more frequent than writes.

+
+
+
+
+

Built In GC Strategies

+

LLVM includes built in support for several varieties of garbage collectors.

+
+

The Shadow Stack GC

+

To use this collector strategy, mark your functions with:

+
F.setGC("shadow-stack");
+
+
+

Unlike many GC algorithms which rely on a cooperative code generator to compile +stack maps, this algorithm carefully maintains a linked list of stack roots +[Henderson2002]. This so-called “shadow stack” mirrors the +machine stack. Maintaining this data structure is slower than using a stack map +compiled into the executable as constant data, but has a significant portability +advantage because it requires no special support from the target code generator, +and does not require tricky platform-specific code to crawl the machine stack.

+

The tradeoff for this simplicity and portability is:

+
    +
  • High overhead per function call.

  • +
  • Not thread-safe.

  • +
+

Still, it’s an easy way to get started. After your compiler and runtime are up +and running, writing a plugin will allow you to take advantage +of more advanced GC features of LLVM in order to +improve performance.

+

The shadow stack doesn’t imply a memory allocation algorithm. A semispace +collector or building atop malloc are great places to start, and can be +implemented with very little code.

+

When it comes time to collect, however, your runtime needs to traverse the stack +roots, and for this it needs to integrate with the shadow stack. Luckily, doing +so is very simple. (This code is heavily commented to help you understand the +data structure, but there are only 20 lines of meaningful code.)

+
/// The map for a single function's stack frame.  One of these is
+///        compiled as constant data into the executable for each function.
+///
+/// Storage of metadata values is elided if the %metadata parameter to
+/// @llvm.gcroot is null.
+struct FrameMap {
+  int32_t NumRoots;    //< Number of roots in stack frame.
+  int32_t NumMeta;     //< Number of metadata entries.  May be < NumRoots.
+  const void *Meta[0]; //< Metadata for each root.
+};
+
+/// A link in the dynamic shadow stack.  One of these is embedded in
+///        the stack frame of each function on the call stack.
+struct StackEntry {
+  StackEntry *Next;    //< Link to next stack entry (the caller's).
+  const FrameMap *Map; //< Pointer to constant FrameMap.
+  void *Roots[0];      //< Stack roots (in-place array).
+};
+
+/// The head of the singly-linked list of StackEntries.  Functions push
+///        and pop onto this in their prologue and epilogue.
+///
+/// Since there is only a global list, this technique is not threadsafe.
+StackEntry *llvm_gc_root_chain;
+
+/// Calls Visitor(root, meta) for each GC root on the stack.
+///        root and meta are exactly the values passed to
+///        @llvm.gcroot.
+///
+/// Visitor could be a function to recursively mark live objects.  Or it
+/// might copy them to another heap or generation.
+///
+/// @param Visitor A function to invoke for every GC root on the stack.
+void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
+  for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) {
+    unsigned i = 0;
+
+    // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
+    for (unsigned e = R->Map->NumMeta; i != e; ++i)
+      Visitor(&R->Roots[i], R->Map->Meta[i]);
+
+    // For roots [NumMeta, NumRoots), the metadata pointer is null.
+    for (unsigned e = R->Map->NumRoots; i != e; ++i)
+      Visitor(&R->Roots[i], NULL);
+  }
+}
+
+
+
+
+

The ‘Erlang’ and ‘Ocaml’ GCs

+

LLVM ships with two example collectors which leverage the gcroot +mechanisms. To our knowledge, these are not actually used by any language +runtime, but they do provide a reasonable starting point for someone interested +in writing an gcroot compatible GC plugin. In particular, these are the +only in tree examples of how to produce a custom binary stack map format using +a gcroot strategy.

+

As there names imply, the binary format produced is intended to model that +used by the Erlang and OCaml compilers respectively.

+
+
+

The Statepoint Example GC

+
F.setGC("statepoint-example");
+
+
+

This GC provides an example of how one might use the infrastructure provided +by gc.statepoint. This example GC is compatible with the +PlaceSafepoints and RewriteStatepointsForGC utility passes +which simplify gc.statepoint sequence insertion. If you need to build a +custom GC strategy around the gc.statepoints mechanisms, it is recommended +that you use this one as a starting point.

+

This GC strategy does not support read or write barriers. As a result, these +intrinsics are lowered to normal loads and stores.

+

The stack map format generated by this GC strategy can be found in the +Stack Map Section using a format documented here. This format is intended to be the standard +format supported by LLVM going forward.

+
+
+

The CoreCLR GC

+
F.setGC("coreclr");
+
+
+

This GC leverages the gc.statepoint mechanism to support the +CoreCLR runtime.

+

Support for this GC strategy is a work in progress. This strategy will +differ from +statepoint-example GC strategy in +certain aspects like:

+
    +
  • Base-pointers of interior pointers are not explicitly +tracked and reported.

  • +
  • A different format is used for encoding stack maps.

  • +
  • Safe-point polls are only needed before loop-back edges +and before tail-calls (not needed at function-entry).

  • +
+
+
+
+

Custom GC Strategies

+

If none of the built in GC strategy descriptions met your needs above, you will +need to define a custom GCStrategy and possibly, a custom LLVM pass to perform +lowering. Your best example of where to start defining a custom GCStrategy +would be to look at one of the built in strategies.

+

You may be able to structure this additional code as a loadable plugin library. +Loadable plugins are sufficient if all you need is to enable a different +combination of built in functionality, but if you need to provide a custom +lowering pass, you will need to build a patched version of LLVM. If you think +you need a patched build, please ask for advice on llvm-dev. There may be an +easy way we can extend the support to make it work for your use case without +requiring a custom build.

+
+

Collector Requirements

+

You should be able to leverage any existing collector library that includes the following elements:

+
    +
  1. A memory allocator which exposes an allocation function your compiled +code can call.

  2. +
  3. A binary format for the stack map. A stack map describes the location +of references at a safepoint and is used by precise collectors to identify +references within a stack frame on the machine stack. Note that collectors +which conservatively scan the stack don’t require such a structure.

  4. +
  5. A stack crawler to discover functions on the call stack, and enumerate the +references listed in the stack map for each call site.

  6. +
  7. A mechanism for identifying references in global locations (e.g. global +variables).

  8. +
  9. If you collector requires them, an LLVM IR implementation of your collectors +load and store barriers. Note that since many collectors don’t require +barriers at all, LLVM defaults to lowering such barriers to normal loads +and stores unless you arrange otherwise.

  10. +
+
+
+

Implementing a collector plugin

+

User code specifies which GC code generation to use with the gc function +attribute or, equivalently, with the setGC method of Function.

+

To implement a GC plugin, it is necessary to subclass llvm::GCStrategy, +which can be accomplished in a few lines of boilerplate code. LLVM’s +infrastructure provides access to several important algorithms. For an +uncontroversial collector, all that remains may be to compile LLVM’s computed +stack map to assembly code (using the binary representation expected by the +runtime library). This can be accomplished in about 100 lines of code.

+

This is not the appropriate place to implement a garbage collected heap or a +garbage collector itself. That code should exist in the language’s runtime +library. The compiler plugin is responsible for generating code which conforms +to the binary interface defined by library, most essentially the stack map.

+

To subclass llvm::GCStrategy and register it with the compiler:

+
// lib/MyGC/MyGC.cpp - Example LLVM GC plugin
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+  class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
+  public:
+    MyGC() {}
+  };
+
+  GCRegistry::Add<MyGC>
+  X("mygc", "My bespoke garbage collector.");
+}
+
+
+

This boilerplate collector does nothing. More specifically:

+
    +
  • llvm.gcread calls are replaced with the corresponding load +instruction.

  • +
  • llvm.gcwrite calls are replaced with the corresponding store +instruction.

  • +
  • No safe points are added to the code.

  • +
  • The stack map is not compiled into the executable.

  • +
+

Using the LLVM makefiles, this code +can be compiled as a plugin using a simple makefile:

+
# lib/MyGC/Makefile
+
+LEVEL := ../..
+LIBRARYNAME = MyGC
+LOADABLE_MODULE = 1
+
+include $(LEVEL)/Makefile.common
+
+
+

Once the plugin is compiled, code using it may be compiled using llc +-load=MyGC.so (though MyGC.so may have some other platform-specific +extension):

+
$ cat sample.ll
+define void @f() gc "mygc" {
+entry:
+  ret void
+}
+$ llvm-as < sample.ll | llc -load=MyGC.so
+
+
+

It is also possible to statically link the collector plugin into tools, such as +a language-specific compiler front-end.

+
+
+

Overview of available features

+

GCStrategy provides a range of features through which a plugin may do useful +work. Some of these are callbacks, some are algorithms that can be enabled, +disabled, or customized. This matrix summarizes the supported (and planned) +features and correlates them with the collection techniques which typically +require them.

+ +++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Algorithm

Done

Shadow +stack

refcount

mark- +sweep

copying

incremental

threaded

concurrent

stack map

initialize +roots

derived +pointers

NO

N*

N*

custom +lowering

gcroot

gcwrite

gcread

safe +points

in +calls

before +calls

for +loops

NO

N

N

before +escape

emit code +at safe +points

NO

N

N

output

assembly

JIT

NO

?

?

?

?

?

obj

NO

?

?

?

?

?

live +analysis

NO

?

?

?

?

?

register +map

NO

?

?

?

?

?

* Derived pointers only pose a hasard to copying collections.

? denotes a feature which could be utilized if available.

+

To be clear, the collection techniques above are defined as:

+
+
Shadow Stack

The mutator carefully maintains a linked list of stack roots.

+
+
Reference Counting

The mutator maintains a reference count for each object and frees an object +when its count falls to zero.

+
+
Mark-Sweep

When the heap is exhausted, the collector marks reachable objects starting +from the roots, then deallocates unreachable objects in a sweep phase.

+
+
Copying

As reachability analysis proceeds, the collector copies objects from one heap +area to another, compacting them in the process. Copying collectors enable +highly efficient “bump pointer” allocation and can improve locality of +reference.

+
+
Incremental

(Including generational collectors.) Incremental collectors generally have all +the properties of a copying collector (regardless of whether the mature heap +is compacting), but bring the added complexity of requiring write barriers.

+
+
Threaded

Denotes a multithreaded mutator; the collector must still stop the mutator +(“stop the world”) before beginning reachability analysis. Stopping a +multithreaded mutator is a complicated problem. It generally requires highly +platform-specific code in the runtime, and the production of carefully +designed machine code at safe points.

+
+
Concurrent

In this technique, the mutator and the collector run concurrently, with the +goal of eliminating pause times. In a cooperative collector, the mutator +further aids with collection should a pause occur, allowing collection to take +advantage of multiprocessor hosts. The “stop the world” problem of threaded +collectors is generally still present to a limited extent. Sophisticated +marking algorithms are necessary. Read barriers may be necessary.

+
+
+

As the matrix indicates, LLVM’s garbage collection infrastructure is already +suitable for a wide variety of collectors, but does not currently extend to +multithreaded programs. This will be added in the future as there is +interest.

+
+
+

Computing stack maps

+

LLVM automatically computes a stack map. One of the most important features +of a GCStrategy is to compile this information into the executable in +the binary representation expected by the runtime library.

+

The stack map consists of the location and identity of each GC root in the +each function in the module. For each root:

+
    +
  • RootNum: The index of the root.

  • +
  • StackOffset: The offset of the object relative to the frame pointer.

  • +
  • RootMetadata: The value passed as the %metadata parameter to the +@llvm.gcroot intrinsic.

  • +
+

Also, for the function as a whole:

+
    +
  • +
    getFrameSize(): The overall size of the function’s initial stack frame,

    not accounting for any dynamic allocation.

    +
    +
    +
  • +
  • roots_size(): The count of roots in the function.

  • +
+

To access the stack map, use GCFunctionMetadata::roots_begin() and +-end() from the GCMetadataPrinter:

+
for (iterator I = begin(), E = end(); I != E; ++I) {
+  GCFunctionInfo *FI = *I;
+  unsigned FrameSize = FI->getFrameSize();
+  size_t RootCount = FI->roots_size();
+
+  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+                                      RE = FI->roots_end();
+                                      RI != RE; ++RI) {
+    int RootNum = RI->Num;
+    int RootStackOffset = RI->StackOffset;
+    Constant *RootMetadata = RI->Metadata;
+  }
+}
+
+
+

If the llvm.gcroot intrinsic is eliminated before code generation by a +custom lowering pass, LLVM will compute an empty stack map. This may be useful +for collector plugins which implement reference counting or a shadow stack.

+
+
+

Initializing roots to null

+

It is recommended that frontends initialize roots explicitly to avoid +potentially confusing the optimizer. This prevents the GC from visiting +uninitialized pointers, which will almost certainly cause it to crash.

+

As a fallback, LLVM will automatically initialize each root to null +upon entry to the function. Support for this mode in code generation is +largely a legacy detail to keep old collector implementations working.

+
+
+

Custom lowering of intrinsics

+

For GCs which use barriers or unusual treatment of stack roots, the +implementor is responsibly for providing a custom pass to lower the +intrinsics with the desired semantics. If you have opted in to custom +lowering of a particular intrinsic your pass must eliminate all +instances of the corresponding intrinsic in functions which opt in to +your GC. The best example of such a pass is the ShadowStackGC and it’s +ShadowStackGCLowering pass.

+

There is currently no way to register such a custom lowering pass +without building a custom copy of LLVM.

+
+
+

Generating safe points

+

LLVM provides support for associating stackmaps with the return address of +a call. Any loop or return safepoints required by a given collector design +can be modeled via calls to runtime routines, or potentially patchable call +sequences. Using gcroot, all call instructions are inferred to be possible +safepoints and will thus have an associated stackmap.

+
+
+

Emitting assembly code: GCMetadataPrinter

+

LLVM allows a plugin to print arbitrary assembly code before and after the rest +of a module’s assembly code. At the end of the module, the GC can compile the +LLVM stack map into assembly code. (At the beginning, this information is not +yet computed.)

+

Since AsmWriter and CodeGen are separate components of LLVM, a separate abstract +base class and registry is provided for printing assembly code, the +GCMetadaPrinter and GCMetadataPrinterRegistry. The AsmWriter will look +for such a subclass if the GCStrategy sets UsesMetadata:

+
MyGC::MyGC() {
+  UsesMetadata = true;
+}
+
+
+

This separation allows JIT-only clients to be smaller.

+

Note that LLVM does not currently have analogous APIs to support code generation +in the JIT, nor using the object writers.

+
// lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+  class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
+  public:
+    virtual void beginAssembly(AsmPrinter &AP);
+
+    virtual void finishAssembly(AsmPrinter &AP);
+  };
+
+  GCMetadataPrinterRegistry::Add<MyGCPrinter>
+  X("mygc", "My bespoke garbage collector.");
+}
+
+
+

The collector should use AsmPrinter to print portable assembly code. The +collector itself contains the stack map for the entire module, and may access +the GCFunctionInfo using its own begin() and end() methods. Here’s +a realistic example:

+
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+void MyGCPrinter::beginAssembly(AsmPrinter &AP) {
+  // Nothing to do.
+}
+
+void MyGCPrinter::finishAssembly(AsmPrinter &AP) {
+  MCStreamer &OS = AP.OutStreamer;
+  unsigned IntPtrSize = AP.getPointerSize();
+
+  // Put this in the data section.
+  OS.SwitchSection(AP.getObjFileLowering().getDataSection());
+
+  // For each function...
+  for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+    GCFunctionInfo &MD = **FI;
+
+    // A compact GC layout. Emit this data structure:
+    //
+    // struct {
+    //   int32_t PointCount;
+    //   void *SafePointAddress[PointCount];
+    //   int32_t StackFrameSize; // in words
+    //   int32_t StackArity;
+    //   int32_t LiveCount;
+    //   int32_t LiveOffsets[LiveCount];
+    // } __gcmap_<FUNCTIONNAME>;
+
+    // Align to address width.
+    AP.emitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+    // Emit PointCount.
+    OS.AddComment("safe point count");
+    AP.emitInt32(MD.size());
+
+    // And each safe point...
+    for (GCFunctionInfo::iterator PI = MD.begin(),
+                                  PE = MD.end(); PI != PE; ++PI) {
+      // Emit the address of the safe point.
+      OS.AddComment("safe point address");
+      MCSymbol *Label = PI->Label;
+      AP.emitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/);
+    }
+
+    // Stack information never change in safe points! Only print info from the
+    // first call-site.
+    GCFunctionInfo::iterator PI = MD.begin();
+
+    // Emit the stack frame size.
+    OS.AddComment("stack frame size (in words)");
+    AP.emitInt32(MD.getFrameSize() / IntPtrSize);
+
+    // Emit stack arity, i.e. the number of stacked arguments.
+    unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
+    unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
+                          MD.getFunction().arg_size() - RegisteredArgs : 0;
+    OS.AddComment("stack arity");
+    AP.emitInt32(StackArity);
+
+    // Emit the number of live roots in the function.
+    OS.AddComment("live root count");
+    AP.emitInt32(MD.live_size(PI));
+
+    // And for each live root...
+    for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+                                       LE = MD.live_end(PI);
+                                       LI != LE; ++LI) {
+      // Emit live root's offset within the stack frame.
+      OS.AddComment("stack index (offset / wordsize)");
+      AP.emitInt32(LI->StackOffset);
+    }
+  }
+}
+
+
+
+
+
+

References

+

[Appel89] Runtime Tags Aren’t Necessary. Andrew W. Appel. Lisp and Symbolic +Computation 19(7):703-705, July 1989.

+

[Goldberg91] Tag-free garbage collection for strongly typed programming +languages. Benjamin Goldberg. ACM SIGPLAN PLDI’91.

+

[Tolmach94] Tag-free garbage collection using explicit type parameters. Andrew +Tolmach. Proceedings of the 1994 ACM conference on LISP and functional +programming.

+

[Henderson2002] Accurate Garbage Collection in an Uncooperative Environment

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/genindex.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/genindex.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/genindex.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/genindex.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,10376 @@ + + + + + + + + + Index — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ + +

Index

+ +
+ Symbols + | A + | B + | C + | D + | E + | F + | G + | I + | L + | M + | N + | O + | P + | Q + | R + | S + | T + | U + | V + | X + +
+

Symbols

+ + + +
+ +

A

+ + + +
+ +

B

+ + +
+ +

C

+ + + +
+ +

D

+ + +
+ +

E

+ + + +
+ +

F

+ + +
+ +

G

+ + + +
+ +

I

+ + + +
+ +

L

+ + + +
+ +

M

+ + +
+ +

N

+ + +
+ +

O

+ + + +
+ +

P

+ + + +
+ +

Q

+ + +
+ +

R

+ + +
+ +

S

+ + + +
+ +

T

+ + + +
+ +

U

+ + + +
+ +

V

+ + + +
+ +

X

+ + +
+ + + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GetElementPtr.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GetElementPtr.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GetElementPtr.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GetElementPtr.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,619 @@ + + + + + + + + + The Often Misunderstood GEP Instruction — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The Often Misunderstood GEP Instruction

+ +
+

Introduction

+

This document seeks to dispel the mystery and confusion surrounding LLVM’s +GetElementPtr (GEP) instruction. +Questions about the wily GEP instruction are probably the most frequently +occurring questions once a developer gets down to coding with LLVM. Here we lay +out the sources of confusion and show that the GEP instruction is really quite +simple.

+
+
+

Address Computation

+

When people are first confronted with the GEP instruction, they tend to relate +it to known concepts from other programming paradigms, most notably C array +indexing and field selection. GEP closely resembles C array indexing and field +selection, however it is a little different and this leads to the following +questions.

+
+

What is the first index of the GEP instruction?

+

Quick answer: The index stepping through the second operand.

+

The confusion with the first index usually arises from thinking about the +GetElementPtr instruction as if it was a C index operator. They aren’t the +same. For example, when we write, in “C”:

+
AType *Foo;
+...
+X = &Foo->F;
+
+
+

it is natural to think that there is only one index, the selection of the field +F. However, in this example, Foo is a pointer. That pointer +must be indexed explicitly in LLVM. C, on the other hand, indices through it +transparently. To arrive at the same address location as the C code, you would +provide the GEP instruction with two index operands. The first operand indexes +through the pointer; the second operand indexes the field F of the +structure, just as if you wrote:

+
X = &Foo[0].F;
+
+
+

Sometimes this question gets rephrased as:

+
+

Why is it okay to index through the first pointer, but subsequent pointers +won’t be dereferenced?

+
+

The answer is simply because memory does not have to be accessed to perform the +computation. The second operand to the GEP instruction must be a value of a +pointer type. The value of the pointer is provided directly to the GEP +instruction as an operand without any need for accessing memory. It must, +therefore be indexed and requires an index operand. Consider this example:

+
struct munger_struct {
+  int f1;
+  int f2;
+};
+void munge(struct munger_struct *P) {
+  P[0].f1 = P[1].f1 + P[2].f2;
+}
+...
+struct munger_struct Array[3];
+...
+munge(Array);
+
+
+

In this “C” example, the front end compiler (Clang) will generate three GEP +instructions for the three indices through “P” in the assignment statement. The +function argument P will be the second operand of each of these GEP +instructions. The third operand indexes through that pointer. The fourth +operand will be the field offset into the struct munger_struct type, for +either the f1 or f2 field. So, in LLVM assembly the munge function +looks like:

+
define void @munge(%struct.munger_struct* %P) {
+entry:
+  %tmp = getelementptr %struct.munger_struct, %struct.munger_struct* %P, i32 1, i32 0
+  %tmp1 = load i32, i32* %tmp
+  %tmp2 = getelementptr %struct.munger_struct, %struct.munger_struct* %P, i32 2, i32 1
+  %tmp3 = load i32, i32* %tmp2
+  %tmp4 = add i32 %tmp3, %tmp1
+  %tmp5 = getelementptr %struct.munger_struct, %struct.munger_struct* %P, i32 0, i32 0
+  store i32 %tmp4, i32* %tmp5
+  ret void
+}
+
+
+

In each case the second operand is the pointer through which the GEP instruction +starts. The same is true whether the second operand is an argument, allocated +memory, or a global variable.

+

To make this clear, let’s consider a more obtuse example:

+
%MyVar = uninitialized global i32
+...
+%idx1 = getelementptr i32, i32* %MyVar, i64 0
+%idx2 = getelementptr i32, i32* %MyVar, i64 1
+%idx3 = getelementptr i32, i32* %MyVar, i64 2
+
+
+

These GEP instructions are simply making address computations from the base +address of MyVar. They compute, as follows (using C syntax):

+
idx1 = (char*) &MyVar + 0
+idx2 = (char*) &MyVar + 4
+idx3 = (char*) &MyVar + 8
+
+
+

Since the type i32 is known to be four bytes long, the indices 0, 1 and 2 +translate into memory offsets of 0, 4, and 8, respectively. No memory is +accessed to make these computations because the address of %MyVar is passed +directly to the GEP instructions.

+

The obtuse part of this example is in the cases of %idx2 and %idx3. They +result in the computation of addresses that point to memory past the end of the +%MyVar global, which is only one i32 long, not three i32s long. +While this is legal in LLVM, it is inadvisable because any load or store with +the pointer that results from these GEP instructions would produce undefined +results.

+
+
+

Why is the extra 0 index required?

+

Quick answer: there are no superfluous indices.

+

This question arises most often when the GEP instruction is applied to a global +variable which is always a pointer type. For example, consider this:

+
%MyStruct = uninitialized global { float*, i32 }
+...
+%idx = getelementptr { float*, i32 }, { float*, i32 }* %MyStruct, i64 0, i32 1
+
+
+

The GEP above yields an i32* by indexing the i32 typed field of the +structure %MyStruct. When people first look at it, they wonder why the i64 +0 index is needed. However, a closer inspection of how globals and GEPs work +reveals the need. Becoming aware of the following facts will dispel the +confusion:

+
    +
  1. The type of %MyStruct is not { float*, i32 } but rather { float*, +i32 }*. That is, %MyStruct is a pointer to a structure containing a +pointer to a float and an i32.

  2. +
  3. Point #1 is evidenced by noticing the type of the second operand of the GEP +instruction (%MyStruct) which is { float*, i32 }*.

  4. +
  5. The first index, i64 0 is required to step over the global variable +%MyStruct. Since the second argument to the GEP instruction must always +be a value of pointer type, the first index steps through that pointer. A +value of 0 means 0 elements offset from that pointer.

  6. +
  7. The second index, i32 1 selects the second field of the structure (the +i32).

  8. +
+
+
+

What is dereferenced by GEP?

+

Quick answer: nothing.

+

The GetElementPtr instruction dereferences nothing. That is, it doesn’t access +memory in any way. That’s what the Load and Store instructions are for. GEP is +only involved in the computation of addresses. For example, consider this:

+
%MyVar = uninitialized global { [40 x i32 ]* }
+...
+%idx = getelementptr { [40 x i32]* }, { [40 x i32]* }* %MyVar, i64 0, i32 0, i64 0, i64 17
+
+
+

In this example, we have a global variable, %MyVar that is a pointer to a +structure containing a pointer to an array of 40 ints. The GEP instruction seems +to be accessing the 18th integer of the structure’s array of ints. However, this +is actually an illegal GEP instruction. It won’t compile. The reason is that the +pointer in the structure must be dereferenced in order to index into the +array of 40 ints. Since the GEP instruction never accesses memory, it is +illegal.

+

In order to access the 18th integer in the array, you would need to do the +following:

+
%idx = getelementptr { [40 x i32]* }, { [40 x i32]* }* %, i64 0, i32 0
+%arr = load [40 x i32]*, [40 x i32]** %idx
+%idx = getelementptr [40 x i32], [40 x i32]* %arr, i64 0, i64 17
+
+
+

In this case, we have to load the pointer in the structure with a load +instruction before we can index into the array. If the example was changed to:

+
%MyVar = uninitialized global { [40 x i32 ] }
+...
+%idx = getelementptr { [40 x i32] }, { [40 x i32] }*, i64 0, i32 0, i64 17
+
+
+

then everything works fine. In this case, the structure does not contain a +pointer and the GEP instruction can index through the global variable, into the +first field of the structure and access the 18th i32 in the array there.

+
+
+

Why don’t GEP x,0,0,1 and GEP x,1 alias?

+

Quick Answer: They compute different address locations.

+

If you look at the first indices in these GEP instructions you find that they +are different (0 and 1), therefore the address computation diverges with that +index. Consider this example:

+
%MyVar = global { [10 x i32] }
+%idx1 = getelementptr { [10 x i32] }, { [10 x i32] }* %MyVar, i64 0, i32 0, i64 1
+%idx2 = getelementptr { [10 x i32] }, { [10 x i32] }* %MyVar, i64 1
+
+
+

In this example, idx1 computes the address of the second integer in the +array that is in the structure in %MyVar, that is MyVar+4. The type of +idx1 is i32*. However, idx2 computes the address of the next +structure after %MyVar. The type of idx2 is { [10 x i32] }* and its +value is equivalent to MyVar + 40 because it indexes past the ten 4-byte +integers in MyVar. Obviously, in such a situation, the pointers don’t +alias.

+
+
+

Why do GEP x,1,0,0 and GEP x,1 alias?

+

Quick Answer: They compute the same address location.

+

These two GEP instructions will compute the same address because indexing +through the 0th element does not change the address. However, it does change the +type. Consider this example:

+
%MyVar = global { [10 x i32] }
+%idx1 = getelementptr { [10 x i32] }, { [10 x i32] }* %MyVar, i64 1, i32 0, i64 0
+%idx2 = getelementptr { [10 x i32] }, { [10 x i32] }* %MyVar, i64 1
+
+
+

In this example, the value of %idx1 is %MyVar+40 and its type is +i32*. The value of %idx2 is also MyVar+40 but its type is { [10 x +i32] }*.

+
+
+

Can GEP index into vector elements?

+

This hasn’t always been forcefully disallowed, though it’s not recommended. It +leads to awkward special cases in the optimizers, and fundamental inconsistency +in the IR. In the future, it will probably be outright disallowed.

+
+
+

What effect do address spaces have on GEPs?

+

None, except that the address space qualifier on the second operand pointer type +always matches the address space qualifier on the result type.

+
+
+

How is GEP different from ptrtoint, arithmetic, and inttoptr?

+

It’s very similar; there are only subtle differences.

+

With ptrtoint, you have to pick an integer type. One approach is to pick i64; +this is safe on everything LLVM supports (LLVM internally assumes pointers are +never wider than 64 bits in many places), and the optimizer will actually narrow +the i64 arithmetic down to the actual pointer size on targets which don’t +support 64-bit arithmetic in most cases. However, there are some cases where it +doesn’t do this. With GEP you can avoid this problem.

+

Also, GEP carries additional pointer aliasing rules. It’s invalid to take a GEP +from one object, address into a different separately allocated object, and +dereference it. IR producers (front-ends) must follow this rule, and consumers +(optimizers, specifically alias analysis) benefit from being able to rely on +it. See the Rules section for more information.

+

And, GEP is more concise in common cases.

+

However, for the underlying integer computation implied, there is no +difference.

+
+
+

I’m writing a backend for a target which needs custom lowering for GEP. How do I do this?

+

You don’t. The integer computation implied by a GEP is target-independent. +Typically what you’ll need to do is make your backend pattern-match expressions +trees involving ADD, MUL, etc., which are what GEP is lowered into. This has the +advantage of letting your code work correctly in more cases.

+

GEP does use target-dependent parameters for the size and layout of data types, +which targets can customize.

+

If you require support for addressing units which are not 8 bits, you’ll need to +fix a lot of code in the backend, with GEP lowering being only a small piece of +the overall picture.

+
+
+

How does VLA addressing work with GEPs?

+

GEPs don’t natively support VLAs. LLVM’s type system is entirely static, and GEP +address computations are guided by an LLVM type.

+

VLA indices can be implemented as linearized indices. For example, an expression +like X[a][b][c], must be effectively lowered into a form like +X[a*m+b*n+c], so that it appears to the GEP as a single-dimensional array +reference.

+

This means if you want to write an analysis which understands array indices and +you want to support VLAs, your code will have to be prepared to reverse-engineer +the linearization. One way to solve this problem is to use the ScalarEvolution +library, which always presents VLA and non-VLA indexing in the same manner.

+
+
+
+

Rules

+
+

What happens if an array index is out of bounds?

+

There are two senses in which an array index can be out of bounds.

+

First, there’s the array type which comes from the (static) type of the first +operand to the GEP. Indices greater than the number of elements in the +corresponding static array type are valid. There is no problem with out of +bounds indices in this sense. Indexing into an array only depends on the size of +the array element, not the number of elements.

+

A common example of how this is used is arrays where the size is not known. +It’s common to use array types with zero length to represent these. The fact +that the static type says there are zero elements is irrelevant; it’s perfectly +valid to compute arbitrary element indices, as the computation only depends on +the size of the array element, not the number of elements. Note that zero-sized +arrays are not a special case here.

+

This sense is unconnected with inbounds keyword. The inbounds keyword is +designed to describe low-level pointer arithmetic overflow conditions, rather +than high-level array indexing rules.

+

Analysis passes which wish to understand array indexing should not assume that +the static array type bounds are respected.

+

The second sense of being out of bounds is computing an address that’s beyond +the actual underlying allocated object.

+

With the inbounds keyword, the result value of the GEP is undefined if the +address is outside the actual underlying allocated object and not the address +one-past-the-end.

+

Without the inbounds keyword, there are no restrictions on computing +out-of-bounds addresses. Obviously, performing a load or a store requires an +address of allocated and sufficiently aligned memory. But the GEP itself is only +concerned with computing addresses.

+
+
+

Can array indices be negative?

+

Yes. This is basically a special case of array indices being out of bounds.

+
+
+

Can I compare two values computed with GEPs?

+

Yes. If both addresses are within the same allocated object, or +one-past-the-end, you’ll get the comparison result you expect. If either is +outside of it, integer arithmetic wrapping may occur, so the comparison may not +be meaningful.

+
+
+

Can I do GEP with a different pointer type than the type of the underlying object?

+

Yes. There are no restrictions on bitcasting a pointer value to an arbitrary +pointer type. The types in a GEP serve only to define the parameters for the +underlying integer computation. They need not correspond with the actual type of +the underlying object.

+

Furthermore, loads and stores don’t have to use the same types as the type of +the underlying object. Types in this context serve only to specify memory size +and alignment. Beyond that there are merely a hint to the optimizer indicating +how the value will likely be used.

+
+
+

Can I cast an object’s address to integer and add it to null?

+

You can compute an address that way, but if you use GEP to do the add, you can’t +use that pointer to actually access the object, unless the object is managed +outside of LLVM.

+

The underlying integer computation is sufficiently defined; null has a defined +value — zero — and you can add whatever value you want to it.

+

However, it’s invalid to access (load from or store to) an LLVM-aware object +with such a pointer. This includes GlobalVariables, Allocas, and objects +pointed to by noalias pointers.

+

If you really need this functionality, you can do the arithmetic with explicit +integer instructions, and use inttoptr to convert the result to an address. Most +of GEP’s special aliasing rules do not apply to pointers computed from ptrtoint, +arithmetic, and inttoptr sequences.

+
+
+

Can I compute the distance between two objects, and add that value to one address to compute the other address?

+

As with arithmetic on null, you can use GEP to compute an address that way, but +you can’t use that pointer to actually access the object if you do, unless the +object is managed outside of LLVM.

+

Also as above, ptrtoint and inttoptr provide an alternative way to do this which +do not have this restriction.

+
+
+

Can I do type-based alias analysis on LLVM IR?

+

You can’t do type-based alias analysis using LLVM’s built-in type system, +because LLVM has no restrictions on mixing types in addressing, loads or stores.

+

LLVM’s type-based alias analysis pass uses metadata to describe a different type +system (such as the C type system), and performs type-based aliasing on top of +that. Further details are in the +language reference.

+
+
+

What happens if a GEP computation overflows?

+

If the GEP lacks the inbounds keyword, the value is the result from +evaluating the implied two’s complement integer computation. However, since +there’s no guarantee of where an object will be allocated in the address space, +such values have limited meaning.

+

If the GEP has the inbounds keyword, the result value is undefined (a “trap +value”) if the GEP overflows (i.e. wraps around the end of the address space).

+

As such, there are some ramifications of this for inbounds GEPs: scales implied +by array/vector/pointer indices are always known to be “nsw” since they are +signed values that are scaled by the element size. These values are also +allowed to be negative (e.g. “gep i32 *%P, i32 -1”) but the pointer itself +is logically treated as an unsigned value. This means that GEPs have an +asymmetric relation between the pointer base (which is treated as unsigned) and +the offset applied to it (which is treated as signed). The result of the +additions within the offset calculation cannot have signed overflow, but when +applied to the base pointer, there can be signed overflow.

+
+
+

How can I tell if my front-end is following the rules?

+

There is currently no checker for the getelementptr rules. Currently, the only +way to do this is to manually check each place in your front-end where +GetElementPtr operators are created.

+

It’s not possible to write a checker which could find all rule violations +statically. It would be possible to write a checker which works by instrumenting +the code with dynamic checks though. Alternatively, it would be possible to +write a static checker which catches a subset of possible problems. However, no +such checker exists today.

+
+
+
+

Rationale

+
+

Why is GEP designed this way?

+

The design of GEP has the following goals, in rough unofficial order of +priority:

+
    +
  • Support C, C-like languages, and languages which can be conceptually lowered +into C (this covers a lot).

  • +
  • Support optimizations such as those that are common in C compilers. In +particular, GEP is a cornerstone of LLVM’s pointer aliasing +model.

  • +
  • Provide a consistent method for computing addresses so that address +computations don’t need to be a part of load and store instructions in the IR.

  • +
  • Support non-C-like languages, to the extent that it doesn’t interfere with +other goals.

  • +
  • Minimize target-specific information in the IR.

  • +
+
+
+

Why do struct member indices always use i32?

+

The specific type i32 is probably just a historical artifact, however it’s wide +enough for all practical purposes, so there’s been no need to change it. It +doesn’t necessarily imply i32 address arithmetic; it’s just an identifier which +identifies a field in a struct. Requiring that all struct indices be the same +reduces the range of possibilities for cases where two GEPs are effectively the +same but have distinct operand types.

+
+
+

What’s an uglygep?

+

Some LLVM optimizers operate on GEPs by internally lowering them into more +primitive integer expressions, which allows them to be combined with other +integer expressions and/or split into multiple separate integer expressions. If +they’ve made non-trivial changes, translating back into LLVM IR can involve +reverse-engineering the structure of the addressing in order to fit it into the +static type of the original first operand. It isn’t always possibly to fully +reconstruct this structure; sometimes the underlying addressing doesn’t +correspond with the static type at all. In such cases the optimizer instead will +emit a GEP with the base pointer casted to a simple address-unit pointer, using +the name “uglygep”. This isn’t pretty, but it’s just as valid, and it’s +sufficient to preserve the pointer aliasing guarantees that GEP provides.

+
+
+
+

Summary

+

In summary, here’s some things to always remember about the GetElementPtr +instruction:

+
    +
  1. The GEP instruction never accesses memory, it only provides pointer +computations.

  2. +
  3. The second operand to the GEP instruction is always a pointer and it must be +indexed.

  4. +
  5. There are no superfluous indices for the GEP instruction.

  6. +
  7. Trailing zero indices are superfluous for pointer aliasing, but not for the +types of the pointers.

  8. +
  9. Leading zero indices are not superfluous for pointer aliasing nor the types +of the pointers.

  10. +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingInvolved.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingInvolved.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingInvolved.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingInvolved.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,381 @@ + + + + + + + + + Getting Involved — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Getting Involved

+

LLVM welcomes contributions of all kinds. To get started, please review the following topics:

+ +
+
+
+
Contributing to LLVM

An overview on how to contribute to LLVM.

+
+
LLVM Developer Policy

The LLVM project’s policy towards developers and their contributions.

+
+
LLVM Code-Review Policy and Practices

The LLVM project’s code-review process.

+
+
LLVM Community Support Policy

The LLVM support policy for core and non-core components.

+
+
Sphinx Quickstart Template

A template + tutorial for writing new Sphinx documentation. It is meant +to be read in source form.

+
+
Code Reviews with Phabricator

Describes how to use the Phabricator code review tool hosted on +http://reviews.llvm.org/ and its command line interface, Arcanist.

+
+
How to submit an LLVM bug report

Instructions for properly submitting information about any bugs you run into +in the LLVM system.

+
+
LLVM Bug Life Cycle

Describes how bugs are reported, triaged and closed.

+
+
LLVM Coding Standards

Details the LLVM coding standards and provides useful information on writing +efficient C++ code.

+
+
Bisecting LLVM code

Describes how to use git bisect on LLVM’s repository.

+
+
+
+

Development Process

+

Information about LLVM’s development process.

+
+
+
+
Creating an LLVM Project

How-to guide and templates for new projects that use the LLVM +infrastructure. The templates (directory organization, Makefiles, and test +tree) allow the project code to be located outside (or inside) the llvm/ +tree, while using LLVM header files and libraries.

+
+
How To Release LLVM To The Public

This is a guide to preparing LLVM releases. Most developers can ignore it.

+
+
How To Validate a New Release

This is a guide to validate a new release, during the release process. Most developers can ignore it.

+
+
How To Add Your Build Configuration To LLVM Buildbot Infrastructure

Instructions for adding new builder to LLVM buildbot master.

+
+
Advice on Packaging LLVM

Advice on packaging LLVM into a distribution.

+
+
Release notes for the current release

This describes new features, known bugs, and other limitations.

+
+
+
+
+

Mailing Lists

+

If you can’t find what you need in these docs, try consulting the mailing +lists. In addition to the traditional mailing lists there is also a +Discourse server available.

+
+
Developer’s List (llvm-dev)

This list is for people who want to be included in technical discussions of +LLVM. People post to this list when they have questions about writing code +for or using the LLVM tools. It is relatively low volume.

+
+
Commits Archive (llvm-commits)

This list contains all commit messages that are made when LLVM developers +commit code changes to the repository. It also serves as a forum for +patch review (i.e. send patches here). It is useful for those who want to +stay on the bleeding edge of LLVM development. This list is very high +volume.

+
+
Bugs & Patches Archive (llvm-bugs)

This list gets emailed every time a bug is opened and closed. It is +higher volume than the LLVM-dev list.

+
+
Test Results Archive (llvm-testresults)

A message is automatically sent to this list by every active nightly tester +when it completes. As such, this list gets email several times each day, +making it a high volume list.

+
+
LLVM Announcements List (llvm-announce)

This is a low volume list that provides important announcements regarding +LLVM. It gets email about once a month.

+
+
+
+
+

Online Sync-Ups

+

A number of regular calls are organized on specific topics. It should be +expected that the range of topics will change over time. At the time of +writing, the following sync-ups are organized:

+ + ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LLVM regular sync-up calls

Topic

Frequency

Calendar link

Minutes/docs link

RISC-V

Every 2 weeks on Thursday

ics +gcal

Scalable Vectors and Arm SVE

Monthly, every 3rd Tuesday

ics +gcal

Minutes/docs

ML Guided Compiler Optimizations

Monthly

Minutes/docs

LLVM security group

Monthly, every 3rd Tuesday

ics +gcal

Minutes/docs

CIRCT

Weekly, on Wednesday

Minutes/docs

MLIR design meetings

Weekly, on Thursdays

Minutes/docs

flang

Multiple meeting series, documented here

OpenMP

Multiple meeting series, documented here

LLVM Alias Analysis

Every 4 weeks on Tuesdays

ics

Minutes/docs

Windows/COFF related developments

Every 2 months on Thursday

Minutes/docs

Vector Predication

Every 2 weeks on Tuesdays, 3pm UTC

Minutes/docs

LLVM Pointer Authentication

Every month on Mondays

ics

Minutes/docs

MemorySSA in LLVM

Every 8 weeks on Mondays

ics +gcal

Minutes/docs

+
+
+

IRC

+

Users and developers of the LLVM project (including subprojects such as Clang) +can be found in #llvm on irc.oftc.net.

+

This channel has several bots.

+
    +
  • Buildbot reporters

    + +
  • +
  • robot - Bugzilla linker. %bug <number>

  • +
  • clang-bot - A geordi instance running +near-trunk clang instead of gcc.

  • +
+

In addition to the traditional IRC there is a +Discord +chat server available. To sign up, please use this +invitation link.

+
+
+

Meetups and social events

+
+
+

Besides developer meetings and conferences, +there are several user groups called +LLVM Socials. We greatly encourage you to +join one in your city. Or start a new one if there is none:

+

How to start LLVM Social in your town

+
+
+

Community wide proposals

+

Proposals for massive changes in how the community behaves and how the work flow +can be better.

+
+
+
+
LLVM Community Code of Conduct

Proposal to adopt a code of conduct on the LLVM social spaces (lists, events, +IRC, etc).

+
+
Moving LLVM Projects to GitHub

Proposal to move from SVN/Git to GitHub.

+
+
Bugpoint Redesign

Design doc for a redesign of the Bugpoint tool.

+
+
“llvm-libc” C Standard Library

Proposal to add a libc implementation under the LLVM project.

+
+
Test-Suite Extensions

Proposals for additional benchmarks/programs for llvm’s test-suite.

+
+
Variable Names Plan

Proposal to change the variable names coding standard.

+
+
Vectorization Plan

Proposal to model the process and upgrade the infrastructure of LLVM’s Loop Vectorizer.

+
+
Vector Predication Roadmap

Proposal for predicated vector instructions in LLVM.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingStarted.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingStarted.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingStarted.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingStarted.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,1385 @@ + + + + + + + + + Getting Started with the LLVM System — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Getting Started with the LLVM System

+ +
+

Overview

+

Welcome to the LLVM project!

+

The LLVM project has multiple components. The core of the project is +itself called “LLVM”. This contains all of the tools, libraries, and header +files needed to process intermediate representations and converts it into +object files. Tools include an assembler, disassembler, bitcode analyzer, and +bitcode optimizer. It also contains basic regression tests.

+

C-like languages use the Clang front end. This +component compiles C, C++, Objective C, and Objective C++ code into LLVM bitcode +– and from there into object files, using LLVM.

+

Other components include: +the libc++ C++ standard library, +the LLD linker, and more.

+
+
+

Getting the Source Code and Building LLVM

+

The LLVM Getting Started documentation may be out of date. The Clang +Getting Started page might have more +accurate information.

+

This is an example workflow and configuration to get and build the LLVM source:

+
    +
  1. Checkout LLVM (including related subprojects like Clang):

    +
      +
    • git clone https://github.com/llvm/llvm-project.git

    • +
    • Or, on windows, git clone --config core.autocrlf=false +https://github.com/llvm/llvm-project.git

    • +
    • To save storage and speed-up the checkout time, you may want to do a +shallow clone. +For example, to get the latest revision of the LLVM project, use +git clone --depth 1 https://github.com/llvm/llvm-project.git

    • +
    +
  2. +
  3. Configure and build LLVM and Clang:

    +
      +
    • cd llvm-project

    • +
    • mkdir build

    • +
    • cd build

    • +
    • cmake -G <generator> [options] ../llvm

      +

      Some common build system generators are:

      +
        +
      • Ninja — for generating Ninja +build files. Most llvm developers use Ninja.

      • +
      • Unix Makefiles — for generating make-compatible parallel makefiles.

      • +
      • Visual Studio — for generating Visual Studio projects and +solutions.

      • +
      • Xcode — for generating Xcode projects.

      • +
      +

      Some Common options:

      +
        +
      • -DLLVM_ENABLE_PROJECTS='...' — semicolon-separated list of the LLVM +subprojects you’d like to additionally build. Can include any of: clang, +clang-tools-extra, libcxx, libcxxabi, libunwind, lldb, compiler-rt, lld, +polly, or cross-project-tests.

        +

        For example, to build LLVM, Clang, libcxx, and libcxxabi, use +-DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi".

        +
      • +
      • -DCMAKE_INSTALL_PREFIX=directory — Specify for directory the full +pathname of where you want the LLVM tools and libraries to be installed +(default /usr/local).

      • +
      • -DCMAKE_BUILD_TYPE=type — Valid options for type are Debug, +Release, RelWithDebInfo, and MinSizeRel. Default is Debug.

      • +
      • -DLLVM_ENABLE_ASSERTIONS=On — Compile with assertion checks enabled +(default is Yes for Debug builds, No for all other build types).

      • +
      +
    • +
    • cmake --build . [--target <target>] or the build system specified +above directly.

      +
        +
      • The default target (i.e. cmake --build . or make) will build all of +LLVM.

      • +
      • The check-all target (i.e. ninja check-all) will run the +regression tests to ensure everything is in working order.

      • +
      • CMake will generate build targets for each tool and library, and most +LLVM sub-projects generate their own check-<project> target.

      • +
      • Running a serial build will be slow. To improve speed, try running a +parallel build. That’s done by default in Ninja; for make, use the +option -j NN, where NN is the number of parallel jobs, e.g. the +number of available CPUs.

      • +
      +
    • +
    • For more information see CMake

    • +
    • If you get an “internal compiler error (ICE)” or test failures, see +below.

    • +
    +
  4. +
+

Consult the Getting Started with LLVM section for detailed information on +configuring and compiling LLVM. Go to Directory Layout to learn about the +layout of the source code tree.

+
+
+

Requirements

+

Before you begin to use the LLVM system, review the requirements given below. +This may save you some trouble by knowing ahead of time what hardware and +software you will need.

+
+

Hardware

+

LLVM is known to work on the following host platforms:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

OS

Arch

Compilers

Linux

x861

GCC, Clang

Linux

amd64

GCC, Clang

Linux

ARM

GCC, Clang

Linux

Mips

GCC, Clang

Linux

PowerPC

GCC, Clang

Linux

SystemZ

GCC, Clang

Solaris

V9 (Ultrasparc)

GCC

FreeBSD

x861

GCC, Clang

FreeBSD

amd64

GCC, Clang

NetBSD

x861

GCC, Clang

NetBSD

amd64

GCC, Clang

OpenBSD

x861

GCC, Clang

OpenBSD

amd64

GCC, Clang

macOS2

PowerPC

GCC

macOS

x86

GCC, Clang

Cygwin/Win32

x861, 3

GCC

Windows

x861

Visual Studio

Windows x64

x86-64

Visual Studio

+
+

Note

+
    +
  1. Code generation supported for Pentium processors and up

  2. +
  3. Code generation supported for 32-bit ABI only

  4. +
  5. To use LLVM modules on Win32-based system, you may configure LLVM +with -DBUILD_SHARED_LIBS=On.

  6. +
+
+

Note that Debug builds require a lot of time and disk space. An LLVM-only build +will need about 1-3 GB of space. A full build of LLVM and Clang will need around +15-20 GB of disk space. The exact space requirements will vary by system. (It +is so large because of all the debugging information and the fact that the +libraries are statically linked into multiple tools).

+

If you are space-constrained, you can build only selected tools or only +selected targets. The Release build requires considerably less space.

+

The LLVM suite may compile on other platforms, but it is not guaranteed to do +so. If compilation is successful, the LLVM utilities should be able to +assemble, disassemble, analyze, and optimize LLVM bitcode. Code generation +should work as well, although the generated native code may not work on your +platform.

+
+
+

Software

+

Compiling LLVM requires that you have several software packages installed. The +table below lists those required packages. The Package column is the usual name +for the software package that LLVM depends on. The Version column provides +“known to work” versions of the package. The Notes column describes how LLVM +uses the package and provides other details.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Package

Version

Notes

CMake

>=3.13.4

Makefile/workspace generator

GCC

>=5.1.0

C/C++ compiler1

python

>=3.6

Automated test suite2

zlib

>=1.2.3.4

Compression library3

GNU Make

3.79, 3.79.1

Makefile/build processor4

+
+

Note

+
    +
  1. Only the C and C++ languages are needed so there’s no need to build the +other languages for LLVM’s purposes. See below for specific version +info.

  2. +
  3. Only needed if you want to run the automated test suite in the +llvm/test directory.

  4. +
  5. Optional, adds compression / uncompression capabilities to selected LLVM +tools.

  6. +
  7. Optional, you can use any other build tool supported by CMake.

  8. +
+
+

Additionally, your compilation host is expected to have the usual plethora of +Unix utilities. Specifically:

+
    +
  • ar — archive library builder

  • +
  • bzip2 — bzip2 command for distribution generation

  • +
  • bunzip2 — bunzip2 command for distribution checking

  • +
  • chmod — change permissions on a file

  • +
  • cat — output concatenation utility

  • +
  • cp — copy files

  • +
  • date — print the current date/time

  • +
  • echo — print to standard output

  • +
  • egrep — extended regular expression search utility

  • +
  • find — find files/dirs in a file system

  • +
  • grep — regular expression search utility

  • +
  • gzip — gzip command for distribution generation

  • +
  • gunzip — gunzip command for distribution checking

  • +
  • install — install directories/files

  • +
  • mkdir — create a directory

  • +
  • mv — move (rename) files

  • +
  • ranlib — symbol table builder for archive libraries

  • +
  • rm — remove (delete) files and directories

  • +
  • sed — stream editor for transforming output

  • +
  • sh — Bourne shell for make build scripts

  • +
  • tar — tape archive for distribution generation

  • +
  • test — test things in file system

  • +
  • unzip — unzip command for distribution checking

  • +
  • zip — zip command for distribution generation

  • +
+
+
+

Host C++ Toolchain, both Compiler and Standard Library

+

LLVM is very demanding of the host C++ compiler, and as such tends to expose +bugs in the compiler. We also attempt to follow improvements and developments in +the C++ language and library reasonably closely. As such, we require a modern +host C++ toolchain, both compiler and standard library, in order to build LLVM.

+

LLVM is written using the subset of C++ documented in coding +standards. To enforce this language version, we check the most +popular host toolchains for specific minimum versions in our build systems:

+
    +
  • Clang 3.5

  • +
  • Apple Clang 6.0

  • +
  • GCC 5.1

  • +
  • Visual Studio 2017

  • +
+

Anything older than these toolchains may work, but will require forcing the +build system with a special option and is not really a supported host platform. +Also note that older versions of these compilers have often crashed or +miscompiled LLVM.

+

For less widely used host toolchains such as ICC or xlC, be aware that a very +recent version may be required to support all of the C++ features used in LLVM.

+

We track certain versions of software that are known to fail when used as +part of the host toolchain. These even include linkers at times.

+

GNU ld 2.16.X. Some 2.16.X versions of the ld linker will produce very long +warning messages complaining that some “.gnu.linkonce.t.*” symbol was +defined in a discarded section. You can safely ignore these messages as they are +erroneous and the linkage is correct. These messages disappear using ld 2.17.

+

GNU binutils 2.17: Binutils 2.17 contains a bug which causes huge link +times (minutes instead of seconds) when building LLVM. We recommend upgrading +to a newer version (2.17.50.0.4 or later).

+

GNU Binutils 2.19.1 Gold: This version of Gold contained a bug which causes +intermittent failures when building LLVM with position independent code. The +symptom is an error about cyclic dependencies. We recommend upgrading to a +newer version of Gold.

+
+

Getting a Modern Host C++ Toolchain

+

This section mostly applies to Linux and older BSDs. On macOS, you should +have a sufficiently modern Xcode, or you will likely need to upgrade until you +do. Windows does not have a “system compiler”, so you must install either Visual +Studio 2017 or a recent version of mingw64. FreeBSD 10.0 and newer have a modern +Clang as the system compiler.

+

However, some Linux distributions and some other or older BSDs sometimes have +extremely old versions of GCC. These steps attempt to help you upgrade you +compiler even on such a system. However, if at all possible, we encourage you +to use a recent version of a distribution with a modern system compiler that +meets these requirements. Note that it is tempting to install a prior +version of Clang and libc++ to be the host compiler, however libc++ was not +well tested or set up to build on Linux until relatively recently. As +a consequence, this guide suggests just using libstdc++ and a modern GCC as the +initial host in a bootstrap, and then using Clang (and potentially libc++).

+

The first step is to get a recent GCC toolchain installed. The most common +distribution on which users have struggled with the version requirements is +Ubuntu Precise, 12.04 LTS. For this distribution, one easy option is to install +the toolchain testing PPA and use it to install a modern GCC. There is +a really nice discussions of this on the ask ubuntu stack exchange and a +github gist with updated commands. However, not all users can use PPAs and +there are many other distributions, so it may be necessary (or just useful, if +you’re here you are doing compiler development after all) to build and install +GCC from source. It is also quite easy to do these days.

+

Easy steps for installing GCC 5.1.0:

+
% gcc_version=5.1.0
+% wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2
+% wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2.sig
+% wget https://ftp.gnu.org/gnu/gnu-keyring.gpg
+% signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-${gcc_version}.tar.bz2.sig`
+% if [ $signature_invalid ]; then echo "Invalid signature" ; exit 1 ; fi
+% tar -xvjf gcc-${gcc_version}.tar.bz2
+% cd gcc-${gcc_version}
+% ./contrib/download_prerequisites
+% cd ..
+% mkdir gcc-${gcc_version}-build
+% cd gcc-${gcc_version}-build
+% $PWD/../gcc-${gcc_version}/configure --prefix=$HOME/toolchains --enable-languages=c,c++
+% make -j$(nproc)
+% make install
+
+
+

For more details, check out the excellent GCC wiki entry, where I got most +of this information from.

+

Once you have a GCC toolchain, configure your build of LLVM to use the new +toolchain for your host compiler and C++ standard library. Because the new +version of libstdc++ is not on the system library search path, you need to pass +extra linker flags so that it can be found at link time (-L) and at runtime +(-rpath). If you are using CMake, this invocation should produce working +binaries:

+
% mkdir build
+% cd build
+% CC=$HOME/toolchains/bin/gcc CXX=$HOME/toolchains/bin/g++ \
+  cmake .. -DCMAKE_CXX_LINK_FLAGS="-Wl,-rpath,$HOME/toolchains/lib64 -L$HOME/toolchains/lib64"
+
+
+

If you fail to set rpath, most LLVM binaries will fail on startup with a message +from the loader similar to libstdc++.so.6: version `GLIBCXX_3.4.20' not +found. This means you need to tweak the -rpath linker flag.

+

This method will add an absolute path to the rpath of all executables. That’s +fine for local development. If you want to distribute the binaries you build +so that they can run on older systems, copy libstdc++.so.6 into the +lib/ directory. All of LLVM’s shipping binaries have an rpath pointing at +$ORIGIN/../lib, so they will find libstdc++.so.6 there. Non-distributed +binaries don’t have an rpath set and won’t find libstdc++.so.6. Pass +-DLLVM_LOCAL_RPATH="$HOME/toolchains/lib64" to cmake to add an absolute +path to libstdc++.so.6 as above. Since these binaries are not distributed, +having an absolute local path is fine for them.

+

When you build Clang, you will need to give it access to modern C++ +standard library in order to use it as your new host in part of a bootstrap. +There are two easy ways to do this, either build (and install) libc++ along +with Clang and then use it with the -stdlib=libc++ compile and link flag, +or install Clang into the same prefix ($HOME/toolchains above) as GCC. +Clang will look within its own prefix for libstdc++ and use it if found. You +can also add an explicit prefix for Clang to look in for a GCC toolchain with +the --gcc-toolchain=/opt/my/gcc/prefix flag, passing it to both compile and +link commands when using your just-built-Clang to bootstrap.

+
+
+
+
+

Getting Started with LLVM

+

The remainder of this guide is meant to get you up and running with LLVM and to +give you some basic information about the LLVM environment.

+

The later sections of this guide describe the general layout of the LLVM +source tree, a simple example using the LLVM tool chain, and links to find +more information about LLVM or to get help via e-mail.

+
+

Terminology and Notation

+

Throughout this manual, the following names are used to denote paths specific to +the local system and working environment. These are not environment variables +you need to set but just strings used in the rest of this document below. In +any of the examples below, simply replace each of these names with the +appropriate pathname on your local system. All these paths are absolute:

+

SRC_ROOT

+
+

This is the top level directory of the LLVM source tree.

+
+

OBJ_ROOT

+
+

This is the top level directory of the LLVM object tree (i.e. the tree where +object files and compiled programs will be placed. It can be the same as +SRC_ROOT).

+
+
+
+

Unpacking the LLVM Archives

+

If you have the LLVM distribution, you will need to unpack it before you can +begin to compile it. LLVM is distributed as a number of different +subprojects. Each one has its own download which is a TAR archive that is +compressed with the gzip program.

+

The files are as follows, with x.y marking the version number:

+

llvm-x.y.tar.gz

+
+

Source release for the LLVM libraries and tools.

+
+

cfe-x.y.tar.gz

+
+

Source release for the Clang frontend.

+
+
+
+

Checkout LLVM from Git

+

You can also checkout the source code for LLVM from Git.

+
+

Note

+

Passing --config core.autocrlf=false should not be required in +the future after we adjust the .gitattribute settings correctly, but +is required for Windows users at the time of this writing.

+
+

Simply run:

+
% git clone https://github.com/llvm/llvm-project.git
+
+
+

or on Windows,

+
% git clone --config core.autocrlf=false https://github.com/llvm/llvm-project.git
+
+
+

This will create an ‘llvm-project’ directory in the current directory and +fully populate it with all of the source code, test directories, and local +copies of documentation files for LLVM and all the related subprojects. Note +that unlike the tarballs, which contain each subproject in a separate file, the +git repository contains all of the projects together.

+

If you want to get a specific release (as opposed to the most recent revision), +you can check out a tag after cloning the repository. E.g., git checkout +llvmorg-6.0.1 inside the llvm-project directory created by the above +command. Use git tag -l to list all of them.

+
+

Sending patches

+

Please read Developer Policy, too.

+

We don’t currently accept github pull requests, so you’ll need to send patches +either via emailing to llvm-commits, or, preferably, via Phabricator.

+

You’ll generally want to make sure your branch has a single commit, +corresponding to the review you wish to send, up-to-date with the upstream +origin/main branch, and doesn’t contain merges. Once you have that, you +can start a Phabricator review (or use git show or +git format-patch to output the diff, and attach it to an email message).

+

However, using the “Arcanist” tool is often easier. After installing +arcanist, you can upload the latest commit using:

+
% arc diff HEAD~1
+
+
+

Additionally, before sending a patch for review, please also try to ensure it’s +formatted properly. We use clang-format for this, which has git integration +through the git-clang-format script. On some systems, it may already be +installed (or be installable via your package manager). If so, you can simply +run it – the following command will format only the code changed in the most +recent commit:

+
% git clang-format HEAD~1
+
+
+

Note that this modifies the files, but doesn’t commit them – you’ll likely want +to run

+
% git commit --amend -a
+
+
+

in order to update the last commit with all pending changes.

+
+

Note

+

If you don’t already have clang-format or git clang-format installed +on your system, the clang-format binary will be built alongside clang, and +the git integration can be run from +clang/tools/clang-format/git-clang-format.

+
+
+
+

For developers to commit changes from Git

+

Once a patch is reviewed, you should rebase it, re-test locally, and commit the +changes to LLVM’s main branch. This is done using git push if you have the +required access rights. See committing a change for Phabricator based commits or +obtaining commit access +for commit access.

+

Here is an example workflow using git. This workflow assumes you have an +accepted commit on the branch named branch-with-change.

+
# Go to the branch with your accepted commit.
+% git checkout branch-with-change
+# Rebase your change onto the latest commits on Github.
+% git pull --rebase origin main
+# Rerun the appropriate tests if needed.
+% ninja check-$whatever
+# Check that the list of commits about to be pushed is correct.
+% git log origin/main...HEAD --oneline
+# Push to Github.
+% git push origin HEAD:main
+
+
+

LLVM currently has a linear-history policy, which means that merge commits are +not allowed. The llvm-project repo on github is configured to reject pushes +that include merges, so the git rebase step above is required.

+

Please ask for help if you’re having trouble with your particular git workflow.

+
+
+

Git pre-push hook

+

We include an optional pre-push hook that run some sanity checks on the revisions +you are about to push and ask confirmation if you push multiple commits at once. +You can set it up (on Unix systems) by running from the repository root:

+
% ln -sf ../../llvm/utils/git/pre-push.py .git/hooks/pre-push
+
+
+
+
+

Bisecting commits

+

See Bisecting LLVM code for how to use git bisect +on LLVM.

+
+
+

Reverting a change

+

When reverting changes using git, the default message will say “This reverts +commit XYZ”. Leave this at the end of the commit message, but add some details +before it as to why the commit is being reverted. A brief explanation and/or +links to bots that demonstrate the problem are sufficient.

+
+
+
+

Local LLVM Configuration

+

Once checked out repository, the LLVM suite source code must be configured +before being built. This process uses CMake. Unlinke the normal configure +script, CMake generates the build files in whatever format you request as well +as various *.inc files, and llvm/include/Config/config.h.

+

Variables are passed to cmake on the command line using the format +-D<variable name>=<value>. The following variables are some common options +used by people developing LLVM.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Variable

Purpose

CMAKE_C_COMPILER

Tells cmake which C compiler to use. By +default, this will be /usr/bin/cc.

CMAKE_CXX_COMPILER

Tells cmake which C++ compiler to use. By +default, this will be /usr/bin/c++.

CMAKE_BUILD_TYPE

Tells cmake what type of build you are trying +to generate files for. Valid options are Debug, +Release, RelWithDebInfo, and MinSizeRel. Default +is Debug.

CMAKE_INSTALL_PREFIX

Specifies the install directory to target when +running the install action of the build files.

PYTHON_EXECUTABLE

Forces CMake to use a specific Python version by +passing a path to a Python interpreter. By default +the Python version of the interpreter in your PATH +is used.

LLVM_TARGETS_TO_BUILD

A semicolon delimited list controlling which +targets will be built and linked into llvm. +The default list is defined as +LLVM_ALL_TARGETS, and can be set to include +out-of-tree targets. The default value includes: +AArch64, AMDGPU, ARM, AVR, BPF, Hexagon, Lanai, +Mips, MSP430, NVPTX, PowerPC, RISCV, Sparc, +SystemZ, WebAssembly, X86, XCore.

LLVM_ENABLE_DOXYGEN

Build doxygen-based documentation from the source +code This is disabled by default because it is +slow and generates a lot of output.

LLVM_ENABLE_PROJECTS

A semicolon-delimited list selecting which of the +other LLVM subprojects to additionally build. (Only +effective when using a side-by-side project layout +e.g. via git). The default list is empty. Can +include: clang, libcxx, libcxxabi, libunwind, lldb, +compiler-rt, lld, polly, or debuginfo-tests.

LLVM_ENABLE_SPHINX

Build sphinx-based documentation from the source +code. This is disabled by default because it is +slow and generates a lot of output. Sphinx version +1.5 or later recommended.

LLVM_BUILD_LLVM_DYLIB

Generate libLLVM.so. This library contains a +default set of LLVM components that can be +overridden with LLVM_DYLIB_COMPONENTS. The +default contains most of LLVM and is defined in +tools/llvm-shlib/CMakelists.txt. This option is +not available on Windows.

LLVM_OPTIMIZED_TABLEGEN

Builds a release tablegen that gets used during +the LLVM build. This can dramatically speed up +debug builds.

+

To configure LLVM, follow these steps:

+
    +
  1. Change directory into the object root directory:

    +
    % cd OBJ_ROOT
    +
    +
    +
  2. +
  3. Run the cmake:

    +
    % cmake -G "Unix Makefiles" -DCMAKE_INSTALL_PREFIX=/install/path
    +  [other options] SRC_ROOT
    +
    +
    +
  4. +
+
+
+

Compiling the LLVM Suite Source Code

+

Unlike with autotools, with CMake your build type is defined at configuration. +If you want to change your build type, you can re-run cmake with the following +invocation:

+
+
% cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=type SRC_ROOT
+
+
+
+

Between runs, CMake preserves the values set for all options. CMake has the +following build types defined:

+

Debug

+
+

These builds are the default. The build system will compile the tools and +libraries unoptimized, with debugging information, and asserts enabled.

+
+

Release

+
+

For these builds, the build system will compile the tools and libraries +with optimizations enabled and not generate debug info. CMakes default +optimization level is -O3. This can be configured by setting the +CMAKE_CXX_FLAGS_RELEASE variable on the CMake command line.

+
+

RelWithDebInfo

+
+

These builds are useful when debugging. They generate optimized binaries with +debug information. CMakes default optimization level is -O2. This can be +configured by setting the CMAKE_CXX_FLAGS_RELWITHDEBINFO variable on the +CMake command line.

+
+

Once you have LLVM configured, you can build it by entering the OBJ_ROOT +directory and issuing the following command:

+
% make
+
+
+

If the build fails, please check here to see if you are using a version of +GCC that is known not to compile LLVM.

+

If you have multiple processors in your machine, you may wish to use some of the +parallel build options provided by GNU Make. For example, you could use the +command:

+
% make -j2
+
+
+

There are several special targets which are useful when working with the LLVM +source code:

+

make clean

+
+

Removes all files generated by the build. This includes object files, +generated C/C++ files, libraries, and executables.

+
+

make install

+
+

Installs LLVM header files, libraries, tools, and documentation in a hierarchy +under $PREFIX, specified with CMAKE_INSTALL_PREFIX, which +defaults to /usr/local.

+
+

make docs-llvm-html

+
+

If configured with -DLLVM_ENABLE_SPHINX=On, this will generate a directory +at OBJ_ROOT/docs/html which contains the HTML formatted documentation.

+
+
+
+

Cross-Compiling LLVM

+

It is possible to cross-compile LLVM itself. That is, you can create LLVM +executables and libraries to be hosted on a platform different from the platform +where they are built (a Canadian Cross build). To generate build files for +cross-compiling CMake provides a variable CMAKE_TOOLCHAIN_FILE which can +define compiler flags and variables used during the CMake test operations.

+

The result of such a build is executables that are not runnable on the build +host but can be executed on the target. As an example the following CMake +invocation can generate build files targeting iOS. This will work on macOS +with the latest Xcode:

+
% cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES="armv7;armv7s;arm64"
+  -DCMAKE_TOOLCHAIN_FILE=<PATH_TO_LLVM>/cmake/platforms/iOS.cmake
+  -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_RUNTIME=Off -DLLVM_INCLUDE_TESTS=Off
+  -DLLVM_INCLUDE_EXAMPLES=Off -DLLVM_ENABLE_BACKTRACES=Off [options]
+  <PATH_TO_LLVM>
+
+
+

Note: There are some additional flags that need to be passed when building for +iOS due to limitations in the iOS SDK.

+

Check How To Cross-Compile Clang/LLVM using Clang/LLVM and Clang docs on how to cross-compile in general for more information +about cross-compiling.

+
+
+

The Location of LLVM Object Files

+

The LLVM build system is capable of sharing a single LLVM source tree among +several LLVM builds. Hence, it is possible to build LLVM for several different +platforms or configurations using the same source tree.

+
    +
  • Change directory to where the LLVM object files should live:

    +
    % cd OBJ_ROOT
    +
    +
    +
  • +
  • Run cmake:

    +
    % cmake -G "Unix Makefiles" SRC_ROOT
    +
    +
    +
  • +
+

The LLVM build will create a structure underneath OBJ_ROOT that matches the +LLVM source tree. At each level where source files are present in the source +tree there will be a corresponding CMakeFiles directory in the OBJ_ROOT. +Underneath that directory there is another directory with a name ending in +.dir under which you’ll find object files for each source.

+

For example:

+
+
% cd llvm_build_dir
+% find lib/Support/ -name APFloat*
+lib/Support/CMakeFiles/LLVMSupport.dir/APFloat.cpp.o
+
+
+
+
+
+

Optional Configuration Items

+

If you’re running on a Linux system that supports the binfmt_misc +module, and you have root access on the system, you can set your system up to +execute LLVM bitcode files directly. To do this, use commands like this (the +first command may not be required if you are already using the module):

+
% mount -t binfmt_misc none /proc/sys/fs/binfmt_misc
+% echo ':llvm:M::BC::/path/to/lli:' > /proc/sys/fs/binfmt_misc/register
+% chmod u+x hello.bc   (if needed)
+% ./hello.bc
+
+
+

This allows you to execute LLVM bitcode files directly. On Debian, you can also +use this command instead of the ‘echo’ command above:

+
% sudo update-binfmts --install llvm /path/to/lli --magic 'BC'
+
+
+
+
+
+

Directory Layout

+

One useful source of information about the LLVM source base is the LLVM doxygen documentation available at +https://llvm.org/doxygen/. The following is a brief introduction to code +layout:

+
+

llvm/cmake

+

Generates system build files.

+
+
llvm/cmake/modules

Build configuration for llvm user defined options. Checks compiler version and +linker flags.

+
+
llvm/cmake/platforms

Toolchain configuration for Android NDK, iOS systems and non-Windows hosts to +target MSVC.

+
+
+
+
+

llvm/examples

+
    +
  • Some simple examples showing how to use LLVM as a compiler for a custom +language - including lowering, optimization, and code generation.

  • +
  • Kaleidoscope Tutorial: Kaleidoscope language tutorial run through the +implementation of a nice little compiler for a non-trivial language +including a hand-written lexer, parser, AST, as well as code generation +support using LLVM- both static (ahead of time) and various approaches to +Just In Time (JIT) compilation. +Kaleidoscope Tutorial for complete beginner.

  • +
  • BuildingAJIT: Examples of the BuildingAJIT tutorial that shows how LLVM’s +ORC JIT APIs interact with other parts of LLVM. It also, teaches how to +recombine them to build a custom JIT that is suited to your use-case.

  • +
+
+
+

llvm/include

+

Public header files exported from the LLVM library. The three main subdirectories:

+

llvm/include/llvm

+
+

All LLVM-specific header files, and subdirectories for different portions of +LLVM: Analysis, CodeGen, Target, Transforms, etc…

+
+

llvm/include/llvm/Support

+
+

Generic support libraries provided with LLVM but not necessarily specific to +LLVM. For example, some C++ STL utilities and a Command Line option processing +library store header files here.

+
+

llvm/include/llvm/Config

+
+

Header files configured by cmake. They wrap “standard” UNIX and +C header files. Source code can include these header files which +automatically take care of the conditional #includes that cmake +generates.

+
+
+
+

llvm/lib

+

Most source files are here. By putting code in libraries, LLVM makes it easy to +share code among the tools.

+

llvm/lib/IR/

+
+

Core LLVM source files that implement core classes like Instruction and +BasicBlock.

+
+

llvm/lib/AsmParser/

+
+

Source code for the LLVM assembly language parser library.

+
+

llvm/lib/Bitcode/

+
+

Code for reading and writing bitcode.

+
+

llvm/lib/Analysis/

+
+

A variety of program analyses, such as Call Graphs, Induction Variables, +Natural Loop Identification, etc.

+
+

llvm/lib/Transforms/

+
+

IR-to-IR program transformations, such as Aggressive Dead Code Elimination, +Sparse Conditional Constant Propagation, Inlining, Loop Invariant Code Motion, +Dead Global Elimination, and many others.

+
+

llvm/lib/Target/

+
+

Files describing target architectures for code generation. For example, +llvm/lib/Target/X86 holds the X86 machine description.

+
+

llvm/lib/CodeGen/

+
+

The major parts of the code generator: Instruction Selector, Instruction +Scheduling, and Register Allocation.

+
+

llvm/lib/MC/

+
+

The libraries represent and process code at machine code level. Handles +assembly and object-file emission.

+
+

llvm/lib/ExecutionEngine/

+
+

Libraries for directly executing bitcode at runtime in interpreted and +JIT-compiled scenarios.

+
+

llvm/lib/Support/

+
+

Source code that corresponding to the header files in llvm/include/ADT/ +and llvm/include/Support/.

+
+
+
+

llvm/bindings

+

Contains bindings for the LLVM compiler infrastructure to allow +programs written in languages other than C or C++ to take advantage of the LLVM +infrastructure. +LLVM project provides language bindings for Go, OCaml and Python.

+
+
+

llvm/projects

+

Projects not strictly part of LLVM but shipped with LLVM. This is also the +directory for creating your own LLVM-based projects which leverage the LLVM +build system.

+
+
+

llvm/test

+

Feature and regression tests and other sanity checks on LLVM infrastructure. These +are intended to run quickly and cover a lot of territory without being exhaustive.

+
+
+

test-suite

+

A comprehensive correctness, performance, and benchmarking test suite +for LLVM. This comes in a separate git repository +<https://github.com/llvm/llvm-test-suite>, because it contains a +large amount of third-party code under a variety of licenses. For +details see the Testing Guide document.

+
+
+

llvm/tools

+

Executables built out of the libraries +above, which form the main part of the user interface. You can always get help +for a tool by typing tool_name -help. The following is a brief introduction +to the most important tools. More detailed information is in +the Command Guide.

+

bugpoint

+
+

bugpoint is used to debug optimization passes or code generation backends +by narrowing down the given test case to the minimum number of passes and/or +instructions that still cause a problem, whether it is a crash or +miscompilation. See HowToSubmitABug.html for more information on using +bugpoint.

+
+

llvm-ar

+
+

The archiver produces an archive containing the given LLVM bitcode files, +optionally with an index for faster lookup.

+
+

llvm-as

+
+

The assembler transforms the human readable LLVM assembly to LLVM bitcode.

+
+

llvm-dis

+
+

The disassembler transforms the LLVM bitcode to human readable LLVM assembly.

+
+

llvm-link

+
+

llvm-link, not surprisingly, links multiple LLVM modules into a single +program.

+
+

lli

+
+

lli is the LLVM interpreter, which can directly execute LLVM bitcode +(although very slowly…). For architectures that support it (currently x86, +Sparc, and PowerPC), by default, lli will function as a Just-In-Time +compiler (if the functionality was compiled in), and will execute the code +much faster than the interpreter.

+
+

llc

+
+

llc is the LLVM backend compiler, which translates LLVM bitcode to a +native code assembly file.

+
+

opt

+
+

opt reads LLVM bitcode, applies a series of LLVM to LLVM transformations +(which are specified on the command line), and outputs the resultant +bitcode. ‘opt -help’ is a good way to get a list of the +program transformations available in LLVM.

+

opt can also run a specific analysis on an input LLVM bitcode +file and print the results. Primarily useful for debugging +analyses, or familiarizing yourself with what an analysis does.

+
+
+
+

llvm/utils

+

Utilities for working with LLVM source code; some are part of the build process +because they are code generators for parts of the infrastructure.

+

codegen-diff

+
+

codegen-diff finds differences between code that LLC +generates and code that LLI generates. This is useful if you are +debugging one of them, assuming that the other generates correct output. For +the full user manual, run `perldoc codegen-diff'.

+
+

emacs/

+
+

Emacs and XEmacs syntax highlighting for LLVM assembly files and TableGen +description files. See the README for information on using them.

+
+

getsrcs.sh

+
+

Finds and outputs all non-generated source files, +useful if one wishes to do a lot of development across directories +and does not want to find each file. One way to use it is to run, +for example: xemacs `utils/getsources.sh` from the top of the LLVM source +tree.

+
+

llvmgrep

+
+

Performs an egrep -H -n on each source file in LLVM and +passes to it a regular expression provided on llvmgrep’s command +line. This is an efficient way of searching the source base for a +particular regular expression.

+
+

TableGen/

+
+

Contains the tool used to generate register +descriptions, instruction set descriptions, and even assemblers from common +TableGen description files.

+
+

vim/

+
+

vim syntax-highlighting for LLVM assembly files +and TableGen description files. See the README for how to use them.

+
+
+
+
+

An Example Using the LLVM Tool Chain

+

This section gives an example of using LLVM with the Clang front end.

+
+

Example with clang

+
    +
  1. First, create a simple C file, name it ‘hello.c’:

    +
    #include <stdio.h>
    +
    +int main() {
    +  printf("hello world\n");
    +  return 0;
    +}
    +
    +
    +
  2. +
  3. Next, compile the C file into a native executable:

    +
    % clang hello.c -o hello
    +
    +
    +
    +

    Note

    +

    Clang works just like GCC by default. The standard -S and -c arguments +work as usual (producing a native .s or .o file, respectively).

    +
    +
  4. +
  5. Next, compile the C file into an LLVM bitcode file:

    +
    % clang -O3 -emit-llvm hello.c -c -o hello.bc
    +
    +
    +

    The -emit-llvm option can be used with the -S or -c options to emit an LLVM +.ll or .bc file (respectively) for the code. This allows you to use +the standard LLVM tools on the bitcode file.

    +
  6. +
  7. Run the program in both forms. To run the program, use:

    +
    % ./hello
    +
    +
    +

    and

    +
    % lli hello.bc
    +
    +
    +

    The second examples shows how to invoke the LLVM JIT, lli.

    +
  8. +
  9. Use the llvm-dis utility to take a look at the LLVM assembly code:

    +
    % llvm-dis < hello.bc | less
    +
    +
    +
  10. +
  11. Compile the program to native assembly using the LLC code generator:

    +
    % llc hello.bc -o hello.s
    +
    +
    +
  12. +
  13. Assemble the native assembly language file into a program:

    +
    % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native   # On Solaris
    +
    +% gcc hello.s -o hello.native                              # On others
    +
    +
    +
  14. +
  15. Execute the native code program:

    +
    % ./hello.native
    +
    +
    +

    Note that using clang to compile directly to native code (i.e. when the +-emit-llvm option is not present) does steps 6/7/8 for you.

    +
  16. +
+
+
+
+

Common Problems

+

If you are having problems building or using LLVM, or if you have any other +general questions about LLVM, please consult the Frequently Asked +Questions page.

+

If you are having problems with limited memory and build time, please try +building with ninja instead of make. Please consider configuring the +following options with cmake:

+
+
    +
  • -G Ninja +Setting this option will allow you to build with ninja instead of make. +Building with ninja significantly improves your build time, especially with +incremental builds, and improves your memory usage.

  • +
  • -DLLVM_USE_LINKER +Setting this option to lld will significantly reduce linking time for LLVM +executables on ELF-based platforms, such as Linux. If you are building LLVM +for the first time and lld is not available to you as a binary package, then +you may want to use the gold linker as a faster alternative to GNU ld.

  • +
  • +
    -DCMAKE_BUILD_TYPE
    +
      +
    • Debug — This is the default build type. This disables optimizations while +compiling LLVM and enables debug info. On ELF-based platforms (e.g. Linux) +linking with debug info may consume a large amount of memory.

    • +
    • Release — Turns on optimizations and disables debug info. Combining the +Release build type with -DLLVM_ENABLE_ASSERTIONS=ON may be a good trade-off +between speed and debugability during development, particularly for running +the test suite.

    • +
    +
    +
    +
  • +
  • -DLLVM_ENABLE_ASSERTIONS +This option defaults to ON for Debug builds and defaults to OFF for Release +builds. As mentioned in the previous option, using the Release build type and +enabling assertions may be a good alternative to using the Debug build type.

  • +
  • -DLLVM_PARALLEL_LINK_JOBS +Set this equal to number of jobs you wish to run simultaneously. This is +similar to the -j option used with make, but only for link jobs. This option +can only be used with ninja. You may wish to use a very low number of jobs, +as this will greatly reduce the amount of memory used during the build +process. If you have limited memory, you may wish to set this to 1.

  • +
  • -DLLVM_TARGETS_TO_BUILD +Set this equal to the target you wish to build. You may wish to set this to +X86; however, you will find a full list of targets within the +llvm-project/llvm/lib/Target directory.

  • +
  • -DLLVM_OPTIMIZED_TABLEGEN +Set this to ON to generate a fully optimized tablegen during your build. This +will significantly improve your build time. This is only useful if you are +using the Debug build type.

  • +
  • -DLLVM_ENABLE_PROJECTS +Set this equal to the projects you wish to compile (e.g. clang, lld, etc.) If +compiling more than one project, separate the items with a semicolon. Should +you run into issues with the semicolon, try surrounding it with single quotes.

  • +
  • -DCLANG_ENABLE_STATIC_ANALYZER +Set this option to OFF if you do not require the clang static analyzer. This +should improve your build time slightly.

  • +
  • -DLLVM_USE_SPLIT_DWARF +Consider setting this to ON if you require a debug build, as this will ease +memory pressure on the linker. This will make linking much faster, as the +binaries will not contain any of the debug information; however, this will +generate the debug information in the form of a DWARF object file (with the +extension .dwo). This only applies to host platforms using ELF, such as Linux.

  • +
+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingStartedTutorials.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingStartedTutorials.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingStartedTutorials.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingStartedTutorials.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,166 @@ + + + + + + + + + Getting Started/Tutorials — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Getting Started/Tutorials

+

For those new to the LLVM system.

+
+
+
+
Getting Started with the LLVM System

Discusses how to get up and running quickly with the LLVM infrastructure. +Everything from unpacking and compilation of the distribution to execution +of some tools.

+
+
LLVM Tutorial: Table of Contents

Tutorials about using LLVM. Includes a tutorial about making a custom +language with LLVM.

+
+
LLVM Programmer’s Manual

Introduction to the general layout of the LLVM sourcebase, important classes +and APIs, and some tips & tricks.

+
+
Performance Tips for Frontend Authors

A collection of tips for frontend authors on how to generate IR +which LLVM is able to effectively optimize.

+
+
Getting Started with the LLVM System using Microsoft Visual Studio

An addendum to the main Getting Started guide for those using Visual Studio +on Windows.

+
+
Architecture & Platform Information for Compiler Writers

A list of helpful links for compiler writers.

+
+
MyFirstTypoFix

This tutorial will guide you through the process of making a change to +LLVM, and contributing it back to the LLVM project.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingStartedVS.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingStartedVS.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GettingStartedVS.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GettingStartedVS.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,412 @@ + + + + + + + + + Getting Started with the LLVM System using Microsoft Visual Studio — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Getting Started with the LLVM System using Microsoft Visual Studio

+ +
+

Overview

+

Welcome to LLVM on Windows! This document only covers LLVM on Windows using +Visual Studio, not WSL, mingw or cygwin. In order to get started, you first need +to know some basic information.

+

There are many different projects that compose LLVM. The first piece is the +LLVM suite. This contains all of the tools, libraries, and header files needed +to use LLVM. It contains an assembler, disassembler, bitcode analyzer and +bitcode optimizer. It also contains basic regression tests that can be used to +test the LLVM tools and the Clang front end.

+

The second piece is the Clang front end. This +component compiles C, C++, Objective C, and Objective C++ code into LLVM +bitcode. Clang typically uses LLVM libraries to optimize the bitcode and emit +machine code. LLVM fully supports the COFF object file format, which is +compatible with all other existing Windows toolchains.

+

There are more LLVM projects which this document does not discuss.

+
+
+

Requirements

+

Before you begin to use the LLVM system, review the requirements given +below. This may save you some trouble by knowing ahead of time what hardware +and software you will need.

+
+

Hardware

+

Any system that can adequately run Visual Studio 2017 is fine. The LLVM +source tree including the git index consumes approximately 3GB. +Object files, libraries and executables consume approximately 5GB in +Release mode and much more in Debug mode. SSD drive and >16GB RAM are +recommended.

+
+
+

Software

+

You will need Visual Studio 2017 or +higher, with the latest Update installed. Visual Studio Community Edition +suffices.

+

You will also need the CMake build system since it +generates the project files you will use to build with. CMake is bundled with +Visual Studio 2019 so separate installation is not required.

+

If you would like to run the LLVM tests you will need Python. Version 3.6 and newer are known to work. You can +install Python with Visual Studio 2019, from the Microsoft store or from +the Python web site. We recommend the latter since it +allows you to to adjust installation options.

+

You will need Git for Windows with bash tools, too. +Git for Windows is also bundled with Visual Studio 2019.

+
+
+
+

Getting Started

+

Here’s the short story for getting up and running quickly with LLVM. +These instruction were tested with Visual Studio 2019 and Python 3.9.6:

+
    +
  1. Download and install Visual Studio.

  2. +
  3. In the Visual Studio installer, Workloads tab, select the +Desktop development with C++ workload. Under Individual components tab, +select Git for Windows.

  4. +
  5. Complete the Visual Studio installation.

  6. +
  7. Download and install the latest Python 3 release.

  8. +
  9. In the first install screen, select both Install launcher for all users +and Add Python to the PATH. This will allow installing psutil for all +users for the regression tests and make Python available from the command +line.

  10. +
  11. In the second install screen, select (again) Install for all users and +if you want to develop lldb, selecting +Download debug binaries is useful.

  12. +
  13. Complete the Python installation.

  14. +
  15. +
    Run a “Developer Command Prompt for VS 2019” as administrator. This command

    prompt provides correct path and environment variables to Visual Studio and +the installed tools.

    +
    +
    +
  16. +
  17. In the terminal window, type the commands:

    +
    c:
    +cd \
    +
    +
    +
  18. +
+
+

You may install the llvm sources in other location than c:\llvm but do not +install into a path containing spaces (e.g. c:\Documents and Settings\...) +as it will fail.

+
+
    +
  1. Register the Microsoft Debug Interface Access (DIA) DLLs

    +
    regsvr32 "%VSINSTALLDIR%\DIA SDK\bin\msdia140.dll"
    +regsvr32 "%VSINSTALLDIR%\DIA SDK\bin\amd64\msdia140.dll"
    +
    +
    +
  2. +
+
+

The DIA library is required for LLVM PDB tests and +LLDB development.

+
+
    +
  1. Install psutil and obtain LLVM source code:

    +
    pip install psutil
    +git clone https://github.com/llvm/llvm-project.git llvm
    +
    +
    +
  2. +
+
+

Instead of git clone you may download a compressed source distribution +from the releases page. +Select the last link: Source code (zip) and unpack the downloaded file using +Windows Explorer built-in zip support or any other unzip tool.

+
+
    +
  1. Finally, configure LLVM using CMake:

    +
    cmake -S llvm\llvm -B build -DLLVM_ENABLE_PROJECTS=clang -DLLVM_TARGETS_TO_BUILD=X86 -Thost=x64
    +exit
    +
    +
    +
  2. +
+
+

LLVM_ENABLE_PROJECTS specifies any additional LLVM projects you want to +build while LLVM_TARGETS_TO_BUILD selects the compiler targets. If +LLVM_TARGETS_TO_BUILD is omitted by default all targets are built +slowing compilation and using more disk space. +See the LLVM CMake guide for detailed information about +how to configure the LLVM build.

+

The cmake command line tool is bundled with Visual Studio but its GUI is +not. You may install CMake to use its GUI to change +CMake variables or modify the above command line.

+
    +
  • Once CMake is installed then the simplest way is to just start the +CMake GUI, select the directory where you have LLVM extracted to, and +the default options should all be fine. One option you may really +want to change, regardless of anything else, might be the +CMAKE_INSTALL_PREFIX setting to select a directory to INSTALL to +once compiling is complete, although installation is not mandatory for +using LLVM. Another important option is LLVM_TARGETS_TO_BUILD, +which controls the LLVM target architectures that are included on the +build.

  • +
  • CMake generates project files for all build types. To select a specific +build type, use the Configuration manager from the VS IDE or the +/property:Configuration command line option when using MSBuild.

  • +
  • By default, the Visual Studio project files generated by CMake use the +32-bit toolset. If you are developing on a 64-bit version of Windows and +want to use the 64-bit toolset, pass the -Thost=x64 flag when +generating the Visual Studio solution. This requires CMake 3.8.0 or later.

  • +
+
+
    +
  1. Start Visual Studio and select configuration:

  2. +
+
+

In the directory you created the project files will have an llvm.sln +file, just double-click on that to open Visual Studio. The default Visual +Studio configuration is Debug which is slow and generates a huge amount +of debug information on disk. For now, we recommend selecting Release +configuration for the LLVM project which will build the fastest or +RelWithDebInfo which is also several time larger than Release. +Another technique is to build all of LLVM in Release mode and change +compiler flags, disabling optimization and enabling debug information, only +for specific librares or source files you actually need to debug.

+
+
    +
  1. Test LLVM in Visual Studio:

  2. +
+
+

You can run LLVM tests by merely building the project “check-all”. The test +results will be shown in the VS output window. Once the build succeeds, you +have verified a working LLVM development environment!

+

You should not see any unexpected failures, but will see many unsupported +tests and expected failures:

+
114>Testing Time: 1124.66s
+114>  Skipped          :    39
+114>  Unsupported      : 21649
+114>  Passed           : 51615
+114>  Expectedly Failed:    93
+========== Build: 114 succeeded, 0 failed, 321 up-to-date, 0 skipped ==========``
+
+
+
+
+
+

Alternatives to manual installation

+

Instead of the steps above, to simplify the installation procedure you can use +Chocolatey as package manager. +After the installation of Chocolatey, +run these commands in an admin shell to install the required tools:

+
choco install -y git cmake python3
+pip3 install psutil
+
+
+

There is also a Windows +Dockerfile +with the entire build tool chain. This can be used to test the build with a +tool chain different from your host installation or to create build servers.

+
+
+

Next steps

+
    +
  1. Read the documentation.

  2. +
  3. Seriously, read the documentation.

  4. +
  5. Remember that you were warned twice about reading the documentation.

  6. +
+
+

Test LLVM on the command line:

+

The LLVM tests can be run by changing directory to the llvm source +directory and running:

+
c:\llvm> python ..\build\Release\bin\llvm-lit.py llvm\test
+
+
+

This example assumes that Python is in your PATH variable, which would be +after Add Python to the PATH was selected during Python installation. +If you had opened a command window prior to Python installation, you would +have to close and reopen it to get the updated PATH.

+

A specific test or test directory can be run with:

+
c:\llvm> python ..\build\Release\bin\llvm-lit.py llvm\test\Transforms\Util
+
+
+
+
+

Build the LLVM Suite:

+
    +
  • The projects may still be built individually, but to build them all do +not just select all of them in batch build (as some are meant as +configuration projects), but rather select and build just the +ALL_BUILD project to build everything, or the INSTALL project, +which first builds the ALL_BUILD project, then installs the LLVM +headers, libs, and other useful things to the directory set by the +CMAKE_INSTALL_PREFIX setting when you first configured CMake.

  • +
  • The Fibonacci project is a sample program that uses the JIT. Modify the +project’s debugging properties to provide a numeric command line argument +or run it from the command line. The program will print the +corresponding fibonacci value.

  • +
+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GitBisecting.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GitBisecting.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GitBisecting.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GitBisecting.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,246 @@ + + + + + + + + + Bisecting LLVM code — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Bisecting LLVM code

+
+

Introduction

+

git bisect is a useful tool for finding which revision caused a bug.

+

This document describes how to use git bisect. In particular, while LLVM +has a mostly linear history, it has a few merge commits that added projects – +and these merged the linear history of those projects. As a consequence, the +LLVM repository has multiple roots: One “normal” root, and then one for each +toplevel project that was developed out-of-tree and then merged later. +As of early 2020, the only such merged project is MLIR, but flang will likely +be merged in a similar way soon.

+
+
+

Basic operation

+

See https://git-scm.com/docs/git-bisect for a good overview. In summary:

+
+
git bisect start
+git bisect bad main
+git bisect good f00ba
+
+
+
+

git will check out a revision in between. Try to reproduce your problem at +that revision, and run git bisect good or git bisect bad.

+

If you can’t repro at the current commit (maybe the build is broken), run +git bisect skip and git will pick a nearby alternate commit.

+

(To abort a bisect, run git bisect reset, and if git complains about not +being able to reset, do the usual git checkout -f main; git reset --hard +origin/main dance and try again).

+
+
+

git bisect run

+

A single bisect step often requires first building clang, and then compiling +a large code base with just-built clang. This can take a long time, so it’s +good if it can happen completely automatically. git bisect run can do +this for you if you write a run script that reproduces the problem +automatically. Writing the script can take 10-20 minutes, but it’s almost +always worth it – you can do something else while the bisect runs (such +as writing this document).

+

Here’s an example run script. It assumes that you’re in llvm-project and +that you have a sibling llvm-build-project build directory where you +configured CMake to use Ninja. You have a file repro.c in the current +directory that makes clang crash at trunk, but it worked fine at revision +f00ba.

+
+
# Build clang. If the build fails, `exit 125` causes this
+# revision to be skipped
+ninja -C ../llvm-build-project clang || exit 125
+
+../llvm-build-project/bin/clang repro.c
+
+
+
+

To make sure your run script works, it’s a good idea to run ./run.sh by +hand and tweak the script until it works, then run git bisect good or +git bisect bad manually once based on the result of the script +(check echo $? after your script ran), and only then run git bisect run +./run.sh. Don’t forget to mark your run script as executable – git bisect +run doesn’t check for that, it just assumes the run script failed each time.

+

Once your run script works, run git bisect run ./run.sh and a few hours +later you’ll know which commit caused the regression.

+

(This is a very simple run script. Often, you want to use just-built clang +to build a different project and then run a built executable of that project +in the run script.)

+
+
+

Bisecting across multiple roots

+

Here’s how LLVM’s history currently looks:

+
+
A-o-o-......-o-D-o-o-HEAD
+              /
+  B-o-...-o-C-
+
+
+
+

A is the first commit in LLVM ever, 97724f18c79c.

+

B is the first commit in MLIR, aed0d21a62db.

+

D is the merge commit that merged MLIR into the main LLVM repository, +0f0d0ed1c78f.

+

C is the last commit in MLIR before it got merged, 0f0d0ed1c78f^2. (The +^n modifier selects the n’th parent of a merge commit.)

+

git bisect goes through all parent revisions. Due to the way MLIR was +merged, at every revision at C or earlier, only the mlir/ directory +exists, and nothing else does.

+

As of early 2020, there is no flag to git bisect to tell it to not +descend into all reachable commits. Ideally, we’d want to tell it to only +follow the first parent of D.

+

The best workaround is to pass a list of directories to git bisect: +If you know the bug is due to a change in llvm, clang, or compiler-rt, use

+
+
git bisect start -- clang llvm compiler-rt
+
+
+
+

That way, the commits in mlir are never evaluated.

+

Alternatively, git bisect skip aed0d21a6 aed0d21a6..0f0d0ed1c78f explicitly +skips all commits on that branch. It takes 1.5 minutes to run on a fast +machine, and makes git bisect log output unreadable. (aed0d21a6 is +listed twice because git ranges exclude the revision listed on the left, +so it needs to be ignored explicitly.)

+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/GenericOpcode.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/GenericOpcode.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/GenericOpcode.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/GenericOpcode.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,1036 @@ + + + + + + + + + Generic Opcodes — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Generic Opcodes

+
+ +
+
+

Note

+

This documentation does not yet fully account for vectors. Many of the +scalar/integer/floating-point operations can also take vectors.

+
+
+

Constants

+
+

G_IMPLICIT_DEF

+

An undefined value.

+
%0:_(s32) = G_IMPLICIT_DEF
+
+
+
+
+

G_CONSTANT

+

An integer constant.

+
%0:_(s32) = G_CONSTANT i32 1
+
+
+
+
+

G_FCONSTANT

+

A floating point constant.

+
%0:_(s32) = G_FCONSTANT float 1.0
+
+
+
+
+

G_FRAME_INDEX

+

The address of an object in the stack frame.

+
%1:_(p0) = G_FRAME_INDEX %stack.0.ptr0
+
+
+
+
+

G_GLOBAL_VALUE

+

The address of a global value.

+
%0(p0) = G_GLOBAL_VALUE @var_local
+
+
+
+
+

G_BLOCK_ADDR

+

The address of a basic block.

+
%0:_(p0) = G_BLOCK_ADDR blockaddress(@test_blockaddress, %ir-block.block)
+
+
+
+
+
+

Integer Extension and Truncation

+
+

G_ANYEXT

+

Extend the underlying scalar type of an operation, leaving the high bits +unspecified.

+
%1:_(s32) = G_ANYEXT %0:_(s16)
+
+
+
+
+

G_SEXT

+

Sign extend the underlying scalar type of an operation, copying the sign bit +into the newly-created space.

+
%1:_(s32) = G_SEXT %0:_(s16)
+
+
+
+
+

G_SEXT_INREG

+

Sign extend the value from an arbitrary bit position, copying the sign bit +into all bits above it. This is equivalent to a shl + ashr pair with an +appropriate shift amount. $sz is an immediate (MachineOperand::isImm() +returns true) to allow targets to have some bitwidths legal and others +lowered. This opcode is particularly useful if the target has sign-extension +instructions that are cheaper than the constituent shifts as the optimizer is +able to make decisions on whether it’s better to hang on to the G_SEXT_INREG +or to lower it and optimize the individual shifts.

+
%1:_(s32) = G_SEXT_INREG %0:_(s32), 16
+
+
+
+
+

G_ZEXT

+

Zero extend the underlying scalar type of an operation, putting zero bits +into the newly-created space.

+
%1:_(s32) = G_ZEXT %0:_(s16)
+
+
+
+
+

G_TRUNC

+

Truncate the underlying scalar type of an operation. This is equivalent to +G_EXTRACT for scalar types, but acts elementwise on vectors.

+
%1:_(s16) = G_TRUNC %0:_(s32)
+
+
+
+
+
+

Type Conversions

+
+

G_INTTOPTR

+

Convert an integer to a pointer.

+
%1:_(p0) = G_INTTOPTR %0:_(s32)
+
+
+
+
+

G_PTRTOINT

+

Convert a pointer to an integer.

+
%1:_(s32) = G_PTRTOINT %0:_(p0)
+
+
+
+
+

G_BITCAST

+

Reinterpret a value as a new type. This is usually done without +changing any bits but this is not always the case due a subtlety in the +definition of the LLVM-IR Bitcast Instruction. It +is allowed to bitcast between pointers with the same size, but +different address spaces.

+
%1:_(s64) = G_BITCAST %0:_(<2 x s32>)
+
+
+
+
+

G_ADDRSPACE_CAST

+

Convert a pointer to an address space to a pointer to another address space.

+
%1:_(p1) = G_ADDRSPACE_CAST %0:_(p0)
+
+
+
+

Caution

+

‘addrspacecast .. to’ Instruction doesn’t mention what happens if the cast is simply +invalid (i.e. if the address spaces are disjoint).

+
+
+
+
+

Scalar Operations

+
+

G_EXTRACT

+

Extract a register of the specified size, starting from the block given by +index. This will almost certainly be mapped to sub-register COPYs after +register banks have been selected.

+
%3:_(s32) = G_EXTRACT %2:_(s64), 32
+
+
+
+
+

G_INSERT

+

Insert a smaller register into a larger one at the specified bit-index.

+
%2:_(s64) = G_INSERT %0:(_s64), %1:_(s32), 0
+
+
+
+
+

G_MERGE_VALUES

+

Concatenate multiple registers of the same size into a wider register. +The input operands are always ordered from lowest bits to highest:

+
%0:(s32) = G_MERGE_VALUES %bits_0_7:(s8), %bits_8_15:(s8),
+                          %bits_16_23:(s8), %bits_24_31:(s8)
+
+
+
+
+

G_UNMERGE_VALUES

+

Extract multiple registers of the specified size, starting from blocks given by +indexes. This will almost certainly be mapped to sub-register COPYs after +register banks have been selected. +The output operands are always ordered from lowest bits to highest:

+
%bits_0_7:(s8), %bits_8_15:(s8),
+    %bits_16_23:(s8), %bits_24_31:(s8) = G_UNMERGE_VALUES %0:(s32)
+
+
+
+
+

G_BSWAP

+

Reverse the order of the bytes in a scalar.

+
%1:_(s32) = G_BSWAP %0:_(s32)
+
+
+
+
+

G_BITREVERSE

+

Reverse the order of the bits in a scalar.

+
%1:_(s32) = G_BITREVERSE %0:_(s32)
+
+
+
+
+

G_SBFX, G_UBFX

+

Extract a range of bits from a register.

+

The source operands are registers as follows:

+
    +
  • Source

  • +
  • The least-significant bit for the extraction

  • +
  • The width of the extraction

  • +
+

The least-significant bit (lsb) and width operands are in the range:

+
0 <= lsb < lsb + width <= source bitwidth, where all values are unsigned
+
+
+

G_SBFX sign-extends the result, while G_UBFX zero-extends the result.

+
; Extract 5 bits starting at bit 1 from %x and store them in %a.
+; Sign-extend the result.
+;
+; Example:
+; %x = 0...0000[10110]1 ---> %a = 1...111111[10110]
+%lsb_one = G_CONSTANT i32 1
+%width_five = G_CONSTANT i32 5
+%a:_(s32) = G_SBFX %x, %lsb_one, %width_five
+
+; Extract 3 bits starting at bit 2 from %x and store them in %b. Zero-extend
+; the result.
+;
+; Example:
+; %x = 1...11111[100]11 ---> %b = 0...00000[100]
+%lsb_two = G_CONSTANT i32 2
+%width_three = G_CONSTANT i32 3
+%b:_(s32) = G_UBFX %x, %lsb_two, %width_three
+
+
+
+
+
+

Integer Operations

+
+

G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SDIV, G_UDIV, G_SREM, G_UREM

+

These each perform their respective integer arithmetic on a scalar.

+
%dst:_(s32) = G_ADD %src0:_(s32), %src1:_(s32)
+
+
+

The above example adds %src1 to %src0 and stores the result in %dst.

+
+
+

G_SDIVREM, G_UDIVREM

+

Perform integer division and remainder thereby producing two results.

+
%div:_(s32), %rem:_(s32) = G_SDIVREM %0:_(s32), %1:_(s32)
+
+
+
+
+

G_SADDSAT, G_UADDSAT, G_SSUBSAT, G_USUBSAT, G_SSHLSAT, G_USHLSAT

+

Signed and unsigned addition, subtraction and left shift with saturation.

+
%2:_(s32) = G_SADDSAT %0:_(s32), %1:_(s32)
+
+
+
+
+

G_SHL, G_LSHR, G_ASHR

+

Shift the bits of a scalar left or right inserting zeros (sign-bit for G_ASHR).

+
+
+

G_ROTR, G_ROTL

+

Rotate the bits right (G_ROTR) or left (G_ROTL).

+
+
+

G_ICMP

+

Perform integer comparison producing non-zero (true) or zero (false). It’s +target specific whether a true value is 1, ~0U, or some other non-zero value.

+
+
+

G_SELECT

+

Select between two values depending on a zero/non-zero value.

+
%5:_(s32) = G_SELECT %4(s1), %6, %2
+
+
+
+
+

G_PTR_ADD

+

Add a scalar offset in addressible units to a pointer. Addressible units are +typically bytes but this may vary between targets.

+
%1:_(p0) = G_PTR_ADD %0:_(p0), %1:_(s32)
+
+
+
+

Caution

+

There are currently no in-tree targets that use this with addressable units +not equal to 8 bit.

+
+
+
+

G_PTRMASK

+

Zero out an arbitrary mask of bits of a pointer. The mask type must be +an integer, and the number of vector elements must match for all +operands. This corresponds to i_intr_llvm_ptrmask.

+
%2:_(p0) = G_PTRMASK %0, %1
+
+
+
+
+

G_SMIN, G_SMAX, G_UMIN, G_UMAX

+

Take the minimum/maximum of two values.

+
%5:_(s32) = G_SMIN %6, %2
+
+
+
+
+

G_ABS

+

Take the absolute value of a signed integer. The absolute value of the minimum +negative value (e.g. the 8-bit value 0x80) is defined to be itself.

+
%1:_(s32) = G_ABS %0
+
+
+
+
+

G_UADDO, G_SADDO, G_USUBO, G_SSUBO, G_SMULO, G_UMULO

+

Perform the requested arithmetic and produce a carry output in addition to the +normal result.

+
%3:_(s32), %4:_(s1) = G_UADDO %0, %1
+
+
+
+
+

G_UADDE, G_SADDE, G_USUBE, G_SSUBE

+

Perform the requested arithmetic and consume a carry input in addition to the +normal input. Also produce a carry output in addition to the normal result.

+
%4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3:_(s1)
+
+
+
+
+

G_UMULH, G_SMULH

+

Multiply two numbers at twice the incoming bit width (signed) and return +the high half of the result.

+
%3:_(s32) = G_UMULH %0, %1
+
+
+
+
+

G_CTLZ, G_CTTZ, G_CTPOP

+

Count leading zeros, trailing zeros, or number of set bits.

+
%2:_(s33) = G_CTLZ_ZERO_UNDEF %1
+%2:_(s33) = G_CTTZ_ZERO_UNDEF %1
+%2:_(s33) = G_CTPOP %1
+
+
+
+
+

G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF

+

Count leading zeros or trailing zeros. If the value is zero then the result is +undefined.

+
%2:_(s33) = G_CTLZ_ZERO_UNDEF %1
+%2:_(s33) = G_CTTZ_ZERO_UNDEF %1
+
+
+
+
+
+

Floating Point Operations

+
+

G_FCMP

+

Perform floating point comparison producing non-zero (true) or zero +(false). It’s target specific whether a true value is 1, ~0U, or some other +non-zero value.

+
+
+

G_FNEG

+

Floating point negation.

+
+
+

G_FPEXT

+

Convert a floating point value to a larger type.

+
+
+

G_FPTRUNC

+

Convert a floating point value to a narrower type.

+
+
+

G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP

+

Convert between integer and floating point.

+
+
+

G_FABS

+

Take the absolute value of a floating point value.

+
+
+

G_FCOPYSIGN

+

Copy the value of the first operand, replacing the sign bit with that of the +second operand.

+
+ +
+

G_FMINNUM

+

Perform floating-point minimum on two values.

+

In the case where a single input is a NaN (either signaling or quiet), +the non-NaN input is returned.

+

The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.

+
+
+

G_FMAXNUM

+

Perform floating-point maximum on two values.

+

In the case where a single input is a NaN (either signaling or quiet), +the non-NaN input is returned.

+

The return value of (FMAXNUM 0.0, -0.0) could be either 0.0 or -0.0.

+
+
+

G_FMINNUM_IEEE

+

Perform floating-point minimum on two values, following the IEEE-754 2008 +definition. This differs from FMINNUM in the handling of signaling NaNs. If one +input is a signaling NaN, returns a quiet NaN.

+
+
+

G_FMAXNUM_IEEE

+

Perform floating-point maximum on two values, following the IEEE-754 2008 +definition. This differs from FMAXNUM in the handling of signaling NaNs. If one +input is a signaling NaN, returns a quiet NaN.

+
+
+

G_FMINIMUM

+

NaN-propagating minimum that also treat -0.0 as less than 0.0. While +FMINNUM_IEEE follow IEEE 754-2008 semantics, FMINIMUM follows IEEE 754-2018 +draft semantics.

+
+
+

G_FMAXIMUM

+

NaN-propagating maximum that also treat -0.0 as less than 0.0. While +FMAXNUM_IEEE follow IEEE 754-2008 semantics, FMAXIMUM follows IEEE 754-2018 +draft semantics.

+
+
+

G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FREM

+

Perform the specified floating point arithmetic.

+
+
+

G_FMA

+

Perform a fused multiply add (i.e. without the intermediate rounding step).

+
+
+

G_FMAD

+

Perform a non-fused multiply add (i.e. with the intermediate rounding step).

+
+
+

G_FPOW

+

Raise the first operand to the power of the second.

+
+
+

G_FEXP, G_FEXP2

+

Calculate the base-e or base-2 exponential of a value

+
+
+

G_FLOG, G_FLOG2, G_FLOG10

+

Calculate the base-e, base-2, or base-10 respectively.

+
+
+

G_FCEIL, G_FCOS, G_FSIN, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT

+

These correspond to the standard C functions of the same name.

+
+
+

G_INTRINSIC_TRUNC

+

Returns the operand rounded to the nearest integer not larger in magnitude than the operand.

+
+
+

G_INTRINSIC_ROUND

+

Returns the operand rounded to the nearest integer.

+
+
+

G_LROUND, G_LLROUND

+

Returns the source operand rounded to the nearest integer with ties away from +zero.

+

See the LLVM LangRef entry on ‘llvm.lround.*' for details on behaviour.

+
%rounded_32:_(s32) = G_LROUND %round_me:_(s64)
+%rounded_64:_(s64) = G_LLROUND %round_me:_(s64)
+
+
+
+
+
+

Vector Specific Operations

+
+

G_CONCAT_VECTORS

+

Concatenate two vectors to form a longer vector.

+
+
+

G_BUILD_VECTOR, G_BUILD_VECTOR_TRUNC

+

Create a vector from multiple scalar registers. No implicit +conversion is performed (i.e. the result element type must be the +same as all source operands)

+

The _TRUNC version truncates the larger operand types to fit the +destination vector elt type.

+
+
+

G_INSERT_VECTOR_ELT

+

Insert an element into a vector

+
+
+

G_EXTRACT_VECTOR_ELT

+

Extract an element from a vector

+
+
+

G_SHUFFLE_VECTOR

+

Concatenate two vectors and shuffle the elements according to the mask operand. +The mask operand should be an IR Constant which exactly matches the +corresponding mask for the IR shufflevector instruction.

+
+
+
+

Vector Reduction Operations

+

These operations represent horizontal vector reduction, producing a scalar result.

+
+

G_VECREDUCE_SEQ_FADD, G_VECREDUCE_SEQ_FMUL

+

The SEQ variants perform reductions in sequential order. The first operand is +an initial scalar accumulator value, and the second operand is the vector to reduce.

+
+
+

G_VECREDUCE_FADD, G_VECREDUCE_FMUL

+

These reductions are relaxed variants which may reduce the elements in any order.

+
+
+

G_VECREDUCE_FMAX, G_VECREDUCE_FMIN

+

FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.

+
+
+

Integer/bitwise reductions

+
    +
  • G_VECREDUCE_ADD

  • +
  • G_VECREDUCE_MUL

  • +
  • G_VECREDUCE_AND

  • +
  • G_VECREDUCE_OR

  • +
  • G_VECREDUCE_XOR

  • +
  • G_VECREDUCE_SMAX

  • +
  • G_VECREDUCE_SMIN

  • +
  • G_VECREDUCE_UMAX

  • +
  • G_VECREDUCE_UMIN

  • +
+

Integer reductions may have a result type larger than the vector element type. +However, the reduction is performed using the vector element type and the value +in the top bits is unspecified.

+
+
+
+

Memory Operations

+
+

G_LOAD, G_SEXTLOAD, G_ZEXTLOAD

+

Generic load. Expects a MachineMemOperand in addition to explicit +operands. If the result size is larger than the memory size, the +high bits are undefined, sign-extended, or zero-extended respectively.

+

Only G_LOAD is valid if the result is a vector type. If the result is larger +than the memory size, the high elements are undefined (i.e. this is not a +per-element, vector anyextload)

+
+
+

G_INDEXED_LOAD

+

Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset. +If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed) +then the value is loaded from $newaddr.

+
+
+

G_INDEXED_SEXTLOAD

+

Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD.

+
+
+

G_INDEXED_ZEXTLOAD

+

Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD.

+
+
+

G_STORE

+

Generic store. Expects a MachineMemOperand in addition to explicit +operands. If the stored value size is greater than the memory size, +the high bits are implicitly truncated. If this is a vector store, the +high elements are discarded (i.e. this does not function as a per-lane +vector, truncating store)

+
+
+

G_INDEXED_STORE

+

Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour.

+
+
+

G_ATOMIC_CMPXCHG_WITH_SUCCESS

+

Generic atomic cmpxchg with internal success check. Expects a +MachineMemOperand in addition to explicit operands.

+
+
+

G_ATOMIC_CMPXCHG

+

Generic atomic cmpxchg. Expects a MachineMemOperand in addition to explicit +operands.

+
+ +
+

G_FENCE

+
+

Caution

+

I couldn’t find any documentation on this at the time of writing.

+
+
+
+

G_MEMCPY

+

Generic memcpy. Expects two MachineMemOperands covering the store and load +respectively, in addition to explicit operands.

+
+
+

G_MEMCPY_INLINE

+

Generic inlined memcpy. Like G_MEMCPY, but it is guaranteed that this version +will not be lowered as a call to an external function. Currently the size +operand is required to evaluate as a constant (not an immediate), though that is +expected to change when llvm.memcpy.inline is taught to support dynamic sizes.

+
+
+

G_MEMMOVE

+

Generic memmove. Similar to G_MEMCPY, but the source and destination memory +ranges are allowed to overlap.

+
+
+

G_MEMSET

+

Generic memset. Expects a MachineMemOperand in addition to explicit operands.

+
+
+

G_BZERO

+

Generic bzero. Expects a MachineMemOperand in addition to explicit operands.

+
+
+
+

Control Flow

+
+

G_PHI

+

Implement the φ node in the SSA graph representing the function.

+
%dst(s8) = G_PHI %src1(s8), %bb.<id1>, %src2(s8), %bb.<id2>
+
+
+
+
+

G_BR

+

Unconditional branch

+
G_BR %bb.<id>
+
+
+
+
+

G_BRCOND

+

Conditional branch

+
G_BRCOND %condition, %basicblock.<id>
+
+
+
+
+

G_BRINDIRECT

+

Indirect branch

+
G_BRINDIRECT %src(p0)
+
+
+
+
+

G_BRJT

+

Indirect branch to jump table entry

+
G_BRJT %ptr(p0), %jti, %idx(s64)
+
+
+
+
+

G_JUMP_TABLE

+

Generates a pointer to the address of the jump table specified by the source +operand. The source operand is a jump table index. +G_JUMP_TABLE can be used in conjunction with G_BRJT to support jump table +codegen with GlobalISel.

+
%dst:_(p0) = G_JUMP_TABLE %jump-table.0
+
+
+

The above example generates a pointer to the source jump table index.

+
+
+

G_INTRINSIC, G_INTRINSIC_W_SIDE_EFFECTS

+

Call an intrinsic

+

The _W_SIDE_EFFECTS version is considered to have unknown side-effects and +as such cannot be reordered across other side-effecting instructions.

+
+

Note

+

Unlike SelectionDAG, there is no _VOID variant. Both of these are permitted +to have zero, one, or multiple results.

+
+
+
+
+

Variadic Arguments

+
+

G_VASTART

+
+

Caution

+

I found no documentation for this instruction at the time of writing.

+
+
+
+

G_VAARG

+
+

Caution

+

I found no documentation for this instruction at the time of writing.

+
+
+
+
+

Other Operations

+
+

G_DYN_STACKALLOC

+

Dynamically realigns the stack pointer to the specified size and alignment. +An alignment value of 0 or 1 means no specific alignment.

+
%8:_(p0) = G_DYN_STACKALLOC %7(s64), 32
+
+
+
+
+
+

Optimization Hints

+

These instructions do not correspond to any target instructions. They act as +hints for various combines.

+
+

G_ASSERT_SEXT, G_ASSERT_ZEXT

+

This signifies that the contents of a register were previously extended from a +smaller type.

+

The smaller type is denoted using an immediate operand. For scalars, this is the +width of the entire smaller type. For vectors, this is the width of the smaller +element type.

+
%x_was_zexted:_(s32) = G_ASSERT_ZEXT %x(s32), 16
+%y_was_zexted:_(<2 x s32>) = G_ASSERT_ZEXT %y(<2 x s32>), 16
+
+%z_was_sexted:_(s32) = G_ASSERT_SEXT %z(s32), 8
+
+
+

G_ASSERT_SEXT and G_ASSERT_ZEXT act like copies, albeit with some restrictions.

+

The source and destination registers must

+
    +
  • Be virtual

  • +
  • Belong to the same register class

  • +
  • Belong to the same register bank

  • +
+

It should always be safe to

+
    +
  • Look through the source register

  • +
  • Replace the destination register with the source register

  • +
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/GMIR.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/GMIR.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/GMIR.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/GMIR.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,386 @@ + + + + + + + + + Generic Machine IR — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Generic Machine IR

+ +

Generic MIR (gMIR) is an intermediate representation that shares the same data +structures as MachineIR (MIR) but has more relaxed +constraints. As the compilation pipeline proceeds, these constraints are +gradually tightened until gMIR has become MIR.

+

The rest of this document will assume that you are familiar with the concepts +in MachineIR (MIR) and will highlight the differences +between MIR and gMIR.

+
+

Generic Machine Instructions

+
+

Note

+

This section expands on Machine Instructions from the MIR Language +Reference.

+
+

Whereas MIR deals largely in Target Instructions and only has a small set of +target independent opcodes such as COPY, PHI, and REG_SEQUENCE, +gMIR defines a rich collection of Generic Opcodes which are target +independent and describe operations which are typically supported by targets. +One example is G_ADD which is the generic opcode for an integer addition. +More information on each of the generic opcodes can be found at +Generic Opcodes.

+

The MachineIRBuilder class wraps the MachineInstrBuilder and provides +a convenient way to create these generic instructions.

+
+
+

Generic Virtual Registers

+
+

Note

+

This section expands on Registers from the MIR Language +Reference.

+
+

Generic virtual registers are like virtual registers but they are not assigned a +Register Class constraint. Instead, generic virtual registers have less strict +constraints starting with a Low Level Type and then further constrained to a +Register Bank. Eventually they will be constrained to a register class +at which point they become normal virtual registers.

+

Generic virtual registers can be used with all the virtual register API’s +provided by MachineRegisterInfo. In particular, the def-use chain API’s can +be used without needing to distinguish them from non-generic virtual registers.

+

For simplicity, most generic instructions only accept virtual registers (both +generic and non-generic). There are some exceptions to this but in general:

+
    +
  • instead of immediates, they use a generic virtual register defined by an +instruction that materializes the immediate value (see +Translation of Constants). Typically this is a G_CONSTANT or a +G_FCONSTANT. One example of an exception to this rule is G_SEXT_INREG where +having an immediate is mandatory.

  • +
  • instead of physical register, they use a generic virtual register that is +either defined by a COPY from the physical register or used by a COPY +that defines the physical register.

  • +
+
+

Historical Note

+

We started with an alternative representation, where MRI tracks a size for +each generic virtual register, and instructions have lists of types. +That had two flaws: the type and size are redundant, and there was no generic +way of getting a given operand’s type (as there was no 1:1 mapping between +instruction types and operands). +We considered putting the type in some variant of MCInstrDesc instead: +See PR26576: [GlobalISel] Generic MachineInstrs +need a type but this increases the memory footprint of the related objects

+
+
+
+

Register Bank

+

A Register Bank is a set of register classes defined by the target. This +definition is rather loose so let’s talk about what they can achieve.

+

Suppose we have a processor that has two register files, A and B. These are +equal in every way and support the same instructions for the same cost. They’re +just physically stored apart and each instruction can only access registers from +A or B but never a mix of the two. If we want to perform an operation on data +that’s in split between the two register files, we must first copy all the data +into a single register file.

+

Given a processor like this, we would benefit from clustering related data +together into one register file so that we minimize the cost of copying data +back and forth to satisfy the (possibly conflicting) requirements of all the +instructions. Register Banks are a means to constrain the register allocator to +use a particular register file for a virtual register.

+

In practice, register files A and B are rarely equal. They can typically store +the same data but there’s usually some restrictions on what operations you can +do on each register file. A fairly common pattern is for one of them to be +accessible to integer operations and the other accessible to floating point +operations. To accommodate this, let’s rename A and B to GPR (general purpose +registers) and FPR (floating point registers).

+

We now have some additional constraints that limit us. An operation like G_FMUL +has to happen in FPR and G_ADD has to happen in GPR. However, even though this +prescribes a lot of the assignments we still have some freedom. A G_LOAD can +happen in both GPR and FPR, and which we want depends on who is going to consume +the loaded data. Similarly, G_FNEG can happen in both GPR and FPR. If we assign +it to FPR, then we’ll use floating point negation. However, if we assign it to +GPR then we can equivalently G_XOR the sign bit with 1 to invert it.

+

In summary, Register Banks are a means of disambiguating between seemingly +equivalent choices based on some analysis of the differences when each choice +is applied in a given context.

+

To give some concrete examples:

+

AArch64

+
+

AArch64 has three main banks. GPR for integer operations, FPR for floating +point and also for the NEON vector instruction set. The third is CCR and +describes the condition code register used for predication.

+
+

MIPS

+
+

MIPS has five main banks of which many programs only really use one or two. +GPR is the general purpose bank for integer operations. FGR or CP1 is for +the floating point operations as well as the MSA vector instructions and a +few other application specific extensions. CP0 is for system registers and +few programs will use it. CP2 and CP3 are for any application specific +coprocessors that may be present in the chip. Arguably, there is also a sixth +for the LO and HI registers but these are only used for the result of a few +operations and it’s of questionable value to model distinctly from GPR.

+
+

X86

+
+

X86 can be seen as having 3 main banks: general-purpose, x87, and +vector (which could be further split into a bank per domain for single vs +double precision instructions). It also looks like there’s arguably a few +more potential banks such as one for the AVX512 Mask Registers.

+
+

Register banks are described by a target-provided API, +RegisterBankInfo.

+
+
+

Low Level Type

+

Additionally, every generic virtual register has a type, represented by an +instance of the LLT class.

+

Like EVT/MVT/Type, it has no distinction between unsigned and signed +integer types. Furthermore, it also has no distinction between integer and +floating-point types: it mainly conveys absolutely necessary information, such +as size and number of vector lanes:

+
    +
  • sN for scalars

  • +
  • pN for pointers

  • +
  • <N x sM> for vectors

  • +
+

LLT is intended to replace the usage of EVT in SelectionDAG.

+

Here are some LLT examples and their EVT and Type equivalents:

+
+
+++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

LLT

EVT

IR Type

s1

i1

i1

s8

i8

i8

s32

i32

i32

s32

f32

float

s17

i17

i17

s16

N/A

{i8, i8} 1

s32

N/A

[4 x i8] 1

p0

iPTR

i8*, i32*, %opaque*

p2

iPTR

i8 addrspace(2)*

<4 x s32>

v4f32

<4 x float>

s64

v1f64

<1 x double>

<3 x s32>

v3i32

<3 x i32>

+
+

Rationale: instructions already encode a specific interpretation of types +(e.g., add vs. fadd, or sdiv vs. udiv). Also encoding that +information in the type system requires introducing bitcast with no real +advantage for the selector.

+

Pointer types are distinguished by address space. This matches IR, as opposed +to SelectionDAG where address space is an attribute on operations. +This representation better supports pointers having different sizes depending +on their addressspace.

+
+

Note

+
+

Caution

+

Is this still true? I thought we’d removed the 1-element vector concept. +Hypothetically, it could be distinct from a scalar but I think we failed to +find a real occurrence.

+
+

Currently, LLT requires at least 2 elements in vectors, but some targets have +the concept of a ‘1-element vector’. Representing them as their underlying +scalar type is a nice simplification.

+
+

Footnotes

+
+
1(1,2)
+

This mapping is ABI dependent. Here we’ve assumed no additional padding is required.

+
+
+
+
+

Generic Opcode Reference

+

The Generic Opcodes that are available are described at Generic Opcodes.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/index.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,235 @@ + + + + + + + + + Global Instruction Selection — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Global Instruction Selection

+
+

Warning

+

This document is a work in progress. It reflects the current state of the +implementation, as well as open design and implementation issues.

+
+ +
+

Introduction

+

GlobalISel is a framework that provides a set of reusable passes and utilities +for instruction selection — translation from LLVM IR to target-specific +Machine IR (MIR).

+

GlobalISel is intended to be a replacement for SelectionDAG and FastISel, to +solve three major problems:

+
    +
  • Performance — SelectionDAG introduces a dedicated intermediate +representation, which has a compile-time cost.

    +

    GlobalISel directly operates on the post-isel representation used by the +rest of the code generator, MIR. +It does require extensions to that representation to support arbitrary +incoming IR: Generic Machine IR.

    +
  • +
  • Granularity — SelectionDAG and FastISel operate on individual basic +blocks, losing some global optimization opportunities.

    +

    GlobalISel operates on the whole function.

    +
  • +
  • Modularity — SelectionDAG and FastISel are radically different and share +very little code.

    +

    GlobalISel is built in a way that enables code reuse. For instance, both the +optimized and fast selectors share the Core Pipeline, and targets can +configure that pipeline to better suit their needs.

    +
  • +
+
+
+

Design and Implementation Reference

+

More information on the design and implementation of GlobalISel can be found in +the following sections.

+ +

More information on specific passes can be found in the following sections:

+ +
+
+

Progress and Future Work

+

The initial goal is to replace FastISel on AArch64. The next step will be to +replace SelectionDAG as the optimized ISel.

+

NOTE: +While we iterate on GlobalISel, we strive to avoid affecting the performance of +SelectionDAG, FastISel, or the other MIR passes. For instance, the types of +Generic Virtual Registers are stored in a separate table in MachineRegisterInfo, +that is destroyed after InstructionSelect.

+
+

FastISel Replacement

+

For the initial FastISel replacement, we intend to fallback to SelectionDAG on +selection failures.

+

Currently, compile-time of the fast pipeline is within 1.5x of FastISel. +We’re optimistic we can get to within 1.1/1.2x, but beating FastISel will be +challenging given the multi-pass approach. +Still, supporting all IR (via a complete legalizer) and avoiding the fallback +to SelectionDAG in the worst case should enable better amortized performance +than SelectionDAG+FastISel.

+

NOTE: +We considered never having a fallback to SelectionDAG, instead deciding early +whether a given function is supported by GlobalISel or not. The decision would +be based on Legalizer queries. +We abandoned that for two reasons: +a) on IR inputs, we’d need to basically simulate the IRTranslator; +b) to be robust against unforeseen failures and to enable iterative +improvements.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/InstructionSelect.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/InstructionSelect.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/InstructionSelect.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/InstructionSelect.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,224 @@ + + + + + + + + + InstructionSelect — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

InstructionSelect

+

This pass transforms generic machine instructions into equivalent +target-specific instructions. It traverses the MachineFunction bottom-up, +selecting uses before definitions, enabling trivial dead code elimination.

+
+

API: InstructionSelector

+

The target implements the InstructionSelector class, containing the +target-specific selection logic proper.

+

The instance is provided by the subtarget, so that it can specialize the +selector by subtarget feature (with, e.g., a vector selector overriding parts +of a general-purpose common selector). +We might also want to parameterize it by MachineFunction, to enable selector +variants based on function attributes like optsize.

+

The simple API consists of:

+
+
virtual bool select(MachineInstr &MI)
+
+
+
+

This target-provided method is responsible for mutating (or replacing) a +possibly-generic MI into a fully target-specific equivalent. +It is also responsible for doing the necessary constraining of gvregs into the +appropriate register classes as well as passing through COPY instructions to +the register allocator.

+

The InstructionSelector can fold other instructions into the selected MI, +by walking the use-def chain of the vreg operands. +As GlobalISel is Global, this folding can occur across basic blocks.

+
+
+

SelectionDAG Rule Imports

+

TableGen will import SelectionDAG rules and provide the following function to +execute them:

+
+
bool selectImpl(MachineInstr &MI)
+
+
+
+

The --stats option can be used to determine what proportion of rules were +successfully imported. The easiest way to use this is to copy the +-gen-globalisel tablegen command from ninja -v and modify it.

+

Similarly, the --warn-on-skipped-patterns option can be used to obtain the +reasons that rules weren’t imported. This can be used to focus on the most +important rejection reasons.

+
+
+

PatLeaf Predicates

+

PatLeafs cannot be imported because their C++ is implemented in terms of +SDNode objects. PatLeafs that handle immediate predicates should be +replaced by ImmLeaf, IntImmLeaf, or FPImmLeaf as appropriate.

+

There’s no standard answer for other PatLeafs. Some standard predicates have +been baked into TableGen but this should not generally be done.

+
+
+

Custom SDNodes

+

Custom SDNodes should be mapped to Target Pseudos using GINodeEquiv. This +will cause the instruction selector to import them but you will also need to +ensure the target pseudo is introduced to the MIR before the instruction +selector. Any preceding pass is suitable but the legalizer will be a +particularly common choice.

+
+
+

ComplexPatterns

+

ComplexPatterns cannot be imported because their C++ is implemented in terms of +SDNode objects. GlobalISel versions should be defined with +GIComplexOperandMatcher and mapped to ComplexPattern with +GIComplexPatternEquiv.

+

The following predicates are useful for porting ComplexPattern:

+
    +
  • isBaseWithConstantOffset() - Check for base+offset structures

  • +
  • isOperandImmEqual() - Check for a particular constant

  • +
  • isObviouslySafeToFold() - Check for reasons an instruction can’t be sunk and folded into another.

  • +
+

There are some important points for the C++ implementation:

+
    +
  • Don’t modify MIR in the predicate

  • +
  • Renderer lambdas should capture by value to avoid use-after-free. They will be used after the predicate returns.

  • +
  • Only create instructions in a renderer lambda. GlobalISel won’t clean up things you create but don’t use.

  • +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/IRTranslator.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/IRTranslator.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/IRTranslator.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/IRTranslator.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,226 @@ + + + + + + + + + IRTranslator — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

IRTranslator

+ +

This pass translates the input LLVM-IR Function to a GMIR +MachineFunction. This is typically a direct translation but does +occasionally get a bit more involved. For example:

+
%2 = add i32 %0, %1
+
+
+

becomes:

+
%2:_(s32) = G_ADD %0:_(s32), %1:_(s32)
+
+
+

whereas

+
call i32 @puts(i8* %cast210)
+
+
+

is translated according to the ABI rules of the target.

+
+

Note

+

The currently implemented portion of the LLVM Language Reference Manual is sufficient for +many compilations but it is not 100% complete. Users seeking to compile +LLVM-IR containing some of the rarer features may need to implement the +translation.

+
+
+

Target Intrinsics

+

There has been some (off-list) debate about whether to add target hooks for +translating target intrinsics. Among those who discussed it, it was generally +agreed that the IRTranslator should be able to lower target intrinsics in a +customizable way but no work has happened to implement this at the time of +writing.

+
+
+

Translating Function Calls

+

The IRTranslator also implements the ABI’s calling convention by lowering +calls, returns, and arguments to the appropriate physical register usage and +instruction sequences. This is achieved using the CallLowering +implementation,

+
+

Aggregates

+
+

Caution

+

This has changed since it was written and is no longer accurate. It has not +been refreshed in this pass of improving the documentation as I haven’t +worked much in this part of the codebase and it should have attention from +someone more knowledgeable about it.

+
+

Aggregates are lowered into multiple virtual registers, similar to +SelectionDAG’s multiple vregs via GetValueVTs.

+

TODO: +As some of the bits are undef (padding), we should consider augmenting the +representation with additional metadata (in effect, caching computeKnownBits +information on vregs). +See PR26161: [GlobalISel] Value to vreg during +IR to MachineInstr translation for aggregate type

+
+
+
+

Translation of Constants

+

Constant operands are translated as a use of a virtual register that is defined +by a G_CONSTANT or G_FCONSTANT instruction. These instructions are +placed in the entry block to allow them to be subject to the continuous CSE +implementation (CSEMIRBuilder). Their debug location information is removed +to prevent this from confusing debuggers.

+

This is beneficial as it allows us to fold constants into immediate operands +during InstructionSelect, while still avoiding redundant materializations +for expensive non-foldable constants. However, this can lead to unnecessary +spills and reloads in an -O0 pipeline, as these virtual registers can have long +live ranges. This can be mitigated by running a localizer +after the translator.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/KnownBits.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/KnownBits.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/KnownBits.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/KnownBits.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,225 @@ + + + + + + + + + Known Bits Analysis — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Known Bits Analysis

+

The Known Bits Analysis pass makes information about the known values of bits +available to other passes to enable transformations like those in the examples +below. The information is lazily computed so you should only pay for what you +use.

+
+

Examples

+

A simple example is that transforming:

+
a + 1
+
+
+

into:

+
a | 1
+
+
+

is only valid when the addition doesn’t carry. In other words it’s only valid +if a & 1 is zero.

+

Another example is:

+
%1:(s32) = G_CONSTANT i32 0xFF0
+%2:(s32) = G_AND %0, %1
+%3:(s32) = G_CONSTANT i32 0x0FF
+%4:(s32) = G_AND %2, %3
+
+
+

We can use the constants and the definition of G_AND to determine the known +bits:

+
                                 ; %0 = 0x????????
+%1:(s32) = G_CONSTANT i32 0xFF0  ; %1 = 0x00000FF0
+%2:(s32) = G_AND %0, %1          ; %2 = 0x00000??0
+%3:(s32) = G_CONSTANT i32 0x0FF  ; %3 = 0x000000FF
+%4:(s32) = G_AND %2, %3          ; %4 = 0x000000?0
+
+
+

and then use this to simplify the expression:

+
                                 ; %0 = 0x????????
+%5:(s32) = G_CONSTANT i32 0x0F0  ; %5 = 0x00000FF0
+%4:(s32) = G_AND %0, %5          ; %4 = 0x000000?0
+
+
+

Note that %4 still has the same known bits as before the transformation. +Many transformations share this property. The main exception being when the +transform causes undefined bits to become defined to either zero, one, or +defined but unknown.

+
+
+

Usage

+

To use Known Bits Analysis in a pass, first include the header and register the +dependency with INITIALIZE_PASS_DEPENDENCY.

+
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+
+...
+
+INITIALIZE_PASS_BEGIN(...)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
+INITIALIZE_PASS_END(...)
+
+
+

and require the pass in getAnalysisUsage.

+
void MyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<GISelKnownBitsAnalysis>();
+  // Optional: If your pass preserves known bits analysis (many do) then
+  //           indicate that it's preserved for re-use by another pass here.
+  AU.addPreserved<GISelKnownBitsAnalysis>();
+}
+
+
+

Then it’s just a matter of fetching the analysis and using it:

+
bool MyPass::runOnMachineFunction(MachineFunction &MF) {
+  ...
+  GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+  ...
+  MachineInstr *MI = ...;
+  KnownBits Known = KB->getKnownBits(MI->getOperand(0).getReg());
+  if (Known.Zeros & 1) {
+    // Bit 0 is known to be zero
+  }
+  ...
+}
+
+
+

There are many more API’s beyond getKnownBits(). See the API reference for more information

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Legalizer.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Legalizer.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Legalizer.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Legalizer.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,448 @@ + + + + + + + + + Legalizer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Legalizer

+

This pass transforms the generic machine instructions such that they are legal.

+

A legal instruction is defined as:

+
    +
  • selectable — the target will later be able to select it to a +target-specific (non-generic) instruction. This doesn’t necessarily mean that +InstructionSelect has to handle it though. It just means that +something must handle it.

  • +
  • operating on vregs that can be loaded and stored – if necessary, the +target can select a G_LOAD/G_STORE of each gvreg operand.

  • +
+

As opposed to SelectionDAG, there are no legalization phases. In particular, +‘type’ and ‘operation’ legalization are not separate.

+

Legalization is iterative, and all state is contained in GMIR. To maintain the +validity of the intermediate code, instructions are introduced:

+
    +
  • G_MERGE_VALUES — concatenate multiple registers of the same +size into a single wider register.

  • +
  • G_UNMERGE_VALUES — extract multiple registers of the same size +from a single wider register.

  • +
  • G_EXTRACT — extract a simple register (as contiguous sequences of bits) +from a single wider register.

  • +
+

As they are expected to be temporary byproducts of the legalization process, +they are combined at the end of the Legalizer pass. +If any remain, they are expected to always be selectable, using loads and stores +if necessary.

+

The legality of an instruction may only depend on the instruction itself and +must not depend on any context in which the instruction is used. However, after +deciding that an instruction is not legal, using the context of the instruction +to decide how to legalize the instruction is permitted. As an example, if we +have a G_FOO instruction of the form:

+
%1:_(s32) = G_CONSTANT i32 1
+%2:_(s32) = G_FOO %0:_(s32), %1:_(s32)
+
+
+

it’s impossible to say that G_FOO is legal iff %1 is a G_CONSTANT with +value 1. However, the following:

+
%2:_(s32) = G_FOO %0:_(s32), i32 1
+
+
+

can say that it’s legal iff operand 2 is an immediate with value 1 because +that information is entirely contained within the single instruction.

+
+

API: LegalizerInfo

+

The recommended 1 API looks like this:

+
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL})
+    .legalFor({s32, s64, v2s32, v4s32, v2s64})
+    .clampScalar(0, s32, s64)
+    .widenScalarToNextPow2(0)
+    .clampNumElements(0, v2s32, v4s32)
+    .clampNumElements(0, v2s64, v2s64)
+    .moreElementsToNextPow2(0);
+
+
+

and describes a set of rules by which we can either declare an instruction legal +or decide which action to take to make it more legal.

+

At the core of this ruleset is the LegalityQuery which describes the +instruction. We use a description rather than the instruction to both allow other +passes to determine legality without having to create an instruction and also to +limit the information available to the predicates to that which is safe to rely +on. Currently, the information available to the predicates that determine +legality contains:

+
    +
  • The opcode for the instruction

  • +
  • The type of each type index (see type0, type1, etc.)

  • +
  • The size in bytes and atomic ordering for each MachineMemOperand

  • +
+
+

Note

+

An alternative worth investigating is to generalize the API to represent +actions using std::function that implements the action, instead of explicit +enum tokens (Legal, WidenScalar, …) that instruct it to call a +function. This would have some benefits, most notable being that Custom could +be removed.

+
+

Footnotes

+
+
1
+

An API is broadly similar to +SelectionDAG/TargetLowering is available but is not recommended as a more +powerful API is available.

+
+
+
+

Rule Processing and Declaring Rules

+

The getActionDefinitionsBuilder function generates a ruleset for the given +opcode(s) that rules can be added to. If multiple opcodes are given, they are +all permanently bound to the same ruleset. The rules in a ruleset are executed +from top to bottom and will start again from the top if an instruction is +legalized as a result of the rules. If the ruleset is exhausted without +satisfying any rule, then it is considered unsupported.

+

When it doesn’t declare the instruction legal, each pass over the rules may +request that one type changes to another type. Sometimes this can cause multiple +types to change but we avoid this as much as possible as making multiple changes +can make it difficult to avoid infinite loops where, for example, narrowing one +type causes another to be too small and widening that type causes the first one +to be too big.

+

In general, it’s advisable to declare instructions legal as close to the top of +the rule as possible and to place any expensive rules as low as possible. This +helps with performance as testing for legality happens more often than +legalization and legalization can require multiple passes over the rules.

+

As a concrete example, consider the rule:

+
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL})
+    .legalFor({s32, s64, v2s32, v4s32, v2s64})
+    .clampScalar(0, s32, s64)
+    .widenScalarToNextPow2(0);
+
+
+

and the instruction:

+
%2:_(s7) = G_ADD %0:_(s7), %1:_(s7)
+
+
+

this doesn’t meet the predicate for the .legalFor() as s7 +is not one of the listed types so it falls through to the +.clampScalar(). It does meet the predicate for this rule +as the type is smaller than the s32 and this rule instructs the legalizer +to change type 0 to s32. It then restarts from the top. This time it does +satisfy .legalFor() and the resulting output is:

+
%3:_(s32) = G_ANYEXT %0:_(s7)
+%4:_(s32) = G_ANYEXT %1:_(s7)
+%5:_(s32) = G_ADD %3:_(s32), %4:_(s32)
+%2:_(s7) = G_TRUNC %5:_(s32)
+
+
+

where the G_ADD is legal and the other instructions are scheduled for +processing by the legalizer.

+
+
+

Rule Actions

+

There are various rule factories that append rules to a ruleset but they have a +few actions in common:

+
    +
  • legalIf(), legalFor(), etc. declare an instruction to be legal if the +predicate is satisfied.

  • +
  • narrowScalarIf(), narrowScalarFor(), etc. declare an instruction to be illegal +if the predicate is satisfied and indicates that narrowing the scalars in one +of the types to a specific type would make it more legal. This action supports +both scalars and vectors.

  • +
  • widenScalarIf(), widenScalarFor(), etc. declare an instruction to be illegal +if the predicate is satisfied and indicates that widening the scalars in one +of the types to a specific type would make it more legal. This action supports +both scalars and vectors.

  • +
  • fewerElementsIf(), fewerElementsFor(), etc. declare an instruction to be +illegal if the predicate is satisfied and indicates reducing the number of +vector elements in one of the types to a specific type would make it more +legal. This action supports vectors.

  • +
  • moreElementsIf(), moreElementsFor(), etc. declare an instruction to be illegal +if the predicate is satisfied and indicates increasing the number of vector +elements in one of the types to a specific type would make it more legal. +This action supports vectors.

  • +
  • lowerIf(), lowerFor(), etc. declare an instruction to be +illegal if the predicate is satisfied and indicates that replacing +it with equivalent instruction(s) would make it more legal. Support +for this action differs for each opcode. These may provide an +optional LegalizeMutation containing a type to attempt to perform +the expansion in a different type.

  • +
  • libcallIf(), libcallFor(), etc. declare an instruction to be illegal if the +predicate is satisfied and indicates that replacing it with a libcall would +make it more legal. Support for this action differs for +each opcode.

  • +
  • customIf(), customFor(), etc. declare an instruction to be illegal if the +predicate is satisfied and indicates that the backend developer will supply +a means of making it more legal.

  • +
  • unsupportedIf(), unsupportedFor(), etc. declare an instruction to be illegal +if the predicate is satisfied and indicates that there is no way to make it +legal and the compiler should fail.

  • +
  • fallback() falls back on an older API and should only be used while porting +existing code from that API.

  • +
+
+
+

Rule Predicates

+

The rule factories also have predicates in common:

+
    +
  • legal(), lower(), etc. are always satisfied.

  • +
  • legalIf(), narrowScalarIf(), etc. are satisfied if the user-supplied +LegalityPredicate function returns true. This predicate has access to the +information in the LegalityQuery to make its decision. +User-supplied predicates can also be combined using all(P0, P1, ...).

  • +
  • legalFor(), narrowScalarFor(), etc. are satisfied if the type matches one in +a given set of types. For example .legalFor({s16, s32}) declares the +instruction legal if type 0 is either s16 or s32. Additional versions for two +and three type indices are generally available. For these, all the type +indices considered together must match all the types in one of the tuples. So +.legalFor({{s16, s32}, {s32, s64}}) will only accept {s16, s32}, or +{s32, s64} but will not accept {s16, s64}.

  • +
  • legalForTypesWithMemSize(), narrowScalarForTypesWithMemSize(), etc. are +similar to legalFor(), narrowScalarFor(), etc. but additionally require a +MachineMemOperand to have a given size in each tuple.

  • +
  • legalForCartesianProduct(), narrowScalarForCartesianProduct(), etc. are +satisfied if each type index matches one element in each of the independent +sets. So .legalForCartesianProduct({s16, s32}, {s32, s64}) will accept +{s16, s32}, {s16, s64}, {s32, s32}, and {s32, s64}.

  • +
+
+
+

Composite Rules

+

There are some composite rules for common situations built out of the above facilities:

+
    +
  • widenScalarToNextPow2() is like widenScalarIf() but is satisfied iff the type +size in bits is not a power of 2 and selects a target type that is the next +largest power of 2.

  • +
+
    +
  • minScalar() is like widenScalarIf() but is satisfied iff the type +size in bits is smaller than the given minimum and selects the minimum as the +target type. Similarly, there is also a maxScalar() for the maximum and a +clampScalar() to do both at once.

  • +
  • minScalarSameAs() is like minScalar() but the minimum is taken from another +type index.

  • +
  • moreElementsToNextMultiple() is like moreElementsToNextPow2() but is based on +multiples of X rather than powers of 2.

  • +
+
+
+
+

Minimum Rule Set

+

GlobalISel’s legalizer has a great deal of flexibility in how a given target +shapes the GMIR that the rest of the backend must handle. However, there are +a small number of requirements that all targets must meet.

+

Before discussing the minimum requirements, we’ll need some terminology:

+
+
Producer Type Set

The set of types which is the union of all possible types produced by at +least one legal instruction.

+
+
Consumer Type Set

The set of types which is the union of all possible types consumed by at +least one legal instruction.

+
+
+

Both sets are often identical but there’s no guarantee of that. For example, +it’s not uncommon to be unable to consume s64 but still be able to produce it +for a few specific instructions.

+
+

Minimum Rules For Scalars

+
    +
  • G_ANYEXT must be legal for all inputs from the producer type set and all larger +outputs from the consumer type set.

  • +
  • G_TRUNC must be legal for all inputs from the producer type set and all +smaller outputs from the consumer type set.

  • +
+

G_ANYEXT, and G_TRUNC have mandatory legality since the GMIR requires a means to +connect operations with different type sizes. They are usually trivial to support +since G_ANYEXT doesn’t define the value of the additional bits and G_TRUNC is +discarding bits. The other conversions can be lowered into G_ANYEXT/G_TRUNC +with some additional operations that are subject to further legalization. For +example, G_SEXT can lower to:

+
%1 = G_ANYEXT %0
+%2 = G_CONSTANT ...
+%3 = G_SHL %1, %2
+%4 = G_ASHR %3, %2
+
+
+

and the G_CONSTANT/G_SHL/G_ASHR can further lower to other operations or target +instructions. Similarly, G_FPEXT has no legality requirement since it can lower +to a G_ANYEXT followed by a target instruction.

+

G_MERGE_VALUES and G_UNMERGE_VALUES do not have legality requirements since the +former can lower to G_ANYEXT and some other legalizable instructions, while the +latter can lower to some legalizable instructions followed by G_TRUNC.

+
+
+

Minimum Legality For Vectors

+

Within the vector types, there aren’t any defined conversions in LLVM IR as +vectors are often converted by reinterpreting the bits or by decomposing the +vector and reconstituting it as a different type. As such, G_BITCAST is the +only operation to account for. We generally don’t require that it’s legal +because it can usually be lowered to COPY (or to nothing using +replaceAllUses()). However, there are situations where G_BITCAST is non-trivial +(e.g. little-endian vectors of big-endian data such as on big-endian MIPS MSA and +big-endian ARM NEON, see _i_bitcast). To account for this G_BITCAST must be +legal for all type combinations that change the bit pattern in the value.

+

There are no legality requirements for G_BUILD_VECTOR, or G_BUILD_VECTOR_TRUNC +since these can be handled by: +* Declaring them legal. +* Scalarizing them. +* Lowering them to G_TRUNC+G_ANYEXT and some legalizable instructions. +* Lowering them to target instructions which are legal by definition.

+

The same reasoning also allows G_UNMERGE_VALUES to lack legality requirements +for vector inputs.

+
+
+

Minimum Legality for Pointers

+

There are no minimum rules for pointers since G_INTTOPTR and G_PTRTOINT can +be selected to a COPY from register class to another by the legalizer.

+
+
+

Minimum Legality For Operations

+

The rules for G_ANYEXT, G_MERGE_VALUES, G_BITCAST, G_BUILD_VECTOR, +G_BUILD_VECTOR_TRUNC, G_CONCAT_VECTORS, G_UNMERGE_VALUES, G_PTRTOINT, and +G_INTTOPTR have already been noted above. In addition to those, the following +operations have requirements:

+
    +
  • At least one G_IMPLICIT_DEF must be legal. This is usually trivial as it +requires no code to be selected.

  • +
  • G_PHI must be legal for all types in the producer and consumer typesets. This +is usually trivial as it requires no code to be selected.

  • +
  • At least one G_FRAME_INDEX must be legal

  • +
  • At least one G_BLOCK_ADDR must be legal

  • +
+

There are many other operations you’d expect to have legality requirements but +they can be lowered to target instructions which are legal by definition.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Pipeline.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Pipeline.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Pipeline.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Pipeline.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,295 @@ + + + + + + + + + Core Pipeline — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Core Pipeline

+
+
+

The core pipeline of GlobalISel is:

+../_images/pipeline-overview.png +

The four passes shown in the diagram consist of:

+

IRTranslator

+
+

Converts LLVM-IR into gMIR (Generic MIR). +This is largely a direct translation and has little target customization. +It’s somewhat analogous to SelectionDAGBuilder but builds a flavour of MIR +called gMIR instead of a specialized representation. gMIR uses exactly the +same data structures as MIR but has more relaxed constraints. For example, +a virtual register may be constrained to a particular type without also +constraining it to a specific register class.

+
+

Legalizer

+
+

Replaces unsupported operations with supported ones. In other words, it shapes +the gMIR to suit what the backend can support. There is a very small set of +operations which targets are required to support but aside from that targets +can shape the MIR as they wish.

+
+

Register Bank Selector

+
+

Binds virtual registers to register banks. This pass is intended to minimize +cross-register-bank copies by clustering portions of the MIR together.

+
+

Instruction Select

+
+

Select target instructions using the gMIR. At this point, the gMIR has been +constrained enough that it becomes MIR.

+
+

Although we tend to talk about them as distinct passes, it should be noted that +there’s a good deal of flexibility here and it’s ok for things to happen +earlier than described below. For example, it’s not unusual for the legalizer to +legalize an intrinsic directly to a target instruction. The concrete +requirement is that the following additional constraints are preserved after +each of these passes:

+

IRTranslator

+
+

The representation must be gMIR, MIR, or a mixture of the two after this pass. +The majority will typically be gMIR to begin with but later passes will +gradually transition the gMIR to MIR.

+
+

Legalizer

+
+

No illegal operations must remain or be introduced after this pass.

+
+

Register Bank Selector

+
+

All virtual registers must have a register bank assigned after this pass.

+
+

Instruction Select

+
+

No gMIR must remain or be introduced after this pass. In other words, we must +have completed the conversion from gMIR to MIR.

+
+

In addition to these passes, there are also some optional passes that perform +an optimization. The current optional passes are:

+

Combiner

+
+

Replaces patterns of instructions with a better alternative. Typically, this +means improving run time performance by replacing instructions with faster +alternatives but Combiners can also focus on code size or other metrics.

+
+

Additional passes such as these can be inserted to support higher optimization +levels or target specific needs. A likely pipeline is:

+../_images/pipeline-overview-with-combiners.png +

Of course, combiners can be inserted in other places too. Also passes can be +replaced entirely so long as their task is complete as shown in this (more +customized) example pipeline.

+../_images/pipeline-overview-customized.png +
+

MachineVerifier

+

The pass approach lets us use the MachineVerifier to enforce invariants +that are required beyond certain points of the pipeline. For example, a +function with the legalized property can have the MachineVerifier +enforce that no illegal instructions occur. Similarly, a +regBankSelected function may not have virtual registers without a register +bank assigned.

+
+

Note

+

For layering reasons, MachineVerifier isn’t able to be the sole verifier +in GlobalISel. Currently some of the passes also perform verification while +we find a way to solve this problem.

+

The main issue is that GlobalISel is a separate library, so we can’t +directly reference it from CodeGen.

+
+
+
+

Testing

+

The ability to test GlobalISel is significantly improved over SelectionDAG. +SelectionDAG is something of a black box and there’s a lot going on inside it. +This makes it difficult to write a test that reliably tests a particular aspect +of its behaviour. For comparison, see the following diagram:

+../_images/testing-pass-level.png +

Each of the grey boxes indicates an opportunity to serialize the current state +and test the behaviour between two points in the pipeline. The current state +can be serialized using -stop-before or -stop-after and loaded using +-start-before, -start-after, and -run-pass.

+

We can also go further still, as many of GlobalISel’s passes are readily unit +testable:

+../_images/testing-unit-level.png +

It’s possible to create an imaginary target such as in LegalizerHelperTest.cpp +and perform a single step of the algorithm and check the result. The MIR and +FileCheck directives can be embedded using strings so you still have access to +the convenience available in llvm-lit.

+
+
+

Debugging

+

One debugging technique that’s proven particularly valuable is to use the +BlockExtractor to extract basic blocks into new functions. This can be used +to track down correctness bugs and can also be used to track down performance +regressions. It can also be coupled with function attributes to disable +GlobalISel for one or more of the extracted functions.

+../_images/block-extract.png +

The command to do the extraction is:

+
./bin/llvm-extract -o - -S -b ‘foo:bb1;bb4’ <input> > extracted.ll
+
+
+

This particular example extracts two basic blocks from a function named foo. +The new LLVM-IR can then be modified to add the failedISel attribute to the +extracted function containing bb4 to make that function use SelectionDAG.

+

This can prevent some optimizations as GlobalISel is generally able to work on a +single function at a time. This technique can be repeated for different +combinations of basic blocks until you have identified the critical blocks +involved in a bug.

+

Once the critical blocks have been identified, you can further increase the +resolution to the critical instructions by splitting the blocks like from:

+
bb1:
+  ... instructions group 1 ...
+  ... instructions group 2 ...
+
+
+

into:

+
bb1:
+  ... instructions group 1 ...
+  br %bb2
+
+bb2:
+  ... instructions group 2 ...
+
+
+

and then repeating the process for the new blocks.

+

It’s also possible to use this technique in a mode where the main function +is compiled with GlobalISel and the extracted basic blocks are compiled with +SelectionDAG (or the other way around) to leverage the existing quality of +another code generator to track down bugs. This technique can also be used to +improve the similarity between fast and slow code when tracking down performance +regressions and help you zero in on a particular cause of the regression.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Porting.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Porting.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Porting.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Porting.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,166 @@ + + + + + + + + + Porting GlobalISel to A New Target — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Porting GlobalISel to A New Target

+

There are four major classes to implement by the target:

+
    +
  • CallLowering — lower calls, returns, and +arguments according to the ABI.

  • +
  • RegisterBankInfo — describe +Register Bank coverage, cross-bank copy cost, and the mapping of +operands onto banks for each instruction.

  • +
  • LegalizerInfo — describe what is legal, and how +to legalize what isn’t.

  • +
  • InstructionSelector — select generic MIR +to target-specific MIR.

  • +
+

Additionally:

+
    +
  • TargetPassConfig — create the passes constituting the pipeline, +including additional passes not included in the Core Pipeline.

  • +
+
+
+

Tutorials

+

We’d recommend watching this tutorial from the 2017 LLVM DevMeeting +which gave an overview of how to bring up a new backend in GlobalISel.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/RegBankSelect.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/RegBankSelect.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/RegBankSelect.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/RegBankSelect.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,199 @@ + + + + + + + + + RegBankSelect — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

RegBankSelect

+

This pass constrains the Generic Virtual Registers operands of generic +instructions to some Register Bank.

+

It iteratively maps instructions to a set of per-operand bank assignment. +The possible mappings are determined by the target-provided +RegisterBankInfo. +The mapping is then applied, possibly introducing COPY instructions if +necessary.

+

It traverses the MachineFunction top down so that all operands are already +mapped when analyzing an instruction.

+

This pass could also remap target-specific instructions when beneficial. +In the future, this could replace the ExeDepsFix pass, as we can directly +select the best variant for an instruction that’s available on multiple banks.

+
+

API: RegisterBankInfo

+

The RegisterBankInfo class describes multiple aspects of register banks.

+
    +
  • Banks: addRegBankCoverage — which register bank covers each +register class.

  • +
  • Cross-Bank Copies: copyCost — the cost of a COPY from one bank +to another.

  • +
  • Default Mapping: getInstrMapping — the default bank assignments for +a given instruction.

  • +
  • Alternative Mapping: getInstrAlternativeMapping — the other +possible bank assignments for a given instruction.

  • +
+

TODO: +All this information should eventually be static and generated by TableGen, +mostly using existing information augmented by bank descriptions.

+

TODO: +getInstrMapping is currently separate from getInstrAlternativeMapping +because the latter is more expensive: as we move to static mapping info, +both methods should be free, and we should merge them.

+
+
+

RegBankSelect Modes

+

RegBankSelect currently has two modes:

+
    +
  • Fast — For each instruction, pick a target-provided “default” bank +assignment. This is the default at -O0.

  • +
  • Greedy — For each instruction, pick the cheapest of several +target-provided bank assignment alternatives.

  • +
+

We intend to eventually introduce an additional optimizing mode:

+
    +
  • Global — Across multiple instructions, pick the cheapest combination of +bank assignments.

  • +
+

NOTE: +On AArch64, we are considering using the Greedy mode even at -O0 (or perhaps at +backend -O1): because Low Level Type doesn’t distinguish floating point from +integer scalars, the default assignment for loads and stores is the integer +bank, introducing cross-bank copies on most floating point operations.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Resources.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Resources.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GlobalISel/Resources.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GlobalISel/Resources.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,152 @@ + + + + + + + + + Resources — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GoldPlugin.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GoldPlugin.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GoldPlugin.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GoldPlugin.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,297 @@ + + + + + + + + + The LLVM gold plugin — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The LLVM gold plugin

+
+

Introduction

+

Building with link time optimization requires cooperation from +the system linker. LTO support on Linux systems is available via the +gold linker which supports LTO via plugins. This is the same mechanism +used by the GCC LTO project.

+

The LLVM gold plugin implements the gold plugin interface on top of +libLTO. The same plugin can also be used by other tools such as +ar and nm. Note that ld.bfd from binutils version 2.21.51.0.2 +and above also supports LTO via plugins. However, usage of the LLVM +gold plugin with ld.bfd is not tested and therefore not officially +supported or recommended.

+
+
+

How to build it

+

You need to have gold with plugin support and build the LLVMgold plugin. +The gold linker is installed as ld.gold. To see whether gold is the default +on your system, run /usr/bin/ld -v. It will report “GNU +gold” or else “GNU ld” if not. If gold is already installed at +/usr/bin/ld.gold, one option is to simply make that the default by +backing up your existing /usr/bin/ld and creating a symbolic link +with ln -s /usr/bin/ld.gold /usr/bin/ld. Alternatively, you can build +with clang’s -fuse-ld=gold or add -fuse-ld=gold to LDFLAGS, which will +cause the clang driver to invoke /usr/bin/ld.gold directly.

+

If you have gold installed, check for plugin support by running +/usr/bin/ld.gold -plugin. If it complains “missing argument” then +you have plugin support. If not, and you get an error such as “unknown option”, +then you will either need to build gold or install a version with plugin +support.

+
    +
  • Download, configure and build gold with plugin support:

    +
    $ git clone --depth 1 git://sourceware.org/git/binutils-gdb.git binutils
    +$ mkdir build
    +$ cd build
    +$ ../binutils/configure --enable-gold --enable-plugins --disable-werror
    +$ make all-gold
    +
    +
    +

    That should leave you with build/gold/ld-new which supports +the -plugin option. Running make will additionally build +build/binutils/ar and nm-new binaries supporting plugins.

    +

    Once you’re ready to switch to using gold, backup your existing +/usr/bin/ld then replace it with ld-new. Alternatively, install +in /usr/bin/ld.gold and use -fuse-ld=gold as described earlier.

    +

    Optionally, add --enable-gold=default to the above configure invocation +to automatically install the newly built gold as the default linker with +make install.

    +
  • +
  • Build the LLVMgold plugin. Run CMake with +-DLLVM_BINUTILS_INCDIR=/path/to/binutils/include. The correct include +path will contain the file plugin-api.h.

  • +
+
+
+

Usage

+

You should produce bitcode files from clang with the option +-flto. This flag will also cause clang to look for the gold plugin in +the lib directory under its prefix and pass the -plugin option to +ld. It will not look for an alternate linker without -fuse-ld=gold, +which is why you otherwise need gold to be the installed system linker in +your path.

+

ar and nm also accept the -plugin option and it’s possible to +to install LLVMgold.so to /usr/lib/bfd-plugins for a seamless setup. +If you built your own gold, be sure to install the ar and nm-new you +built to /usr/bin.

+ +
+
+

Quickstart for using LTO with autotooled projects

+

Once your system ld, ar, and nm all support LLVM bitcode, +everything is in place for an easy to use LTO build of autotooled projects:

+
    +
  • Follow the instructions on how to build LLVMgold.so.

  • +
  • Install the newly built binutils to $PREFIX

  • +
  • Copy Release/lib/LLVMgold.so to $PREFIX/lib/bfd-plugins/

  • +
  • Set environment variables ($PREFIX is where you installed clang and +binutils):

    +
    export CC="$PREFIX/bin/clang -flto"
    +export CXX="$PREFIX/bin/clang++ -flto"
    +export AR="$PREFIX/bin/ar"
    +export NM="$PREFIX/bin/nm"
    +export RANLIB=/bin/true #ranlib is not needed, and doesn't support .bc files in .a
    +
    +
    +
  • +
  • Or you can just set your path:

    +
    export PATH="$PREFIX/bin:$PATH"
    +export CC="clang -flto"
    +export CXX="clang++ -flto"
    +export RANLIB=/bin/true
    +
    +
    +
  • +
  • Configure and build the project as usual:

    +
    % ./configure && make && make check
    +
    +
    +
  • +
+

The environment variable settings may work for non-autotooled projects too, +but you may need to set the LD environment variable as well.

+
+
+

Licensing

+

Gold is licensed under the GPLv3. LLVMgold uses the interface file +plugin-api.h from gold which means that the resulting LLVMgold.so +binary is also GPLv3. This can still be used to link non-GPLv3 programs +just as much as gold could without the plugin.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GwpAsan.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GwpAsan.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/GwpAsan.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/GwpAsan.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,425 @@ + + + + + + + + + GWP-ASan — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

GWP-ASan

+ +
+

Introduction

+

GWP-ASan is a sampled allocator framework that assists in finding use-after-free +and heap-buffer-overflow bugs in production environments. It informally is a +recursive acronym, “GWP-ASan Will Provide Allocation +SANity”.

+

GWP-ASan is based on the classic +Electric Fence Malloc Debugger, with a +key adaptation. Notably, we only choose a very small percentage of allocations +to sample, and apply guard pages to these sampled allocations only. The sampling +is small enough to allow us to have very low performance overhead.

+

There is a small, tunable memory overhead that is fixed for the lifetime of the +process. This is approximately ~40KiB per process using the default settings, +depending on the average size of your allocations.

+
+
+

GWP-ASan vs. ASan

+

Unlike AddressSanitizer, +GWP-ASan does not induce a significant performance overhead. ASan often requires +the use of dedicated canaries to be viable in production environments, and as +such is often impractical.

+

GWP-ASan is only capable of finding a subset of the memory issues detected by +ASan. Furthermore, GWP-ASan’s bug detection capabilities are only probabilistic. +As such, we recommend using ASan over GWP-ASan in testing, as well as anywhere +else that guaranteed error detection is more valuable than the 2x execution +slowdown/binary size bloat. For the majority of production environments, this +impact is too high, and GWP-ASan proves extremely useful.

+
+
+

Design

+

Please note: The implementation of GWP-ASan is largely in-flux, and these +details are subject to change. There are currently other implementations of +GWP-ASan, such as the implementation featured in +Chromium. The +long-term support goal is to ensure feature-parity where reasonable, and to +support compiler-rt as the reference implementation.

+
+

Allocator Support

+

GWP-ASan is not a replacement for a traditional allocator. Instead, it works by +inserting stubs into a supporting allocator to redirect allocations to GWP-ASan +when they’re chosen to be sampled. These stubs are generally implemented in the +implementation of malloc(), free() and realloc(). The stubs are +extremely small, which makes using GWP-ASan in most allocators fairly trivial. +The stubs follow the same general pattern (example malloc() pseudocode +below):

+
#ifdef INSTALL_GWP_ASAN_STUBS
+  gwp_asan::GuardedPoolAllocator GWPASanAllocator;
+#endif
+
+void* YourAllocator::malloc(..) {
+#ifdef INSTALL_GWP_ASAN_STUBS
+  if (GWPASanAllocator.shouldSample(..))
+    return GWPASanAllocator.allocate(..);
+#endif
+
+  // ... the rest of your allocator code here.
+}
+
+
+

Then, all the supporting allocator needs to do is compile with +-DINSTALL_GWP_ASAN_STUBS and link against the GWP-ASan library! For +performance reasons, we strongly recommend static linkage of the GWP-ASan +library.

+
+
+

Guarded Allocation Pool

+

The core of GWP-ASan is the guarded allocation pool. Each sampled allocation is +backed using its own guarded slot, which may consist of one or more accessible +pages. Each guarded slot is surrounded by two guard pages, which are mapped as +inaccessible. The collection of all guarded slots makes up the guarded +allocation pool.

+
+
+

Buffer Underflow/Overflow Detection

+

We gain buffer-overflow and buffer-underflow detection through these guard +pages. When a memory access overruns the allocated buffer, it will touch the +inaccessible guard page, causing memory exception. This exception is caught and +handled by the internal crash handler. Because each allocation is recorded with +metadata about where (and by what thread) it was allocated and deallocated, we +can provide information that will help identify the root cause of the bug.

+

Allocations are randomly selected to be either left- or right-aligned to provide +equal detection of both underflows and overflows.

+
+
+

Use after Free Detection

+

The guarded allocation pool also provides use-after-free detection. Whenever a +sampled allocation is deallocated, we map its guarded slot as inaccessible. Any +memory accesses after deallocation will thus trigger the crash handler, and we +can provide useful information about the source of the error.

+

Please note that the use-after-free detection for a sampled allocation is +transient. To keep memory overhead fixed while still detecting bugs, deallocated +slots are randomly reused to guard future allocations.

+
+
+
+

Usage

+

GWP-ASan already ships by default in the +Scudo Hardened Allocator, +so building with -fsanitize=scudo is the quickest and easiest way to try out +GWP-ASan.

+
+

Options

+

GWP-ASan’s configuration is managed by the supporting allocator. We provide a +generic configuration management library that is used by Scudo. It allows +several aspects of GWP-ASan to be configured through the following methods:

+
    +
  • When the GWP-ASan library is compiled, by setting +-DGWP_ASAN_DEFAULT_OPTIONS to the options string you want set by default. +If you’re building GWP-ASan as part of a compiler-rt/LLVM build, add it during +cmake configure time (e.g. cmake ... -DGWP_ASAN_DEFAULT_OPTIONS="..."). If +you’re building GWP-ASan outside of compiler-rt, simply ensure that you +specify -DGWP_ASAN_DEFAULT_OPTIONS="..." when building +optional/options_parser.cpp).

  • +
  • By defining a __gwp_asan_default_options function in one’s program that +returns the options string to be parsed. Said function must have the following +prototype: extern "C" const char* __gwp_asan_default_options(void), with a +default visibility. This will override the compile time define;

  • +
  • Depending on allocator support (Scudo has support for this mechanism): Through +the environment variable GWP_ASAN_OPTIONS, containing the options string +to be parsed. Options defined this way will override any definition made +through __gwp_asan_default_options.

  • +
+

The options string follows a syntax similar to ASan, where distinct options +can be assigned in the same string, separated by colons.

+

For example, using the environment variable:

+
GWP_ASAN_OPTIONS="MaxSimultaneousAllocations=16:SampleRate=5000" ./a.out
+
+
+

Or using the function:

+
extern "C" const char *__gwp_asan_default_options() {
+  return "MaxSimultaneousAllocations=16:SampleRate=5000";
+}
+
+
+

The following options are available:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Option

Default

Description

Enabled

true

Is GWP-ASan enabled?

PerfectlyRightAlign

false

When allocations are right-aligned, should we perfectly align them up to the +page boundary? By default (false), we round up allocation size to the nearest +power of two (2, 4, 8, 16) up to a maximum of 16-byte alignment for +performance reasons. Setting this to true can find single byte +buffer-overflows at the cost of performance, and may be incompatible with +some architectures.

MaxSimultaneousAllocations

16

Number of simultaneously-guarded allocations available in the pool.

SampleRate

5000

The probability (1 / SampleRate) that a page is selected for GWP-ASan +sampling. Sample rates up to (2^31 - 1) are supported.

InstallSignalHandlers

true

Install GWP-ASan signal handlers for SIGSEGV during dynamic loading. This +allows better error reports by providing stack traces for allocation and +deallocation when reporting a memory error. GWP-ASan’s signal handler will +forward the signal to any previously-installed handler, and user programs +that install further signal handlers should make sure they do the same. Note, +if the previously installed SIGSEGV handler is SIG_IGN, we terminate the +process after dumping the error report.

+
+
+

Example

+

The below code has a use-after-free bug, where the string_view is created as +a reference to the temporary result of the string+ operator. The +use-after-free occurs when sv is dereferenced on line 8.

+
1: #include <iostream>
+2: #include <string>
+3: #include <string_view>
+4:
+5: int main() {
+6:   std::string s = "Hellooooooooooooooo ";
+7:   std::string_view sv = s + "World\n";
+8:   std::cout << sv;
+9: }
+
+
+

Compiling this code with Scudo+GWP-ASan will probabilistically catch this bug +and provide us a detailed error report:

+
$ clang++ -fsanitize=scudo -std=c++17 -g buggy_code.cpp
+$ for i in `seq 1 200`; do
+    GWP_ASAN_OPTIONS="SampleRate=100" ./a.out > /dev/null;
+  done
+|
+| *** GWP-ASan detected a memory error ***
+| Use after free at 0x7feccab26000 (0 bytes into a 41-byte allocation at 0x7feccab26000) by thread 31027 here:
+|   ...
+|   #9 ./a.out(_ZStlsIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_St17basic_string_viewIS3_S4_E+0x45) [0x55585c0afa55]
+|   #10 ./a.out(main+0x9f) [0x55585c0af7cf]
+|   #11 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xeb) [0x7fecc966952b]
+|   #12 ./a.out(_start+0x2a) [0x55585c0867ba]
+|
+| 0x7feccab26000 was deallocated by thread 31027 here:
+|   ...
+|   #7 ./a.out(main+0x83) [0x55585c0af7b3]
+|   #8 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xeb) [0x7fecc966952b]
+|   #9 ./a.out(_start+0x2a) [0x55585c0867ba]
+|
+| 0x7feccab26000 was allocated by thread 31027 here:
+|   ...
+|   #12 ./a.out(main+0x57) [0x55585c0af787]
+|   #13 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xeb) [0x7fecc966952b]
+|   #14 ./a.out(_start+0x2a) [0x55585c0867ba]
+|
+| *** End GWP-ASan report ***
+| Segmentation fault
+
+
+

To symbolize these stack traces, some care has to be taken. Scudo currently uses +GNU’s backtrace_symbols() from <execinfo.h> to unwind. The unwinder +provides human-readable stack traces in function+offset form, rather than +the normal binary+offset form. In order to use addr2line or similar tools to +recover the exact line number, we must convert the function+offset to +binary+offset. A helper script is available at +compiler-rt/lib/gwp_asan/scripts/symbolize.sh. Using this script will +attempt to symbolize each possible line, falling back to the previous output if +anything fails. This results in the following output:

+
$ cat my_gwp_asan_error.txt | symbolize.sh
+|
+| *** GWP-ASan detected a memory error ***
+| Use after free at 0x7feccab26000 (0 bytes into a 41-byte allocation at 0x7feccab26000) by thread 31027 here:
+| ...
+| #9 /usr/lib/gcc/x86_64-linux-gnu/8.0.1/../../../../include/c++/8.0.1/string_view:547
+| #10 /tmp/buggy_code.cpp:8
+|
+| 0x7feccab26000 was deallocated by thread 31027 here:
+| ...
+| #7 /tmp/buggy_code.cpp:8
+| #8 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xeb) [0x7fecc966952b]
+| #9 ./a.out(_start+0x2a) [0x55585c0867ba]
+|
+| 0x7feccab26000 was allocated by thread 31027 here:
+| ...
+| #12 /tmp/buggy_code.cpp:7
+| #13 /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xeb) [0x7fecc966952b]
+| #14 ./a.out(_start+0x2a) [0x55585c0867ba]
+|
+| *** End GWP-ASan report ***
+| Segmentation fault
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToAddABuilder.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToAddABuilder.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToAddABuilder.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToAddABuilder.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,239 @@ + + + + + + + + + How To Add Your Build Configuration To LLVM Buildbot Infrastructure — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Add Your Build Configuration To LLVM Buildbot Infrastructure

+
+

Introduction

+

This document contains information about adding a build configuration and +buildbot-worker to private worker builder to LLVM Buildbot Infrastructure.

+
+
+

Buildmasters

+

There are two buildmasters running.

+
    +
  • The main buildmaster at http://lab.llvm.org:8011. All builders attached +to this machine will notify commit authors every time they break the build.

  • +
  • The staging buildbot at http://lab.llvm.org:8014. All builders attached +to this machine will be completely silent by default when the build is broken. +Builders for experimental backends should generally be attached to this +buildmaster.

  • +
+
+
+

Steps To Add Builder To LLVM Buildbot

+

Volunteers can provide their build machines to work as build workers to +public LLVM Buildbot.

+

Here are the steps you can follow to do so:

+
    +
  1. Check the existing build configurations to make sure the one you are +interested in is not covered yet or gets built on your computer much +faster than on the existing one. We prefer faster builds so developers +will get feedback sooner after changes get committed.

  2. +
  3. The computer you will be registering with the LLVM buildbot +infrastructure should have all dependencies installed and you can +actually build your configuration successfully. Please check what degree +of parallelism (-j param) would give the fastest build. You can build +multiple configurations on one computer.

  4. +
  5. Install buildbot-worker (currently we are using buildbot version 2.8.5). +Depending on the platform, buildbot-worker could be available to download and +install with your package manager, or you can download it directly from +http://trac.buildbot.net and install it manually.

  6. +
  7. Create a designated user account, your buildbot-worker will be running under, +and set appropriate permissions.

  8. +
  9. Choose the buildbot-worker root directory (all builds will be placed under +it), buildbot-worker access name and password the build master will be using +to authenticate your buildbot-worker.

  10. +
  11. Create a buildbot-worker in context of that buildbot-worker account. Point it +to the lab.llvm.org port 9990 (see Buildbot documentation, +Creating a worker +for more details) by running the following command:

    +
    +
    $ buildbot-worker create-worker <buildbot-worker-root-directory> \
    +             lab.llvm.org:9990 \
    +             <buildbot-worker-access-name> \
    +             <buildbot-worker-access-password>
    +
    +
    +
    +

    To point a worker to silent master please use lab.llvm.org:9994 instead +of lab.llvm.org:9990.

    +
  12. +
  13. Fill the buildbot-worker description and admin name/e-mail. Here is an +example of the buildbot-worker description:

    +
    Windows 7 x64
    +Core i7 (2.66GHz), 16GB of RAM
    +
    +g++.exe (TDM-1 mingw32) 4.4.0
    +GNU Binutils 2.19.1
    +cmake version 2.8.4
    +Microsoft(R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86
    +
    +
    +
  14. +
  15. Make sure you can actually start the buildbot-worker successfully. Then set +up your buildbot-worker to start automatically at the start up time. See the +buildbot documentation for help. You may want to restart your computer +to see if it works.

  16. +
  17. Send a patch which adds your build worker and your builder to +zorg. Use the typical LLVM +workflow.

    +
      +
    • workers are added to buildbot/osuosl/master/config/workers.py

    • +
    • builders are added to buildbot/osuosl/master/config/builders.py

    • +
    +

    Please make sure your builder name and its builddir are unique through the +file.

    +

    It is possible to allow email addresses to unconditionally receive +notifications on build failure; for this you’ll need to add an +InformativeMailNotifier to buildbot/osuosl/master/config/status.py. +This is particularly useful for the staging buildmaster which is silent +otherwise.

    +
  18. +
  19. Send the buildbot-worker access name and the access password directly to +Galina Kistanova, and wait till she +will let you know that your changes are applied and buildmaster is +reconfigured.

  20. +
  21. Check the status of your buildbot-worker on the Waterfall Display to make sure it is connected, and +http://lab.llvm.org:8011/#/workers to see if administrator contact and +worker information are correct.

  22. +
  23. Wait for the first build to succeed and enjoy.

  24. +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToBuildOnARM.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToBuildOnARM.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToBuildOnARM.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToBuildOnARM.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,213 @@ + + + + + + + + + How To Build On ARM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Build On ARM

+
+

Introduction

+

This document contains information about building/testing LLVM and +Clang on an ARM machine.

+

This document is NOT tailored to help you cross-compile LLVM/Clang +to ARM on another architecture, for example an x86_64 machine. To find +out more about cross-compiling, please check How To Cross-Compile Clang/LLVM using Clang/LLVM.

+
+
+

Notes On Building LLVM/Clang on ARM

+

Here are some notes on building/testing LLVM/Clang on ARM. Note that +ARM encompasses a wide variety of CPUs; this advice is primarily based +on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips.

+
    +
  1. The most popular Linaro/Ubuntu OS’s for ARM boards, e.g., the +Pandaboard, have become hard-float platforms. There are a number of +choices when using CMake. Autoconf usage is deprecated as of 3.8.

    +

    Building LLVM/Clang in Release mode is preferred since it consumes +a lot less memory. Otherwise, the building process will very likely +fail due to insufficient memory. It’s also a lot quicker to only build +the relevant back-ends (ARM and AArch64), since it’s very unlikely that +you’ll use an ARM board to cross-compile to other arches. If you’re +running Compiler-RT tests, also include the x86 back-end, or some tests +will fail.

    +
    cmake $LLVM_SRC_DIR -DCMAKE_BUILD_TYPE=Release \
    +                    -DLLVM_TARGETS_TO_BUILD="ARM;X86;AArch64"
    +
    +
    +

    Other options you can use are:

    +
    Use Ninja instead of Make: "-G Ninja"
    +Build with assertions on: "-DLLVM_ENABLE_ASSERTIONS=True"
    +Local (non-sudo) install path: "-DCMAKE_INSTALL_PREFIX=$HOME/llvm/install"
    +CPU flags: "DCMAKE_C_FLAGS=-mcpu=cortex-a15" (same for CXX_FLAGS)
    +
    +
    +

    After that, just typing make -jN or ninja will build everything. +make -jN check-all or ninja check-all will run all compiler tests. For +running the test suite, please refer to LLVM Testing Infrastructure Guide.

    +
  2. +
  3. If you are building LLVM/Clang on an ARM board with 1G of memory or less, +please use gold rather then GNU ld. In any case it is probably a good +idea to set up a swap partition, too.

    +
    $ sudo ln -sf /usr/bin/ld /usr/bin/ld.gold
    +
    +
    +
  4. +
  5. ARM development boards can be unstable and you may experience that cores +are disappearing, caches being flushed on every big.LITTLE switch, and +other similar issues. To help ease the effect of this, set the Linux +scheduler to “performance” on all cores using this little script:

    +
    # The code below requires the package 'cpufrequtils' to be installed.
    +for ((cpu=0; cpu<`grep -c proc /proc/cpuinfo`; cpu++)); do
    +    sudo cpufreq-set -c $cpu -g performance
    +done
    +
    +
    +

    Remember to turn that off after the build, or you may risk burning your +CPU. Most modern kernels don’t need that, so only use it if you have +problems.

    +
  6. +
  7. Running the build on SD cards is ok, but they are more prone to failures +than good quality USB sticks, and those are more prone to failures than +external hard-drives (those are also a lot faster). So, at least, you +should consider to buy a fast USB stick. On systems with a fast eMMC, +that’s a good option too.

  8. +
  9. Make sure you have a decent power supply (dozens of dollars worth) that can +provide at least 4 amperes, this is especially important if you use USB +devices with your board. Externally powered USB/SATA harddrives are even +better than having a good power supply.

  10. +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToBuildWindowsItaniumPrograms.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToBuildWindowsItaniumPrograms.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToBuildWindowsItaniumPrograms.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToBuildWindowsItaniumPrograms.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,311 @@ + + + + + + + + + How to build Windows Itanium applications. — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How to build Windows Itanium applications.

+
+

Introduction

+

This document contains information describing how to create a Windows Itanium toolchain.

+

Windows Itanium allows you to deploy Itanium C++ ABI applications on top of the MS VS CRT. +This environment can use the Windows SDK headers directly and does not required additional +headers or additional runtime machinery (such as is used by mingw).

+

Windows Itanium Stack:

+
    +
  • Uses the Itanium C++ abi.

  • +
  • libc++.

  • +
  • libc++-abi.

  • +
  • libunwind.

  • +
  • The MS VS CRT.

  • +
  • Is compatible with MS Windows SDK include headers.

  • +
  • COFF/PE file format.

  • +
  • LLD

  • +
+

Note: compiler-rt is not used. This functionality is supplied by the MS VCRT.

+
+
+

Prerequisites

+
    +
  • The MS SDK is installed as part of MS Visual Studio.

  • +
  • Clang with support for the windows-itanium triple.

  • +
  • COFF LLD with support for the -autoimport switch.

  • +
+
+
+

Known issues:

+

SJLJ exceptions, “-fsjlj-exceptions”, are the only currently supported model.

+

link.exe (the MS linker) is unsuitable as it doesn’t support auto-importing which +is currently required to link correctly. However, if that limitation is removed +then there are no other known issues with using link.exe.

+

Currently, there is a lack of a usable Windows compiler driver for Windows Itanium. +A reasonable work-around is to build clang with a windows-msvc default target and +then override the triple with e.g. “-Xclang -triple -Xclang x86_64-unknown-windows-itanium”. +The linker can be specified with: “-fuse-ld=lld”.

+

In the Itanium C++ ABI the first member of an object is a pointer to the vtable +for its class. The vtable is often emitted into the object file with the key function +and must be imported for classes marked dllimport. The pointers must be globally +unique. Unfortunately, the COFF/PE file format does not provide a mechanism to +store a runtime address from another DLL into this pointer (although runtime +addresses are patched into the IAT). Therefore, the compiler must emit some code, +that runs after IAT patching but before anything that might use the vtable pointers, +and sets the vtable pointer to the address from the IAT. For the special case of +the references to vtables for __cxxabiv1::__class_type_info from typeinto objects +there is no declaration available to the compiler so this can’t be done. To allow +programs to link we currently rely on the -auto-import switch in LLD to auto-import +references to __cxxabiv1::__class_type_info pointers (see: https://reviews.llvm.org/D43184 +for a related discussion). This allows for linking; but, code that actually uses +such fields will not work as they these will not be fixed up at runtime. See +_pei386_runtime_relocator which handles the runtime component of the autoimporting +scheme used for mingw and comments in https://reviews.llvm.org/D43184 and +https://reviews.llvm.org/D89518 for more.

+
+
+

Assembling a Toolchain:

+

The procedure is:

+

# Build an LLVM toolchain with support for Windows Itanium. +# Use the toolchain from step 1. to build libc++, libc++abi, and libunwind.

+

It is also possible to cross-compile from Linux.

+

One method of building the libraries in step 2. is to build them “stand-alone”. +A stand-alone build doesn’t involve the rest of the LLVM tree. The steps are:

+
    +
  • cd build-dir

  • +
  • cmake -DLLVM_PATH=<path to llvm checkout e.g. /llvm-project/> -DCMAKE_INSTALL_PREFIX=<install path> <other options> <path to project e.g. /llvm-project/libcxxabi>

  • +
  • <make program e.g. ninja>

  • +
  • <make program> install

  • +
+

More information on standalone builds can be found in the build documentation for +the respective libraries. The next section discuss the salient options and modifications +required for building and installing the libraries using standalone builds. This assumes +that we are building libunwind and ibc++ as DLLs and statically linking libc++abi into +libc++. Other build configurations are possible, but they are not discussed here.

+
+

Common CMake configuration options:

+
    +
  • -D_LIBCPP_ABI_FORCE_ITANIUM'

  • +
+

Tell the libc++ headers that the Itanium C++ ABI is being used.

+
    +
  • -DCMAKE_C_FLAGS="-lmsvcrt -llegacy_stdio_definitions -D_NO_CRT_STDIO_INLINE"

  • +
+

Supply CRT definitions including stdio definitions that have been removed from the MS VS CRT. +We don’t want the stdio functions declared inline as they will cause multiple definition +errors when the same symbols are pulled in from legacy_stdio_definitions.ib.

+
    +
  • -DCMAKE_INSTALL_PREFIX=<install path>

  • +
+

Where to install the library and headers.

+
+
+

Building libunwind:

+
    +
  • -DLIBUNWIND_ENABLE_SHARED=ON

  • +
  • -DLIBUNWIND_ENABLE_STATIC=OFF

  • +
+

libunwind can be built as a DLL. It is not dependent on other projects.

+
    +
  • -DLIBUNWIND_USE_COMPILER_RT=OFF

  • +
+

We use the MS runtime.

+

The CMake files will need to be edited to prevent them adding GNU specific libraries to the link line.

+
+
+

Building libc++abi:

+
    +
  • -DLIBCXXABI_ENABLE_SHARED=OFF

  • +
  • -DLIBCXXABI_ENABLE_STATIC=ON

  • +
  • -DLIBCXX_ENABLE_SHARED=ON'

  • +
  • -DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON

  • +
+

To break the symbol dependency between libc++abi and libc++ we +build libc++abi as a static library and then statically link it +into the libc++ DLL. This necessitates setting the CMake file +to ensure that the visibility macros (which expand to dllexport/import) +are expanded as they will be needed when creating the final libc++ +DLL later, see: https://reviews.llvm.org/D90021.

+
    +
  • -DLIBCXXABI_LIBCXX_INCLUDES=<path to libcxx>/include

  • +
+

Where to find the libc++ headers

+
+
+

Building libc++:

+
    +
  • -DLIBCXX_ENABLE_SHARED=ON

  • +
  • -DLIBCXX_ENABLE_STATIC=OFF

  • +
+

We build libc++ as a DLL and statically link libc++abi into it.

+
    +
  • -DLIBCXX_INSTALL_HEADERS=ON

  • +
+

Install the headers.

+
    +
  • -DLIBCXX_USE_COMPILER_RT=OFF

  • +
+

We use the MS runtime.

+
    +
  • -DLIBCXX_HAS_WIN32_THREAD_API=ON

  • +
+

Windows Itanium does not offer a POSIX-like layer over WIN32.

+
    +
  • -DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON

  • +
  • -DLIBCXX_CXX_ABI=libcxxabi

  • +
  • -DLIBCXX_CXX_ABI_INCLUDE_PATHS=<libcxxabi src path>/include

  • +
  • -DLIBCXX_CXX_ABI_LIBRARY_PATH=<libcxxabi build path>/lib

  • +
+

Use the static libc++abi library built earlier.

+
    +
  • -DLIBCXX_NO_VCRUNTIME=ON

  • +
+

Remove any dependency on the VC runtime - we need libc++abi to supply the C++ runtime.

+
    +
  • -DCMAKE_C_FLAGS=<path to installed unwind.lib>

  • +
+

As we are statically linking against libcxxabi we need to link +against the unwind import library to resolve unwind references +from the libcxxabi objects.

+
    +
  • -DCMAKE_C_FLAGS+=' -UCLOCK_REALTIME'

  • +
+

Prevent the inclusion of sys/time that MS doesn’t provide.

+
+
+

Notes:

+

An example build recipe is available here: https://reviews.llvm.org/D88124

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToBuildWithPGO.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToBuildWithPGO.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToBuildWithPGO.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToBuildWithPGO.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,308 @@ + + + + + + + + + How To Build Clang and LLVM with Profile-Guided Optimizations — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Build Clang and LLVM with Profile-Guided Optimizations

+
+

Introduction

+

PGO (Profile-Guided Optimization) allows your compiler to better optimize code +for how it actually runs. Users report that applying this to Clang and LLVM can +decrease overall compile time by 20%.

+

This guide walks you through how to build Clang with PGO, though it also applies +to other subprojects, such as LLD.

+

If you want to build other software with PGO, see the end-user documentation +for PGO.

+
+
+

Using preconfigured CMake caches

+

See https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo

+
+
+

Using the script

+

We have a script at utils/collect_and_build_with_pgo.py. This script is +tested on a few Linux flavors, and requires a checkout of LLVM, Clang, and +compiler-rt. Despite the name, it performs four clean builds of Clang, so it +can take a while to run to completion. Please see the script’s --help for +more information on how to run it, and the different options available to you. +If you want to get the most out of PGO for a particular use-case (e.g. compiling +a specific large piece of software), please do read the section below on +‘benchmark’ selection.

+

Please note that this script is only tested on a few Linux distros. Patches to +add support for other platforms, as always, are highly appreciated. :)

+

This script also supports a --dry-run option, which causes it to print +important commands instead of running them.

+
+
+

Selecting ‘benchmarks’

+

PGO does best when the profiles gathered represent how the user plans to use the +compiler. Notably, highly accurate profiles of llc building x86_64 code aren’t +incredibly helpful if you’re going to be targeting ARM.

+

By default, the script above does two things to get solid coverage. It:

+
    +
  • runs all of Clang and LLVM’s lit tests, and

  • +
  • uses the instrumented Clang to build Clang, LLVM, and all of the other +LLVM subprojects available to it.

  • +
+

Together, these should give you:

+
    +
  • solid coverage of building C++,

  • +
  • good coverage of building C,

  • +
  • great coverage of running optimizations,

  • +
  • great coverage of the backend for your host’s architecture, and

  • +
  • some coverage of other architectures (if other arches are supported backends).

  • +
+

Altogether, this should cover a diverse set of uses for Clang and LLVM. If you +have very specific needs (e.g. your compiler is meant to compile a large browser +for four different platforms, or similar), you may want to do something else. +This is configurable in the script itself.

+
+
+

Building Clang with PGO

+

If you prefer to not use the script or the cmake cache, this briefly goes over +how to build Clang/LLVM with PGO.

+

First, you should have at least LLVM, Clang, and compiler-rt checked out +locally.

+

Next, at a high level, you’re going to need to do the following:

+
    +
  1. Build a standard Release Clang and the relevant libclang_rt.profile library

  2. +
  3. Build Clang using the Clang you built above, but with instrumentation

  4. +
  5. Use the instrumented Clang to generate profiles, which consists of two steps:

  6. +
+
+
    +
  • Running the instrumented Clang/LLVM/lld/etc. on tasks that represent how +users will use said tools.

  • +
  • Using a tool to convert the “raw” profiles generated above into a single, +final PGO profile.

  • +
+
+
    +
  1. Build a final release Clang (along with whatever other binaries you need) +using the profile collected from your benchmark

  2. +
+

In more detailed steps:

+
    +
  1. Configure a Clang build as you normally would. It’s highly recommended that +you use the Release configuration for this, since it will be used to build +another Clang. Because you need Clang and supporting libraries, you’ll want +to build the all target (e.g. ninja all or make -j4 all).

  2. +
  3. Configure a Clang build as above, but add the following CMake args:

    +
      +
    • -DLLVM_BUILD_INSTRUMENTED=IR – This causes us to build everything +with instrumentation.

    • +
    • -DLLVM_BUILD_RUNTIME=No – A few projects have bad interactions when +built with profiling, and aren’t necessary to build. This flag turns them +off.

    • +
    • -DCMAKE_C_COMPILER=/path/to/stage1/clang - Use the Clang we built in +step 1.

    • +
    • -DCMAKE_CXX_COMPILER=/path/to/stage1/clang++ - Same as above.

    • +
    +
  4. +
+
+

In this build directory, you simply need to build the clang target (and +whatever supporting tooling your benchmark requires).

+
+
    +
  1. As mentioned above, this has two steps: gathering profile data, and then +massaging it into a useful form:

    +
      +
    1. Build your benchmark using the Clang generated in step 2. The ‘standard’ +benchmark recommended is to run check-clang and check-llvm in your +instrumented Clang’s build directory, and to do a full build of Clang/LLVM +using your instrumented Clang. So, create yet another build directory, +with the following CMake arguments:

      +
        +
      • -DCMAKE_C_COMPILER=/path/to/stage2/clang - Use the Clang we built in +step 2.

      • +
      • -DCMAKE_CXX_COMPILER=/path/to/stage2/clang++ - Same as above.

      • +
      +

      If your users are fans of debug info, you may want to consider using +-DCMAKE_BUILD_TYPE=RelWithDebInfo instead of +-DCMAKE_BUILD_TYPE=Release. This will grant better coverage of +debug info pieces of clang, but will take longer to complete and will +result in a much larger build directory.

      +

      It’s recommended to build the all target with your instrumented Clang, +since more coverage is often better.

      +
    2. +
    +
  2. +
+
+
    +
  1. You should now have a few *.profraw files in +path/to/stage2/profiles/. You need to merge these using +llvm-profdata (even if you only have one! The profile merge transforms +profraw into actual profile data, as well). This can be done with +/path/to/stage1/llvm-profdata merge +-output=/path/to/output/profdata.prof path/to/stage2/profiles/*.profraw.

  2. +
+
+
    +
  1. Now, build your final, PGO-optimized Clang. To do this, you’ll want to pass +the following additional arguments to CMake.

    +
      +
    • -DLLVM_PROFDATA_FILE=/path/to/output/profdata.prof - Use the PGO +profile from the previous step.

    • +
    • -DCMAKE_C_COMPILER=/path/to/stage1/clang - Use the Clang we built in +step 1.

    • +
    • -DCMAKE_CXX_COMPILER=/path/to/stage1/clang++ - Same as above.

    • +
    +

    From here, you can build whatever targets you need.

    +
    +

    Note

    +

    You may see warnings about a mismatched profile in the build output. These +are generally harmless. To silence them, you can add +-DCMAKE_C_FLAGS='-Wno-backend-plugin' +-DCMAKE_CXX_FLAGS='-Wno-backend-plugin' to your CMake invocation.

    +
    +
  2. +
+

Congrats! You now have a Clang built with profile-guided optimizations, and you +can delete all but the final build directory if you’d like.

+

If this worked well for you and you plan on doing it often, there’s a slight +optimization that can be made: LLVM and Clang have a tool called tblgen that’s +built and run during the build process. While it’s potentially nice to build +this for coverage as part of step 3, none of your other builds should benefit +from building it. You can pass the CMake options +-DCLANG_TABLEGEN=/path/to/stage1/bin/clang-tblgen +-DLLVM_TABLEGEN=/path/to/stage1/bin/llvm-tblgen to steps 2 and onward to avoid +these useless rebuilds.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToCrossCompileBuiltinsOnArm.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToCrossCompileBuiltinsOnArm.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToCrossCompileBuiltinsOnArm.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToCrossCompileBuiltinsOnArm.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,409 @@ + + + + + + + + + How to Cross Compile Compiler-rt Builtins For Arm — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How to Cross Compile Compiler-rt Builtins For Arm

+
+

Introduction

+

This document contains information about building and testing the builtins part +of compiler-rt for an Arm target, from an x86_64 Linux machine.

+

While this document concentrates on Arm and Linux the general principles should +apply to other targets supported by compiler-rt. Further contributions for other +targets are welcome.

+

The instructions in this document depend on libraries and programs external to +LLVM, there are many ways to install and configure these dependencies so you +may need to adapt the instructions here to fit your own local situation.

+
+
+

Prerequisites

+

In this use case we’ll be using cmake on a Debian-based Linux system, +cross-compiling from an x86_64 host to a hard-float Armv7-A target. We’ll be +using as many of the LLVM tools as we can, but it is possible to use GNU +equivalents.

+
+
    +
  • A build of LLVM/clang for the llvm-tools and llvm-config

  • +
  • A clang executable with support for the ARM target

  • +
  • compiler-rt sources

  • +
  • The qemu-arm user mode emulator

  • +
  • An arm-linux-gnueabihf sysroot

  • +
+
+

In this example we will be using ninja.

+

See https://compiler-rt.llvm.org/ for more information about the dependencies +on clang and LLVM.

+

See https://llvm.org/docs/GettingStarted.html for information about obtaining +the source for LLVM and compiler-rt. Note that the getting started guide +places compiler-rt in the projects subdirectory, but this is not essential and +if you are using the BaremetalARM.cmake cache for v6-M, v7-M and v7-EM then +compiler-rt must be placed in the runtimes directory.

+

qemu-arm should be available as a package for your Linux distribution.

+

The most complicated of the prerequisites to satisfy is the arm-linux-gnueabihf +sysroot. In theory it is possible to use the Linux distributions multiarch +support to fulfill the dependencies for building but unfortunately due to +/usr/local/include being added some host includes are selected. The easiest way +to supply a sysroot is to download the arm-linux-gnueabihf toolchain. This can +be found at: +* https://developer.arm.com/open-source/gnu-toolchain/gnu-a/downloads for gcc 8 and above +* https://releases.linaro.org/components/toolchain/binaries/ for gcc 4.9 to 7.3

+
+
+

Building compiler-rt builtins for Arm

+

We will be doing a standalone build of compiler-rt using the following cmake +options.

+
    +
  • path/to/compiler-rt

  • +
  • -G Ninja

  • +
  • -DCMAKE_AR=/path/to/llvm-ar

  • +
  • -DCMAKE_ASM_COMPILER_TARGET="arm-linux-gnueabihf"

  • +
  • -DCMAKE_ASM_FLAGS="build-c-flags"

  • +
  • -DCMAKE_C_COMPILER=/path/to/clang

  • +
  • -DCMAKE_C_COMPILER_TARGET="arm-linux-gnueabihf"

  • +
  • -DCMAKE_C_FLAGS="build-c-flags"

  • +
  • -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld"

  • +
  • -DCMAKE_NM=/path/to/llvm-nm

  • +
  • -DCMAKE_RANLIB=/path/to/llvm-ranlib

  • +
  • -DCOMPILER_RT_BUILD_BUILTINS=ON

  • +
  • -DCOMPILER_RT_BUILD_LIBFUZZER=OFF

  • +
  • -DCOMPILER_RT_BUILD_MEMPROF=OFF

  • +
  • -DCOMPILER_RT_BUILD_PROFILE=OFF

  • +
  • -DCOMPILER_RT_BUILD_SANITIZERS=OFF

  • +
  • -DCOMPILER_RT_BUILD_XRAY=OFF

  • +
  • -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON

  • +
  • -DLLVM_CONFIG_PATH=/path/to/llvm-config

  • +
+

The build-c-flags need to be sufficient to pass the C-make compiler check, +compile compiler-rt, and if you are running the tests, compile and link the +tests. When cross-compiling with clang we will need to pass sufficient +information to generate code for the Arm architecture we are targeting. We will +need to select the Arm target, select the Armv7-A architecture and choose +between using Arm or Thumb. +instructions. For example:

+
    +
  • --target=arm-linux-gnueabihf

  • +
  • -march=armv7a

  • +
  • -mthumb

  • +
+

When using a GCC arm-linux-gnueabihf toolchain the following flags are +needed to pick up the includes and libraries:

+
    +
  • --gcc-toolchain=/path/to/dir/toolchain

  • +
  • --sysroot=/path/to/toolchain/arm-linux-gnueabihf/libc

  • +
+

In this example we will be adding all of the command line options to both +CMAKE_C_FLAGS and CMAKE_ASM_FLAGS. There are cmake flags to pass some of +these options individually which can be used to simplify the build-c-flags:

+
    +
  • -DCMAKE_C_COMPILER_TARGET="arm-linux-gnueabihf"

  • +
  • -DCMAKE_ASM_COMPILER_TARGET="arm-linux-gnueabihf"

  • +
  • -DCMAKE_C_COMPILER_EXTERNAL_TOOLCHAIN=/path/to/dir/toolchain

  • +
  • -DCMAKE_SYSROOT=/path/to/dir/toolchain/arm-linux-gnueabihf/libc

  • +
+

Once cmake has completed the builtins can be built with ninja builtins

+
+
+

Testing compiler-rt builtins using qemu-arm

+

To test the builtins library we need to add a few more cmake flags to enable +testing and set up the compiler and flags for test case. We must also tell +cmake that we wish to run the tests on qemu-arm.

+
    +
  • -DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armhf/sysroot

  • +
  • -DCOMPILER_RT_INCLUDE_TESTS=ON

  • +
  • -DCOMPILER_RT_TEST_COMPILER="/path/to/clang"

  • +
  • -DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"

  • +
+

The /path/to/armhf/sysroot should be the same as the one passed to +--sysroot in the “build-c-flags”.

+

The “test-c-flags” need to include the target, architecture, gcc-toolchain, +sysroot and arm/thumb state. The additional cmake defines such as +CMAKE_C_COMPILER_EXTERNAL_TOOLCHAIN do not apply when building the tests. If +you have put all of these in “build-c-flags” then these can be repeated. If you +wish to use lld to link the tests then add "-fuse-ld=lld.

+

Once cmake has completed the tests can be built and run using +ninja check-builtins

+
+
+

Troubleshooting

+
+

The cmake try compile stage fails

+

At an early stage cmake will attempt to compile and link a simple C program to +test if the toolchain is working.

+

This stage can often fail at link time if the --sysroot= and +--gcc-toolchain= options are not passed to the compiler. Check the +CMAKE_C_FLAGS and CMAKE_C_COMPILER_TARGET flags.

+

It can be useful to build a simple example outside of cmake with your toolchain +to make sure it is working. For example: clang --target=arm-linux-gnueabi -march=armv7a --gcc-toolchain=/path/to/gcc-toolchain --sysroot=/path/to/gcc-toolchain/arm-linux-gnueabihf/libc helloworld.c

+
+
+

Clang uses the host header files

+

On debian based systems it is possible to install multiarch support for +arm-linux-gnueabi and arm-linux-gnueabihf. In many cases clang can successfully +use this multiarch support when --gcc-toolchain= and --sysroot= are not supplied. +Unfortunately clang adds /usr/local/include before +/usr/include/arm-linux-gnueabihf leading to errors when compiling the hosts +header files.

+

The multiarch support is not sufficient to build the builtins you will need to +use a separate arm-linux-gnueabihf toolchain.

+
+
+

No target passed to clang

+

If clang is not given a target it will typically use the host target, this will +not understand the Arm assembly language files resulting in error messages such +as error: unknown directive .syntax unified.

+

You can check the clang invocation in the error message to see if there is no +--target or if it is set incorrectly. The cause is usually +CMAKE_ASM_FLAGS not containing --target or CMAKE_ASM_COMPILER_TARGET not being present.

+
+
+

Arm architecture not given

+

The --target=arm-linux-gnueabihf will default to arm architecture v4t which +cannot assemble the barrier instructions used in the synch_and_fetch source +files.

+

The cause is usually a missing -march=armv7a from the CMAKE_ASM_FLAGS.

+
+
+

Compiler-rt builds but the tests fail to build

+

The flags used to build the tests are not the same as those used to build the +builtins. The c flags are provided by COMPILER_RT_TEST_COMPILE_CFLAGS and +the CMAKE_C_COMPILER_TARGET, CMAKE_ASM_COMPILER_TARGET, +CMAKE_C_COMPILER_EXTERNAL_TOOLCHAIN and CMAKE_SYSROOT flags are not +applied.

+

Make sure that COMPILER_RT_TEST_COMPILE_CFLAGS contains all the necessary +information.

+
+
+
+

Modifications for other Targets

+
+

Arm Soft-Float Target

+

The instructions for the Arm hard-float target can be used for the soft-float +target by substituting soft-float equivalents for the sysroot and target. The +target to use is:

+
    +
  • -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi

  • +
+

Depending on whether you want to use floating point instructions or not you +may need extra c-flags such as -mfloat-abi=softfp for use of floating-point +instructions, and -mfloat-abi=soft -mfpu=none for software floating-point +emulation.

+

You will need to use an arm-linux-gnueabi GNU toolchain for soft-float.

+
+
+

AArch64 Target

+

The instructions for Arm can be used for AArch64 by substituting AArch64 +equivalents for the sysroot, emulator and target.

+
    +
  • -DCMAKE_C_COMPILER_TARGET=aarch64-linux-gnu

  • +
  • -DCOMPILER_RT_EMULATOR="qemu-aarch64 -L /path/to/aarch64/sysroot

  • +
+

The CMAKE_C_FLAGS and COMPILER_RT_TEST_COMPILER_CFLAGS may also need: +"--sysroot=/path/to/aarch64/sysroot --gcc-toolchain=/path/to/gcc-toolchain"

+
+
+

Armv6-m, Armv7-m and Armv7E-M targets

+

To build and test the libraries using a similar method to Armv7-A is possible +but more difficult. The main problems are:

+
    +
  • There isn’t a qemu-arm user-mode emulator for bare-metal systems. The qemu-system-arm can be used but this is significantly more difficult to setup.

  • +
  • The targets to compile compiler-rt have the suffix -none-eabi. This uses the BareMetal driver in clang and by default won’t find the libraries needed to pass the cmake compiler check.

  • +
+

As the Armv6-M, Armv7-M and Armv7E-M builds of compiler-rt only use instructions +that are supported on Armv7-A we can still get most of the value of running the +tests using the same qemu-arm that we used for Armv7-A by building and +running the test cases for Armv7-A but using the builtins compiled for +Armv6-M, Armv7-M or Armv7E-M. This will test that the builtins can be linked +into a binary and execute the tests correctly but it will not catch if the +builtins use instructions that are supported on Armv7-A but not Armv6-M, +Armv7-M and Armv7E-M.

+

To get the cmake compile test to pass you will need to pass the libraries +needed to successfully link the cmake test via CMAKE_CFLAGS. It is +strongly recommended that you use version 3.6 or above of cmake so you can use +CMAKE_TRY_COMPILE_TARGET=STATIC_LIBRARY to skip the link step.

+
    +
  • -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY

  • +
  • -DCOMPILER_RT_OS_DIR="baremetal"

  • +
  • -DCOMPILER_RT_BUILD_BUILTINS=ON

  • +
  • -DCOMPILER_RT_BUILD_SANITIZERS=OFF

  • +
  • -DCOMPILER_RT_BUILD_XRAY=OFF

  • +
  • -DCOMPILER_RT_BUILD_LIBFUZZER=OFF

  • +
  • -DCOMPILER_RT_BUILD_PROFILE=OFF

  • +
  • -DCMAKE_C_COMPILER=${host_install_dir}/bin/clang

  • +
  • -DCMAKE_C_COMPILER_TARGET="your *-none-eabi target"

  • +
  • -DCMAKE_ASM_COMPILER_TARGET="your *-none-eabi target"

  • +
  • -DCMAKE_AR=/path/to/llvm-ar

  • +
  • -DCMAKE_NM=/path/to/llvm-nm

  • +
  • -DCMAKE_RANLIB=/path/to/llvm-ranlib

  • +
  • -DCOMPILER_RT_BAREMETAL_BUILD=ON

  • +
  • -DCOMPILER_RT_DEFAULT_TARGET_ONLY=ON

  • +
  • -DLLVM_CONFIG_PATH=/path/to/llvm-config

  • +
  • -DCMAKE_C_FLAGS="build-c-flags"

  • +
  • -DCMAKE_ASM_FLAGS="build-c-flags"

  • +
  • -DCOMPILER_RT_EMULATOR="qemu-arm -L /path/to/armv7-A/sysroot"

  • +
  • -DCOMPILER_RT_INCLUDE_TESTS=ON

  • +
  • -DCOMPILER_RT_TEST_COMPILER="/path/to/clang"

  • +
  • -DCOMPILER_RT_TEST_COMPILER_CFLAGS="test-c-flags"

  • +
+

The Armv6-M builtins will use the soft-float ABI. When compiling the tests for +Armv7-A we must include "-mthumb -mfloat-abi=soft -mfpu=none" in the +test-c-flags. We must use an Armv7-A soft-float abi sysroot for qemu-arm.

+

Depending on the linker used for the test cases you may encounter BuildAttribute +mismatches between the M-profile objects from compiler-rt and the A-profile +objects from the test. The lld linker does not check the profile +BuildAttribute so it can be used to link the tests by adding -fuse-ld=lld to the +COMPILER_RT_TEST_COMPILER_CFLAGS.

+
+
+

Alternative using a cmake cache

+

If you wish to build, but not test compiler-rt for Armv6-M, Armv7-M or Armv7E-M +the easiest way is to use the BaremetalARM.cmake recipe in clang/cmake/caches.

+

You will need a bare metal sysroot such as that provided by the GNU ARM +Embedded toolchain.

+

The libraries can be built with the cmake options:

+
    +
  • -DBAREMETAL_ARMV6M_SYSROOT=/path/to/bare/metal/toolchain/arm-none-eabi

  • +
  • -DBAREMETAL_ARMV7M_SYSROOT=/path/to/bare/metal/toolchain/arm-none-eabi

  • +
  • -DBAREMETAL_ARMV7EM_SYSROOT=/path/to/bare/metal/toolchain/arm-none-eabi

  • +
  • -C /path/to/llvm/source/tools/clang/cmake/caches/BaremetalARM.cmake

  • +
  • /path/to/llvm

  • +
+

Note that for the recipe to work the compiler-rt source must be checked out +into the directory llvm/runtimes. You will also need clang and lld checked out.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToCrossCompileLLVM.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToCrossCompileLLVM.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToCrossCompileLLVM.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToCrossCompileLLVM.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,331 @@ + + + + + + + + + How To Cross-Compile Clang/LLVM using Clang/LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Cross-Compile Clang/LLVM using Clang/LLVM

+
+

Introduction

+

This document contains information about building LLVM and +Clang on host machine, targeting another platform.

+

For more information on how to use Clang as a cross-compiler, +please check https://clang.llvm.org/docs/CrossCompilation.html.

+

TODO: Add MIPS and other platforms to this document.

+
+
+

Cross-Compiling from x86_64 to ARM

+

In this use case, we’ll be using CMake and Ninja, on a Debian-based Linux +system, cross-compiling from an x86_64 host (most Intel and AMD chips +nowadays) to a hard-float ARM target (most ARM targets nowadays).

+

The packages you’ll need are:

+
+
    +
  • cmake

  • +
  • ninja-build (from backports in Ubuntu)

  • +
  • gcc-4.7-arm-linux-gnueabihf

  • +
  • gcc-4.7-multilib-arm-linux-gnueabihf

  • +
  • binutils-arm-linux-gnueabihf

  • +
  • libgcc1-armhf-cross

  • +
  • libsfgcc1-armhf-cross

  • +
  • libstdc++6-armhf-cross

  • +
  • libstdc++6-4.7-dev-armhf-cross

  • +
+
+
+

Configuring CMake

+

For more information on how to configure CMake for LLVM/Clang, +see Building LLVM with CMake.

+

The CMake options you need to add are:

+
+
    +
  • -DCMAKE_CROSSCOMPILING=True

  • +
  • -DCMAKE_INSTALL_PREFIX=<install-dir>

  • +
  • -DLLVM_TABLEGEN=<path-to-host-bin>/llvm-tblgen

  • +
  • -DCLANG_TABLEGEN=<path-to-host-bin>/clang-tblgen

  • +
  • -DLLVM_DEFAULT_TARGET_TRIPLE=arm-linux-gnueabihf

  • +
  • -DLLVM_TARGET_ARCH=ARM

  • +
  • -DLLVM_TARGETS_TO_BUILD=ARM

  • +
+
+

If you’re compiling with GCC, you can use architecture options for your target, +and the compiler driver will detect everything that it needs:

+
+
    +
  • -DCMAKE_CXX_FLAGS='-march=armv7-a -mcpu=cortex-a9 -mfloat-abi=hard'

  • +
+
+

However, if you’re using Clang, the driver might not be up-to-date with your +specific Linux distribution, version or GCC layout, so you’ll need to fudge.

+

In addition to the ones above, you’ll also need:

+
+
    +
  • '-target arm-linux-gnueabihf' or whatever is the triple of your cross GCC.

  • +
  • '--sysroot=/usr/arm-linux-gnueabihf', '--sysroot=/opt/gcc/arm-linux-gnueabihf' +or whatever is the location of your GCC’s sysroot (where /lib, /bin etc are).

  • +
  • Appropriate use of -I and -L, depending on how the cross GCC is installed, +and where are the libraries and headers.

  • +
+
+

The TableGen options are required to compile it with the host compiler, +so you’ll need to compile LLVM (or at least llvm-tblgen) to your host +platform before you start. The CXX flags define the target, cpu (which in this case +defaults to fpu=VFP3 with NEON), and forcing the hard-float ABI. If you’re +using Clang as a cross-compiler, you will also have to set --sysroot +to make sure it picks the correct linker.

+

When using Clang, it’s important that you choose the triple to be identical +to the GCC triple and the sysroot. This will make it easier for Clang to +find the correct tools and include headers. But that won’t mean all headers and +libraries will be found. You’ll still need to use -I and -L to locate +those extra ones, depending on your distribution.

+

Most of the time, what you want is to have a native compiler to the +platform itself, but not others. So there’s rarely a point in compiling +all back-ends. For that reason, you should also set the +TARGETS_TO_BUILD to only build the back-end you’re targeting to.

+

You must set the CMAKE_INSTALL_PREFIX, otherwise a ninja install +will copy ARM binaries to your root filesystem, which is not what you +want.

+
+
+

Hacks

+

There are some bugs in current LLVM, which require some fiddling before +running CMake:

+
    +
  1. If you’re using Clang as the cross-compiler, there is a problem in +the LLVM ARM back-end that is producing absolute relocations on +position-independent code (R_ARM_THM_MOVW_ABS_NC), so for now, you +should disable PIC:

    +
    -DLLVM_ENABLE_PIC=False
    +
    +
    +

    This is not a problem, since Clang/LLVM libraries are statically +linked anyway, it shouldn’t affect much.

    +
  2. +
  3. The ARM libraries won’t be installed in your system. +But the CMake prepare step, which checks for +dependencies, will check the host libraries, not the target +ones. Below there’s a list of some dependencies, but your project could +have more, or this document could be outdated. You’ll see the errors +while linking as an indication of that.

    +

    Debian based distros have a way to add multiarch, which adds +a new architecture and allows you to install packages for those +systems. See https://wiki.debian.org/Multiarch/HOWTO for more info.

    +

    But not all distros will have that, and possibly not an easy way to +install them in any anyway, so you’ll have to build/download +them separately.

    +

    A quick way of getting the libraries is to download them from +a distribution repository, like Debian (http://packages.debian.org/jessie/), +and download the missing libraries. Note that the libXXX +will have the shared objects (.so) and the libXXX-dev will +give you the headers and the static (.a) library. Just in +case, download both.

    +

    The ones you need for ARM are: libtinfo, zlib1g, +libxml2 and liblzma. In the Debian repository you’ll +find downloads for all architectures.

    +

    After you download and unpack all .deb packages, copy all +.so and .a to a directory, make the appropriate +symbolic links (if necessary), and add the relevant -L +and -I paths to -DCMAKE_CXX_FLAGS above.

    +
  4. +
+
+
+

Running CMake and Building

+

Finally, if you’re using your platform compiler, run:

+
+
$ cmake -G Ninja <source-dir> <options above>
+
+
+
+

If you’re using Clang as the cross-compiler, run:

+
+
$ CC='clang' CXX='clang++' cmake -G Ninja <source-dir> <options above>
+
+
+
+

If you have clang/clang++ on the path, it should just work, and special +Ninja files will be created in the build directory. I strongly suggest +you to run cmake on a separate build directory, not inside the +source tree.

+

To build, simply type:

+
+
$ ninja
+
+
+
+

It should automatically find out how many cores you have, what are +the rules that needs building and will build the whole thing.

+

You can’t run ninja check-all on this tree because the created +binaries are targeted to ARM, not x86_64.

+
+
+

Installing and Using

+

After the LLVM/Clang has built successfully, you should install it +via:

+
+
$ ninja install
+
+
+
+

which will create a sysroot on the install-dir. You can then tar +that directory into a binary with the full triple name (for easy +identification), like:

+
+
$ ln -sf <install-dir> arm-linux-gnueabihf-clang
+$ tar zchf arm-linux-gnueabihf-clang.tar.gz arm-linux-gnueabihf-clang
+
+
+
+

If you copy that tarball to your target board, you’ll be able to use +it for running the test-suite, for example. Follow the guidelines at +https://llvm.org/docs/lnt/quickstart.html, unpack the tarball in the +test directory, and use options:

+
+
$ ./sandbox/bin/python sandbox/bin/lnt runtest nt \
+    --sandbox sandbox \
+    --test-suite `pwd`/test-suite \
+    --cc `pwd`/arm-linux-gnueabihf-clang/bin/clang \
+    --cxx `pwd`/arm-linux-gnueabihf-clang/bin/clang++
+
+
+
+

Remember to add the -jN options to lnt to the number of CPUs +on your board. Also, the path to your clang has to be absolute, so +you’ll need the pwd trick above.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToReleaseLLVM.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToReleaseLLVM.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToReleaseLLVM.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToReleaseLLVM.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,468 @@ + + + + + + + + + How To Release LLVM To The Public — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Release LLVM To The Public

+
+

Introduction

+

This document contains information about successfully releasing LLVM — +including sub-projects: e.g., clang and compiler-rt — to the public. +It is the Release Manager’s responsibility to ensure that a high quality build +of LLVM is released.

+

If you’re looking for the document on how to test the release candidates and +create the binary packages, please refer to the How To Validate a New Release instead.

+
+
+

Release Timeline

+

LLVM is released on a time based schedule — with major releases roughly +every 6 months. In between major releases there may be dot releases. +The release manager will determine if and when to make a dot release based +on feedback from the community. Typically, dot releases should be made if +there are large number of bug-fixes in the stable branch or a critical bug +has been discovered that affects a large number of users.

+

Unless otherwise stated, dot releases will follow the same procedure as +major releases.

+

The release process is roughly as follows:

+
    +
  • Set code freeze and branch creation date for 6 months after last code freeze +date. Announce release schedule to the LLVM community and update the website.

  • +
  • Create release branch and begin release process.

  • +
  • Send out release candidate sources for first round of testing. Testing lasts +7-10 days. During the first round of testing, any regressions found should be +fixed. Patches are merged from mainline into the release branch. Also, all +features need to be completed during this time. Any features not completed at +the end of the first round of testing will be removed or disabled for the +release.

  • +
  • Generate and send out the second release candidate sources. Only critical +bugs found during this testing phase will be fixed. Any bugs introduced by +merged patches will be fixed. If so a third round of testing is needed.

  • +
  • The release notes are updated.

  • +
  • Finally, release!

  • +
  • Announce bug fix release schedule to the LLVM community and update the website.

  • +
  • Tag bug fix -rc1 after 4 weeks have passed.

  • +
  • Tag bug fix -rc2 4 weeks after -rc1.

  • +
  • Tag additional -rc candidates, if needed, to fix critical issues in +previous -rc releases.

  • +
  • Tag final release.

  • +
+
+
+

Release Process

+ +
+

Release Administrative Tasks

+

This section describes a few administrative tasks that need to be done for the +release process to begin. Specifically, it involves:

+
    +
  • Updating version numbers,

  • +
  • Creating the release branch, and

  • +
  • Tagging release candidates for the release team to begin testing.

  • +
+
+

Create Release Branch

+

Branch the Git trunk using the following procedure:

+
    +
  1. Remind developers that the release branching is imminent and to refrain from +committing patches that might break the build. E.g., new features, large +patches for works in progress, an overhaul of the type system, an exciting +new TableGen feature, etc.

  2. +
  3. Verify that the current git trunk is in decent shape by +examining nightly tester and buildbot results.

  4. +
  5. Bump the version in trunk to N.0.0git and tag the commit with llvmorg-N-init. +If X is the version to be released, then N is X + 1.

  6. +
+
$ git tag -a llvmorg-N-init
+
+
+
    +
  1. Clear the release notes in trunk.

  2. +
  3. Create the release branch from the last known good revision from before the +version bump. The branch’s name is release/X.x where X is the major version +number and x is just the letter x.

  4. +
  5. All tags and branches need to be created in both the llvm/llvm-project and +llvm/llvm-test-suite repos.

  6. +
+
+
+

Update LLVM Version

+

After creating the LLVM release branch, update the release branches’ +CMakeLists.txt versions from ‘X.0.0git’ to ‘X.0.0’.

+

In addition, the version numbers of all the Bugzilla components must be updated +for the next release.

+
+
+

Tagging the LLVM Release Candidates

+

Tag release candidates:

+
$ git tag -a llvmorg-X.Y.Z-rcN
+
+
+

The Release Manager must supply pre-packaged source tarballs for users. This can +be done with the export.sh script in utils/release.

+

Tarballs, release binaries, or any other release artifacts must be uploaded to +GitHub. This can be done using the github-upload-release.py script in utils/release.

+
$ github-upload-release.py upload --token <github-token> --release X.Y.Z-rcN --files <release_files>
+
+
+
$ ./export.sh -release X.Y.Z -rc $RC
+
+
+

This will generate source tarballs for each LLVM project being validated, which +can be uploaded to github for further testing.

+
+
+

Build The Binary Distribution

+

Creating the binary distribution requires following the instructions +here.

+

That process will perform both Release+Asserts and Release builds but only +pack the Release build for upload. You should use the Release+Asserts sysroot, +normally under final/Phase3/Release+Asserts/llvmCore-3.8.1-RCn.install/, +for test-suite and run-time benchmarks, to make sure nothing serious has +passed through the net. For compile-time benchmarks, use the Release version.

+

The minimum required version of the tools you’ll need are here

+
+
+
+

Release Qualification Criteria

+

There are no official release qualification criteria. It is up to the +the release manager to determine when a release is ready. The release manager +should pay attention to the results of community testing, the number of outstanding +bugs, and then number of regressions when determining whether or not to make a +release.

+

The community values time based releases, so releases should not be delayed for +too long unless there are critical issues remaining. In most cases, the only +kind of bugs that are critical enough to block a release would be a major regression +from a previous release.

+
+
+

Official Testing

+

A few developers in the community have dedicated time to validate the release +candidates and volunteered to be the official release testers for each +architecture.

+

These will be the ones testing, generating and uploading the official binaries +to the server, and will be the minimum tests necessary for the release to +proceed.

+

This will obviously not cover all OSs and distributions, so additional community +validation is important. However, if community input is not reached before the +release is out, all bugs reported will have to go on the next stable release.

+

The official release managers are:

+
    +
  • Major releases (X.0): Hans Wennborg

  • +
  • Stable releases (X.n): Tom Stellard

  • +
+

The official release testers are volunteered from the community and have +consistently validated and released binaries for their targets/OSs. To contact +them, you should email the release-testers@lists.llvm.org mailing list.

+

The official testers list is in the file RELEASE_TESTERS.TXT, in the LLVM +repository.

+
+
+

Community Testing

+

Once all testing has been completed and appropriate bugs filed, the release +candidate tarballs are put on the website and the LLVM community is notified.

+

We ask that all LLVM developers test the release in any the following ways:

+
    +
  1. Download llvm-X.Y, llvm-test-X.Y, and the appropriate clang +binary. Build LLVM. Run make check and the full LLVM test suite (make +TEST=nightly report).

  2. +
  3. Download llvm-X.Y, llvm-test-X.Y, and the clang sources. Compile +everything. Run make check and the full LLVM test suite (make +TEST=nightly report).

  4. +
  5. Download llvm-X.Y, llvm-test-X.Y, and the appropriate clang +binary. Build whole programs with it (ex. Chromium, Firefox, Apache) for +your platform.

  6. +
  7. Download llvm-X.Y, llvm-test-X.Y, and the appropriate clang +binary. Build your programs with it and check for conformance and +performance regressions.

  8. +
  9. Run the release process, if your platform is +different than that which is officially supported, and report back errors +only if they were not reported by the official release tester for that +architecture.

  10. +
+

We also ask that the OS distribution release managers test their packages with +the first candidate of every release, and report any new errors in Bugzilla. +If the bug can be reproduced with an unpatched upstream version of the release +candidate (as opposed to the distribution’s own build), the priority should be +release blocker.

+

During the first round of testing, all regressions must be fixed before the +second release candidate is tagged.

+

In the subsequent stages, the testing is only to ensure that bug +fixes previously merged in have not created new major problems. This is not +the time to solve additional and unrelated bugs! If no patches are merged in, +the release is determined to be ready and the release manager may move onto the +next stage.

+
+
+

Reporting Regressions

+

Every regression that is found during the tests (as per the criteria above), +should be filled in a bug in Bugzilla with the priority release blocker and +blocking a specific release.

+

To help manage all the bugs reported and which ones are blockers or not, a new +“[meta]” bug should be created and all regressions blocking that Meta. Once +all blockers are done, the Meta can be closed.

+

If a bug can’t be reproduced, or stops being a blocker, it should be removed +from the Meta and its priority decreased to normal. Debugging can continue, +but on trunk.

+
+
+

Merge Requests

+

You can use any of the following methods to request that a revision from trunk +be merged into a release branch:

+
    +
  1. Use the utils/release/merge-request.sh script which will automatically +file a bug requesting that the patch be merged. e.g. To request revision +12345 be merged into the branch for the 5.0.1 release: +llvm.src/utils/release/merge-request.sh -stable-version 5.0 -r 12345 -user bugzilla@example.com

  2. +
  3. Manually file a bug with the subject: “Merge r12345 into the X.Y branch”, +enter the commit(s) that you want merged in the “Fixed by Commit(s)” and mark +it as a blocker of the current release bug. Release bugs are given aliases +in the form of release-x.y.z, so to mark a bug as a blocker for the 5.0.1 +release, just enter release-5.0.1 in the “Blocks” field.

  4. +
  5. Reply to the commit email on llvm-commits for the revision to merge and cc +the release manager.

  6. +
+
+
+

Release Patch Rules

+

Below are the rules regarding patching the release branch:

+
    +
  1. Patches applied to the release branch may only be applied by the release +manager, the official release testers or the code owners with approval from +the release manager.

  2. +
  3. Release managers are encouraged, but not required, to get approval from code +owners before approving patches. If there is no code owner or the code owner +is unreachable then release managers can ask approval from patch reviewers or +other developers active in that area.

  4. +
  5. Before RC1 Patches should be limited to bug fixes, important optimization +improvements, or completion of features that were started before the branch +was created. As with all phases, release managers and code owners can reject +patches that are deemed too invasive.

  6. +
  7. Before RC2 Patches should be limited to bug fixes or backend specific +improvements that are determined to be very safe.

  8. +
  9. Before RC3/Final Major Release Patches should be limited to critical +bugs or regressions.

  10. +
  11. Bug fix releases Patches should be limited to bug fixes or very safe +and critical performance improvements. Patches must maintain both API and +ABI compatibility with the previous major release.

  12. +
+
+

Merging Patches

+

Use the git cherry-pick -x command to merge patches to the release branch:

+
    +
  1. git cherry-pick -x abcdef0

  2. +
  3. Run regression tests.

  4. +
+
+
+
+

Release Final Tasks

+

The final stages of the release process involves tagging the “final” release +branch, updating documentation that refers to the release, and updating the +demo page.

+
+

Update Documentation

+

Review the documentation in the release branch and ensure that it is up +to date. The “Release Notes” must be updated to reflect new features, bug +fixes, new known issues, and changes in the list of supported platforms. +The “Getting Started Guide” should be updated to reflect the new release +version number tag available from Subversion and changes in basic system +requirements.

+
+
+

Tag the LLVM Final Release

+

Tag the final release sources:

+
$ git tag -a llvmorg-X.Y.Z
+$ git push https://github.com/llvm/llvm-project.git llvmorg-X.Y.Z
+
+
+
+
+

Update the LLVM Website

+

The website must be updated before the release announcement is sent out. Here +is what to do:

+
    +
  1. Check out the www-releases module from GitHub.

  2. +
  3. Create a new sub-directory X.Y.Z in the releases directory.

  4. +
  5. Copy and commit the llvm/docs and LICENSE.txt files into this new +directory.

  6. +
  7. Update the releases/download.html file with links to the release +binaries on GitHub.

  8. +
  9. Update the releases/index.html with the new release and link to release +documentation.

  10. +
  11. Finally checkout the llvm-www repo and update the main page +(index.html and sidebar) to point to the new release and release +announcement.

  12. +
+
+
+

Announce the Release

+

Send an email to the list announcing the release, pointing people to all the +relevant documentation, download pages and bugs fixed.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToSetUpLLVMStyleRTTI.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToSetUpLLVMStyleRTTI.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToSetUpLLVMStyleRTTI.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToSetUpLLVMStyleRTTI.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,568 @@ + + + + + + + + + How to set up LLVM-style RTTI for your class hierarchy — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How to set up LLVM-style RTTI for your class hierarchy

+ +
+

Background

+

LLVM avoids using C++’s built in RTTI. Instead, it pervasively uses its +own hand-rolled form of RTTI which is much more efficient and flexible, +although it requires a bit more work from you as a class author.

+

A description of how to use LLVM-style RTTI from a client’s perspective is +given in the Programmer’s Manual. This +document, in contrast, discusses the steps you need to take as a class +hierarchy author to make LLVM-style RTTI available to your clients.

+

Before diving in, make sure that you are familiar with the Object Oriented +Programming concept of “is-a”.

+
+
+

Basic Setup

+

This section describes how to set up the most basic form of LLVM-style RTTI +(which is sufficient for 99.9% of the cases). We will set up LLVM-style +RTTI for this class hierarchy:

+
class Shape {
+public:
+  Shape() {}
+  virtual double computeArea() = 0;
+};
+
+class Square : public Shape {
+  double SideLength;
+public:
+  Square(double S) : SideLength(S) {}
+  double computeArea() override;
+};
+
+class Circle : public Shape {
+  double Radius;
+public:
+  Circle(double R) : Radius(R) {}
+  double computeArea() override;
+};
+
+
+

The most basic working setup for LLVM-style RTTI requires the following +steps:

+
    +
  1. In the header where you declare Shape, you will want to #include +"llvm/Support/Casting.h", which declares LLVM’s RTTI templates. That +way your clients don’t even have to think about it.

    +
    #include "llvm/Support/Casting.h"
    +
    +
    +
  2. +
  3. In the base class, introduce an enum which discriminates all of the +different concrete classes in the hierarchy, and stash the enum value +somewhere in the base class.

    +

    Here is the code after introducing this change:

    +
     class Shape {
    + public:
    ++  /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
    ++  enum ShapeKind {
    ++    SK_Square,
    ++    SK_Circle
    ++  };
    ++private:
    ++  const ShapeKind Kind;
    ++public:
    ++  ShapeKind getKind() const { return Kind; }
    ++
    +   Shape() {}
    +   virtual double computeArea() = 0;
    + };
    +
    +
    +

    You will usually want to keep the Kind member encapsulated and +private, but let the enum ShapeKind be public along with providing a +getKind() method. This is convenient for clients so that they can do +a switch over the enum.

    +

    A common naming convention is that these enums are “kind”s, to avoid +ambiguity with the words “type” or “class” which have overloaded meanings +in many contexts within LLVM. Sometimes there will be a natural name for +it, like “opcode”. Don’t bikeshed over this; when in doubt use Kind.

    +

    You might wonder why the Kind enum doesn’t have an entry for +Shape. The reason for this is that since Shape is abstract +(computeArea() = 0;), you will never actually have non-derived +instances of exactly that class (only subclasses). See Concrete Bases +and Deeper Hierarchies for information on how to deal with +non-abstract bases. It’s worth mentioning here that unlike +dynamic_cast<>, LLVM-style RTTI can be used (and is often used) for +classes that don’t have v-tables.

    +
  4. +
  5. Next, you need to make sure that the Kind gets initialized to the +value corresponding to the dynamic type of the class. Typically, you will +want to have it be an argument to the constructor of the base class, and +then pass in the respective XXXKind from subclass constructors.

    +

    Here is the code after that change:

    +
     class Shape {
    + public:
    +   /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
    +   enum ShapeKind {
    +     SK_Square,
    +     SK_Circle
    +   };
    + private:
    +   const ShapeKind Kind;
    + public:
    +   ShapeKind getKind() const { return Kind; }
    +
    +-  Shape() {}
    ++  Shape(ShapeKind K) : Kind(K) {}
    +   virtual double computeArea() = 0;
    + };
    +
    + class Square : public Shape {
    +   double SideLength;
    + public:
    +-  Square(double S) : SideLength(S) {}
    ++  Square(double S) : Shape(SK_Square), SideLength(S) {}
    +   double computeArea() override;
    + };
    +
    + class Circle : public Shape {
    +   double Radius;
    + public:
    +-  Circle(double R) : Radius(R) {}
    ++  Circle(double R) : Shape(SK_Circle), Radius(R) {}
    +   double computeArea() override;
    + };
    +
    +
    +
  6. +
  7. Finally, you need to inform LLVM’s RTTI templates how to dynamically +determine the type of a class (i.e. whether the isa<>/dyn_cast<> +should succeed). The default “99.9% of use cases” way to accomplish this +is through a small static member function classof. In order to have +proper context for an explanation, we will display this code first, and +then below describe each part:

    +
     class Shape {
    + public:
    +   /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
    +   enum ShapeKind {
    +     SK_Square,
    +     SK_Circle
    +   };
    + private:
    +   const ShapeKind Kind;
    + public:
    +   ShapeKind getKind() const { return Kind; }
    +
    +   Shape(ShapeKind K) : Kind(K) {}
    +   virtual double computeArea() = 0;
    + };
    +
    + class Square : public Shape {
    +   double SideLength;
    + public:
    +   Square(double S) : Shape(SK_Square), SideLength(S) {}
    +   double computeArea() override;
    ++
    ++  static bool classof(const Shape *S) {
    ++    return S->getKind() == SK_Square;
    ++  }
    + };
    +
    + class Circle : public Shape {
    +   double Radius;
    + public:
    +   Circle(double R) : Shape(SK_Circle), Radius(R) {}
    +   double computeArea() override;
    ++
    ++  static bool classof(const Shape *S) {
    ++    return S->getKind() == SK_Circle;
    ++  }
    + };
    +
    +
    +

    The job of classof is to dynamically determine whether an object of +a base class is in fact of a particular derived class. In order to +downcast a type Base to a type Derived, there needs to be a +classof in Derived which will accept an object of type Base.

    +

    To be concrete, consider the following code:

    +
    Shape *S = ...;
    +if (isa<Circle>(S)) {
    +  /* do something ... */
    +}
    +
    +
    +

    The code of the isa<> test in this code will eventually boil +down—after template instantiation and some other machinery—to a +check roughly like Circle::classof(S). For more information, see +The Contract of classof.

    +

    The argument to classof should always be an ancestor class because +the implementation has logic to allow and optimize away +upcasts/up-isa<>’s automatically. It is as though every class +Foo automatically has a classof like:

    +
    class Foo {
    +  [...]
    +  template <class T>
    +  static bool classof(const T *,
    +                      ::std::enable_if<
    +                        ::std::is_base_of<Foo, T>::value
    +                      >::type* = 0) { return true; }
    +  [...]
    +};
    +
    +
    +

    Note that this is the reason that we did not need to introduce a +classof into Shape: all relevant classes derive from Shape, +and Shape itself is abstract (has no entry in the Kind enum), +so this notional inferred classof is all we need. See Concrete +Bases and Deeper Hierarchies for more information about how to extend +this example to more general hierarchies.

    +
  8. +
+

Although for this small example setting up LLVM-style RTTI seems like a lot +of “boilerplate”, if your classes are doing anything interesting then this +will end up being a tiny fraction of the code.

+
+
+

Concrete Bases and Deeper Hierarchies

+

For concrete bases (i.e. non-abstract interior nodes of the inheritance +tree), the Kind check inside classof needs to be a bit more +complicated. The situation differs from the example above in that

+
    +
  • Since the class is concrete, it must itself have an entry in the Kind +enum because it is possible to have objects with this class as a dynamic +type.

  • +
  • Since the class has children, the check inside classof must take them +into account.

  • +
+

Say that SpecialSquare and OtherSpecialSquare derive +from Square, and so ShapeKind becomes:

+
 enum ShapeKind {
+   SK_Square,
++  SK_SpecialSquare,
++  SK_OtherSpecialSquare,
+   SK_Circle
+ }
+
+
+

Then in Square, we would need to modify the classof like so:

+
-  static bool classof(const Shape *S) {
+-    return S->getKind() == SK_Square;
+-  }
++  static bool classof(const Shape *S) {
++    return S->getKind() >= SK_Square &&
++           S->getKind() <= SK_OtherSpecialSquare;
++  }
+
+
+

The reason that we need to test a range like this instead of just equality +is that both SpecialSquare and OtherSpecialSquare “is-a” +Square, and so classof needs to return true for them.

+

This approach can be made to scale to arbitrarily deep hierarchies. The +trick is that you arrange the enum values so that they correspond to a +preorder traversal of the class hierarchy tree. With that arrangement, all +subclass tests can be done with two comparisons as shown above. If you just +list the class hierarchy like a list of bullet points, you’ll get the +ordering right:

+
| Shape
+  | Square
+    | SpecialSquare
+    | OtherSpecialSquare
+  | Circle
+
+
+
+

A Bug to be Aware Of

+

The example just given opens the door to bugs where the classofs are +not updated to match the Kind enum when adding (or removing) classes to +(from) the hierarchy.

+

Continuing the example above, suppose we add a SomewhatSpecialSquare as +a subclass of Square, and update the ShapeKind enum like so:

+
 enum ShapeKind {
+   SK_Square,
+   SK_SpecialSquare,
+   SK_OtherSpecialSquare,
++  SK_SomewhatSpecialSquare,
+   SK_Circle
+ }
+
+
+

Now, suppose that we forget to update Square::classof(), so it still +looks like:

+
static bool classof(const Shape *S) {
+  // BUG: Returns false when S->getKind() == SK_SomewhatSpecialSquare,
+  // even though SomewhatSpecialSquare "is a" Square.
+  return S->getKind() >= SK_Square &&
+         S->getKind() <= SK_OtherSpecialSquare;
+}
+
+
+

As the comment indicates, this code contains a bug. A straightforward and +non-clever way to avoid this is to introduce an explicit SK_LastSquare +entry in the enum when adding the first subclass(es). For example, we could +rewrite the example at the beginning of Concrete Bases and Deeper +Hierarchies as:

+
 enum ShapeKind {
+   SK_Square,
++  SK_SpecialSquare,
++  SK_OtherSpecialSquare,
++  SK_LastSquare,
+   SK_Circle
+ }
+...
+// Square::classof()
+-  static bool classof(const Shape *S) {
+-    return S->getKind() == SK_Square;
+-  }
++  static bool classof(const Shape *S) {
++    return S->getKind() >= SK_Square &&
++           S->getKind() <= SK_LastSquare;
++  }
+
+
+

Then, adding new subclasses is easy:

+
 enum ShapeKind {
+   SK_Square,
+   SK_SpecialSquare,
+   SK_OtherSpecialSquare,
++  SK_SomewhatSpecialSquare,
+   SK_LastSquare,
+   SK_Circle
+ }
+
+
+

Notice that Square::classof does not need to be changed.

+
+
+

The Contract of classof

+

To be more precise, let classof be inside a class C. Then the +contract for classof is “return true if the dynamic type of the +argument is-a C”. As long as your implementation fulfills this +contract, you can tweak and optimize it as much as you want.

+

For example, LLVM-style RTTI can work fine in the presence of +multiple-inheritance by defining an appropriate classof. +An example of this in practice is +Decl vs. +DeclContext +inside Clang. +The Decl hierarchy is done very similarly to the example setup +demonstrated in this tutorial. +The key part is how to then incorporate DeclContext: all that is needed +is in bool DeclContext::classof(const Decl *), which asks the question +“Given a Decl, how can I determine if it is-a DeclContext?”. +It answers this with a simple switch over the set of Decl “kinds”, and +returning true for ones that are known to be DeclContext’s.

+
+
+
+

Rules of Thumb

+
    +
  1. The Kind enum should have one entry per concrete class, ordered +according to a preorder traversal of the inheritance tree.

  2. +
  3. The argument to classof should be a const Base *, where Base +is some ancestor in the inheritance hierarchy. The argument should +never be a derived class or the class itself: the template machinery +for isa<> already handles this case and optimizes it.

  4. +
  5. For each class in the hierarchy that has no children, implement a +classof that checks only against its Kind.

  6. +
  7. For each class in the hierarchy that has children, implement a +classof that checks a range of the first child’s Kind and the +last child’s Kind.

  8. +
+
+
+

RTTI for Open Class Hierarchies

+

Sometimes it is not possible to know all types in a hierarchy ahead of time. +For example, in the shapes hierarchy described above the authors may have +wanted their code to work for user defined shapes too. To support use cases +that require open hierarchies LLVM provides the RTTIRoot and +RTTIExtends utilities.

+

The RTTIRoot class describes an interface for performing RTTI checks. The +RTTIExtends class template provides an implementation of this interface +for classes derived from RTTIRoot. RTTIExtends uses the “Curiously +Recurring Template Idiom”, taking the class being defined as its first +template argument and the parent class as the second argument. Any class that +uses RTTIExtends must define a static char ID member, the address of +which will be used to identify the type.

+

This open-hierarchy RTTI support should only be used if your use case requires +it. Otherwise the standard LLVM RTTI system should be preferred.

+

E.g.

+
class Shape : public RTTIExtends<Shape, RTTIRoot> {
+public:
+  static char ID;
+  virtual double computeArea() = 0;
+};
+
+class Square : public RTTIExtends<Square, Shape> {
+  double SideLength;
+public:
+  static char ID;
+
+  Square(double S) : SideLength(S) {}
+  double computeArea() override;
+};
+
+class Circle : public RTTIExtends<Circle, Shape> {
+  double Radius;
+public:
+  static char ID;
+
+  Circle(double R) : Radius(R) {}
+  double computeArea() override;
+};
+
+char Shape::ID = 0;
+char Square::ID = 0;
+char Circle::ID = 0;
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToSubmitABug.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToSubmitABug.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToSubmitABug.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToSubmitABug.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,342 @@ + + + + + + + + + How to submit an LLVM bug report — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How to submit an LLVM bug report

+
+

Introduction - Got bugs?

+

If you’re working with LLVM and run into a bug, we definitely want to know +about it. This document describes what you can do to increase the odds of +getting it fixed quickly.

+

🔒 If you believe that the bug is security related, please follow How to report a security issue?. 🔒

+

Basically you have to do two things at a minimum. First, decide whether the +bug crashes the compiler or if the compiler is miscompiling the program +(i.e., the compiler successfully produces an executable, but it doesn’t run +right). Based on what type of bug it is, follow the instructions in the +linked section to narrow down the bug so that the person who fixes it will be +able to find the problem more easily.

+

Once you have a reduced test-case, go to the LLVM Bug Tracking System and fill out the form with the +necessary details (note that you don’t need to pick a category, just use +the “new-bugs” category if you’re not sure). The bug description should +contain the following information:

+
    +
  • All information necessary to reproduce the problem.

  • +
  • The reduced test-case that triggers the bug.

  • +
  • The location where you obtained LLVM (if not from our Git +repository).

  • +
+

Thanks for helping us make LLVM better!

+
+
+

Crashing Bugs

+

More often than not, bugs in the compiler cause it to crash—often due to +an assertion failure of some sort. The most important piece of the puzzle +is to figure out if it is crashing in the Clang front-end or if it is one of +the LLVM libraries (e.g. the optimizer or code generator) that has +problems.

+

To figure out which component is crashing (the front-end, middle-end +optimizer, or backend code generator), run the clang command line as you +were when the crash occurred, but with the following extra command line +options:

+
    +
  • -emit-llvm -Xclang -disable-llvm-passes: If clang still crashes when +passed these options (which disable the optimizer and code generator), then +the crash is in the front-end. Jump ahead to front-end bugs.

  • +
  • -emit-llvm: If clang crashes with this option (which disables +the code generator), you found a middle-end optimizer bug. Jump ahead to +middle-end bugs.

  • +
  • Otherwise, you have a backend code generator crash. Jump ahead to code +generator bugs.

  • +
+
+

Front-end bugs

+

On a clang crash, the compiler will dump a preprocessed file and a script +to replay the clang command. For example, you should see something like

+
PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:
+Preprocessed source(s) and associated run script(s) are located at:
+clang: note: diagnostic msg: /tmp/foo-xxxxxx.c
+clang: note: diagnostic msg: /tmp/foo-xxxxxx.sh
+
+
+

The creduce tool helps to +reduce the preprocessed file down to the smallest amount of code that still +replicates the problem. You’re encouraged to use creduce to reduce the code +to make the developers’ lives easier. The +clang/utils/creduce-clang-crash.py script can be used on the files +that clang dumps to help with automating creating a test to check for the +compiler crash.

+

cvise is an alternative to creduce.

+
+
+

Middle-end optimization bugs

+

If you find that a bug crashes in the optimizer, compile your test-case to a +.bc file by passing “-emit-llvm -O1 -Xclang -disable-llvm-passes -c -o +foo.bc”. The -O1 is important because -O0 adds the optnone +function attribute to all functions and many passes don’t run on optnone +functions. Then run:

+
opt -O3 foo.bc -disable-output
+
+
+

If this doesn’t crash, please follow the instructions for a front-end +bug.

+

If this does crash, then you should be able to debug this with the following +bugpoint command:

+
bugpoint foo.bc -O3
+
+
+

Run this, then file a bug with the instructions and reduced .bc +files that bugpoint emits.

+

If bugpoint doesn’t reproduce the crash, llvm-reduce is an alternative +way to reduce LLVM IR. Create a script that repros the crash and run:

+
llvm-reduce --test=path/to/script foo.bc
+
+
+

which should produce reduced IR that reproduces the crash. Be warned the +llvm-reduce is still fairly immature and may crash.

+

If none of the above work, you can get the IR before a crash by running the +opt command with the --print-before-all --print-module-scope flags to +dump the IR before every pass. Be warned that this is very verbose.

+
+
+

Backend code generator bugs

+

If you find a bug that crashes clang in the code generator, compile your +source file to a .bc file by passing “-emit-llvm -c -o foo.bc” to +clang (in addition to the options you already pass). Once your have +foo.bc, one of the following commands should fail:

+
    +
  1. llc foo.bc

  2. +
  3. llc foo.bc -relocation-model=pic

  4. +
  5. llc foo.bc -relocation-model=static

  6. +
+

If none of these crash, please follow the instructions for a front-end +bug. If one of these do crash, you should be able to reduce +this with one of the following bugpoint command lines (use +the one corresponding to the command above that failed):

+
    +
  1. bugpoint -run-llc foo.bc

  2. +
  3. bugpoint -run-llc foo.bc --tool-args -relocation-model=pic

  4. +
  5. bugpoint -run-llc foo.bc --tool-args -relocation-model=static

  6. +
+

Please run this, then file a bug with the instructions and reduced .bc file +that bugpoint emits. If something goes wrong with bugpoint, please submit +the “foo.bc” file and the option that llc crashes with.

+
+
+
+

Miscompilations

+

If clang successfully produces an executable, but that executable doesn’t run +right, this is either a bug in the code or a bug in the compiler. The first +thing to check is to make sure it is not using undefined behavior (e.g. +reading a variable before it is defined). In particular, check to see if the +program is clean under various sanitizers (e.g. clang +-fsanitize=undefined,address) and valgrind. Many +“LLVM bugs” that we have chased down ended up being bugs in the program being +compiled, not LLVM.

+

Once you determine that the program itself is not buggy, you should choose +which code generator you wish to compile the program with (e.g. LLC or the JIT) +and optionally a series of LLVM passes to run. For example:

+
bugpoint -run-llc [... optzn passes ...] file-to-test.bc --args -- [program arguments]
+
+
+

bugpoint will try to narrow down your list of passes to the one pass that +causes an error, and simplify the bitcode file as much as it can to assist +you. It will print a message letting you know how to reproduce the +resulting error.

+

The OptBisect page shows an alternative method for finding +incorrect optimization passes.

+
+
+

Incorrect code generation

+

Similarly to debugging incorrect compilation by mis-behaving passes, you +can debug incorrect code generation by either LLC or the JIT, using +bugpoint. The process bugpoint follows in this case is to try to +narrow the code down to a function that is miscompiled by one or the other +method, but since for correctness, the entire program must be run, +bugpoint will compile the code it deems to not be affected with the C +Backend, and then link in the shared object it generates.

+

To debug the JIT:

+
bugpoint -run-jit -output=[correct output file] [bitcode file]  \
+         --tool-args -- [arguments to pass to lli]              \
+         --args -- [program arguments]
+
+
+

Similarly, to debug the LLC, one would run:

+
bugpoint -run-llc -output=[correct output file] [bitcode file]  \
+         --tool-args -- [arguments to pass to llc]              \
+         --args -- [program arguments]
+
+
+

Special note: if you are debugging MultiSource or SPEC tests that +already exist in the llvm/test hierarchy, there is an easier way to +debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which +will pass the program options specified in the Makefiles:

+
cd llvm/test/../../program
+make bugpoint-jit
+
+
+

At the end of a successful bugpoint run, you will be presented +with two bitcode files: a safe file which can be compiled with the C +backend and the test file which either LLC or the JIT +mis-codegenerates, and thus causes the error.

+

To reproduce the error that bugpoint found, it is sufficient to do +the following:

+
    +
  1. Regenerate the shared object from the safe bitcode file:

    +
    llc -march=c safe.bc -o safe.c
    +gcc -shared safe.c -o safe.so
    +
    +
    +
  2. +
  3. If debugging LLC, compile test bitcode native and link with the shared +object:

    +
    llc test.bc -o test.s
    +gcc test.s safe.so -o test.llc
    +./test.llc [program options]
    +
    +
    +
  4. +
  5. If debugging the JIT, load the shared object and supply the test +bitcode:

    +
    lli -load=safe.so test.bc [program options]
    +
    +
    +
  6. +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToUpdateDebugInfo.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToUpdateDebugInfo.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToUpdateDebugInfo.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToUpdateDebugInfo.html 2021-09-19 16:16:35.000000000 +0000 @@ -0,0 +1,576 @@ + + + + + + + + + How to Update Debug Info: A Guide for LLVM Pass Authors — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How to Update Debug Info: A Guide for LLVM Pass Authors

+ +
+

Introduction

+

Certain kinds of code transformations can inadvertently result in a loss of +debug info, or worse, make debug info misrepresent the state of a program.

+

This document specifies how to correctly update debug info in various kinds of +code transformations, and offers suggestions for how to create targeted debug +info tests for arbitrary transformations.

+

For more on the philosophy behind LLVM debugging information, see +Source Level Debugging with LLVM.

+
+
+

Rules for updating debug locations

+
+

When to preserve an instruction location

+

A transformation should preserve the debug location of an instruction if the +instruction either remains in its basic block, or if its basic block is folded +into a predecessor that branches unconditionally. The APIs to use are +IRBuilder, or Instruction::setDebugLoc.

+

The purpose of this rule is to ensure that common block-local optimizations +preserve the ability to set breakpoints on source locations corresponding to +the instructions they touch. Debugging, crash logs, and SamplePGO accuracy +would be severely impacted if that ability were lost.

+

Examples of transformations that should follow this rule include:

+
    +
  • Instruction scheduling. Block-local instruction reordering should not drop +source locations, even though this may lead to jumpy single-stepping +behavior.

  • +
  • Simple jump threading. For example, if block B1 unconditionally jumps to +B2, and is its unique predecessor, instructions from B2 can be +hoisted into B1. Source locations from B2 should be preserved.

  • +
  • Peephole optimizations that replace or expand an instruction, like (add X +X) => (shl X 1). The location of the shl instruction should be the same +as the location of the add instruction.

  • +
  • Tail duplication. For example, if blocks B1 and B2 both +unconditionally branch to B3 and B3 can be folded into its +predecessors, source locations from B3 should be preserved.

  • +
+

Examples of transformations for which this rule does not apply include:

+
    +
  • LICM. E.g., if an instruction is moved from the loop body to the preheader, +the rule for dropping locations applies.

  • +
+

In addition to the rule above, a transformation should also preserve the debug +location of an instruction that is moved between basic blocks, if the +destination block already contains an instruction with an identical debug +location.

+

Examples of transformations that should follow this rule include:

+
    +
  • Moving instructions between basic blocks. For example, if instruction I1 +in BB1 is moved before I2 in BB2, the source location of I1 +can be preserved if it has the same source location as I2.

  • +
+
+
+

When to merge instruction locations

+

A transformation should merge instruction locations if it replaces multiple +instructions with a single merged instruction, and that merged instruction +does not correspond to any of the original instructions’ locations. The API to +use is Instruction::applyMergedLocation.

+

The purpose of this rule is to ensure that a) the single merged instruction +has a location with an accurate scope attached, and b) to prevent misleading +single-stepping (or breakpoint) behavior. Often, merged instructions are memory +accesses which can trap: having an accurate scope attached greatly assists in +crash triage by identifying the (possibly inlined) function where the bad +memory access occurred. This rule is also meant to assist SamplePGO by banning +scenarios in which a sample of a block containing a merged instruction is +misattributed to a block containing one of the instructions-to-be-merged.

+

Examples of transformations that should follow this rule include:

+
    +
  • Merging identical loads/stores which occur on both sides of a CFG diamond +(see the MergedLoadStoreMotion pass).

  • +
  • Merging identical loop-invariant stores (see the LICM utility +llvm::promoteLoopAccessesToScalars).

  • +
  • Peephole optimizations which combine multiple instructions together, like +(add (mul A B) C) => llvm.fma.f32(A, B, C). Note that the location of +the fma does not exactly correspond to the locations of either the +mul or the add instructions.

  • +
+

Examples of transformations for which this rule does not apply include:

+
    +
  • Block-local peepholes which delete redundant instructions, like +(sext (zext i8 %x to i16) to i32) => (zext i8 %x to i32). The inner +zext is modified but remains in its block, so the rule for +preserving locations should apply.

  • +
  • Converting an if-then-else CFG diamond into a select. Preserving the +debug locations of speculated instructions can make it seem like a condition +is true when it’s not (or vice versa), which leads to a confusing +single-stepping experience. The rule for +dropping locations should apply here.

  • +
  • Hoisting identical instructions which appear in several successor blocks into +a predecessor block (see BranchFolder::HoistCommonCodeInSuccs). In this +case there is no single merged instruction. The rule for +dropping locations applies.

  • +
+
+
+

When to drop an instruction location

+

A transformation should drop debug locations if the rules for +preserving and +merging debug locations do not apply. The API to +use is Instruction::dropLocation().

+

The purpose of this rule is to prevent erratic or misleading single-stepping +behavior in situations in which an instruction has no clear, unambiguous +relationship to a source location.

+

To handle an instruction without a location, the DWARF generator +defaults to allowing the last-set location after a label to cascade forward, or +to setting a line 0 location with viable scope information if no previous +location is available.

+

See the discussion in the section about +merging locations for examples of when the rule for +dropping locations applies.

+
+
+
+

Rules for updating debug values

+
+

Deleting an IR-level Instruction

+

When an Instruction is deleted, its debug uses change to undef. This is +a loss of debug info: the value of one or more source variables becomes +unavailable, starting with the llvm.dbg.value(undef, ...). When there is no +way to reconstitute the value of the lost instruction, this is the best +possible outcome. However, it’s often possible to do better:

+
    +
  • If the dying instruction can be RAUW’d, do so. The +Value::replaceAllUsesWith API transparently updates debug uses of the +dying instruction to point to the replacement value.

  • +
  • If the dying instruction cannot be RAUW’d, call llvm::salvageDebugInfo on +it. This makes a best-effort attempt to rewrite debug uses of the dying +instruction by describing its effect as a DIExpression.

  • +
  • If one of the operands of a dying instruction would become trivially +dead, use llvm::replaceAllDbgUsesWith to rewrite the debug uses of that +operand. Consider the following example function:

  • +
+
define i16 @foo(i16 %a) {
+  %b = sext i16 %a to i32
+  %c = and i32 %b, 15
+  call void @llvm.dbg.value(metadata i32 %c, ...)
+  %d = trunc i32 %c to i16
+  ret i16 %d
+}
+
+
+

Now, here’s what happens after the unnecessary truncation instruction %d is +replaced with a simplified instruction:

+
define i16 @foo(i16 %a) {
+  call void @llvm.dbg.value(metadata i32 undef, ...)
+  %simplified = and i16 %a, 15
+  ret i16 %simplified
+}
+
+
+

Note that after deleting %d, all uses of its operand %c become +trivially dead. The debug use which used to point to %c is now undef, +and debug info is needlessly lost.

+

To solve this problem, do:

+
llvm::replaceAllDbgUsesWith(%c, theSimplifiedAndInstruction, ...)
+
+
+

This results in better debug info because the debug use of %c is preserved:

+
define i16 @foo(i16 %a) {
+  %simplified = and i16 %a, 15
+  call void @llvm.dbg.value(metadata i16 %simplified, ...)
+  ret i16 %simplified
+}
+
+
+

You may have noticed that %simplified is narrower than %c: this is not +a problem, because llvm::replaceAllDbgUsesWith takes care of inserting the +necessary conversion operations into the DIExpressions of updated debug uses.

+
+ +
+
+

How to automatically convert tests into debug info tests

+
+

Mutation testing for IR-level transformations

+

An IR test case for a transformation can, in many cases, be automatically +mutated to test debug info handling within that transformation. This is a +simple way to test for proper debug info handling.

+
+

The debugify utility pass

+

The debugify testing utility is just a pair of passes: debugify and +check-debugify.

+

The first applies synthetic debug information to every instruction of the +module, and the second checks that this DI is still available after an +optimization has occurred, reporting any errors/warnings while doing so.

+

The instructions are assigned sequentially increasing line locations, and are +immediately used by debug value intrinsics everywhere possible.

+

For example, here is a module before:

+
define void @f(i32* %x) {
+entry:
+  %x.addr = alloca i32*, align 8
+  store i32* %x, i32** %x.addr, align 8
+  %0 = load i32*, i32** %x.addr, align 8
+  store i32 10, i32* %0, align 4
+  ret void
+}
+
+
+

and after running opt -debugify:

+
define void @f(i32* %x) !dbg !6 {
+entry:
+  %x.addr = alloca i32*, align 8, !dbg !12
+  call void @llvm.dbg.value(metadata i32** %x.addr, metadata !9, metadata !DIExpression()), !dbg !12
+  store i32* %x, i32** %x.addr, align 8, !dbg !13
+  %0 = load i32*, i32** %x.addr, align 8, !dbg !14
+  call void @llvm.dbg.value(metadata i32* %0, metadata !11, metadata !DIExpression()), !dbg !14
+  store i32 10, i32* %0, align 4, !dbg !15
+  ret void, !dbg !16
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!3, !4}
+!llvm.module.flags = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "debugify-sample.ll", directory: "/")
+!2 = !{}
+!3 = !{i32 5}
+!4 = !{i32 2}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = distinct !DISubprogram(name: "f", linkageName: "f", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
+!7 = !DISubroutineType(types: !2)
+!8 = !{!9, !11}
+!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
+!10 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned)
+!11 = !DILocalVariable(name: "2", scope: !6, file: !1, line: 3, type: !10)
+!12 = !DILocation(line: 1, column: 1, scope: !6)
+!13 = !DILocation(line: 2, column: 1, scope: !6)
+!14 = !DILocation(line: 3, column: 1, scope: !6)
+!15 = !DILocation(line: 4, column: 1, scope: !6)
+!16 = !DILocation(line: 5, column: 1, scope: !6)
+
+
+
+
+

Using debugify

+

A simple way to use debugify is as follows:

+
$ opt -debugify -pass-to-test -check-debugify sample.ll
+
+
+

This will inject synthetic DI to sample.ll run the pass-to-test and +then check for missing DI. The -check-debugify step can of course be +omitted in favor of more customizable FileCheck directives.

+

Some other ways to run debugify are available:

+
# Same as the above example.
+$ opt -enable-debugify -pass-to-test sample.ll
+
+# Suppresses verbose debugify output.
+$ opt -enable-debugify -debugify-quiet -pass-to-test sample.ll
+
+# Prepend -debugify before and append -check-debugify -strip after
+# each pass on the pipeline (similar to -verify-each).
+$ opt -debugify-each -O2 sample.ll
+
+
+

In order for check-debugify to work, the DI must be coming from +debugify. Thus, modules with existing DI will be skipped.

+

debugify can be used to test a backend, e.g:

+
$ opt -debugify < sample.ll | llc -o -
+
+
+

There is also a MIR-level debugify pass that can be run before each backend +pass, see: +Mutation testing for MIR-level transformations.

+
+
+

debugify in regression tests

+

The output of the debugify pass must be stable enough to use in regression +tests. Changes to this pass are not allowed to break existing tests.

+
+

Note

+

Regression tests must be robust. Avoid hardcoding line/variable numbers in +check lines. In cases where this can’t be avoided (say, if a test wouldn’t +be precise enough), moving the test to its own file is preferred.

+
+
+
+
+

Test original debug info preservation in optimizations

+

In addition to automatically generating debug info, the checks provided by +the debugify utility pass can also be used to test the preservation of +pre-existing debug info metadata. It could be run as follows:

+
# Run the pass by checking original Debug Info preservation.
+$ opt -verify-debuginfo-preserve -pass-to-test sample.ll
+
+# Check the preservation of original Debug Info after each pass.
+$ opt -verify-each-debuginfo-preserve -O2 sample.ll
+
+
+

Furthermore, there is a way to export the issues that have been found into +a JSON file as follows:

+
$ opt -verify-debuginfo-preserve -verify-di-preserve-export=sample.json -pass-to-test sample.ll
+
+
+

and then use the llvm/utils/llvm-original-di-preservation.py script +to generate an HTML page with the issues reported in a more human readable form +as follows:

+
$ llvm-original-di-preservation.py sample.json sample.html
+
+
+

Testing of original debug info preservation can be invoked from front-end level +as follows:

+
# Test each pass.
+$ clang -Xclang -fverify-debuginfo-preserve -g -O2 sample.c
+
+# Test each pass and export the issues report into the JSON file.
+$ clang -Xclang -fverify-debuginfo-preserve -Xclang -fverify-debuginfo-preserve-export=sample.json -g -O2 sample.c
+
+
+

Please do note that there are some known false positives, for source locations +and debug intrinsic checking, so that will be addressed as a future work.

+
+
+

Mutation testing for MIR-level transformations

+

A variant of the debugify utility described in +Mutation testing for IR-level transformations can be used +for MIR-level transformations as well: much like the IR-level pass, +mir-debugify inserts sequentially increasing line locations to each +MachineInstr in a Module. And the MIR-level mir-check-debugify is +similar to IR-level check-debugify pass.

+

For example, here is a snippet before:

+
name:            test
+body:             |
+  bb.1 (%ir-block.0):
+    %0:_(s32) = IMPLICIT_DEF
+    %1:_(s32) = IMPLICIT_DEF
+    %2:_(s32) = G_CONSTANT i32 2
+    %3:_(s32) = G_ADD %0, %2
+    %4:_(s32) = G_SUB %3, %1
+
+
+

and after running llc -run-pass=mir-debugify:

+
name:            test
+body:             |
+  bb.0 (%ir-block.0):
+    %0:_(s32) = IMPLICIT_DEF debug-location !12
+    DBG_VALUE %0(s32), $noreg, !9, !DIExpression(), debug-location !12
+    %1:_(s32) = IMPLICIT_DEF debug-location !13
+    DBG_VALUE %1(s32), $noreg, !11, !DIExpression(), debug-location !13
+    %2:_(s32) = G_CONSTANT i32 2, debug-location !14
+    DBG_VALUE %2(s32), $noreg, !9, !DIExpression(), debug-location !14
+    %3:_(s32) = G_ADD %0, %2, debug-location !DILocation(line: 4, column: 1, scope: !6)
+    DBG_VALUE %3(s32), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 4, column: 1, scope: !6)
+    %4:_(s32) = G_SUB %3, %1, debug-location !DILocation(line: 5, column: 1, scope: !6)
+    DBG_VALUE %4(s32), $noreg, !9, !DIExpression(), debug-location !DILocation(line: 5, column: 1, scope: !6)
+
+
+

By default, mir-debugify inserts DBG_VALUE instructions everywhere +it is legal to do so. In particular, every (non-PHI) machine instruction that +defines a register must be followed by a DBG_VALUE use of that def. If +an instruction does not define a register, but can be followed by a debug inst, +MIRDebugify inserts a DBG_VALUE that references a constant. Insertion of +DBG_VALUE’s can be disabled by setting -debugify-level=locations.

+

To run MIRDebugify once, simply insert mir-debugify into your llc +invocation, like:

+
# Before some other pass.
+$ llc -run-pass=mir-debugify,other-pass ...
+
+# After some other pass.
+$ llc -run-pass=other-pass,mir-debugify ...
+
+
+

To run MIRDebugify before each pass in a pipeline, use +-debugify-and-strip-all-safe. This can be combined with -start-before +and -start-after. For example:

+
$ llc -debugify-and-strip-all-safe -run-pass=... <other llc args>
+$ llc -debugify-and-strip-all-safe -O1 <other llc args>
+
+
+

If you want to check it after each pass in a pipeline, use +-debugify-check-and-strip-all-safe. This can also be combined with +-start-before and -start-after. For example:

+
$ llc -debugify-check-and-strip-all-safe -run-pass=... <other llc args>
+$ llc -debugify-check-and-strip-all-safe -O1 <other llc args>
+
+
+

To check all debug info from a test, use mir-check-debugify, like:

+
$ llc -run-pass=mir-debugify,other-pass,mir-check-debugify
+
+
+

To strip out all debug info from a test, use mir-strip-debug, like:

+
$ llc -run-pass=mir-debugify,other-pass,mir-strip-debug
+
+
+

It can be useful to combine mir-debugify, mir-check-debugify and/or +mir-strip-debug to identify backend transformations which break in +the presence of debug info. For example, to run the AArch64 backend tests +with all normal passes “sandwiched” in between MIRDebugify and +MIRStripDebugify mutation passes, run:

+
$ llvm-lit test/CodeGen/AArch64 -Dllc="llc -debugify-and-strip-all-safe"
+
+
+
+ +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToUseAttributes.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToUseAttributes.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToUseAttributes.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToUseAttributes.html 2021-09-19 16:16:36.000000000 +0000 @@ -0,0 +1,212 @@ + + + + + + + + + How To Use Attributes — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Use Attributes

+ +
+

Introduction

+

Attributes in LLVM have changed in some fundamental ways. It was necessary to +do this to support expanding the attributes to encompass more than a handful of +attributes — e.g. command line options. The old way of handling attributes +consisted of representing them as a bit mask of values. This bit mask was +stored in a “list” structure that was reference counted. The advantage of this +was that attributes could be manipulated with ‘or’s and ‘and’s. The +disadvantage of this was that there was limited room for expansion, and +virtually no support for attribute-value pairs other than alignment.

+

In the new scheme, an Attribute object represents a single attribute that’s +uniqued. You use the Attribute::get methods to create a new Attribute +object. An attribute can be a single “enum” value (the enum being the +Attribute::AttrKind enum), a string representing a target-dependent +attribute, or an attribute-value pair. Some examples:

+
    +
  • Target-independent: noinline, zext

  • +
  • Target-dependent: "no-sse", "thumb2"

  • +
  • Attribute-value pair: "cpu" = "cortex-a8", align = 4

  • +
+

Note: for an attribute value pair, we expect a target-dependent attribute to +have a string for the value.

+
+
+

Attribute

+

An Attribute object is designed to be passed around by value.

+

Because attributes are no longer represented as a bit mask, you will need to +convert any code which does treat them as a bit mask to use the new query +methods on the Attribute class.

+
+
+

AttributeList

+

The AttributeList stores a collection of Attribute objects for each kind of +object that may have an attribute associated with it: the function as a whole, +the return type, or the function’s parameters. A function’s attributes are at +index AttributeList::FunctionIndex; the return type’s attributes are at +index AttributeList::ReturnIndex; and the function’s parameters’ attributes +are at indices 1, …, n (where ‘n’ is the number of parameters). Most methods +on the AttributeList class take an index parameter.

+

An AttributeList is also a uniqued and immutable object. You create an +AttributeList through the AttributeList::get methods. You can add and +remove attributes, which result in the creation of a new AttributeList.

+

An AttributeList object is designed to be passed around by value.

+

Note: It is advised that you do not use the AttributeList “introspection” +methods (e.g. Raw, getRawPointer, etc.). These methods break +encapsulation, and may be removed in a future release.

+
+
+

AttrBuilder

+

Lastly, we have a “builder” class to help create the AttributeList object +without having to create several different intermediate uniqued +AttributeList objects. The AttrBuilder class allows you to add and +remove attributes at will. The attributes won’t be uniqued until you call the +appropriate AttributeList::get method.

+

An AttrBuilder object is not designed to be passed around by value. It +should be passed by reference.

+

Note: It is advised that you do not use the AttrBuilder::addRawValue() +method or the AttrBuilder(uint64_t Val) constructor. These are for +backwards compatibility and may be removed in a future release.

+

And that’s basically it! A lot of functionality is hidden behind these classes, +but the interfaces are pretty straight forward.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToUseInstrMappings.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToUseInstrMappings.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/HowToUseInstrMappings.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/HowToUseInstrMappings.html 2021-09-19 16:16:36.000000000 +0000 @@ -0,0 +1,313 @@ + + + + + + + + + How To Use Instruction Mappings — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Use Instruction Mappings

+ +
+

Introduction

+

This document contains information about adding instruction mapping support +for a target. The motivation behind this feature comes from the need to switch +between different instruction formats during various optimizations. One approach +could be to use switch cases which list all the instructions along with formats +they can transition to. However, it has large maintenance overhead +because of the hardcoded instruction names. Also, whenever a new instruction is +added in the .td files, all the relevant switch cases should be modified +accordingly. Instead, the same functionality could be achieved with TableGen and +some support from the .td files for a fraction of maintenance cost.

+
+
+

InstrMapping Class Overview

+

TableGen uses relationship models to map instructions with each other. These +models are described using InstrMapping class as a base. Each model sets +various fields of the InstrMapping class such that they can uniquely +describe all the instructions using that model. TableGen parses all the relation +models and uses the information to construct relation tables which relate +instructions with each other. These tables are emitted in the +XXXInstrInfo.inc file along with the functions to query them. Following +is the definition of InstrMapping class defined in Target.td file:

+
class InstrMapping {
+  // Used to reduce search space only to the instructions using this
+  // relation model.
+  string FilterClass;
+
+  // List of fields/attributes that should be same for all the instructions in
+  // a row of the relation table. Think of this as a set of properties shared
+  // by all the instructions related by this relationship.
+  list<string> RowFields = [];
+
+  // List of fields/attributes that are same for all the instructions
+  // in a column of the relation table.
+  list<string> ColFields = [];
+
+  // Values for the fields/attributes listed in 'ColFields' corresponding to
+  // the key instruction. This is the instruction that will be transformed
+  // using this relation model.
+  list<string> KeyCol = [];
+
+  // List of values for the fields/attributes listed in 'ColFields', one for
+  // each column in the relation table. These are the instructions a key
+  // instruction will be transformed into.
+  list<list<string> > ValueCols = [];
+}
+
+
+
+

Sample Example

+

Let’s say that we want to have a function +int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense) which +takes a non-predicated instruction and returns its predicated true or false form +depending on some input flag, inPredSense. The first step in the process is +to define a relationship model that relates predicated instructions to their +non-predicated form by assigning appropriate values to the InstrMapping +fields. For this relationship, non-predicated instructions are treated as key +instruction since they are the one used to query the interface function.

+
def getPredOpcode : InstrMapping {
+  // Choose a FilterClass that is used as a base class for all the
+  // instructions modeling this relationship. This is done to reduce the
+  // search space only to these set of instructions.
+  let FilterClass = "PredRel";
+
+  // Instructions with same values for all the fields in RowFields form a
+  // row in the resulting relation table.
+  // For example, if we want to relate 'ADD' (non-predicated) with 'Add_pt'
+  // (predicated true) and 'Add_pf' (predicated false), then all 3
+  // instructions need to have same value for BaseOpcode field. It can be any
+  // unique value (Ex: XYZ) and should not be shared with any other
+  // instruction not related to 'add'.
+  let RowFields = ["BaseOpcode"];
+
+  // List of attributes that can be used to define key and column instructions
+  // for a relation. Key instruction is passed as an argument
+  // to the function used for querying relation tables. Column instructions
+  // are the instructions they (key) can transform into.
+  //
+  // Here, we choose 'PredSense' as ColFields since this is the unique
+  // attribute of the key (non-predicated) and column (true/false)
+  // instructions involved in this relationship model.
+  let ColFields = ["PredSense"];
+
+  // The key column contains non-predicated instructions.
+  let KeyCol = ["none"];
+
+  // Two value columns - first column contains instructions with
+  // PredSense=true while second column has instructions with PredSense=false.
+  let ValueCols = [["true"], ["false"]];
+}
+
+
+

TableGen uses the above relationship model to emit relation table that maps +non-predicated instructions with their predicated forms. It also outputs the +interface function +int getPredOpcode(uint16_t Opcode, enum PredSense inPredSense) to query +the table. Here, Function getPredOpcode takes two arguments, opcode of the +current instruction and PredSense of the desired instruction, and returns +predicated form of the instruction, if found in the relation table. +In order for an instruction to be added into the relation table, it needs +to include relevant information in its definition. For example, consider +following to be the current definitions of ADD, ADD_pt (true) and ADD_pf (false) +instructions:

+
def ADD : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
+            "$dst = add($a, $b)",
+            [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$a),
+                                           (i32 IntRegs:$b)))]>;
+
+def ADD_Pt : ALU32_rr<(outs IntRegs:$dst),
+                       (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+            "if ($p) $dst = add($a, $b)",
+            []>;
+
+def ADD_Pf : ALU32_rr<(outs IntRegs:$dst),
+                       (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+            "if (!$p) $dst = add($a, $b)",
+            []>;
+
+
+

In this step, we modify these instructions to include the information +required by the relationship model, <tt>getPredOpcode</tt>, so that they can +be related.

+
def ADD : PredRel, ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$a, IntRegs:$b),
+            "$dst = add($a, $b)",
+            [(set (i32 IntRegs:$dst), (add (i32 IntRegs:$a),
+                                           (i32 IntRegs:$b)))]> {
+  let BaseOpcode = "ADD";
+  let PredSense = "none";
+}
+
+def ADD_Pt : PredRel, ALU32_rr<(outs IntRegs:$dst),
+                       (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+            "if ($p) $dst = add($a, $b)",
+            []> {
+  let BaseOpcode = "ADD";
+  let PredSense = "true";
+}
+
+def ADD_Pf : PredRel, ALU32_rr<(outs IntRegs:$dst),
+                       (ins PredRegs:$p, IntRegs:$a, IntRegs:$b),
+            "if (!$p) $dst = add($a, $b)",
+            []> {
+  let BaseOpcode = "ADD";
+  let PredSense = "false";
+}
+
+
+

Please note that all the above instructions use PredRel as a base class. +This is extremely important since TableGen uses it as a filter for selecting +instructions for getPredOpcode model. Any instruction not derived from +PredRel is excluded from the analysis. BaseOpcode is another important +field. Since it’s selected as a RowFields of the model, it is required +to have the same value for all 3 instructions in order to be related. Next, +PredSense is used to determine their column positions by comparing its value +with KeyCol and ValueCols. If an instruction sets its PredSense +value to something not used in the relation model, it will not be assigned +a column in the relation table.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/ARM-BE-bitcastfail.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/ARM-BE-bitcastfail.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/ARM-BE-bitcastsuccess.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/ARM-BE-bitcastsuccess.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/ARM-BE-ld1.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/ARM-BE-ld1.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/ARM-BE-ldr.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/ARM-BE-ldr.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/block-extract.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/block-extract.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/buildbot_worker_contact.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/buildbot_worker_contact.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/cycle_pi.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/cycle_pi.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/cycle.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/cycle.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/epilogue-vectorization-cfg.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/epilogue-vectorization-cfg.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/gcc-loops.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/gcc-loops.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/LangImpl05-cfg.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/LangImpl05-cfg.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/linpack-pc.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/linpack-pc.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/llvm-exegesis-analysis.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/llvm-exegesis-analysis.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/locstats-compare.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/locstats-compare.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/locstats-draw-plot.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/locstats-draw-plot.png differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-guard.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-guard.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-guard.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-guard.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,1079 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-irreducible.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-irreducible.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-irreducible.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-irreducible.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,772 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-merge.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-merge.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-merge.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-merge.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,660 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-nested.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-nested.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-nested.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-nested.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,874 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-nonmaximal.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-nonmaximal.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-nonmaximal.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-nonmaximal.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,1280 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-separate.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-separate.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-separate.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-separate.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,690 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-single.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-single.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-single.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-single.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,338 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-terminology-guarded-loop.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-terminology-guarded-loop.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-terminology-initial-loop.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-terminology-initial-loop.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-terminology-rotated-loop.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-terminology-rotated-loop.png differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-terminology.svg llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-terminology.svg --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/loop-terminology.svg 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/loop-terminology.svg 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,2111 @@ + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/MCJIT-creation.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/MCJIT-creation.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/MCJIT-dyld-load.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/MCJIT-dyld-load.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/MCJIT-engine-builder.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/MCJIT-engine-builder.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/MCJIT-load-object.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/MCJIT-load-object.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/MCJIT-load.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/MCJIT-load.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/MCJIT-resolve-relocations.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/MCJIT-resolve-relocations.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/Phabricator_premerge_results.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/Phabricator_premerge_results.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/Phabricator_premerge_unit_tests.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/Phabricator_premerge_unit_tests.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/pipeline-overview-customized.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/pipeline-overview-customized.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/pipeline-overview.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/pipeline-overview.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/pipeline-overview-with-combiners.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/pipeline-overview-with-combiners.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/speculative_load_hardening_microbenchmarks.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/speculative_load_hardening_microbenchmarks.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/testing-pass-level.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/testing-pass-level.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/testing-unit-level.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/testing-unit-level.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/uml_builder_pattern.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/uml_builder_pattern.png differ Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_images/uml_nodes_and_edges.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_images/uml_nodes_and_edges.png differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/InAlloca.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/InAlloca.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/InAlloca.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/InAlloca.html 2021-09-19 16:16:36.000000000 +0000 @@ -0,0 +1,284 @@ + + + + + + + + + Design and Usage of the InAlloca Attribute — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Design and Usage of the InAlloca Attribute

+
+

Introduction

+

The inalloca attribute is designed to allow +taking the address of an aggregate argument that is being passed by +value through memory. Primarily, this feature is required for +compatibility with the Microsoft C++ ABI. Under that ABI, class +instances that are passed by value are constructed directly into +argument stack memory. Prior to the addition of inalloca, calls in LLVM +were indivisible instructions. There was no way to perform intermediate +work, such as object construction, between the first stack adjustment +and the final control transfer. With inalloca, all arguments passed in +memory are modelled as a single alloca, which can be stored to prior to +the call. Unfortunately, this complicated feature comes with a large +set of restrictions designed to bound the lifetime of the argument +memory around the call.

+

For now, it is recommended that frontends and optimizers avoid producing +this construct, primarily because it forces the use of a base pointer. +This feature may grow in the future to allow general mid-level +optimization, but for now, it should be regarded as less efficient than +passing by value with a copy.

+
+
+

Intended Usage

+

The example below is the intended LLVM IR lowering for some C++ code +that passes two default-constructed Foo objects to g in the +32-bit Microsoft C++ ABI.

+
// Foo is non-trivial.
+struct Foo { int a, b; Foo(); ~Foo(); Foo(const Foo &); };
+void g(Foo a, Foo b);
+void f() {
+  g(Foo(), Foo());
+}
+
+
+
%struct.Foo = type { i32, i32 }
+declare void @Foo_ctor(%struct.Foo* %this)
+declare void @Foo_dtor(%struct.Foo* %this)
+declare void @g(<{ %struct.Foo, %struct.Foo }>* inalloca %memargs)
+
+define void @f() {
+entry:
+  %base = call i8* @llvm.stacksave()
+  %memargs = alloca <{ %struct.Foo, %struct.Foo }>
+  %b = getelementptr <{ %struct.Foo, %struct.Foo }>* %memargs, i32 1
+  call void @Foo_ctor(%struct.Foo* %b)
+
+  ; If a's ctor throws, we must destruct b.
+  %a = getelementptr <{ %struct.Foo, %struct.Foo }>* %memargs, i32 0
+  invoke void @Foo_ctor(%struct.Foo* %a)
+      to label %invoke.cont unwind %invoke.unwind
+
+invoke.cont:
+  call void @g(<{ %struct.Foo, %struct.Foo }>* inalloca %memargs)
+  call void @llvm.stackrestore(i8* %base)
+  ...
+
+invoke.unwind:
+  call void @Foo_dtor(%struct.Foo* %b)
+  call void @llvm.stackrestore(i8* %base)
+  ...
+}
+
+
+

To avoid stack leaks, the frontend saves the current stack pointer with +a call to llvm.stacksave. Then, it allocates the +argument stack space with alloca and calls the default constructor. The +default constructor could throw an exception, so the frontend has to +create a landing pad. The frontend has to destroy the already +constructed argument b before restoring the stack pointer. If the +constructor does not unwind, g is called. In the Microsoft C++ ABI, +g will destroy its arguments, and then the stack is restored in +f.

+
+
+

Design Considerations

+
+

Lifetime

+

The biggest design consideration for this feature is object lifetime. +We cannot model the arguments as static allocas in the entry block, +because all calls need to use the memory at the top of the stack to pass +arguments. We cannot vend pointers to that memory at function entry +because after code generation they will alias.

+

The rule against allocas between argument allocations and the call site +avoids this problem, but it creates a cleanup problem. Cleanup and +lifetime is handled explicitly with stack save and restore calls. In +the future, we may want to introduce a new construct such as freea +or afree to make it clear that this stack adjusting cleanup is less +powerful than a full stack save and restore.

+
+
+

Nested Calls and Copy Elision

+

We also want to be able to support copy elision into these argument +slots. This means we have to support multiple live argument +allocations.

+

Consider the evaluation of:

+
// Foo is non-trivial.
+struct Foo { int a; Foo(); Foo(const &Foo); ~Foo(); };
+Foo bar(Foo b);
+int main() {
+  bar(bar(Foo()));
+}
+
+
+

In this case, we want to be able to elide copies into bar’s argument +slots. That means we need to have more than one set of argument frames +active at the same time. First, we need to allocate the frame for the +outer call so we can pass it in as the hidden struct return pointer to +the middle call. Then we do the same for the middle call, allocating a +frame and passing its address to Foo’s default constructor. By +wrapping the evaluation of the inner bar with stack save and +restore, we can have multiple overlapping active call frames.

+
+
+

Callee-cleanup Calling Conventions

+

Another wrinkle is the existence of callee-cleanup conventions. On +Windows, all methods and many other functions adjust the stack to clear +the memory used to pass their arguments. In some sense, this means that +the allocas are automatically cleared by the call. However, LLVM +instead models this as a write of undef to all of the inalloca values +passed to the call instead of a stack adjustment. Frontends should +still restore the stack pointer to avoid a stack leak.

+
+
+

Exceptions

+

There is also the possibility of an exception. If argument evaluation +or copy construction throws an exception, the landing pad must do +cleanup, which includes adjusting the stack pointer to avoid a stack +leak. This means the cleanup of the stack memory cannot be tied to the +call itself. There needs to be a separate IR-level instruction that can +perform independent cleanup of arguments.

+
+
+

Efficiency

+

Eventually, it should be possible to generate efficient code for this +construct. In particular, using inalloca should not require a base +pointer. If the backend can prove that all points in the CFG only have +one possible stack level, then it can address the stack directly from +the stack pointer. While this is not yet implemented, the plan is that +the inalloca attribute should not change much, but the frontend IR +generation recommendations may change.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/index.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,196 @@ + + + + + + + + + About — LLVM 13 documentation + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

About

+
+

Warning

+

If you are using a released version of LLVM, see the download page to find your documentation.

+
+

The LLVM compiler infrastructure supports a wide range of projects, from +industrial strength compilers to specialized JIT applications to small +research projects.

+

Similarly, documentation is broken down into several high-level groupings +targeted at different audiences:

+
+
+

LLVM Design & Overview

+

Several introductory papers and presentations.

+
+
+
+
Introduction to the LLVM Compiler

Presentation providing a users introduction to LLVM.

+
+
Intro to LLVM

A chapter from the book “The Architecture of Open Source Applications” that +describes high-level design decisions that shaped LLVM.

+
+
LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation

Design overview.

+
+
LLVM: An Infrastructure for Multi-Stage Optimization

More details (quite old now).

+
+
+
+
+

Documentation

+

Getting Started, How-tos, Developer Guides, and Tutorials.

+
+
+
+
Getting Started/Tutorials

For those new to the LLVM system.

+
+
User Guides

User guides and How-tos.

+
+
Reference

LLVM and API reference documentation.

+
+
+
+
+

Community

+

LLVM welcomes contributions of all kinds. To learn more, see the following articles:

+
+
+ +
+
+

Indices and tables

+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/JITLink.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/JITLink.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/JITLink.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/JITLink.html 2021-09-19 16:16:36.000000000 +0000 @@ -0,0 +1,1177 @@ + + + + + + + + + JITLink and ORC’s ObjectLinkingLayer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + + + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LangRef.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LangRef.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LangRef.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LangRef.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,23565 @@ + + + + + + + + + LLVM Language Reference Manual — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Language Reference Manual

+
+ +
+
+

Abstract

+

This document is a reference manual for the LLVM assembly language. LLVM +is a Static Single Assignment (SSA) based representation that provides +type safety, low-level operations, flexibility, and the capability of +representing ‘all’ high-level languages cleanly. It is the common code +representation used throughout all phases of the LLVM compilation +strategy.

+
+
+

Introduction

+

The LLVM code representation is designed to be used in three different +forms: as an in-memory compiler IR, as an on-disk bitcode representation +(suitable for fast loading by a Just-In-Time compiler), and as a human +readable assembly language representation. This allows LLVM to provide a +powerful intermediate representation for efficient compiler +transformations and analysis, while providing a natural means to debug +and visualize the transformations. The three different forms of LLVM are +all equivalent. This document describes the human readable +representation and notation.

+

The LLVM representation aims to be light-weight and low-level while +being expressive, typed, and extensible at the same time. It aims to be +a “universal IR” of sorts, by being at a low enough level that +high-level ideas may be cleanly mapped to it (similar to how +microprocessors are “universal IR’s”, allowing many source languages to +be mapped to them). By providing type information, LLVM can be used as +the target of optimizations: for example, through pointer analysis, it +can be proven that a C automatic variable is never accessed outside of +the current function, allowing it to be promoted to a simple SSA value +instead of a memory location.

+
+

Well-Formedness

+

It is important to note that this document describes ‘well formed’ LLVM +assembly language. There is a difference between what the parser accepts +and what is considered ‘well formed’. For example, the following +instruction is syntactically okay, but not well formed:

+
%x = add i32 1, %x
+
+
+

because the definition of %x does not dominate all of its uses. The +LLVM infrastructure provides a verification pass that may be used to +verify that an LLVM module is well formed. This pass is automatically +run by the parser after parsing input assembly and by the optimizer +before it outputs bitcode. The violations pointed out by the verifier +pass indicate bugs in transformation passes or input to the parser.

+
+
+
+

Identifiers

+

LLVM identifiers come in two basic types: global and local. Global +identifiers (functions, global variables) begin with the '@' +character. Local identifiers (register names, types) begin with the +'%' character. Additionally, there are three different formats for +identifiers, for different purposes:

+
    +
  1. Named values are represented as a string of characters with their +prefix. For example, %foo, @DivisionByZero, +%a.really.long.identifier. The actual regular expression used is +‘[%@][-a-zA-Z$._][-a-zA-Z$._0-9]*’. Identifiers that require other +characters in their names can be surrounded with quotes. Special +characters may be escaped using "\xx" where xx is the ASCII +code for the character in hexadecimal. In this way, any character can +be used in a name value, even quotes themselves. The "\01" prefix +can be used on global values to suppress mangling.

  2. +
  3. Unnamed values are represented as an unsigned numeric value with +their prefix. For example, %12, @2, %44.

  4. +
  5. Constants, which are described in the section Constants below.

  6. +
+

LLVM requires that values start with a prefix for two reasons: Compilers +don’t need to worry about name clashes with reserved words, and the set +of reserved words may be expanded in the future without penalty. +Additionally, unnamed identifiers allow a compiler to quickly come up +with a temporary variable without having to avoid symbol table +conflicts.

+

Reserved words in LLVM are very similar to reserved words in other +languages. There are keywords for different opcodes (‘add’, +‘bitcast’, ‘ret’, etc…), for primitive type names (‘void’, +‘i32’, etc…), and others. These reserved words cannot conflict +with variable names, because none of them start with a prefix character +('%' or '@').

+

Here is an example of LLVM code to multiply the integer variable +‘%X’ by 8:

+

The easy way:

+
%result = mul i32 %X, 8
+
+
+

After strength reduction:

+
%result = shl i32 %X, 3
+
+
+

And the hard way:

+
%0 = add i32 %X, %X           ; yields i32:%0
+%1 = add i32 %0, %0           ; yields i32:%1
+%result = add i32 %1, %1
+
+
+

This last way of multiplying %X by 8 illustrates several important +lexical features of LLVM:

+
    +
  1. Comments are delimited with a ‘;’ and go until the end of line.

  2. +
  3. Unnamed temporaries are created when the result of a computation is +not assigned to a named value.

  4. +
  5. Unnamed temporaries are numbered sequentially (using a per-function +incrementing counter, starting with 0). Note that basic blocks and unnamed +function parameters are included in this numbering. For example, if the +entry basic block is not given a label name and all function parameters are +named, then it will get number 0.

  6. +
+

It also shows a convention that we follow in this document. When +demonstrating instructions, we will follow an instruction with a comment +that defines the type and name of value produced.

+
+
+

High Level Structure

+
+

Module Structure

+

LLVM programs are composed of Module’s, each of which is a +translation unit of the input programs. Each module consists of +functions, global variables, and symbol table entries. Modules may be +combined together with the LLVM linker, which merges function (and +global variable) definitions, resolves forward declarations, and merges +symbol table entries. Here is an example of the “hello world” module:

+
; Declare the string constant as a global constant.
+@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00"
+
+; External declaration of the puts function
+declare i32 @puts(i8* nocapture) nounwind
+
+; Definition of main function
+define i32 @main() {   ; i32()*
+  ; Convert [13 x i8]* to i8*...
+  %cast210 = getelementptr [13 x i8], [13 x i8]* @.str, i64 0, i64 0
+
+  ; Call puts function to write out the string to stdout.
+  call i32 @puts(i8* %cast210)
+  ret i32 0
+}
+
+; Named metadata
+!0 = !{i32 42, null, !"string"}
+!foo = !{!0}
+
+
+

This example is made up of a global variable named +“.str”, an external declaration of the “puts” function, a +function definition for “main” and +named metadatafoo”.

+

In general, a module is made up of a list of global values (where both +functions and global variables are global values). Global values are +represented by a pointer to a memory location (in this case, a pointer +to an array of char, and a pointer to a function), and have one of the +following linkage types.

+
+
+

Linkage Types

+

All Global Variables and Functions have one of the following types of +linkage:

+
+
private

Global values with “private” linkage are only directly +accessible by objects in the current module. In particular, linking +code into a module with a private global value may cause the +private to be renamed as necessary to avoid collisions. Because the +symbol is private to the module, all references can be updated. This +doesn’t show up in any symbol table in the object file.

+
+
internal

Similar to private, but the value shows as a local symbol +(STB_LOCAL in the case of ELF) in the object file. This +corresponds to the notion of the ‘static’ keyword in C.

+
+
available_externally

Globals with “available_externally” linkage are never emitted into +the object file corresponding to the LLVM module. From the linker’s +perspective, an available_externally global is equivalent to +an external declaration. They exist to allow inlining and other +optimizations to take place given knowledge of the definition of the +global, which is known to be somewhere outside the module. Globals +with available_externally linkage are allowed to be discarded at +will, and allow inlining and other optimizations. This linkage type is +only allowed on definitions, not declarations.

+
+
linkonce

Globals with “linkonce” linkage are merged with other globals of +the same name when linkage occurs. This can be used to implement +some forms of inline functions, templates, or other code which must +be generated in each translation unit that uses it, but where the +body may be overridden with a more definitive definition later. +Unreferenced linkonce globals are allowed to be discarded. Note +that linkonce linkage does not actually allow the optimizer to +inline the body of this function into callers because it doesn’t +know if this definition of the function is the definitive definition +within the program or whether it will be overridden by a stronger +definition. To enable inlining and other optimizations, use +“linkonce_odr” linkage.

+
+
weak

weak” linkage has the same merging semantics as linkonce +linkage, except that unreferenced globals with weak linkage may +not be discarded. This is used for globals that are declared “weak” +in C source code.

+
+
common

common” linkage is most similar to “weak” linkage, but they +are used for tentative definitions in C, such as “int X;” at +global scope. Symbols with “common” linkage are merged in the +same way as weak symbols, and they may not be deleted if +unreferenced. common symbols may not have an explicit section, +must have a zero initializer, and may not be marked +‘constant’. Functions and aliases may not have +common linkage.

+
+
+
+
appending

appending” linkage may only be applied to global variables of +pointer to array type. When two global variables with appending +linkage are linked together, the two global arrays are appended +together. This is the LLVM, typesafe, equivalent of having the +system linker append together “sections” with identical names when +.o files are linked.

+

Unfortunately this doesn’t correspond to any feature in .o files, so it +can only be used for variables like llvm.global_ctors which llvm +interprets specially.

+
+
extern_weak

The semantics of this linkage follow the ELF object file model: the +symbol is weak until linked, if not linked, the symbol becomes null +instead of being an undefined reference.

+
+
linkonce_odr, weak_odr

Some languages allow differing globals to be merged, such as two +functions with different semantics. Other languages, such as +C++, ensure that only equivalent globals are ever merged (the +“one definition rule” — “ODR”). Such languages can use the +linkonce_odr and weak_odr linkage types to indicate that the +global will only be merged with equivalent globals. These linkage +types are otherwise the same as their non-odr versions.

+
+
external

If none of the above identifiers are used, the global is externally +visible, meaning that it participates in linkage and can be used to +resolve external symbol references.

+
+
+

It is illegal for a global variable or function declaration to have any +linkage type other than external or extern_weak.

+
+
+

Calling Conventions

+

LLVM functions, calls and +invokes can all have an optional calling convention +specified for the call. The calling convention of any pair of dynamic +caller/callee must match, or the behavior of the program is undefined. +The following calling conventions are supported by LLVM, and more may be +added in the future:

+
+
ccc” - The C calling convention

This calling convention (the default if no other calling convention +is specified) matches the target C calling conventions. This calling +convention supports varargs function calls and tolerates some +mismatch in the declared prototype and implemented declaration of +the function (as does normal C).

+
+
fastcc” - The fast calling convention

This calling convention attempts to make calls as fast as possible +(e.g. by passing things in registers). This calling convention +allows the target to use whatever tricks it wants to produce fast +code for the target, without having to conform to an externally +specified ABI (Application Binary Interface). Tail calls can only +be optimized when this, the tailcc, the GHC or the HiPE convention is +used. This calling convention does not +support varargs and requires the prototype of all callees to exactly +match the prototype of the function definition.

+
+
coldcc” - The cold calling convention

This calling convention attempts to make code in the caller as +efficient as possible under the assumption that the call is not +commonly executed. As such, these calls often preserve all registers +so that the call does not break any live ranges in the caller side. +This calling convention does not support varargs and requires the +prototype of all callees to exactly match the prototype of the +function definition. Furthermore the inliner doesn’t consider such function +calls for inlining.

+
+
cc 10” - GHC convention

This calling convention has been implemented specifically for use by +the Glasgow Haskell Compiler (GHC). +It passes everything in registers, going to extremes to achieve this +by disabling callee save registers. This calling convention should +not be used lightly but only for specific situations such as an +alternative to the register pinning performance technique often +used when implementing functional programming languages. At the +moment only X86 supports this convention and it has the following +limitations:

+
    +
  • On X86-32 only supports up to 4 bit type parameters. No +floating-point types are supported.

  • +
  • On X86-64 only supports up to 10 bit type parameters and 6 +floating-point parameters.

  • +
+

This calling convention supports tail call +optimization but requires both the +caller and callee are using it.

+
+
cc 11” - The HiPE calling convention

This calling convention has been implemented specifically for use by +the High-Performance Erlang +(HiPE) compiler, the +native code compiler of the Ericsson’s Open Source Erlang/OTP +system. It uses more +registers for argument passing than the ordinary C calling +convention and defines no callee-saved registers. The calling +convention properly supports tail call +optimization but requires that both the +caller and the callee use it. It uses a register pinning +mechanism, similar to GHC’s convention, for keeping frequently +accessed runtime components pinned to specific hardware registers. +At the moment only X86 supports this convention (both 32 and 64 +bit).

+
+
webkit_jscc” - WebKit’s JavaScript calling convention

This calling convention has been implemented for WebKit FTL JIT. It passes arguments on the +stack right to left (as cdecl does), and returns a value in the +platform’s customary return register.

+
+
anyregcc” - Dynamic calling convention for code patching

This is a special convention that supports patching an arbitrary code +sequence in place of a call site. This convention forces the call +arguments into registers but allows them to be dynamically +allocated. This can currently only be used with calls to +llvm.experimental.patchpoint because only this intrinsic records +the location of its arguments in a side table. See Stack maps and patch points in LLVM.

+
+
preserve_mostcc” - The PreserveMost calling convention

This calling convention attempts to make the code in the caller as +unintrusive as possible. This convention behaves identically to the C +calling convention on how arguments and return values are passed, but it +uses a different set of caller/callee-saved registers. This alleviates the +burden of saving and recovering a large register set before and after the +call in the caller. If the arguments are passed in callee-saved registers, +then they will be preserved by the callee across the call. This doesn’t +apply for values returned in callee-saved registers.

+
    +
  • On X86-64 the callee preserves all general purpose registers, except for +R11. R11 can be used as a scratch register. Floating-point registers +(XMMs/YMMs) are not preserved and need to be saved by the caller.

  • +
+

The idea behind this convention is to support calls to runtime functions +that have a hot path and a cold path. The hot path is usually a small piece +of code that doesn’t use many registers. The cold path might need to call out to +another function and therefore only needs to preserve the caller-saved +registers, which haven’t already been saved by the caller. The +PreserveMost calling convention is very similar to the cold calling +convention in terms of caller/callee-saved registers, but they are used for +different types of function calls. coldcc is for function calls that are +rarely executed, whereas preserve_mostcc function calls are intended to be +on the hot path and definitely executed a lot. Furthermore preserve_mostcc +doesn’t prevent the inliner from inlining the function call.

+

This calling convention will be used by a future version of the ObjectiveC +runtime and should therefore still be considered experimental at this time. +Although this convention was created to optimize certain runtime calls to +the ObjectiveC runtime, it is not limited to this runtime and might be used +by other runtimes in the future too. The current implementation only +supports X86-64, but the intention is to support more architectures in the +future.

+
+
preserve_allcc” - The PreserveAll calling convention

This calling convention attempts to make the code in the caller even less +intrusive than the PreserveMost calling convention. This calling +convention also behaves identical to the C calling convention on how +arguments and return values are passed, but it uses a different set of +caller/callee-saved registers. This removes the burden of saving and +recovering a large register set before and after the call in the caller. If +the arguments are passed in callee-saved registers, then they will be +preserved by the callee across the call. This doesn’t apply for values +returned in callee-saved registers.

+
    +
  • On X86-64 the callee preserves all general purpose registers, except for +R11. R11 can be used as a scratch register. Furthermore it also preserves +all floating-point registers (XMMs/YMMs).

  • +
+

The idea behind this convention is to support calls to runtime functions +that don’t need to call out to any other functions.

+

This calling convention, like the PreserveMost calling convention, will be +used by a future version of the ObjectiveC runtime and should be considered +experimental at this time.

+
+
cxx_fast_tlscc” - The CXX_FAST_TLS calling convention for access functions

Clang generates an access function to access C++-style TLS. The access +function generally has an entry block, an exit block and an initialization +block that is run at the first time. The entry and exit blocks can access +a few TLS IR variables, each access will be lowered to a platform-specific +sequence.

+

This calling convention aims to minimize overhead in the caller by +preserving as many registers as possible (all the registers that are +preserved on the fast path, composed of the entry and exit blocks).

+

This calling convention behaves identical to the C calling convention on +how arguments and return values are passed, but it uses a different set of +caller/callee-saved registers.

+

Given that each platform has its own lowering sequence, hence its own set +of preserved registers, we can’t use the existing PreserveMost.

+
    +
  • On X86-64 the callee preserves all general purpose registers, except for +RDI and RAX.

  • +
+
+
tailcc” - Tail callable calling convention

This calling convention ensures that calls in tail position will always be +tail call optimized. This calling convention is equivalent to fastcc, +except for an additional guarantee that tail calls will be produced +whenever possible. Tail calls can only be optimized when this, the fastcc, +the GHC or the HiPE convention is used. This +calling convention does not support varargs and requires the prototype of +all callees to exactly match the prototype of the function definition.

+
+
swiftcc” - This calling convention is used for Swift language.
    +
  • On X86-64 RCX and R8 are available for additional integer returns, and +XMM2 and XMM3 are available for additional FP/vector returns.

  • +
  • On iOS platforms, we use AAPCS-VFP calling convention.

  • +
+
+
swifttailcc

This calling convention is like swiftcc in most respects, but also the +callee pops the argument area of the stack so that mandatory tail calls are +possible as in tailcc.

+
+
cfguard_checkcc” - Windows Control Flow Guard (Check mechanism)

This calling convention is used for the Control Flow Guard check function, +calls to which can be inserted before indirect calls to check that the call +target is a valid function address. The check function has no return value, +but it will trigger an OS-level error if the address is not a valid target. +The set of registers preserved by the check function, and the register +containing the target address are architecture-specific.

+
    +
  • On X86 the target address is passed in ECX.

  • +
  • On ARM the target address is passed in R0.

  • +
  • On AArch64 the target address is passed in X15.

  • +
+
+
cc <n>” - Numbered convention

Any calling convention may be specified by number, allowing +target-specific calling conventions to be used. Target specific +calling conventions start at 64.

+
+
+

More calling conventions can be added/defined on an as-needed basis, to +support Pascal conventions or any other well-known target-independent +convention.

+
+
+

Visibility Styles

+

All Global Variables and Functions have one of the following visibility +styles:

+
+
default” - Default style

On targets that use the ELF object file format, default visibility +means that the declaration is visible to other modules and, in +shared libraries, means that the declared entity may be overridden. +On Darwin, default visibility means that the declaration is visible +to other modules. Default visibility corresponds to “external +linkage” in the language.

+
+
hidden” - Hidden style

Two declarations of an object with hidden visibility refer to the +same object if they are in the same shared object. Usually, hidden +visibility indicates that the symbol will not be placed into the +dynamic symbol table, so no other module (executable or shared +library) can reference it directly.

+
+
protected” - Protected style

On ELF, protected visibility indicates that the symbol will be +placed in the dynamic symbol table, but that references within the +defining module will bind to the local symbol. That is, the symbol +cannot be overridden by another module.

+
+
+

A symbol with internal or private linkage must have default +visibility.

+
+
+

DLL Storage Classes

+

All Global Variables, Functions and Aliases can have one of the following +DLL storage class:

+
+
dllimport

dllimport” causes the compiler to reference a function or variable via +a global pointer to a pointer that is set up by the DLL exporting the +symbol. On Microsoft Windows targets, the pointer name is formed by +combining __imp_ and the function or variable name.

+
+
dllexport

dllexport” causes the compiler to provide a global pointer to a pointer +in a DLL, so that it can be referenced with the dllimport attribute. On +Microsoft Windows targets, the pointer name is formed by combining +__imp_ and the function or variable name. Since this storage class +exists for defining a dll interface, the compiler, assembler and linker know +it is externally referenced and must refrain from deleting the symbol.

+
+
+
+
+

Thread Local Storage Models

+

A variable may be defined as thread_local, which means that it will +not be shared by threads (each thread will have a separated copy of the +variable). Not all targets support thread-local variables. Optionally, a +TLS model may be specified:

+
+
localdynamic

For variables that are only used within the current shared library.

+
+
initialexec

For variables in modules that will not be loaded dynamically.

+
+
localexec

For variables defined in the executable and only used within it.

+
+
+

If no explicit model is given, the “general dynamic” model is used.

+

The models correspond to the ELF TLS models; see ELF Handling For +Thread-Local Storage for +more information on under which circumstances the different models may +be used. The target may choose a different TLS model if the specified +model is not supported, or if a better choice of model can be made.

+

A model can also be specified in an alias, but then it only governs how +the alias is accessed. It will not have any effect in the aliasee.

+

For platforms without linker support of ELF TLS model, the -femulated-tls +flag can be used to generate GCC compatible emulated TLS code.

+
+
+

Runtime Preemption Specifiers

+

Global variables, functions and aliases may have an optional runtime preemption +specifier. If a preemption specifier isn’t given explicitly, then a +symbol is assumed to be dso_preemptable.

+
+
dso_preemptable

Indicates that the function or variable may be replaced by a symbol from +outside the linkage unit at runtime.

+
+
dso_local

The compiler may assume that a function or variable marked as dso_local +will resolve to a symbol within the same linkage unit. Direct access will +be generated even if the definition is not within this compilation unit.

+
+
+
+
+

Structure Types

+

LLVM IR allows you to specify both “identified” and “literal” structure +types. Literal types are uniqued structurally, but identified types +are never uniqued. An opaque structural type can also be used +to forward declare a type that is not yet available.

+

An example of an identified structure specification is:

+
%mytype = type { %mytype*, i32 }
+
+
+

Prior to the LLVM 3.0 release, identified types were structurally uniqued. Only +literal types are uniqued in recent versions of LLVM.

+
+
+

Non-Integral Pointer Type

+

Note: non-integral pointer types are a work in progress, and they should be +considered experimental at this time.

+

LLVM IR optionally allows the frontend to denote pointers in certain address +spaces as “non-integral” via the datalayout string. +Non-integral pointer types represent pointers that have an unspecified bitwise +representation; that is, the integral representation may be target dependent or +unstable (not backed by a fixed integer).

+

inttoptr and ptrtoint instructions have the same semantics as for +integral (i.e. normal) pointers in that they convert integers to and from +corresponding pointer types, but there are additional implications to be +aware of. Because the bit-representation of a non-integral pointer may +not be stable, two identical casts of the same operand may or may not +return the same value. Said differently, the conversion to or from the +non-integral type depends on environmental state in an implementation +defined manner.

+

If the frontend wishes to observe a particular value following a cast, the +generated IR must fence with the underlying environment in an implementation +defined manner. (In practice, this tends to require noinline routines for +such operations.)

+

From the perspective of the optimizer, inttoptr and ptrtoint for +non-integral types are analogous to ones on integral types with one +key exception: the optimizer may not, in general, insert new dynamic +occurrences of such casts. If a new cast is inserted, the optimizer would +need to either ensure that a) all possible values are valid, or b) +appropriate fencing is inserted. Since the appropriate fencing is +implementation defined, the optimizer can’t do the latter. The former is +challenging as many commonly expected properties, such as +ptrtoint(v)-ptrtoint(v) == 0, don’t hold for non-integral types.

+
+
+

Global Variables

+

Global variables define regions of memory allocated at compilation time +instead of run-time.

+

Global variable definitions must be initialized.

+

Global variables in other translation units can also be declared, in which +case they don’t have an initializer.

+

Global variables can optionally specify a linkage type.

+

Either global variable definitions or declarations may have an explicit section +to be placed in and may have an optional explicit alignment specified. If there +is a mismatch between the explicit or inferred section information for the +variable declaration and its definition the resulting behavior is undefined.

+

A variable may be defined as a global constant, which indicates that +the contents of the variable will never be modified (enabling better +optimization, allowing the global data to be placed in the read-only +section of an executable, etc). Note that variables that need runtime +initialization cannot be marked constant as there is a store to the +variable.

+

LLVM explicitly allows declarations of global variables to be marked +constant, even if the final definition of the global is not. This +capability can be used to enable slightly better optimization of the +program, but requires the language definition to guarantee that +optimizations based on the ‘constantness’ are valid for the translation +units that do not include the definition.

+

As SSA values, global variables define pointer values that are in scope +(i.e. they dominate) all basic blocks in the program. Global variables +always define a pointer to their “content” type because they describe a +region of memory, and all memory objects in LLVM are accessed through +pointers.

+

Global variables can be marked with unnamed_addr which indicates +that the address is not significant, only the content. Constants marked +like this can be merged with other constants if they have the same +initializer. Note that a constant with significant address can be +merged with a unnamed_addr constant, the result being a constant +whose address is significant.

+

If the local_unnamed_addr attribute is given, the address is known to +not be significant within the module.

+

A global variable may be declared to reside in a target-specific +numbered address space. For targets that support them, address spaces +may affect how optimizations are performed and/or what target +instructions are used to access the variable. The default address space +is zero. The address space qualifier must precede any other attributes.

+

LLVM allows an explicit section to be specified for globals. If the +target supports it, it will emit globals to the section specified. +Additionally, the global can placed in a comdat if the target has the necessary +support.

+

External declarations may have an explicit section specified. Section +information is retained in LLVM IR for targets that make use of this +information. Attaching section information to an external declaration is an +assertion that its definition is located in the specified section. If the +definition is located in a different section, the behavior is undefined.

+

By default, global initializers are optimized by assuming that global +variables defined within the module are not modified from their +initial values before the start of the global initializer. This is +true even for variables potentially accessible from outside the +module, including those with external linkage or appearing in +@llvm.used or dllexported variables. This assumption may be suppressed +by marking the variable with externally_initialized.

+

An explicit alignment may be specified for a global, which must be a +power of 2. If not present, or if the alignment is set to zero, the +alignment of the global is set by the target to whatever it feels +convenient. If an explicit alignment is specified, the global is forced +to have exactly that alignment. Targets and optimizers are not allowed +to over-align the global if the global has an assigned section. In this +case, the extra alignment could be observable: for example, code could +assume that the globals are densely packed in their section and try to +iterate over them as an array, alignment padding would break this +iteration. The maximum alignment is 1 << 29.

+

For global variables declarations, as well as definitions that may be +replaced at link time (linkonce, weak, extern_weak and common +linkage types), LLVM makes no assumptions about the allocation size of the +variables, except that they may not overlap. The alignment of a global variable +declaration or replaceable definition must not be greater than the alignment of +the definition it resolves to.

+

Globals can also have a DLL storage class, +an optional runtime preemption specifier, +an optional global attributes and +an optional list of attached metadata.

+

Variables and aliases can have a +Thread Local Storage Model.

+

Scalable vectors cannot be global variables or members of +arrays because their size is unknown at compile time. They are allowed in +structs to facilitate intrinsics returning multiple values. Structs containing +scalable vectors cannot be used in loads, stores, allocas, or GEPs.

+

Syntax:

+
@<GlobalVarName> = [Linkage] [PreemptionSpecifier] [Visibility]
+                   [DLLStorageClass] [ThreadLocal]
+                   [(unnamed_addr|local_unnamed_addr)] [AddrSpace]
+                   [ExternallyInitialized]
+                   <global | constant> <Type> [<InitializerConstant>]
+                   [, section "name"] [, comdat [($name)]]
+                   [, align <Alignment>] (, !name !N)*
+
+
+

For example, the following defines a global in a numbered address space +with an initializer, section, and alignment:

+
@G = addrspace(5) constant float 1.0, section "foo", align 4
+
+
+

The following example just declares a global variable

+
@G = external global i32
+
+
+

The following example defines a thread-local global with the +initialexec TLS model:

+
@G = thread_local(initialexec) global i32 0, align 4
+
+
+
+
+

Functions

+

LLVM function definitions consist of the “define” keyword, an +optional linkage type, an optional runtime preemption +specifier, an optional visibility +style, an optional DLL storage class, +an optional calling convention, +an optional unnamed_addr attribute, a return type, an optional +parameter attribute for the return type, a function +name, a (possibly empty) argument list (each with optional parameter +attributes), optional function attributes, +an optional address space, an optional section, an optional alignment, +an optional comdat, +an optional garbage collector name, an optional prefix, +an optional prologue, +an optional personality, +an optional list of attached metadata, +an opening curly brace, a list of basic blocks, and a closing curly brace.

+

LLVM function declarations consist of the “declare” keyword, an +optional linkage type, an optional visibility style, an optional DLL storage class, an +optional calling convention, an optional unnamed_addr +or local_unnamed_addr attribute, an optional address space, a return type, +an optional parameter attribute for the return type, a function name, a possibly +empty list of arguments, an optional alignment, an optional garbage +collector name, an optional prefix, and an optional +prologue.

+

A function definition contains a list of basic blocks, forming the CFG (Control +Flow Graph) for the function. Each basic block may optionally start with a label +(giving the basic block a symbol table entry), contains a list of instructions, +and ends with a terminator instruction (such as a branch or +function return). If an explicit label name is not provided, a block is assigned +an implicit numbered label, using the next value from the same counter as used +for unnamed temporaries (see above). For example, if a +function entry block does not have an explicit label, it will be assigned label +“%0”, then the first unnamed temporary in that block will be “%1”, etc. If a +numeric label is explicitly specified, it must match the numeric label that +would be used implicitly.

+

The first basic block in a function is special in two ways: it is +immediately executed on entrance to the function, and it is not allowed +to have predecessor basic blocks (i.e. there can not be any branches to +the entry block of a function). Because the block can have no +predecessors, it also cannot have any PHI nodes.

+

LLVM allows an explicit section to be specified for functions. If the +target supports it, it will emit functions to the section specified. +Additionally, the function can be placed in a COMDAT.

+

An explicit alignment may be specified for a function. If not present, +or if the alignment is set to zero, the alignment of the function is set +by the target to whatever it feels convenient. If an explicit alignment +is specified, the function is forced to have at least that much +alignment. All alignments must be a power of 2.

+

If the unnamed_addr attribute is given, the address is known to not +be significant and two identical functions can be merged.

+

If the local_unnamed_addr attribute is given, the address is known to +not be significant within the module.

+

If an explicit address space is not given, it will default to the program +address space from the datalayout string.

+

Syntax:

+
define [linkage] [PreemptionSpecifier] [visibility] [DLLStorageClass]
+       [cconv] [ret attrs]
+       <ResultType> @<FunctionName> ([argument list])
+       [(unnamed_addr|local_unnamed_addr)] [AddrSpace] [fn Attrs]
+       [section "name"] [comdat [($name)]] [align N] [gc] [prefix Constant]
+       [prologue Constant] [personality Constant] (!name !N)* { ... }
+
+
+

The argument list is a comma separated sequence of arguments where each +argument is of the following form:

+

Syntax:

+
<type> [parameter Attrs] [name]
+
+
+
+
+

Aliases

+

Aliases, unlike function or variables, don’t create any new data. They +are just a new symbol and metadata for an existing position.

+

Aliases have a name and an aliasee that is either a global value or a +constant expression.

+

Aliases may have an optional linkage type, an optional +runtime preemption specifier, an optional +visibility style, an optional DLL storage class and an optional tls model.

+

Syntax:

+
@<Name> = [Linkage] [PreemptionSpecifier] [Visibility] [DLLStorageClass] [ThreadLocal] [(unnamed_addr|local_unnamed_addr)] alias <AliaseeTy>, <AliaseeTy>* @<Aliasee>
+
+
+

The linkage must be one of private, internal, linkonce, weak, +linkonce_odr, weak_odr, external. Note that some system linkers +might not correctly handle dropping a weak symbol that is aliased.

+

Aliases that are not unnamed_addr are guaranteed to have the same address as +the aliasee expression. unnamed_addr ones are only guaranteed to point +to the same content.

+

If the local_unnamed_addr attribute is given, the address is known to +not be significant within the module.

+

Since aliases are only a second name, some restrictions apply, of which +some can only be checked when producing an object file:

+
    +
  • The expression defining the aliasee must be computable at assembly +time. Since it is just a name, no relocations can be used.

  • +
  • No alias in the expression can be weak as the possibility of the +intermediate alias being overridden cannot be represented in an +object file.

  • +
  • No global value in the expression can be a declaration, since that +would require a relocation, which is not possible.

  • +
+
+
+

IFuncs

+

IFuncs, like as aliases, don’t create any new data or func. They are just a new +symbol that dynamic linker resolves at runtime by calling a resolver function.

+

IFuncs have a name and a resolver that is a function called by dynamic linker +that returns address of another function associated with the name.

+

IFunc may have an optional linkage type and an optional +visibility style.

+

Syntax:

+
@<Name> = [Linkage] [Visibility] ifunc <IFuncTy>, <ResolverTy>* @<Resolver>
+
+
+
+
+

Comdats

+

Comdat IR provides access to object file COMDAT/section group functionality +which represents interrelated sections.

+

Comdats have a name which represents the COMDAT key and a selection kind to +provide input on how the linker deduplicates comdats with the same key in two +different object files. A comdat must be included or omitted as a unit. +Discarding the whole comdat is allowed but discarding a subset is not.

+

A global object may be a member of at most one comdat. Aliases are placed in the +same COMDAT that their aliasee computes to, if any.

+

Syntax:

+
$<Name> = comdat SelectionKind
+
+
+

For selection kinds other than nodeduplicate, only one of the duplicate +comdats may be retained by the linker and the members of the remaining comdats +must be discarded. The following selection kinds are supported:

+
+
any

The linker may choose any COMDAT key, the choice is arbitrary.

+
+
exactmatch

The linker may choose any COMDAT key but the sections must contain the +same data.

+
+
largest

The linker will choose the section containing the largest COMDAT key.

+
+
nodeduplicate

No deduplication is performed.

+
+
samesize

The linker may choose any COMDAT key but the sections must contain the +same amount of data.

+
+
+
    +
  • XCOFF and Mach-O don’t support COMDATs.

  • +
  • COFF supports all selection kinds. Non-nodeduplicate selection kinds need +a non-local linkage COMDAT symbol.

  • +
  • ELF supports any and nodeduplicate.

  • +
  • WebAssembly only supports any.

  • +
+

Here is an example of a COFF COMDAT where a function will only be selected if +the COMDAT key’s section is the largest:

+
$foo = comdat largest
+@foo = global i32 2, comdat($foo)
+
+define void @bar() comdat($foo) {
+  ret void
+}
+
+
+

In a COFF object file, this will create a COMDAT section with selection kind +IMAGE_COMDAT_SELECT_LARGEST containing the contents of the @foo symbol +and another COMDAT section with selection kind +IMAGE_COMDAT_SELECT_ASSOCIATIVE which is associated with the first COMDAT +section and contains the contents of the @bar symbol.

+

As a syntactic sugar the $name can be omitted if the name is the same as +the global name:

+
$foo = comdat any
+@foo = global i32 2, comdat
+@bar = global i32 3, comdat($foo)
+
+
+

There are some restrictions on the properties of the global object. +It, or an alias to it, must have the same name as the COMDAT group when +targeting COFF. +The contents and size of this object may be used during link-time to determine +which COMDAT groups get selected depending on the selection kind. +Because the name of the object must match the name of the COMDAT group, the +linkage of the global object must not be local; local symbols can get renamed +if a collision occurs in the symbol table.

+

The combined use of COMDATS and section attributes may yield surprising results. +For example:

+
$foo = comdat any
+$bar = comdat any
+@g1 = global i32 42, section "sec", comdat($foo)
+@g2 = global i32 42, section "sec", comdat($bar)
+
+
+

From the object file perspective, this requires the creation of two sections +with the same name. This is necessary because both globals belong to different +COMDAT groups and COMDATs, at the object file level, are represented by +sections.

+

Note that certain IR constructs like global variables and functions may +create COMDATs in the object file in addition to any which are specified using +COMDAT IR. This arises when the code generator is configured to emit globals +in individual sections (e.g. when -data-sections or -function-sections +is supplied to llc).

+
+
+

Named Metadata

+

Named metadata is a collection of metadata. Metadata +nodes (but not metadata strings) are the only valid +operands for a named metadata.

+
    +
  1. Named metadata are represented as a string of characters with the +metadata prefix. The rules for metadata names are the same as for +identifiers, but quoted names are not allowed. "\xx" type escapes +are still valid, which allows any character to be part of a name.

  2. +
+

Syntax:

+
; Some unnamed metadata nodes, which are referenced by the named metadata.
+!0 = !{!"zero"}
+!1 = !{!"one"}
+!2 = !{!"two"}
+; A named metadata.
+!name = !{!0, !1, !2}
+
+
+
+
+

Parameter Attributes

+

The return type and each parameter of a function type may have a set of +parameter attributes associated with them. Parameter attributes are +used to communicate additional information about the result or +parameters of a function. Parameter attributes are considered to be part +of the function, not of the function type, so functions with different +parameter attributes can have the same function type.

+

Parameter attributes are simple keywords that follow the type specified. +If multiple parameter attributes are needed, they are space separated. +For example:

+
declare i32 @printf(i8* noalias nocapture, ...)
+declare i32 @atoi(i8 zeroext)
+declare signext i8 @returns_signed_char()
+
+
+

Note that any attributes for the function result (nounwind, +readonly) come immediately after the argument list.

+

Currently, only the following parameter attributes are defined:

+
+
zeroext

This indicates to the code generator that the parameter or return +value should be zero-extended to the extent required by the target’s +ABI by the caller (for a parameter) or the callee (for a return value).

+
+
signext

This indicates to the code generator that the parameter or return +value should be sign-extended to the extent required by the target’s +ABI (which is usually 32-bits) by the caller (for a parameter) or +the callee (for a return value).

+
+
inreg

This indicates that this parameter or return value should be treated +in a special target-dependent fashion while emitting code for +a function call or return (usually, by putting it in a register as +opposed to memory, though some targets use it to distinguish between +two different kinds of registers). Use of this attribute is +target-specific.

+
+
byval(<ty>)

This indicates that the pointer parameter should really be passed by +value to the function. The attribute implies that a hidden copy of +the pointee is made between the caller and the callee, so the callee +is unable to modify the value in the caller. This attribute is only +valid on LLVM pointer arguments. It is generally used to pass +structs and arrays by value, but is also valid on pointers to +scalars. The copy is considered to belong to the caller not the +callee (for example, readonly functions should not write to +byval parameters). This is not a valid attribute for return +values.

+

The byval type argument indicates the in-memory value type, and +must be the same as the pointee type of the argument.

+

The byval attribute also supports specifying an alignment with the +align attribute. It indicates the alignment of the stack slot to +form and the known alignment of the pointer specified to the call +site. If the alignment is not specified, then the code generator +makes a target-specific assumption.

+
+
+

byref(<ty>)

+
+

The byref argument attribute allows specifying the pointee +memory type of an argument. This is similar to byval, but does +not imply a copy is made anywhere, or that the argument is passed +on the stack. This implies the pointer is dereferenceable up to +the storage size of the type.

+

It is not generally permissible to introduce a write to an +byref pointer. The pointer may have any address space and may +be read only.

+

This is not a valid attribute for return values.

+

The alignment for an byref parameter can be explicitly +specified by combining it with the align attribute, similar to +byval. If the alignment is not specified, then the code generator +makes a target-specific assumption.

+

This is intended for representing ABI constraints, and is not +intended to be inferred for optimization use.

+
+
+
preallocated(<ty>)

This indicates that the pointer parameter should really be passed by +value to the function, and that the pointer parameter’s pointee has +already been initialized before the call instruction. This attribute +is only valid on LLVM pointer arguments. The argument must be the value +returned by the appropriate +llvm.call.preallocated.arg on non +musttail calls, or the corresponding caller parameter in musttail +calls, although it is ignored during codegen.

+

A non musttail function call with a preallocated attribute in +any parameter must have a "preallocated" operand bundle. A musttail +function call cannot have a "preallocated" operand bundle.

+

The preallocated attribute requires a type argument, which must be +the same as the pointee type of the argument.

+

The preallocated attribute also supports specifying an alignment with the +align attribute. It indicates the alignment of the stack slot to +form and the known alignment of the pointer specified to the call +site. If the alignment is not specified, then the code generator +makes a target-specific assumption.

+
+
+

inalloca(<ty>)

+
+

The inalloca argument attribute allows the caller to take the +address of outgoing stack arguments. An inalloca argument must +be a pointer to stack memory produced by an alloca instruction. +The alloca, or argument allocation, must also be tagged with the +inalloca keyword. Only the last argument may have the inalloca +attribute, and that argument is guaranteed to be passed in memory.

+

An argument allocation may be used by a call at most once because +the call may deallocate it. The inalloca attribute cannot be +used in conjunction with other attributes that affect argument +storage, like inreg, nest, sret, or byval. The +inalloca attribute also disables LLVM’s implicit lowering of +large aggregate return values, which means that frontend authors +must lower them with sret pointers.

+

When the call site is reached, the argument allocation must have +been the most recent stack allocation that is still live, or the +behavior is undefined. It is possible to allocate additional stack +space after an argument allocation and before its call site, but it +must be cleared off with llvm.stackrestore.

+

The inalloca attribute requires a type argument, which must be the +same as the pointee type of the argument.

+

See Design and Usage of the InAlloca Attribute for more information on how to use this +attribute.

+
+
+
sret(<ty>)

This indicates that the pointer parameter specifies the address of a +structure that is the return value of the function in the source +program. This pointer must be guaranteed by the caller to be valid: +loads and stores to the structure may be assumed by the callee not +to trap and to be properly aligned. This is not a valid attribute +for return values.

+

The sret type argument specifies the in memory type, which must be +the same as the pointee type of the argument.

+
+
+

elementtype(<ty>)

+
+

The elementtype argument attribute can be used to specify a pointer +element type in a way that is compatible with opaque pointers +<OpaquePointers.html>.

+

The elementtype attribute by itself does not carry any specific +semantics. However, certain intrinsics may require this attribute to be +present and assign it particular semantics. This will be documented on +individual intrinsics.

+

The attribute may only be applied to pointer typed arguments of intrinsic +calls. It cannot be applied to non-intrinsic calls, and cannot be applied +to parameters on function declarations. For non-opaque pointers, the type +passed to elementtype must match the pointer element type.

+
+
+
align <n> or align(<n>)

This indicates that the pointer value has the specified alignment. +If the pointer value does not have the specified alignment, +poison value is returned or passed instead. The +align attribute should be combined with the noundef attribute to +ensure a pointer is aligned, or otherwise the behavior is undefined. Note +that align 1 has no effect on non-byval, non-preallocated arguments.

+

Note that this attribute has additional semantics when combined with the +byval or preallocated attribute, which are documented there.

+
+
+
+
noalias

This indicates that memory locations accessed via pointer values +based on the argument or return value are not also +accessed, during the execution of the function, via pointer values not +based on the argument or return value. This guarantee only holds for +memory locations that are modified, by any means, during the execution of +the function. The attribute on a return value also has additional semantics +described below. The caller shares the responsibility with the callee for +ensuring that these requirements are met. For further details, please see +the discussion of the NoAlias response in alias analysis.

+

Note that this definition of noalias is intentionally similar +to the definition of restrict in C99 for function arguments.

+

For function return values, C99’s restrict is not meaningful, +while LLVM’s noalias is. Furthermore, the semantics of the noalias +attribute on return values are stronger than the semantics of the attribute +when used on function arguments. On function return values, the noalias +attribute indicates that the function acts like a system memory allocation +function, returning a pointer to allocated storage disjoint from the +storage for any other object accessible to the caller.

+
+
+
+
nocapture

This indicates that the callee does not capture the +pointer. This is not a valid attribute for return values. +This attribute applies only to the particular copy of the pointer passed in +this argument. A caller could pass two copies of the same pointer with one +being annotated nocapture and the other not, and the callee could validly +capture through the non annotated parameter.

+
+
+
define void @f(i8* nocapture %a, i8* %b) {
+  ; (capture %b)
+}
+
+call void @f(i8* @glb, i8* @glb) ; well-defined
+
+
+
+
nofree

This indicates that callee does not free the pointer argument. This is not +a valid attribute for return values.

+
+
+
+
nest

This indicates that the pointer parameter can be excised using the +trampoline intrinsics. This is not a valid +attribute for return values and can only be applied to one parameter.

+
+
returned

This indicates that the function always returns the argument as its return +value. This is a hint to the optimizer and code generator used when +generating the caller, allowing value propagation, tail call optimization, +and omission of register saves and restores in some cases; it is not +checked or enforced when generating the callee. The parameter and the +function return type must be valid operands for the +bitcast instruction. This is not a valid attribute for +return values and can only be applied to one parameter.

+
+
nonnull

This indicates that the parameter or return pointer is not null. This +attribute may only be applied to pointer typed parameters. This is not +checked or enforced by LLVM; if the parameter or return pointer is null, +poison value is returned or passed instead. +The nonnull attribute should be combined with the noundef attribute +to ensure a pointer is not null or otherwise the behavior is undefined.

+
+
dereferenceable(<n>)

This indicates that the parameter or return pointer is dereferenceable. This +attribute may only be applied to pointer typed parameters. A pointer that +is dereferenceable can be loaded from speculatively without a risk of +trapping. The number of bytes known to be dereferenceable must be provided +in parentheses. It is legal for the number of bytes to be less than the +size of the pointee type. The nonnull attribute does not imply +dereferenceability (consider a pointer to one element past the end of an +array), however dereferenceable(<n>) does imply nonnull in +addrspace(0) (which is the default address space), except if the +null_pointer_is_valid function attribute is present. +n should be a positive number. The pointer should be well defined, +otherwise it is undefined behavior. This means dereferenceable(<n>) +implies noundef.

+
+
dereferenceable_or_null(<n>)

This indicates that the parameter or return value isn’t both +non-null and non-dereferenceable (up to <n> bytes) at the same +time. All non-null pointers tagged with +dereferenceable_or_null(<n>) are dereferenceable(<n>). +For address space 0 dereferenceable_or_null(<n>) implies that +a pointer is exactly one of dereferenceable(<n>) or null, +and in other address spaces dereferenceable_or_null(<n>) +implies that a pointer is at least one of dereferenceable(<n>) +or null (i.e. it may be both null and +dereferenceable(<n>)). This attribute may only be applied to +pointer typed parameters.

+
+
swiftself

This indicates that the parameter is the self/context parameter. This is not +a valid attribute for return values and can only be applied to one +parameter.

+
+
swiftasync

This indicates that the parameter is the asynchronous context parameter and +triggers the creation of a target-specific extended frame record to store +this pointer. This is not a valid attribute for return values and can only +be applied to one parameter.

+
+
swifterror

This attribute is motivated to model and optimize Swift error handling. It +can be applied to a parameter with pointer to pointer type or a +pointer-sized alloca. At the call site, the actual argument that corresponds +to a swifterror parameter has to come from a swifterror alloca or +the swifterror parameter of the caller. A swifterror value (either +the parameter or the alloca) can only be loaded and stored from, or used as +a swifterror argument. This is not a valid attribute for return values +and can only be applied to one parameter.

+

These constraints allow the calling convention to optimize access to +swifterror variables by associating them with a specific register at +call boundaries rather than placing them in memory. Since this does change +the calling convention, a function which uses the swifterror attribute +on a parameter is not ABI-compatible with one which does not.

+

These constraints also allow LLVM to assume that a swifterror argument +does not alias any other memory visible within a function and that a +swifterror alloca passed as an argument does not escape.

+
+
immarg

This indicates the parameter is required to be an immediate +value. This must be a trivial immediate integer or floating-point +constant. Undef or constant expressions are not valid. This is +only valid on intrinsic declarations and cannot be applied to a +call site or arbitrary function.

+
+
noundef

This attribute applies to parameters and return values. If the value +representation contains any undefined or poison bits, the behavior is +undefined. Note that this does not refer to padding introduced by the +type’s storage representation.

+
+
alignstack(<n>)

This indicates the alignment that should be considered by the backend when +assigning this parameter to a stack slot during calling convention +lowering. The enforcement of the specified alignment is target-dependent, +as target-specific calling convention rules may override this value. This +attribute serves the purpose of carrying language specific alignment +information that is not mapped to base types in the backend (for example, +over-alignment specification through language attributes).

+
+
+
+
+

Garbage Collector Strategy Names

+

Each function may specify a garbage collector strategy name, which is simply a +string:

+
define void @f() gc "name" { ... }
+
+
+

The supported values of name includes those built in to LLVM and any provided by loaded plugins. Specifying a GC +strategy will cause the compiler to alter its output in order to support the +named garbage collection algorithm. Note that LLVM itself does not contain a +garbage collector, this functionality is restricted to generating machine code +which can interoperate with a collector provided externally.

+
+
+

Prefix Data

+

Prefix data is data associated with a function which the code +generator will emit immediately before the function’s entrypoint. +The purpose of this feature is to allow frontends to associate +language-specific runtime metadata with specific functions and make it +available through the function pointer while still allowing the +function pointer to be called.

+

To access the data for a given function, a program may bitcast the +function pointer to a pointer to the constant’s type and dereference +index -1. This implies that the IR symbol points just past the end of +the prefix data. For instance, take the example of a function annotated +with a single i32,

+
define void @f() prefix i32 123 { ... }
+
+
+

The prefix data can be referenced as,

+
%0 = bitcast void* () @f to i32*
+%a = getelementptr inbounds i32, i32* %0, i32 -1
+%b = load i32, i32* %a
+
+
+

Prefix data is laid out as if it were an initializer for a global variable +of the prefix data’s type. The function will be placed such that the +beginning of the prefix data is aligned. This means that if the size +of the prefix data is not a multiple of the alignment size, the +function’s entrypoint will not be aligned. If alignment of the +function’s entrypoint is desired, padding must be added to the prefix +data.

+

A function may have prefix data but no body. This has similar semantics +to the available_externally linkage in that the data may be used by the +optimizers but will not be emitted in the object file.

+
+
+

Prologue Data

+

The prologue attribute allows arbitrary code (encoded as bytes) to +be inserted prior to the function body. This can be used for enabling +function hot-patching and instrumentation.

+

To maintain the semantics of ordinary function calls, the prologue data must +have a particular format. Specifically, it must begin with a sequence of +bytes which decode to a sequence of machine instructions, valid for the +module’s target, which transfer control to the point immediately succeeding +the prologue data, without performing any other visible action. This allows +the inliner and other passes to reason about the semantics of the function +definition without needing to reason about the prologue data. Obviously this +makes the format of the prologue data highly target dependent.

+

A trivial example of valid prologue data for the x86 architecture is i8 144, +which encodes the nop instruction:

+
define void @f() prologue i8 144 { ... }
+
+
+

Generally prologue data can be formed by encoding a relative branch instruction +which skips the metadata, as in this example of valid prologue data for the +x86_64 architecture, where the first two bytes encode jmp .+10:

+
%0 = type <{ i8, i8, i8* }>
+
+define void @f() prologue %0 <{ i8 235, i8 8, i8* @md}> { ... }
+
+
+

A function may have prologue data but no body. This has similar semantics +to the available_externally linkage in that the data may be used by the +optimizers but will not be emitted in the object file.

+
+
+

Personality Function

+

The personality attribute permits functions to specify what function +to use for exception handling.

+
+
+

Attribute Groups

+

Attribute groups are groups of attributes that are referenced by objects within +the IR. They are important for keeping .ll files readable, because a lot of +functions will use the same set of attributes. In the degenerative case of a +.ll file that corresponds to a single .c file, the single attribute +group will capture the important command line flags used to build that file.

+

An attribute group is a module-level object. To use an attribute group, an +object references the attribute group’s ID (e.g. #37). An object may refer +to more than one attribute group. In that situation, the attributes from the +different groups are merged.

+

Here is an example of attribute groups for a function that should always be +inlined, has a stack alignment of 4, and which shouldn’t use SSE instructions:

+
; Target-independent attributes:
+attributes #0 = { alwaysinline alignstack=4 }
+
+; Target-dependent attributes:
+attributes #1 = { "no-sse" }
+
+; Function @f has attributes: alwaysinline, alignstack=4, and "no-sse".
+define void @f() #0 #1 { ... }
+
+
+
+
+

Function Attributes

+

Function attributes are set to communicate additional information about +a function. Function attributes are considered to be part of the +function, not of the function type, so functions with different function +attributes can have the same function type.

+

Function attributes are simple keywords that follow the type specified. +If multiple attributes are needed, they are space separated. For +example:

+
define void @f() noinline { ... }
+define void @f() alwaysinline { ... }
+define void @f() alwaysinline optsize { ... }
+define void @f() optsize { ... }
+
+
+
+
alignstack(<n>)

This attribute indicates that, when emitting the prologue and +epilogue, the backend should forcibly align the stack pointer. +Specify the desired alignment, which must be a power of two, in +parentheses.

+
+
allocsize(<EltSizeParam>[, <NumEltsParam>])

This attribute indicates that the annotated function will always return at +least a given number of bytes (or null). Its arguments are zero-indexed +parameter numbers; if one argument is provided, then it’s assumed that at +least CallSite.Args[EltSizeParam] bytes will be available at the +returned pointer. If two are provided, then it’s assumed that +CallSite.Args[EltSizeParam] * CallSite.Args[NumEltsParam] bytes are +available. The referenced parameters must be integer types. No assumptions +are made about the contents of the returned block of memory.

+
+
alwaysinline

This attribute indicates that the inliner should attempt to inline +this function into callers whenever possible, ignoring any active +inlining size threshold for this caller.

+
+
builtin

This indicates that the callee function at a call site should be +recognized as a built-in function, even though the function’s declaration +uses the nobuiltin attribute. This is only valid at call sites for +direct calls to functions that are declared with the nobuiltin +attribute.

+
+
cold

This attribute indicates that this function is rarely called. When +computing edge weights, basic blocks post-dominated by a cold +function call are also considered to be cold; and, thus, given low +weight.

+
+
convergent

In some parallel execution models, there exist operations that cannot be +made control-dependent on any additional values. We call such operations +convergent, and mark them with this attribute.

+

The convergent attribute may appear on functions or call/invoke +instructions. When it appears on a function, it indicates that calls to +this function should not be made control-dependent on additional values. +For example, the intrinsic llvm.nvvm.barrier0 is convergent, so +calls to this intrinsic cannot be made control-dependent on additional +values.

+

When it appears on a call/invoke, the convergent attribute indicates +that we should treat the call as though we’re calling a convergent +function. This is particularly useful on indirect calls; without this we +may treat such calls as though the target is non-convergent.

+

The optimizer may remove the convergent attribute on functions when it +can prove that the function does not execute any convergent operations. +Similarly, the optimizer may remove convergent on calls/invokes when it +can prove that the call/invoke cannot call a convergent function.

+
+
disable_sanitizer_instrumentation

When instrumenting code with sanitizers, it can be important to skip certain +functions to ensure no instrumentation is applied to them.

+

This attribute is not always similar to absent sanitize_<name> +attributes: depending on the specific sanitizer, code can be inserted into +functions regardless of the sanitize_<name> attribute to prevent false +positive reports.

+

disable_sanitizer_instrumentation disables all kinds of instrumentation, +taking precedence over the sanitize_<name> attributes and other compiler +flags.

+
+
"dontcall"

This attribute denotes that a diagnostic should be emitted when a call of a +function with this attribute is not eliminated via optimization. Front ends +can provide optional srcloc metadata nodes on call sites of such +callees to attach information about where in the source language such a +call came from.

+
+
"frame-pointer"

This attribute tells the code generator whether the function +should keep the frame pointer. The code generator may emit the frame pointer +even if this attribute says the frame pointer can be eliminated. +The allowed string values are:

+
+
    +
  • "none" (default) - the frame pointer can be eliminated.

  • +
  • "non-leaf" - the frame pointer should be kept if the function calls +other functions.

  • +
  • "all" - the frame pointer should be kept.

  • +
+
+
+
hot

This attribute indicates that this function is a hot spot of the program +execution. The function will be optimized more aggressively and will be +placed into special subsection of the text section to improving locality.

+

When profile feedback is enabled, this attribute has the precedence over +the profile information. By marking a function hot, users can work +around the cases where the training input does not have good coverage +on all the hot functions.

+
+
inaccessiblememonly

This attribute indicates that the function may only access memory that +is not accessible by the module being compiled. This is a weaker form +of readnone. If the function reads or writes other memory, the +behavior is undefined.

+
+
inaccessiblemem_or_argmemonly

This attribute indicates that the function may only access memory that is +either not accessible by the module being compiled, or is pointed to +by its pointer arguments. This is a weaker form of argmemonly. If the +function reads or writes other memory, the behavior is undefined.

+
+
inlinehint

This attribute indicates that the source code contained a hint that +inlining this function is desirable (such as the “inline” keyword in +C/C++). It is just a hint; it imposes no requirements on the +inliner.

+
+
jumptable

This attribute indicates that the function should be added to a +jump-instruction table at code-generation time, and that all address-taken +references to this function should be replaced with a reference to the +appropriate jump-instruction-table function pointer. Note that this creates +a new pointer for the original function, which means that code that depends +on function-pointer identity can break. So, any function annotated with +jumptable must also be unnamed_addr.

+
+
minsize

This attribute suggests that optimization passes and code generator +passes make choices that keep the code size of this function as small +as possible and perform optimizations that may sacrifice runtime +performance in order to minimize the size of the generated code.

+
+
naked

This attribute disables prologue / epilogue emission for the +function. This can have very system-specific consequences.

+
+
"no-inline-line-tables"

When this attribute is set to true, the inliner discards source locations +when inlining code and instead uses the source location of the call site. +Breakpoints set on code that was inlined into the current function will +not fire during the execution of the inlined call sites. If the debugger +stops inside an inlined call site, it will appear to be stopped at the +outermost inlined call site.

+
+
no-jump-tables

When this attribute is set to true, the jump tables and lookup tables that +can be generated from a switch case lowering are disabled.

+
+
nobuiltin

This indicates that the callee function at a call site is not recognized as +a built-in function. LLVM will retain the original call and not replace it +with equivalent code based on the semantics of the built-in function, unless +the call site uses the builtin attribute. This is valid at call sites +and on function declarations and definitions.

+
+
noduplicate

This attribute indicates that calls to the function cannot be +duplicated. A call to a noduplicate function may be moved +within its parent function, but may not be duplicated within +its parent function.

+

A function containing a noduplicate call may still +be an inlining candidate, provided that the call is not +duplicated by inlining. That implies that the function has +internal linkage and only has one call site, so the original +call is dead after inlining.

+
+
nofree

This function attribute indicates that the function does not, directly or +transitively, call a memory-deallocation function (free, for example) +on a memory allocation which existed before the call.

+

As a result, uncaptured pointers that are known to be dereferenceable +prior to a call to a function with the nofree attribute are still +known to be dereferenceable after the call. The capturing condition is +necessary in environments where the function might communicate the +pointer to another thread which then deallocates the memory. Alternatively, +nosync would ensure such communication cannot happen and even captured +pointers cannot be freed by the function.

+

A nofree function is explicitly allowed to free memory which it +allocated or (if not nosync) arrange for another thread to free +memory on it’s behalf. As a result, perhaps surprisingly, a nofree +function can return a pointer to a previously deallocated memory object.

+
+
noimplicitfloat

Disallows implicit floating-point code. This inhibits optimizations that +use floating-point code and floating-point/SIMD/vector registers for +operations that are not nominally floating-point. LLVM instructions that +perform floating-point operations or require access to floating-point +registers may still cause floating-point code to be generated.

+
+
noinline

This attribute indicates that the inliner should never inline this +function in any situation. This attribute may not be used together +with the alwaysinline attribute.

+
+
nomerge

This attribute indicates that calls to this function should never be merged +during optimization. For example, it will prevent tail merging otherwise +identical code sequences that raise an exception or terminate the program. +Tail merging normally reduces the precision of source location information, +making stack traces less useful for debugging. This attribute gives the +user control over the tradeoff between code size and debug information +precision.

+
+
nonlazybind

This attribute suppresses lazy symbol binding for the function. This +may make calls to the function faster, at the cost of extra program +startup time if the function is not called during program startup.

+
+
noprofile

This function attribute prevents instrumentation based profiling, used for +coverage or profile based optimization, from being added to a function, +even when inlined.

+
+
noredzone

This attribute indicates that the code generator should not use a +red zone, even if the target-specific ABI normally permits it.

+
+
indirect-tls-seg-refs

This attribute indicates that the code generator should not use +direct TLS access through segment registers, even if the +target-specific ABI normally permits it.

+
+
noreturn

This function attribute indicates that the function never returns +normally, hence through a return instruction. This produces undefined +behavior at runtime if the function ever does dynamically return. Annotated +functions may still raise an exception, i.a., nounwind is not implied.

+
+
norecurse

This function attribute indicates that the function does not call itself +either directly or indirectly down any possible call path. This produces +undefined behavior at runtime if the function ever does recurse.

+
+
willreturn

This function attribute indicates that a call of this function will +either exhibit undefined behavior or comes back and continues execution +at a point in the existing call stack that includes the current invocation. +Annotated functions may still raise an exception, i.a., nounwind is not implied. +If an invocation of an annotated function does not return control back +to a point in the call stack, the behavior is undefined.

+
+
nosync

This function attribute indicates that the function does not communicate +(synchronize) with another thread through memory or other well-defined means. +Synchronization is considered possible in the presence of atomic accesses +that enforce an order, thus not “unordered” and “monotonic”, volatile accesses, +as well as convergent function calls. Note that through convergent function calls +non-memory communication, e.g., cross-lane operations, are possible and are also +considered synchronization. However convergent does not contradict nosync. +If an annotated function does ever synchronize with another thread, +the behavior is undefined.

+
+
nounwind

This function attribute indicates that the function never raises an +exception. If the function does raise an exception, its runtime +behavior is undefined. However, functions marked nounwind may still +trap or generate asynchronous exceptions. Exception handling schemes +that are recognized by LLVM to handle asynchronous exceptions, such +as SEH, will still provide their implementation defined semantics.

+
+
nosanitize_coverage

This attribute indicates that SanitizerCoverage instrumentation is disabled +for this function.

+
+
null_pointer_is_valid

If null_pointer_is_valid is set, then the null address +in address-space 0 is considered to be a valid address for memory loads and +stores. Any analysis or optimization should not treat dereferencing a +pointer to null as undefined behavior in this function. +Note: Comparing address of a global variable to null may still +evaluate to false because of a limitation in querying this attribute inside +constant expressions.

+
+
optforfuzzing

This attribute indicates that this function should be optimized +for maximum fuzzing signal.

+
+
optnone

This function attribute indicates that most optimization passes will skip +this function, with the exception of interprocedural optimization passes. +Code generation defaults to the “fast” instruction selector. +This attribute cannot be used together with the alwaysinline +attribute; this attribute is also incompatible +with the minsize attribute and the optsize attribute.

+

This attribute requires the noinline attribute to be specified on +the function as well, so the function is never inlined into any caller. +Only functions with the alwaysinline attribute are valid +candidates for inlining into the body of this function.

+
+
optsize

This attribute suggests that optimization passes and code generator +passes make choices that keep the code size of this function low, +and otherwise do optimizations specifically to reduce code size as +long as they do not significantly impact runtime performance.

+
+
"patchable-function"

This attribute tells the code generator that the code +generated for this function needs to follow certain conventions that +make it possible for a runtime function to patch over it later. +The exact effect of this attribute depends on its string value, +for which there currently is one legal possibility:

+
+
    +
  • "prologue-short-redirect" - This style of patchable +function is intended to support patching a function prologue to +redirect control away from the function in a thread safe +manner. It guarantees that the first instruction of the +function will be large enough to accommodate a short jump +instruction, and will be sufficiently aligned to allow being +fully changed via an atomic compare-and-swap instruction. +While the first requirement can be satisfied by inserting large +enough NOP, LLVM can and will try to re-purpose an existing +instruction (i.e. one that would have to be emitted anyway) as +the patchable instruction larger than a short jump.

    +

    "prologue-short-redirect" is currently only supported on +x86-64.

    +
  • +
+
+

This attribute by itself does not imply restrictions on +inter-procedural optimizations. All of the semantic effects the +patching may have to be separately conveyed via the linkage type.

+
+
"probe-stack"

This attribute indicates that the function will trigger a guard region +in the end of the stack. It ensures that accesses to the stack must be +no further apart than the size of the guard region to a previous +access of the stack. It takes one required string value, the name of +the stack probing function that will be called.

+

If a function that has a "probe-stack" attribute is inlined into +a function with another "probe-stack" attribute, the resulting +function has the "probe-stack" attribute of the caller. If a +function that has a "probe-stack" attribute is inlined into a +function that has no "probe-stack" attribute at all, the resulting +function has the "probe-stack" attribute of the callee.

+
+
readnone

On a function, this attribute indicates that the function computes its +result (or decides to unwind an exception) based strictly on its arguments, +without dereferencing any pointer arguments or otherwise accessing +any mutable state (e.g. memory, control registers, etc) visible to +caller functions. It does not write through any pointer arguments +(including byval arguments) and never changes any state visible +to callers. This means while it cannot unwind exceptions by calling +the C++ exception throwing methods (since they write to memory), there may +be non-C++ mechanisms that throw exceptions without writing to LLVM +visible memory.

+

On an argument, this attribute indicates that the function does not +dereference that pointer argument, even though it may read or write the +memory that the pointer points to if accessed through other pointers.

+

If a readnone function reads or writes memory visible to the program, or +has other side-effects, the behavior is undefined. If a function reads from +or writes to a readnone pointer argument, the behavior is undefined.

+
+
readonly

On a function, this attribute indicates that the function does not write +through any pointer arguments (including byval arguments) or otherwise +modify any state (e.g. memory, control registers, etc) visible to +caller functions. It may dereference pointer arguments and read +state that may be set in the caller. A readonly function always +returns the same value (or unwinds an exception identically) when +called with the same set of arguments and global state. This means while it +cannot unwind exceptions by calling the C++ exception throwing methods +(since they write to memory), there may be non-C++ mechanisms that throw +exceptions without writing to LLVM visible memory.

+

On an argument, this attribute indicates that the function does not write +through this pointer argument, even though it may write to the memory that +the pointer points to.

+

If a readonly function writes memory visible to the program, or +has other side-effects, the behavior is undefined. If a function writes to +a readonly pointer argument, the behavior is undefined.

+
+
"stack-probe-size"

This attribute controls the behavior of stack probes: either +the "probe-stack" attribute, or ABI-required stack probes, if any. +It defines the size of the guard region. It ensures that if the function +may use more stack space than the size of the guard region, stack probing +sequence will be emitted. It takes one required integer value, which +is 4096 by default.

+

If a function that has a "stack-probe-size" attribute is inlined into +a function with another "stack-probe-size" attribute, the resulting +function has the "stack-probe-size" attribute that has the lower +numeric value. If a function that has a "stack-probe-size" attribute is +inlined into a function that has no "stack-probe-size" attribute +at all, the resulting function has the "stack-probe-size" attribute +of the callee.

+
+
"no-stack-arg-probe"

This attribute disables ABI-required stack probes, if any.

+
+
writeonly

On a function, this attribute indicates that the function may write to but +does not read from memory.

+

On an argument, this attribute indicates that the function may write to but +does not read through this pointer argument (even though it may read from +the memory that the pointer points to).

+

If a writeonly function reads memory visible to the program, or +has other side-effects, the behavior is undefined. If a function reads +from a writeonly pointer argument, the behavior is undefined.

+
+
argmemonly

This attribute indicates that the only memory accesses inside function are +loads and stores from objects pointed to by its pointer-typed arguments, +with arbitrary offsets. Or in other words, all memory operations in the +function can refer to memory only using pointers based on its function +arguments.

+

Note that argmemonly can be used together with readonly attribute +in order to specify that function reads only from its arguments.

+

If an argmemonly function reads or writes memory other than the pointer +arguments, or has other side-effects, the behavior is undefined.

+
+
returns_twice

This attribute indicates that this function can return twice. The C +setjmp is an example of such a function. The compiler disables +some optimizations (like tail calls) in the caller of these +functions.

+
+
safestack

This attribute indicates that +SafeStack +protection is enabled for this function.

+

If a function that has a safestack attribute is inlined into a +function that doesn’t have a safestack attribute or which has an +ssp, sspstrong or sspreq attribute, then the resulting +function will have a safestack attribute.

+
+
sanitize_address

This attribute indicates that AddressSanitizer checks +(dynamic address safety analysis) are enabled for this function.

+
+
sanitize_memory

This attribute indicates that MemorySanitizer checks (dynamic detection +of accesses to uninitialized memory) are enabled for this function.

+
+
sanitize_thread

This attribute indicates that ThreadSanitizer checks +(dynamic thread safety analysis) are enabled for this function.

+
+
sanitize_hwaddress

This attribute indicates that HWAddressSanitizer checks +(dynamic address safety analysis based on tagged pointers) are enabled for +this function.

+
+
sanitize_memtag

This attribute indicates that MemTagSanitizer checks +(dynamic address safety analysis based on Armv8 MTE) are enabled for +this function.

+
+
speculative_load_hardening

This attribute indicates that +Speculative Load Hardening +should be enabled for the function body.

+

Speculative Load Hardening is a best-effort mitigation against +information leak attacks that make use of control flow +miss-speculation - specifically miss-speculation of whether a branch +is taken or not. Typically vulnerabilities enabling such attacks are +classified as “Spectre variant #1”. Notably, this does not attempt to +mitigate against miss-speculation of branch target, classified as +“Spectre variant #2” vulnerabilities.

+

When inlining, the attribute is sticky. Inlining a function that carries +this attribute will cause the caller to gain the attribute. This is intended +to provide a maximally conservative model where the code in a function +annotated with this attribute will always (even after inlining) end up +hardened.

+
+
speculatable

This function attribute indicates that the function does not have any +effects besides calculating its result and does not have undefined behavior. +Note that speculatable is not enough to conclude that along any +particular execution path the number of calls to this function will not be +externally observable. This attribute is only valid on functions +and declarations, not on individual call sites. If a function is +incorrectly marked as speculatable and really does exhibit +undefined behavior, the undefined behavior may be observed even +if the call site is dead code.

+
+
ssp

This attribute indicates that the function should emit a stack +smashing protector. It is in the form of a “canary” — a random value +placed on the stack before the local variables that’s checked upon +return from the function to see if it has been overwritten. A +heuristic is used to determine if a function needs stack protectors +or not. The heuristic used will enable protectors for functions with:

+
    +
  • Character arrays larger than ssp-buffer-size (default 8).

  • +
  • Aggregates containing character arrays larger than ssp-buffer-size.

  • +
  • Calls to alloca() with variable sizes or constant sizes greater than +ssp-buffer-size.

  • +
+

Variables that are identified as requiring a protector will be arranged +on the stack such that they are adjacent to the stack protector guard.

+

A function with the ssp attribute but without the alwaysinline +attribute cannot be inlined into a function without a +ssp/sspreq/sspstrong attribute. If inlined, the caller will get the +ssp attribute. call, invoke, and callbr instructions with +the alwaysinline attribute force inlining.

+
+
sspstrong

This attribute indicates that the function should emit a stack smashing +protector. This attribute causes a strong heuristic to be used when +determining if a function needs stack protectors. The strong heuristic +will enable protectors for functions with:

+
    +
  • Arrays of any size and type

  • +
  • Aggregates containing an array of any size and type.

  • +
  • Calls to alloca().

  • +
  • Local variables that have had their address taken.

  • +
+

Variables that are identified as requiring a protector will be arranged +on the stack such that they are adjacent to the stack protector guard. +The specific layout rules are:

+
    +
  1. Large arrays and structures containing large arrays +(>= ssp-buffer-size) are closest to the stack protector.

  2. +
  3. Small arrays and structures containing small arrays +(< ssp-buffer-size) are 2nd closest to the protector.

  4. +
  5. Variables that have had their address taken are 3rd closest to the +protector.

  6. +
+

This overrides the ssp function attribute.

+

A function with the sspstrong attribute but without the +alwaysinline attribute cannot be inlined into a function without a +ssp/sspstrong/sspreq attribute. If inlined, the caller will get the +sspstrong attribute unless the sspreq attribute exists. call, +invoke, and callbr instructions with the alwaysinline attribute +force inlining.

+
+
sspreq

This attribute indicates that the function should always emit a stack +smashing protector. This overrides the ssp and sspstrong function +attributes.

+

Variables that are identified as requiring a protector will be arranged +on the stack such that they are adjacent to the stack protector guard. +The specific layout rules are:

+
    +
  1. Large arrays and structures containing large arrays +(>= ssp-buffer-size) are closest to the stack protector.

  2. +
  3. Small arrays and structures containing small arrays +(< ssp-buffer-size) are 2nd closest to the protector.

  4. +
  5. Variables that have had their address taken are 3rd closest to the +protector.

  6. +
+

A function with the sspreq attribute but without the alwaysinline +attribute cannot be inlined into a function without a +ssp/sspstrong/sspreq attribute. If inlined, the caller will get the +sspreq attribute. call, invoke, and callbr instructions +with the alwaysinline attribute force inlining.

+
+
strictfp

This attribute indicates that the function was called from a scope that +requires strict floating-point semantics. LLVM will not attempt any +optimizations that require assumptions about the floating-point rounding +mode or that might alter the state of floating-point status flags that +might otherwise be set or cleared by calling this function. LLVM will +not introduce any new floating-point instructions that may trap.

+
+
"denormal-fp-math"
+

This indicates the denormal (subnormal) handling that may be +assumed for the default floating-point environment. This is a +comma separated pair. The elements may be one of "ieee", +"preserve-sign", or "positive-zero". The first entry +indicates the flushing mode for the result of floating point +operations. The second indicates the handling of denormal inputs +to floating point instructions. For compatibility with older +bitcode, if the second value is omitted, both input and output +modes will assume the same mode.

+

If this is attribute is not specified, the default is +"ieee,ieee".

+

If the output mode is "preserve-sign", or "positive-zero", +denormal outputs may be flushed to zero by standard floating-point +operations. It is not mandated that flushing to zero occurs, but if +a denormal output is flushed to zero, it must respect the sign +mode. Not all targets support all modes. While this indicates the +expected floating point mode the function will be executed with, +this does not make any attempt to ensure the mode is +consistent. User or platform code is expected to set the floating +point mode appropriately before function entry.

+
+

If the input mode is "preserve-sign", or "positive-zero", a +floating-point operation must treat any input denormal value as +zero. In some situations, if an instruction does not respect this +mode, the input may need to be converted to 0 as if by +@llvm.canonicalize during lowering for correctness.

+
+
"denormal-fp-math-f32"

Same as "denormal-fp-math", but only controls the behavior of +the 32-bit float type (or vectors of 32-bit floats). If both are +are present, this overrides "denormal-fp-math". Not all targets +support separately setting the denormal mode per type, and no +attempt is made to diagnose unsupported uses. Currently this +attribute is respected by the AMDGPU and NVPTX backends.

+
+
"thunk"

This attribute indicates that the function will delegate to some other +function with a tail call. The prototype of a thunk should not be used for +optimization purposes. The caller is expected to cast the thunk prototype to +match the thunk target prototype.

+
+
uwtable

This attribute indicates that the ABI being targeted requires that +an unwind table entry be produced for this function even if we can +show that no exceptions passes by it. This is normally the case for +the ELF x86-64 abi, but it can be disabled for some compilation +units.

+
+
nocf_check

This attribute indicates that no control-flow check will be performed on +the attributed entity. It disables -fcf-protection=<> for a specific +entity to fine grain the HW control flow protection mechanism. The flag +is target independent and currently appertains to a function or function +pointer.

+
+
shadowcallstack

This attribute indicates that the ShadowCallStack checks are enabled for +the function. The instrumentation checks that the return address for the +function has not changed between the function prolog and epilog. It is +currently x86_64-specific.

+
+
mustprogress

This attribute indicates that the function is required to return, unwind, +or interact with the environment in an observable way e.g. via a volatile +memory access, I/O, or other synchronization. The mustprogress +attribute is intended to model the requirements of the first section of +[intro.progress] of the C++ Standard. As a consequence, a loop in a +function with the mustprogress attribute can be assumed to terminate if +it does not interact with the environment in an observable way, and +terminating loops without side-effects can be removed. If a mustprogress +function does not satisfy this contract, the behavior is undefined. This +attribute does not apply transitively to callees, but does apply to call +sites within the function. Note that willreturn implies mustprogress.

+
+
"warn-stack-size"="<threshold>"

This attribute sets a threshold to emit diagnostics once the frame size is +known should the frame size exceed the specified value. It takes one +required integer value, which should be a non-negative integer, and less +than UINT_MAX. It’s unspecified which threshold will be used when +duplicate definitions are linked together with differing values.

+
+
vscale_range(<min>[, <max>])

This attribute indicates the minimum and maximum vscale value for the given +function. A value of 0 means unbounded. If the optional max value is omitted +then max is set to the value of min. If the attribute is not present, no +assumptions are made about the range of vscale.

+
+
+
+
+

Call Site Attributes

+

In addition to function attributes the following call site only +attributes are supported:

+
+
vector-function-abi-variant

This attribute can be attached to a call to list +the vector functions associated to the function. Notice that the +attribute cannot be attached to a invoke or a +callbr instruction. The attribute consists of a +comma separated list of mangled names. The order of the list does +not imply preference (it is logically a set). The compiler is free +to pick any listed vector function of its choosing.

+

The syntax for the mangled names is as follows::

+
_ZGV<isa><mask><vlen><parameters>_<scalar_name>[(<vector_redirection>)]
+
+
+

When present, the attribute informs the compiler that the function +<scalar_name> has a corresponding vector variant that can be +used to perform the concurrent invocation of <scalar_name> on +vectors. The shape of the vector function is described by the +tokens between the prefix _ZGV and the <scalar_name> +token. The standard name of the vector function is +_ZGV<isa><mask><vlen><parameters>_<scalar_name>. When present, +the optional token (<vector_redirection>) informs the compiler +that a custom name is provided in addition to the standard one +(custom names can be provided for example via the use of declare +variant in OpenMP 5.0). The declaration of the variant must be +present in the IR Module. The signature of the vector variant is +determined by the rules of the Vector Function ABI (VFABI) +specifications of the target. For Arm and X86, the VFABI can be +found at https://github.com/ARM-software/abi-aa and +https://software.intel.com/content/www/us/en/develop/download/vector-simd-function-abi.html, +respectively.

+

For X86 and Arm targets, the values of the tokens in the standard +name are those that are defined in the VFABI. LLVM has an internal +<isa> token that can be used to create scalar-to-vector +mappings for functions that are not directly associated to any of +the target ISAs (for example, some of the mappings stored in the +TargetLibraryInfo). Valid values for the <isa> token are::

+
<isa>:= b | c | d | e  -> X86 SSE, AVX, AVX2, AVX512
+      | n | s          -> Armv8 Advanced SIMD, SVE
+      | __LLVM__       -> Internal LLVM Vector ISA
+
+
+

For all targets currently supported (x86, Arm and Internal LLVM), +the remaining tokens can have the following values::

+
<mask>:= M | N         -> mask | no mask
+
+<vlen>:= number        -> number of lanes
+       | x             -> VLA (Vector Length Agnostic)
+
+<parameters>:= v              -> vector
+             | l | l <number> -> linear
+             | R | R <number> -> linear with ref modifier
+             | L | L <number> -> linear with val modifier
+             | U | U <number> -> linear with uval modifier
+             | ls <pos>       -> runtime linear
+             | Rs <pos>       -> runtime linear with ref modifier
+             | Ls <pos>       -> runtime linear with val modifier
+             | Us <pos>       -> runtime linear with uval modifier
+             | u              -> uniform
+
+<scalar_name>:= name of the scalar function
+
+<vector_redirection>:= optional, custom name of the vector function
+
+
+
+
preallocated(<ty>)

This attribute is required on calls to llvm.call.preallocated.arg +and cannot be used on any other call. See +llvm.call.preallocated.arg for more +details.

+
+
+
+
+

Global Attributes

+

Attributes may be set to communicate additional information about a global variable. +Unlike function attributes, attributes on a global variable +are grouped into a single attribute group.

+
+
+

Operand Bundles

+

Operand bundles are tagged sets of SSA values that can be associated +with certain LLVM instructions (currently only call s and +invoke s). In a way they are like metadata, but dropping them is +incorrect and will change program semantics.

+

Syntax:

+
operand bundle set ::= '[' operand bundle (, operand bundle )* ']'
+operand bundle ::= tag '(' [ bundle operand ] (, bundle operand )* ')'
+bundle operand ::= SSA value
+tag ::= string constant
+
+
+

Operand bundles are not part of a function’s signature, and a +given function may be called from multiple places with different kinds +of operand bundles. This reflects the fact that the operand bundles +are conceptually a part of the call (or invoke), not the +callee being dispatched to.

+

Operand bundles are a generic mechanism intended to support +runtime-introspection-like functionality for managed languages. While +the exact semantics of an operand bundle depend on the bundle tag, +there are certain limitations to how much the presence of an operand +bundle can influence the semantics of a program. These restrictions +are described as the semantics of an “unknown” operand bundle. As +long as the behavior of an operand bundle is describable within these +restrictions, LLVM does not need to have special knowledge of the +operand bundle to not miscompile programs containing it.

+
    +
  • The bundle operands for an unknown operand bundle escape in unknown +ways before control is transferred to the callee or invokee.

  • +
  • Calls and invokes with operand bundles have unknown read / write +effect on the heap on entry and exit (even if the call target is +readnone or readonly), unless they’re overridden with +callsite specific attributes.

  • +
  • An operand bundle at a call site cannot change the implementation +of the called function. Inter-procedural optimizations work as +usual as long as they take into account the first two properties.

  • +
+

More specific types of operand bundles are described below.

+
+

Deoptimization Operand Bundles

+

Deoptimization operand bundles are characterized by the "deopt" +operand bundle tag. These operand bundles represent an alternate +“safe” continuation for the call site they’re attached to, and can be +used by a suitable runtime to deoptimize the compiled frame at the +specified call site. There can be at most one "deopt" operand +bundle attached to a call site. Exact details of deoptimization is +out of scope for the language reference, but it usually involves +rewriting a compiled frame into a set of interpreted frames.

+

From the compiler’s perspective, deoptimization operand bundles make +the call sites they’re attached to at least readonly. They read +through all of their pointer typed operands (even if they’re not +otherwise escaped) and the entire visible heap. Deoptimization +operand bundles do not capture their operands except during +deoptimization, in which case control will not be returned to the +compiled frame.

+

The inliner knows how to inline through calls that have deoptimization +operand bundles. Just like inlining through a normal call site +involves composing the normal and exceptional continuations, inlining +through a call site with a deoptimization operand bundle needs to +appropriately compose the “safe” deoptimization continuation. The +inliner does this by prepending the parent’s deoptimization +continuation to every deoptimization continuation in the inlined body. +E.g. inlining @f into @g in the following example

+
define void @f() {
+  call void @x()  ;; no deopt state
+  call void @y() [ "deopt"(i32 10) ]
+  call void @y() [ "deopt"(i32 10), "unknown"(i8* null) ]
+  ret void
+}
+
+define void @g() {
+  call void @f() [ "deopt"(i32 20) ]
+  ret void
+}
+
+
+

will result in

+
define void @g() {
+  call void @x()  ;; still no deopt state
+  call void @y() [ "deopt"(i32 20, i32 10) ]
+  call void @y() [ "deopt"(i32 20, i32 10), "unknown"(i8* null) ]
+  ret void
+}
+
+
+

It is the frontend’s responsibility to structure or encode the +deoptimization state in a way that syntactically prepending the +caller’s deoptimization state to the callee’s deoptimization state is +semantically equivalent to composing the caller’s deoptimization +continuation after the callee’s deoptimization continuation.

+
+
+

Funclet Operand Bundles

+

Funclet operand bundles are characterized by the "funclet" +operand bundle tag. These operand bundles indicate that a call site +is within a particular funclet. There can be at most one +"funclet" operand bundle attached to a call site and it must have +exactly one bundle operand.

+

If any funclet EH pads have been “entered” but not “exited” (per the +description in the EH doc), +it is undefined behavior to execute a call or invoke which:

+
    +
  • does not have a "funclet" bundle and is not a call to a nounwind +intrinsic, or

  • +
  • has a "funclet" bundle whose operand is not the most-recently-entered +not-yet-exited funclet EH pad.

  • +
+

Similarly, if no funclet EH pads have been entered-but-not-yet-exited, +executing a call or invoke with a "funclet" bundle is undefined behavior.

+
+
+

GC Transition Operand Bundles

+

GC transition operand bundles are characterized by the +"gc-transition" operand bundle tag. These operand bundles mark a +call as a transition between a function with one GC strategy to a +function with a different GC strategy. If coordinating the transition +between GC strategies requires additional code generation at the call +site, these bundles may contain any values that are needed by the +generated code. For more details, see GC Transitions.

+

The bundle contain an arbitrary list of Values which need to be passed +to GC transition code. They will be lowered and passed as operands to +the appropriate GC_TRANSITION nodes in the selection DAG. It is assumed +that these arguments must be available before and after (but not +necessarily during) the execution of the callee.

+
+
+

Assume Operand Bundles

+

Operand bundles on an llvm.assume allows representing +assumptions that a parameter attribute or a +function attribute holds for a certain value at a certain +location. Operand bundles enable assumptions that are either hard or impossible +to represent as a boolean argument of an llvm.assume.

+

An assume operand bundle has the form:

+
"<tag>"([ <holds for value> [, <attribute argument>] ])
+
+
+
    +
  • The tag of the operand bundle is usually the name of attribute that can be +assumed to hold. It can also be ignore, this tag doesn’t contain any +information and should be ignored.

  • +
  • The first argument if present is the value for which the attribute hold.

  • +
  • The second argument if present is an argument of the attribute.

  • +
+

If there are no arguments the attribute is a property of the call location.

+

If the represented attribute expects a constant argument, the argument provided +to the operand bundle should be a constant as well.

+

For example:

+
call void @llvm.assume(i1 true) ["align"(i32* %val, i32 8)]
+
+
+

allows the optimizer to assume that at location of call to +llvm.assume %val has an alignment of at least 8.

+
call void @llvm.assume(i1 %cond) ["cold"(), "nonnull"(i64* %val)]
+
+
+

allows the optimizer to assume that the llvm.assume +call location is cold and that %val may not be null.

+

Just like for the argument of llvm.assume, if any of the +provided guarantees are violated at runtime the behavior is undefined.

+

Even if the assumed property can be encoded as a boolean value, like +nonnull, using operand bundles to express the property can still have +benefits:

+
    +
  • Attributes that can be expressed via operand bundles are directly the +property that the optimizer uses and cares about. Encoding attributes as +operand bundles removes the need for an instruction sequence that represents +the property (e.g., icmp ne i32* %p, null for nonnull) and for the +optimizer to deduce the property from that instruction sequence.

  • +
  • Expressing the property using operand bundles makes it easy to identify the +use of the value as a use in an llvm.assume. This then +simplifies and improves heuristics, e.g., for use “use-sensitive” +optimizations.

  • +
+
+
+

Preallocated Operand Bundles

+

Preallocated operand bundles are characterized by the "preallocated" +operand bundle tag. These operand bundles allow separation of the allocation +of the call argument memory from the call site. This is necessary to pass +non-trivially copyable objects by value in a way that is compatible with MSVC +on some targets. There can be at most one "preallocated" operand bundle +attached to a call site and it must have exactly one bundle operand, which is +a token generated by @llvm.call.preallocated.setup. A call with this +operand bundle should not adjust the stack before entering the function, as +that will have been done by one of the @llvm.call.preallocated.* intrinsics.

+
%foo = type { i64, i32 }
+
+...
+
+%t = call token @llvm.call.preallocated.setup(i32 1)
+%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%foo)
+%b = bitcast i8* %a to %foo*
+; initialize %b
+call void @bar(i32 42, %foo* preallocated(%foo) %b) ["preallocated"(token %t)]
+
+
+
+
+

GC Live Operand Bundles

+

A “gc-live” operand bundle is only valid on a gc.statepoint +intrinsic. The operand bundle must contain every pointer to a garbage collected +object which potentially needs to be updated by the garbage collector.

+

When lowered, any relocated value will be recorded in the corresponding +stackmap entry. See the intrinsic description +for further details.

+
+
+

ObjC ARC Attached Call Operand Bundles

+

A "clang.arc.attachedcall" operand bundle on a call indicates the call is +implicitly followed by a marker instruction and a call to an ObjC runtime +function that uses the result of the call. The operand bundle takes either the +pointer to the runtime function (@objc_retainAutoreleasedReturnValue or +@objc_unsafeClaimAutoreleasedReturnValue) or no arguments. If the bundle +doesn’t take any arguments, only the marker instruction has to be emitted after +the call; the runtime function calls don’t have to be emitted since they already +have been emitted. The return value of a call with this bundle is used by a call +to @llvm.objc.clang.arc.noop.use unless the called function’s return type is +void, in which case the operand bundle is ignored.

+
; The marker instruction and a runtime function call are inserted after the call
+; to @foo.
+call i8* @foo() [ "clang.arc.attachedcall"(i8* (i8*)* @objc_retainAutoreleasedReturnValue) ]
+call i8* @foo() [ "clang.arc.attachedcall"(i8* (i8*)* @objc_unsafeClaimAutoreleasedReturnValue) ]
+
+; Only the marker instruction is inserted after the call to @foo.
+call i8* @foo() [ "clang.arc.attachedcall"() ]
+
+
+

The operand bundle is needed to ensure the call is immediately followed by the +marker instruction or the ObjC runtime call in the final output.

+
+
+
+

Module-Level Inline Assembly

+

Modules may contain “module-level inline asm” blocks, which corresponds +to the GCC “file scope inline asm” blocks. These blocks are internally +concatenated by LLVM and treated as a single unit, but may be separated +in the .ll file if desired. The syntax is very simple:

+
module asm "inline asm code goes here"
+module asm "more can go here"
+
+
+

The strings can contain any character by escaping non-printable +characters. The escape sequence used is simply “\xx” where “xx” is the +two digit hex code for the number.

+

Note that the assembly string must be parseable by LLVM’s integrated assembler +(unless it is disabled), even when emitting a .s file.

+
+
+

Data Layout

+

A module may specify a target specific data layout string that specifies +how data is to be laid out in memory. The syntax for the data layout is +simply:

+
target datalayout = "layout specification"
+
+
+

The layout specification consists of a list of specifications +separated by the minus sign character (‘-‘). Each specification starts +with a letter and may include other information after the letter to +define some aspect of the data layout. The specifications accepted are +as follows:

+
+
E

Specifies that the target lays out data in big-endian form. That is, +the bits with the most significance have the lowest address +location.

+
+
e

Specifies that the target lays out data in little-endian form. That +is, the bits with the least significance have the lowest address +location.

+
+
S<size>

Specifies the natural alignment of the stack in bits. Alignment +promotion of stack variables is limited to the natural stack +alignment to avoid dynamic stack realignment. The stack alignment +must be a multiple of 8-bits. If omitted, the natural stack +alignment defaults to “unspecified”, which does not prevent any +alignment promotions.

+
+
P<address space>

Specifies the address space that corresponds to program memory. +Harvard architectures can use this to specify what space LLVM +should place things such as functions into. If omitted, the +program memory space defaults to the default address space of 0, +which corresponds to a Von Neumann architecture that has code +and data in the same space.

+
+
G<address space>

Specifies the address space to be used by default when creating global +variables. If omitted, the globals address space defaults to the default +address space 0. +Note: variable declarations without an address space are always created in +address space 0, this property only affects the default value to be used +when creating globals without additional contextual information (e.g. in +LLVM passes).

+
+
A<address space>

Specifies the address space of objects created by ‘alloca’. +Defaults to the default address space of 0.

+
+
p[n]:<size>:<abi>:<pref>:<idx>

This specifies the size of a pointer and its <abi> and +<pref>erred alignments for address space n. The fourth parameter +<idx> is a size of index that used for address calculation. If not +specified, the default index size is equal to the pointer size. All sizes +are in bits. The address space, n, is optional, and if not specified, +denotes the default address space 0. The value of n must be +in the range [1,2^23).

+
+
i<size>:<abi>:<pref>

This specifies the alignment for an integer type of a given bit +<size>. The value of <size> must be in the range [1,2^23).

+
+
v<size>:<abi>:<pref>

This specifies the alignment for a vector type of a given bit +<size>.

+
+
f<size>:<abi>:<pref>

This specifies the alignment for a floating-point type of a given bit +<size>. Only values of <size> that are supported by the target +will work. 32 (float) and 64 (double) are supported on all targets; 80 +or 128 (different flavors of long double) are also supported on some +targets.

+
+
a:<abi>:<pref>

This specifies the alignment for an object of aggregate type.

+
+
F<type><abi>

This specifies the alignment for function pointers. +The options for <type> are:

+
    +
  • i: The alignment of function pointers is independent of the alignment +of functions, and is a multiple of <abi>.

  • +
  • n: The alignment of function pointers is a multiple of the explicit +alignment specified on the function, and is a multiple of <abi>.

  • +
+
+
m:<mangling>

If present, specifies that llvm names are mangled in the output. Symbols +prefixed with the mangling escape character \01 are passed through +directly to the assembler without the escape character. The mangling style +options are

+
    +
  • e: ELF mangling: Private symbols get a .L prefix.

  • +
  • m: Mips mangling: Private symbols get a $ prefix.

  • +
  • o: Mach-O mangling: Private symbols get L prefix. Other +symbols get a _ prefix.

  • +
  • x: Windows x86 COFF mangling: Private symbols get the usual prefix. +Regular C symbols get a _ prefix. Functions with __stdcall, +__fastcall, and __vectorcall have custom mangling that appends +@N where N is the number of bytes used to pass parameters. C++ symbols +starting with ? are not mangled in any way.

  • +
  • w: Windows COFF mangling: Similar to x, except that normal C +symbols do not receive a _ prefix.

  • +
  • a: XCOFF mangling: Private symbols get a L.. prefix.

  • +
+
+
n<size1>:<size2>:<size3>...

This specifies a set of native integer widths for the target CPU in +bits. For example, it might contain n32 for 32-bit PowerPC, +n32:64 for PowerPC 64, or n8:16:32:64 for X86-64. Elements of +this set are considered to support most general arithmetic operations +efficiently.

+
+
ni:<address space0>:<address space1>:<address space2>...

This specifies pointer types with the specified address spaces +as Non-Integral Pointer Type s. The 0 +address space cannot be specified as non-integral.

+
+
+

On every specification that takes a <abi>:<pref>, specifying the +<pref> alignment is optional. If omitted, the preceding : +should be omitted too and <pref> will be equal to <abi>.

+

When constructing the data layout for a given target, LLVM starts with a +default set of specifications which are then (possibly) overridden by +the specifications in the datalayout keyword. The default +specifications are given in this list:

+
    +
  • E - big endian

  • +
  • p:64:64:64 - 64-bit pointers with 64-bit alignment.

  • +
  • p[n]:64:64:64 - Other address spaces are assumed to be the +same as the default address space.

  • +
  • S0 - natural stack alignment is unspecified

  • +
  • i1:8:8 - i1 is 8-bit (byte) aligned

  • +
  • i8:8:8 - i8 is 8-bit (byte) aligned

  • +
  • i16:16:16 - i16 is 16-bit aligned

  • +
  • i32:32:32 - i32 is 32-bit aligned

  • +
  • i64:32:64 - i64 has ABI alignment of 32-bits but preferred +alignment of 64-bits

  • +
  • f16:16:16 - half is 16-bit aligned

  • +
  • f32:32:32 - float is 32-bit aligned

  • +
  • f64:64:64 - double is 64-bit aligned

  • +
  • f128:128:128 - quad is 128-bit aligned

  • +
  • v64:64:64 - 64-bit vector is 64-bit aligned

  • +
  • v128:128:128 - 128-bit vector is 128-bit aligned

  • +
  • a:0:64 - aggregates are 64-bit aligned

  • +
+

When LLVM is determining the alignment for a given type, it uses the +following rules:

+
    +
  1. If the type sought is an exact match for one of the specifications, +that specification is used.

  2. +
  3. If no match is found, and the type sought is an integer type, then +the smallest integer type that is larger than the bitwidth of the +sought type is used. If none of the specifications are larger than +the bitwidth then the largest integer type is used. For example, +given the default specifications above, the i7 type will use the +alignment of i8 (next largest) while both i65 and i256 will use the +alignment of i64 (largest specified).

  4. +
  5. If no match is found, and the type sought is a vector type, then the +largest vector type that is smaller than the sought vector type will +be used as a fall back. This happens because <128 x double> can be +implemented in terms of 64 <2 x double>, for example.

  6. +
+

The function of the data layout string may not be what you expect. +Notably, this is not a specification from the frontend of what alignment +the code generator should use.

+

Instead, if specified, the target data layout is required to match what +the ultimate code generator expects. This string is used by the +mid-level optimizers to improve code, and this only works if it matches +what the ultimate code generator uses. There is no way to generate IR +that does not embed this target-specific detail into the IR. If you +don’t specify the string, the default specifications will be used to +generate a Data Layout and the optimization phases will operate +accordingly and introduce target specificity into the IR with respect to +these default specifications.

+
+
+

Target Triple

+

A module may specify a target triple string that describes the target +host. The syntax for the target triple is simply:

+
target triple = "x86_64-apple-macosx10.7.0"
+
+
+

The target triple string consists of a series of identifiers delimited +by the minus sign character (‘-‘). The canonical forms are:

+
ARCHITECTURE-VENDOR-OPERATING_SYSTEM
+ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT
+
+
+

This information is passed along to the backend so that it generates +code for the proper architecture. It’s possible to override this on the +command line with the -mtriple command line option.

+
+
+

Object Lifetime

+

A memory object, or simply object, is a region of a memory space that is +reserved by a memory allocation such as alloca, heap +allocation calls, and global variable definitions. +Once it is allocated, the bytes stored in the region can only be read or written +through a pointer that is based on the allocation +value. +If a pointer that is not based on the object tries to read or write to the +object, it is undefined behavior.

+

A lifetime of a memory object is a property that decides its accessibility. +Unless stated otherwise, a memory object is alive since its allocation, and +dead after its deallocation. +It is undefined behavior to access a memory object that isn’t alive, but +operations that don’t dereference it such as +getelementptr, ptrtoint and +icmp return a valid result. +This explains code motion of these instructions across operations that +impact the object’s lifetime. +A stack object’s lifetime can be explicitly specified using +llvm.lifetime.start and +llvm.lifetime.end intrinsic function calls.

+
+
+

Pointer Aliasing Rules

+

Any memory access must be done through a pointer value associated with +an address range of the memory access, otherwise the behavior is +undefined. Pointer values are associated with address ranges according +to the following rules:

+
    +
  • A pointer value is associated with the addresses associated with any +value it is based on.

  • +
  • An address of a global variable is associated with the address range +of the variable’s storage.

  • +
  • The result value of an allocation instruction is associated with the +address range of the allocated storage.

  • +
  • A null pointer in the default address-space is associated with no +address.

  • +
  • An undef value in any address-space is +associated with no address.

  • +
  • An integer constant other than zero or a pointer value returned from +a function not defined within LLVM may be associated with address +ranges allocated through mechanisms other than those provided by +LLVM. Such ranges shall not overlap with any ranges of addresses +allocated by mechanisms provided by LLVM.

  • +
+

A pointer value is based on another pointer value according to the +following rules:

+
    +
  • A pointer value formed from a scalar getelementptr operation is based on +the pointer-typed operand of the getelementptr.

  • +
  • The pointer in lane l of the result of a vector getelementptr operation +is based on the pointer in lane l of the vector-of-pointers-typed operand +of the getelementptr.

  • +
  • The result value of a bitcast is based on the operand of the +bitcast.

  • +
  • A pointer value formed by an inttoptr is based on all pointer +values that contribute (directly or indirectly) to the computation of +the pointer’s value.

  • +
  • The “based on” relationship is transitive.

  • +
+

Note that this definition of “based” is intentionally similar to the +definition of “based” in C99, though it is slightly weaker.

+

LLVM IR does not associate types with memory. The result type of a +load merely indicates the size and alignment of the memory from +which to load, as well as the interpretation of the value. The first +operand type of a store similarly only indicates the size and +alignment of the store.

+

Consequently, type-based alias analysis, aka TBAA, aka +-fstrict-aliasing, is not applicable to general unadorned LLVM IR. +Metadata may be used to encode additional information +which specialized optimization passes may use to implement type-based +alias analysis.

+
+
+

Pointer Capture

+

Given a function call and a pointer that is passed as an argument or stored in +the memory before the call, a pointer is captured by the call if it makes a +copy of any part of the pointer that outlives the call. +To be precise, a pointer is captured if one or more of the following conditions +hold:

+
    +
  1. The call stores any bit of the pointer carrying information into a place, +and the stored bits can be read from the place by the caller after this call +exits.

  2. +
+
@glb  = global i8* null
+@glb2 = global i8* null
+@glb3 = global i8* null
+@glbi = global i32 0
+
+define i8* @f(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e) {
+  store i8* %a, i8** @glb ; %a is captured by this call
+
+  store i8* %b,   i8** @glb2 ; %b isn't captured because the stored value is overwritten by the store below
+  store i8* null, i8** @glb2
+
+  store i8* %c,   i8** @glb3
+  call void @g() ; If @g makes a copy of %c that outlives this call (@f), %c is captured
+  store i8* null, i8** @glb3
+
+  %i = ptrtoint i8* %d to i64
+  %j = trunc i64 %i to i32
+  store i32 %j, i32* @glbi ; %d is captured
+
+  ret i8* %e ; %e is captured
+}
+
+
+
    +
  1. The call stores any bit of the pointer carrying information into a place, +and the stored bits can be safely read from the place by another thread via +synchronization.

  2. +
+
@lock = global i1 true
+
+define void @f(i8* %a) {
+  store i8* %a, i8** @glb
+  store atomic i1 false, i1* @lock release ; %a is captured because another thread can safely read @glb
+  store i8* null, i8** @glb
+  ret void
+}
+
+
+
    +
  1. The call’s behavior depends on any bit of the pointer carrying information.

  2. +
+
@glb = global i8 0
+
+define void @f(i8* %a) {
+  %c = icmp eq i8* %a, @glb
+  br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a
+BB_EXIT:
+  call void @exit()
+  unreachable
+BB_CONTINUE:
+  ret void
+}
+
+
+
    +
  1. The pointer is used in a volatile access as its address.

  2. +
+
+
+

Volatile Memory Accesses

+

Certain memory accesses, such as load’s, +store’s, and llvm.memcpy’s may be +marked volatile. The optimizers must not change the number of +volatile operations or change their order of execution relative to other +volatile operations. The optimizers may change the order of volatile +operations relative to non-volatile operations. This is not Java’s +“volatile” and has no cross-thread synchronization behavior.

+

A volatile load or store may have additional target-specific semantics. +Any volatile operation can have side effects, and any volatile operation +can read and/or modify state which is not accessible via a regular load +or store in this module. Volatile operations may use addresses which do +not point to memory (like MMIO registers). This means the compiler may +not use a volatile operation to prove a non-volatile access to that +address has defined behavior.

+

The allowed side-effects for volatile accesses are limited. If a +non-volatile store to a given address would be legal, a volatile +operation may modify the memory at that address. A volatile operation +may not modify any other memory accessible by the module being compiled. +A volatile operation may not call any code in the current module.

+

The compiler may assume execution will continue after a volatile operation, +so operations which modify memory or may have undefined behavior can be +hoisted past a volatile operation.

+

As an exception to the preceding rule, the compiler may not assume execution +will continue after a volatile store operation. This restriction is necessary +to support the somewhat common pattern in C of intentionally storing to an +invalid pointer to crash the program. In the future, it might make sense to +allow frontends to control this behavior.

+

IR-level volatile loads and stores cannot safely be optimized into llvm.memcpy +or llvm.memmove intrinsics even when those intrinsics are flagged volatile. +Likewise, the backend should never split or merge target-legal volatile +load/store instructions. Similarly, IR-level volatile loads and stores cannot +change from integer to floating-point or vice versa.

+
+

Rationale

+

Platforms may rely on volatile loads and stores of natively supported +data width to be executed as single instruction. For example, in C +this holds for an l-value of volatile primitive type with native +hardware support, but not necessarily for aggregate types. The +frontend upholds these expectations, which are intentionally +unspecified in the IR. The rules above ensure that IR transformations +do not violate the frontend’s contract with the language.

+
+
+
+

Memory Model for Concurrent Operations

+

The LLVM IR does not define any way to start parallel threads of +execution or to register signal handlers. Nonetheless, there are +platform-specific ways to create them, and we define LLVM IR’s behavior +in their presence. This model is inspired by the C++0x memory model.

+

For a more informal introduction to this model, see the LLVM Atomic Instructions and Concurrency Guide.

+

We define a happens-before partial order as the least partial order +that

+
    +
  • Is a superset of single-thread program order, and

  • +
  • When a synchronizes-with b, includes an edge from a to +b. Synchronizes-with pairs are introduced by platform-specific +techniques, like pthread locks, thread creation, thread joining, +etc., and by atomic instructions. (See also Atomic Memory Ordering +Constraints).

  • +
+

Note that program order does not introduce happens-before edges +between a thread and signals executing inside that thread.

+

Every (defined) read operation (load instructions, memcpy, atomic +loads/read-modify-writes, etc.) R reads a series of bytes written by +(defined) write operations (store instructions, atomic +stores/read-modify-writes, memcpy, etc.). For the purposes of this +section, initialized globals are considered to have a write of the +initializer which is atomic and happens before any other read or write +of the memory in question. For each byte of a read R, Rbyte +may see any write to the same byte, except:

+
    +
  • If write1 happens before write2, and +write2 happens before Rbyte, then +Rbyte does not see write1.

  • +
  • If Rbyte happens before write3, then +Rbyte does not see write3.

  • +
+

Given that definition, Rbyte is defined as follows:

+
    +
  • If R is volatile, the result is target-dependent. (Volatile is +supposed to give guarantees which can support sig_atomic_t in +C/C++, and may be used for accesses to addresses that do not behave +like normal memory. It does not generally provide cross-thread +synchronization.)

  • +
  • Otherwise, if there is no write to the same byte that happens before +Rbyte, Rbyte returns undef for that byte.

  • +
  • Otherwise, if Rbyte may see exactly one write, +Rbyte returns the value written by that write.

  • +
  • Otherwise, if R is atomic, and all the writes Rbyte may +see are atomic, it chooses one of the values written. See the Atomic +Memory Ordering Constraints section for additional +constraints on how the choice is made.

  • +
  • Otherwise Rbyte returns undef.

  • +
+

R returns the value composed of the series of bytes it read. This +implies that some bytes within the value may be undef without +the entire value being undef. Note that this only defines the +semantics of the operation; it doesn’t mean that targets will emit more +than one instruction to read the series of bytes.

+

Note that in cases where none of the atomic intrinsics are used, this +model places only one restriction on IR transformations on top of what +is required for single-threaded execution: introducing a store to a byte +which might not otherwise be stored is not allowed in general. +(Specifically, in the case where another thread might write to and read +from an address, introducing a store can change a load that may see +exactly one write into a load that may see multiple writes.)

+
+
+

Atomic Memory Ordering Constraints

+

Atomic instructions (cmpxchg, +atomicrmw, fence, +atomic load, and atomic store) take +ordering parameters that determine which other atomic instructions on +the same address they synchronize with. These semantics are borrowed +from Java and C++0x, but are somewhat more colloquial. If these +descriptions aren’t precise enough, check those specs (see spec +references in the atomics guide). +fence instructions treat these orderings somewhat +differently since they don’t take an address. See that instruction’s +documentation for details.

+

For a simpler introduction to the ordering constraints, see the +LLVM Atomic Instructions and Concurrency Guide.

+
+
unordered

The set of values that can be read is governed by the happens-before +partial order. A value cannot be read unless some operation wrote +it. This is intended to provide a guarantee strong enough to model +Java’s non-volatile shared variables. This ordering cannot be +specified for read-modify-write operations; it is not strong enough +to make them atomic in any interesting way.

+
+
monotonic

In addition to the guarantees of unordered, there is a single +total order for modifications by monotonic operations on each +address. All modification orders must be compatible with the +happens-before order. There is no guarantee that the modification +orders can be combined to a global total order for the whole program +(and this often will not be possible). The read in an atomic +read-modify-write operation (cmpxchg and +atomicrmw) reads the value in the modification +order immediately before the value it writes. If one atomic read +happens before another atomic read of the same address, the later +read must see the same value or a later value in the address’s +modification order. This disallows reordering of monotonic (or +stronger) operations on the same address. If an address is written +monotonic-ally by one thread, and other threads monotonic-ally +read that address repeatedly, the other threads must eventually see +the write. This corresponds to the C++0x/C1x +memory_order_relaxed.

+
+
acquire

In addition to the guarantees of monotonic, a +synchronizes-with edge may be formed with a release operation. +This is intended to model C++’s memory_order_acquire.

+
+
release

In addition to the guarantees of monotonic, if this operation +writes a value which is subsequently read by an acquire +operation, it synchronizes-with that operation. (This isn’t a +complete description; see the C++0x definition of a release +sequence.) This corresponds to the C++0x/C1x +memory_order_release.

+
+
acq_rel (acquire+release)

Acts as both an acquire and release operation on its +address. This corresponds to the C++0x/C1x memory_order_acq_rel.

+
+
seq_cst (sequentially consistent)

In addition to the guarantees of acq_rel (acquire for an +operation that only reads, release for an operation that only +writes), there is a global total order on all +sequentially-consistent operations on all addresses, which is +consistent with the happens-before partial order and with the +modification orders of all the affected addresses. Each +sequentially-consistent read sees the last preceding write to the +same address in this global order. This corresponds to the C++0x/C1x +memory_order_seq_cst and Java volatile.

+
+
+

If an atomic operation is marked syncscope("singlethread"), it only +synchronizes with and only participates in the seq_cst total orderings of +other operations running in the same thread (for example, in signal handlers).

+

If an atomic operation is marked syncscope("<target-scope>"), where +<target-scope> is a target specific synchronization scope, then it is target +dependent if it synchronizes with and participates in the seq_cst total +orderings of other operations.

+

Otherwise, an atomic operation that is not marked syncscope("singlethread") +or syncscope("<target-scope>") synchronizes with and participates in the +seq_cst total orderings of other operations that are not marked +syncscope("singlethread") or syncscope("<target-scope>").

+
+
+

Floating-Point Environment

+

The default LLVM floating-point environment assumes that floating-point +instructions do not have side effects. Results assume the round-to-nearest +rounding mode. No floating-point exception state is maintained in this +environment. Therefore, there is no attempt to create or preserve invalid +operation (SNaN) or division-by-zero exceptions.

+

The benefit of this exception-free assumption is that floating-point +operations may be speculated freely without any other fast-math relaxations +to the floating-point model.

+

Code that requires different behavior than this should use the +Constrained Floating-Point Intrinsics.

+
+
+

Fast-Math Flags

+

LLVM IR floating-point operations (fneg, fadd, +fsub, fmul, fdiv, +frem, fcmp), phi, +select and call +may use the following flags to enable otherwise unsafe +floating-point transformations.

+
+
nnan

No NaNs - Allow optimizations to assume the arguments and result are not +NaN. If an argument is a nan, or the result would be a nan, it produces +a poison value instead.

+
+
ninf

No Infs - Allow optimizations to assume the arguments and result are not ++/-Inf. If an argument is +/-Inf, or the result would be +/-Inf, it +produces a poison value instead.

+
+
nsz

No Signed Zeros - Allow optimizations to treat the sign of a zero +argument or result as insignificant. This does not imply that -0.0 +is poison and/or guaranteed to not exist in the operation.

+
+
arcp

Allow Reciprocal - Allow optimizations to use the reciprocal of an +argument rather than perform division.

+
+
contract

Allow floating-point contraction (e.g. fusing a multiply followed by an +addition into a fused multiply-and-add). This does not enable reassociating +to form arbitrary contractions. For example, (a*b) + (c*d) + e can not +be transformed into (a*b) + ((c*d) + e) to create two fma operations.

+
+
afn

Approximate functions - Allow substitution of approximate calculations for +functions (sin, log, sqrt, etc). See floating-point intrinsic definitions +for places where this can apply to LLVM’s intrinsic math functions.

+
+
reassoc

Allow reassociation transformations for floating-point instructions. +This may dramatically change results in floating-point.

+
+
fast

This flag implies all of the others.

+
+
+
+
+

Use-list Order Directives

+

Use-list directives encode the in-memory order of each use-list, allowing the +order to be recreated. <order-indexes> is a comma-separated list of +indexes that are assigned to the referenced value’s uses. The referenced +value’s use-list is immediately sorted by these indexes.

+

Use-list directives may appear at function scope or global scope. They are not +instructions, and have no effect on the semantics of the IR. When they’re at +function scope, they must appear after the terminator of the final basic block.

+

If basic blocks have their address taken via blockaddress() expressions, +uselistorder_bb can be used to reorder their use-lists from outside their +function’s scope.

+
+
Syntax
+

+
+
uselistorder <ty> <value>, { <order-indexes> }
+uselistorder_bb @function, %block { <order-indexes> }
+
+
+
+
Examples
+

+
+
define void @foo(i32 %arg1, i32 %arg2) {
+entry:
+  ; ... instructions ...
+bb:
+  ; ... instructions ...
+
+  ; At function scope.
+  uselistorder i32 %arg1, { 1, 0, 2 }
+  uselistorder label %bb, { 1, 0 }
+}
+
+; At global scope.
+uselistorder i32* @global, { 1, 2, 0 }
+uselistorder i32 7, { 1, 0 }
+uselistorder i32 (i32) @bar, { 1, 0 }
+uselistorder_bb @foo, %bb, { 5, 1, 3, 2, 0, 4 }
+
+
+
+
+

Source Filename

+

The source filename string is set to the original module identifier, +which will be the name of the compiled source file when compiling from +source through the clang front end, for example. It is then preserved through +the IR and bitcode.

+

This is currently necessary to generate a consistent unique global +identifier for local functions used in profile data, which prepends the +source file name to the local function name.

+

The syntax for the source file name is simply:

+
source_filename = "/path/to/source.c"
+
+
+
+
+
+

Type System

+

The LLVM type system is one of the most important features of the +intermediate representation. Being typed enables a number of +optimizations to be performed on the intermediate representation +directly, without having to do extra analyses on the side before the +transformation. A strong type system makes it easier to read the +generated code and enables novel analyses and transformations that are +not feasible to perform on normal three address code representations.

+
+

Void Type

+
+
Overview
+

+
+

The void type does not represent any value and has no size.

+
+
Syntax
+

+
+
void
+
+
+
+
+

Function Type

+
+
Overview
+

+
+

The function type can be thought of as a function signature. It consists of a +return type and a list of formal parameter types. The return type of a function +type is a void type or first class type — except for label +and metadata types.

+
+
Syntax
+

+
+
<returntype> (<parameter list>)
+
+
+

…where ‘<parameter list>’ is a comma-separated list of type +specifiers. Optionally, the parameter list may include a type ..., which +indicates that the function takes a variable number of arguments. Variable +argument functions can access their arguments with the variable argument +handling intrinsic functions. ‘<returntype>’ is any type +except label and metadata.

+
+
Examples
+

+
+ ++++ + + + + + + + + + + + + + + +

i32 (i32)

function taking an i32, returning an i32

float (i16, i32 *) *

Pointer to a function that takes an i16 and a pointer to i32, returning float.

i32 (i8*, ...)

A vararg function that takes at least one pointer to i8 (char in C), which returns an integer. This is the signature for printf in LLVM.

{i32, i32} (i32)

A function taking an i32, returning a structure containing two i32 values

+
+
+

First Class Types

+

The first class types are perhaps the most important. +Values of these types are the only ones which can be produced by +instructions.

+
+

Single Value Types

+

These are the types that are valid in registers from CodeGen’s perspective.

+
+
Integer Type
+
+
Overview
+

+
+

The integer type is a very simple type that simply specifies an +arbitrary bit width for the integer type desired. Any bit width from 1 +bit to 223(about 8 million) can be specified.

+
+
Syntax
+

+
+
iN
+
+
+

The number of bits the integer will occupy is specified by the N +value.

+
+
Examples:
+ ++++ + + + + + + + + + + + +

i1

a single-bit integer.

i32

a 32-bit integer.

i1942652

a really big integer of over 1 million bits.

+
+
+
+
Floating-Point Types
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Type

Description

half

16-bit floating-point value

bfloat

16-bit “brain” floating-point value (7-bit significand). Provides the +same number of exponent bits as float, so that it matches its dynamic +range, but with greatly reduced precision. Used in Intel’s AVX-512 BF16 +extensions and Arm’s ARMv8.6-A extensions, among others.

float

32-bit floating-point value

double

64-bit floating-point value

fp128

128-bit floating-point value (113-bit significand)

x86_fp80

80-bit floating-point value (X87)

ppc_fp128

128-bit floating-point value (two 64-bits)

+

The binary format of half, float, double, and fp128 correspond to the +IEEE-754-2008 specifications for binary16, binary32, binary64, and binary128 +respectively.

+
+
+
X86_amx Type
+
+
Overview
+

+
+

The x86_amx type represents a value held in an AMX tile register on an x86 +machine. The operations allowed on it are quite limited. Only few intrinsics +are allowed: stride load and store, zero and dot product. No instruction is +allowed for this type. There are no arguments, arrays, pointers, vectors +or constants of this type.

+
+
Syntax
+

+
+
x86_amx
+
+
+
+
+
X86_mmx Type
+
+
Overview
+

+
+

The x86_mmx type represents a value held in an MMX register on an x86 +machine. The operations allowed on it are quite limited: parameters and +return values, load and store, and bitcast. User-specified MMX +instructions are represented as intrinsic or asm calls with arguments +and/or results of this type. There are no arrays, vectors or constants +of this type.

+
+
Syntax
+

+
+
x86_mmx
+
+
+
+
+
Pointer Type
+
+
Overview
+

+
+

The pointer type is used to specify memory locations. Pointers are +commonly used to reference objects in memory.

+

Pointer types may have an optional address space attribute defining the +numbered address space where the pointed-to object resides. The default +address space is number zero. The semantics of non-zero address spaces +are target-specific.

+

Note that LLVM does not permit pointers to void (void*) nor does it +permit pointers to labels (label*). Use i8* instead.

+

LLVM is in the process of transitioning to +opaque pointers. +Opaque pointers do not have a pointee type. Rather, instructions +interacting through pointers specify the type of the underlying memory +they are interacting with. Opaque pointers are still in the process of +being worked on and are not complete.

+
+
Syntax
+

+
+
<type> *
+ptr
+
+
+
+
Examples
+

+
+ ++++ + + + + + + + + + + + + + + + + + +

[4 x i32]*

A pointer to array of four i32 values.

i32 (i32*) *

A pointer to a function that takes an i32*, returning an i32.

i32 addrspace(5)*

A pointer to an i32 value that resides in address space 5.

ptr

An opaque pointer type to a value that resides in address space 0.

ptr addrspace(5)

An opaque pointer type to a value that resides in address space 5.

+
+
+
Vector Type
+
+
Overview
+

+
+

A vector type is a simple derived type that represents a vector of +elements. Vector types are used when multiple primitive data are +operated in parallel using a single instruction (SIMD). A vector type +requires a size (number of elements), an underlying primitive data type, +and a scalable property to represent vectors where the exact hardware +vector length is unknown at compile time. Vector types are considered +first class.

+
+
Memory Layout
+

+
+

In general vector elements are laid out in memory in the same way as +array types. Such an analogy works fine as long as the vector +elements are byte sized. However, when the elements of the vector aren’t byte +sized it gets a bit more complicated. One way to describe the layout is by +describing what happens when a vector such as <N x iM> is bitcasted to an +integer type with N*M bits, and then following the rules for storing such an +integer to memory.

+

A bitcast from a vector type to a scalar integer type will see the elements +being packed together (without padding). The order in which elements are +inserted in the integer depends on endianess. For little endian element zero +is put in the least significant bits of the integer, and for big endian +element zero is put in the most significant bits.

+

Using a vector such as <i4 1, i4 2, i4 3, i4 5> as an example, together +with the analogy that we can replace a vector store by a bitcast followed by +an integer store, we get this for big endian:

+
%val = bitcast <4 x i4> <i4 1, i4 2, i4 3, i4 5> to i16
+
+; Bitcasting from a vector to an integral type can be seen as
+; concatenating the values:
+;   %val now has the hexadecimal value 0x1235.
+
+store i16 %val, i16* %ptr
+
+; In memory the content will be (8-bit addressing):
+;
+;    [%ptr + 0]: 00010010  (0x12)
+;    [%ptr + 1]: 00110101  (0x35)
+
+
+

The same example for little endian:

+
%val = bitcast <4 x i4> <i4 1, i4 2, i4 3, i4 5> to i16
+
+; Bitcasting from a vector to an integral type can be seen as
+; concatenating the values:
+;   %val now has the hexadecimal value 0x5321.
+
+store i16 %val, i16* %ptr
+
+; In memory the content will be (8-bit addressing):
+;
+;    [%ptr + 0]: 01010011  (0x53)
+;    [%ptr + 1]: 00100001  (0x21)
+
+
+

When <N*M> isn’t evenly divisible by the byte size the exact memory layout +is unspecified (just like it is for an integral type of the same size). This +is because different targets could put the padding at different positions when +the type size is smaller than the type’s store size.

+
+
Syntax
+

+
+
< <# elements> x <elementtype> >          ; Fixed-length vector
+< vscale x <# elements> x <elementtype> > ; Scalable vector
+
+
+

The number of elements is a constant integer value larger than 0; +elementtype may be any integer, floating-point or pointer type. Vectors +of size zero are not allowed. For scalable vectors, the total number of +elements is a constant multiple (called vscale) of the specified number +of elements; vscale is a positive integer that is unknown at compile time +and the same hardware-dependent constant for all scalable vectors at run +time. The size of a specific scalable vector type is thus constant within +IR, even if the exact size in bytes cannot be determined until run time.

+
+
Examples
+

+
+ ++++ + + + + + + + + + + + + + + + + + +

<4 x i32>

Vector of 4 32-bit integer values.

<8 x float>

Vector of 8 32-bit floating-point values.

<2 x i64>

Vector of 2 64-bit integer values.

<4 x i64*>

Vector of 4 pointers to 64-bit integer values.

<vscale x 4 x i32>

Vector with a multiple of 4 32-bit integer values.

+
+
+
+

Label Type

+
+
Overview
+

+
+

The label type represents code labels.

+
+
Syntax
+

+
+
label
+
+
+
+
+

Token Type

+
+
Overview
+

+
+

The token type is used when a value is associated with an instruction +but all uses of the value must not attempt to introspect or obscure it. +As such, it is not appropriate to have a phi or +select of type token.

+
+
Syntax
+

+
+
token
+
+
+
+
+

Metadata Type

+
+
Overview
+

+
+

The metadata type represents embedded metadata. No derived types may be +created from metadata except for function arguments.

+
+
Syntax
+

+
+
metadata
+
+
+
+
+

Aggregate Types

+

Aggregate Types are a subset of derived types that can contain multiple +member types. Arrays and structs are +aggregate types. Vectors are not considered to be +aggregate types.

+
+
Array Type
+
+
Overview
+

+
+

The array type is a very simple derived type that arranges elements +sequentially in memory. The array type requires a size (number of +elements) and an underlying data type.

+
+
Syntax
+

+
+
[<# elements> x <elementtype>]
+
+
+

The number of elements is a constant integer value; elementtype may +be any type with a size.

+
+
Examples
+

+
+ ++++ + + + + + + + + + + + +

[40 x i32]

Array of 40 32-bit integer values.

[41 x i32]

Array of 41 32-bit integer values.

[4 x i8]

Array of 4 8-bit integer values.

+

Here are some examples of multidimensional arrays:

+ ++++ + + + + + + + + + + + +

[3 x [4 x i32]]

3x4 array of 32-bit integer values.

[12 x [10 x float]]

12x10 array of single precision floating-point values.

[2 x [3 x [4 x i16]]]

2x3x4 array of 16-bit integer values.

+

There is no restriction on indexing beyond the end of the array implied +by a static type (though there are restrictions on indexing beyond the +bounds of an allocated object in some cases). This means that +single-dimension ‘variable sized array’ addressing can be implemented in +LLVM with a zero length array type. An implementation of ‘pascal style +arrays’ in LLVM could use the type “{ i32, [0 x float]}”, for +example.

+
+
+
Structure Type
+
+
Overview
+

+
+

The structure type is used to represent a collection of data members +together in memory. The elements of a structure may be any type that has +a size.

+

Structures in memory are accessed using ‘load’ and ‘store’ by +getting a pointer to a field with the ‘getelementptr’ instruction. +Structures in registers are accessed using the ‘extractvalue’ and +‘insertvalue’ instructions.

+

Structures may optionally be “packed” structures, which indicate that +the alignment of the struct is one byte, and that there is no padding +between the elements. In non-packed structs, padding between field types +is inserted as defined by the DataLayout string in the module, which is +required to match what the underlying code generator expects.

+

Structures can either be “literal” or “identified”. A literal structure +is defined inline with other types (e.g. {i32, i32}*) whereas +identified types are always defined at the top level with a name. +Literal types are uniqued by their contents and can never be recursive +or opaque since there is no way to write one. Identified types can be +recursive, can be opaqued, and are never uniqued.

+
+
Syntax
+

+
+
%T1 = type { <type list> }     ; Identified normal struct type
+%T2 = type <{ <type list> }>   ; Identified packed struct type
+
+
+
+
Examples
+

+
+ ++++ + + + + + + + + + + + +

{ i32, i32, i32 }

A triple of three i32 values

{ float, i32 (i32) * }

A pair, where the first element is a float and the second element is a pointer to a function that takes an i32, returning an i32.

<{ i8, i32 }>

A packed struct known to be 5 bytes in size.

+
+
+
Opaque Structure Types
+
+
Overview
+

+
+

Opaque structure types are used to represent structure types that +do not have a body specified. This corresponds (for example) to the C +notion of a forward declared structure. They can be named (%X) or +unnamed (%52).

+
+
Syntax
+

+
+
%X = type opaque
+%52 = type opaque
+
+
+
+
Examples
+

+
+ ++++ + + + + + +

opaque

An opaque type.

+
+
+
+
+
+

Constants

+

LLVM has several different basic types of constants. This section +describes them all and their syntax.

+
+

Simple Constants

+
+
Boolean constants

The two strings ‘true’ and ‘false’ are both valid constants +of the i1 type.

+
+
Integer constants

Standard integers (such as ‘4’) are constants of the +integer type. Negative numbers may be used with +integer types.

+
+
Floating-point constants

Floating-point constants use standard decimal notation (e.g. +123.421), exponential notation (e.g. 1.23421e+2), or a more precise +hexadecimal notation (see below). The assembler requires the exact +decimal value of a floating-point constant. For example, the +assembler accepts 1.25 but rejects 1.3 because 1.3 is a repeating +decimal in binary. Floating-point constants must have a +floating-point type.

+
+
Null pointer constants

The identifier ‘null’ is recognized as a null pointer constant +and must be of pointer type.

+
+
Token constants

The identifier ‘none’ is recognized as an empty token constant +and must be of token type.

+
+
+

The one non-intuitive notation for constants is the hexadecimal form of +floating-point constants. For example, the form +‘double    0x432ff973cafa8000’ is equivalent to (but harder to read +than) ‘double 4.5e+15’. The only time hexadecimal floating-point +constants are required (and the only time that they are generated by the +disassembler) is when a floating-point constant must be emitted but it +cannot be represented as a decimal floating-point number in a reasonable +number of digits. For example, NaN’s, infinities, and other special +values are represented in their IEEE hexadecimal format so that assembly +and disassembly do not cause any bits to change in the constants.

+

When using the hexadecimal form, constants of types bfloat, half, float, and +double are represented using the 16-digit form shown above (which matches the +IEEE754 representation for double); bfloat, half and float values must, however, +be exactly representable as bfloat, IEEE 754 half, and IEEE 754 single +precision respectively. Hexadecimal format is always used for long double, and +there are three forms of long double. The 80-bit format used by x86 is +represented as 0xK followed by 20 hexadecimal digits. The 128-bit format +used by PowerPC (two adjacent doubles) is represented by 0xM followed by 32 +hexadecimal digits. The IEEE 128-bit format is represented by 0xL followed +by 32 hexadecimal digits. Long doubles will only work if they match the long +double format on your target. The IEEE 16-bit format (half precision) is +represented by 0xH followed by 4 hexadecimal digits. The bfloat 16-bit +format is represented by 0xR followed by 4 hexadecimal digits. All +hexadecimal formats are big-endian (sign bit at the left).

+

There are no constants of type x86_mmx and x86_amx.

+
+
+

Complex Constants

+

Complex constants are a (potentially recursive) combination of simple +constants and smaller complex constants.

+
+
Structure constants

Structure constants are represented with notation similar to +structure type definitions (a comma separated list of elements, +surrounded by braces ({})). For example: +“{ i32 4, float 17.0, i32* @G }”, where “@G” is declared as +“@G = external global i32”. Structure constants must have +structure type, and the number and types of elements +must match those specified by the type.

+
+
Array constants

Array constants are represented with notation similar to array type +definitions (a comma separated list of elements, surrounded by +square brackets ([])). For example: +“[ i32 42, i32 11, i32 74 ]”. Array constants must have +array type, and the number and types of elements must +match those specified by the type. As a special case, character array +constants may also be represented as a double-quoted string using the c +prefix. For example: “c"Hello World\0A\00"”.

+
+
Vector constants

Vector constants are represented with notation similar to vector +type definitions (a comma separated list of elements, surrounded by +less-than/greater-than’s (<>)). For example: +“< i32 42, i32 11, i32 74, i32 100 >”. Vector constants +must have vector type, and the number and types of +elements must match those specified by the type.

+
+
Zero initialization

The string ‘zeroinitializer’ can be used to zero initialize a +value to zero of any type, including scalar and +aggregate types. This is often used to avoid +having to print large zero initializers (e.g. for large arrays) and +is always exactly equivalent to using explicit zero initializers.

+
+
Metadata node

A metadata node is a constant tuple without types. For example: +“!{!0, !{!2, !0}, !"test"}”. Metadata can reference constant values, +for example: “!{!0, i32 0, i8* @global, i64 (i64)* @function, !"str"}”. +Unlike other typed constants that are meant to be interpreted as part of +the instruction stream, metadata is a place to attach additional +information such as debug info.

+
+
+
+
+

Global Variable and Function Addresses

+

The addresses of global variables and +functions are always implicitly valid +(link-time) constants. These constants are explicitly referenced when +the identifier for the global is used and always have +pointer type. For example, the following is a legal LLVM +file:

+
@X = global i32 17
+@Y = global i32 42
+@Z = global [2 x i32*] [ i32* @X, i32* @Y ]
+
+
+
+
+

Undefined Values

+

The string ‘undef’ can be used anywhere a constant is expected, and +indicates that the user of the value may receive an unspecified +bit-pattern. Undefined values may be of any type (other than ‘label’ +or ‘void’) and be used anywhere a constant is permitted.

+

Undefined values are useful because they indicate to the compiler that +the program is well defined no matter what value is used. This gives the +compiler more freedom to optimize. Here are some examples of +(potentially surprising) transformations that are valid (in pseudo IR):

+
  %A = add %X, undef
+  %B = sub %X, undef
+  %C = xor %X, undef
+Safe:
+  %A = undef
+  %B = undef
+  %C = undef
+
+
+

This is safe because all of the output bits are affected by the undef +bits. Any output bit can have a zero or one depending on the input bits.

+
  %A = or %X, undef
+  %B = and %X, undef
+Safe:
+  %A = -1
+  %B = 0
+Safe:
+  %A = %X  ;; By choosing undef as 0
+  %B = %X  ;; By choosing undef as -1
+Unsafe:
+  %A = undef
+  %B = undef
+
+
+

These logical operations have bits that are not always affected by the +input. For example, if %X has a zero bit, then the output of the +‘and’ operation will always be a zero for that bit, no matter what +the corresponding bit from the ‘undef’ is. As such, it is unsafe to +optimize or assume that the result of the ‘and’ is ‘undef’. +However, it is safe to assume that all bits of the ‘undef’ could be +0, and optimize the ‘and’ to 0. Likewise, it is safe to assume that +all the bits of the ‘undef’ operand to the ‘or’ could be set, +allowing the ‘or’ to be folded to -1.

+
  %A = select undef, %X, %Y
+  %B = select undef, 42, %Y
+  %C = select %X, %Y, undef
+Safe:
+  %A = %X     (or %Y)
+  %B = 42     (or %Y)
+  %C = %Y
+Unsafe:
+  %A = undef
+  %B = undef
+  %C = undef
+
+
+

This set of examples shows that undefined ‘select’ (and conditional +branch) conditions can go either way, but they have to come from one +of the two operands. In the %A example, if %X and %Y were +both known to have a clear low bit, then %A would have to have a +cleared low bit. However, in the %C example, the optimizer is +allowed to assume that the ‘undef’ operand could be the same as +%Y, allowing the whole ‘select’ to be eliminated.

+
  %A = xor undef, undef
+
+  %B = undef
+  %C = xor %B, %B
+
+  %D = undef
+  %E = icmp slt %D, 4
+  %F = icmp gte %D, 4
+
+Safe:
+  %A = undef
+  %B = undef
+  %C = undef
+  %D = undef
+  %E = undef
+  %F = undef
+
+
+

This example points out that two ‘undef’ operands are not +necessarily the same. This can be surprising to people (and also matches +C semantics) where they assume that “X^X” is always zero, even if +X is undefined. This isn’t true for a number of reasons, but the +short answer is that an ‘undef’ “variable” can arbitrarily change +its value over its “live range”. This is true because the variable +doesn’t actually have a live range. Instead, the value is logically +read from arbitrary registers that happen to be around when needed, so +the value is not necessarily consistent over time. In fact, %A and +%C need to have the same semantics or the core LLVM “replace all +uses with” concept would not hold.

+

To ensure all uses of a given register observe the same value (even if +‘undef’), the freeze instruction can be used.

+
  %A = sdiv undef, %X
+  %B = sdiv %X, undef
+Safe:
+  %A = 0
+b: unreachable
+
+
+

These examples show the crucial difference between an undefined value +and undefined behavior. An undefined value (like ‘undef’) is +allowed to have an arbitrary bit-pattern. This means that the %A +operation can be constant folded to ‘0’, because the ‘undef’ +could be zero, and zero divided by any value is zero. +However, in the second example, we can make a more aggressive +assumption: because the undef is allowed to be an arbitrary value, +we are allowed to assume that it could be zero. Since a divide by zero +has undefined behavior, we are allowed to assume that the operation +does not execute at all. This allows us to delete the divide and all +code after it. Because the undefined operation “can’t happen”, the +optimizer can assume that it occurs in dead code.

+
a:  store undef -> %X
+b:  store %X -> undef
+Safe:
+a: <deleted>
+b: unreachable
+
+
+

A store of an undefined value can be assumed to not have any effect; +we can assume that the value is overwritten with bits that happen to +match what was already there. However, a store to an undefined +location could clobber arbitrary memory, therefore, it has undefined +behavior.

+

Branching on an undefined value is undefined behavior. +This explains optimizations that depend on branch conditions to construct +predicates, such as Correlated Value Propagation and Global Value Numbering. +In case of switch instruction, the branch condition should be frozen, otherwise +it is undefined behavior.

+
Unsafe:
+  br undef, BB1, BB2 ; UB
+
+  %X = and i32 undef, 255
+  switch %X, label %ret [ .. ] ; UB
+
+  store undef, i8* %ptr
+  %X = load i8* %ptr ; %X is undef
+  switch i8 %X, label %ret [ .. ] ; UB
+
+Safe:
+  %X = or i8 undef, 255 ; always 255
+  switch i8 %X, label %ret [ .. ] ; Well-defined
+
+  %X = freeze i1 undef
+  br %X, BB1, BB2 ; Well-defined (non-deterministic jump)
+
+
+

This is also consistent with the behavior of MemorySanitizer. +MemorySanitizer, detector of uses of uninitialized memory, +defines a branch with condition that depends on an undef value (or +certain other values, like e.g. a result of a load from heap-allocated +memory that has never been stored to) to have an externally visible +side effect. For this reason functions with sanitize_memory +attribute are not allowed to produce such branches “out of thin +air”. More strictly, an optimization that inserts a conditional branch +is only valid if in all executions where the branch condition has at +least one undefined bit, the same branch condition is evaluated in the +input IR as well.

+
+
+

Poison Values

+

A poison value is a result of an erroneous operation. +In order to facilitate speculative execution, many instructions do not +invoke immediate undefined behavior when provided with illegal operands, +and return a poison value instead. +The string ‘poison’ can be used anywhere a constant is expected, and +operations such as add with the nsw flag can produce +a poison value.

+

Poison value behavior is defined in terms of value dependence:

+
    +
  • Values other than phi nodes, select, and +freeze instructions depend on their operands.

  • +
  • Phi nodes depend on the operand corresponding to +their dynamic predecessor basic block.

  • +
  • Select instructions depend on their condition operand and +their selected operand.

  • +
  • Function arguments depend on the corresponding actual argument values +in the dynamic callers of their functions.

  • +
  • Call instructions depend on the ret +instructions that dynamically transfer control back to them.

  • +
  • Invoke instructions depend on the +ret, resume, or exception-throwing +call instructions that dynamically transfer control back to them.

  • +
  • Non-volatile loads and stores depend on the most recent stores to all +of the referenced memory addresses, following the order in the IR +(including loads and stores implied by intrinsics such as +@llvm.memcpy.)

  • +
  • An instruction with externally visible side effects depends on the +most recent preceding instruction with externally visible side +effects, following the order in the IR. (This includes volatile +operations.)

  • +
  • An instruction control-depends on a terminator +instruction if the terminator instruction has +multiple successors and the instruction is always executed when +control transfers to one of the successors, and may not be executed +when control is transferred to another.

  • +
  • Additionally, an instruction also control-depends on a terminator +instruction if the set of instructions it otherwise depends on would +be different if the terminator had transferred control to a different +successor.

  • +
  • Dependence is transitive.

  • +
  • Vector elements may be independently poisoned. Therefore, transforms +on instructions such as shufflevector must be careful to propagate +poison across values or elements only as allowed by the original code.

  • +
+

An instruction that depends on a poison value, produces a poison value +itself. A poison value may be relaxed into an +undef value, which takes an arbitrary bit-pattern. +Propagation of poison can be stopped with the +freeze instruction.

+

This means that immediate undefined behavior occurs if a poison value is +used as an instruction operand that has any values that trigger undefined +behavior. Notably this includes (but is not limited to):

+
    +
  • The pointer operand of a load, store or +any other pointer dereferencing instruction (independent of address +space).

  • +
  • The divisor operand of a udiv, sdiv, urem or srem +instruction.

  • +
  • The condition operand of a br instruction.

  • +
  • The callee operand of a call or invoke +instruction.

  • +
  • The parameter operand of a call or invoke +instruction, when the function or invoking call site has a noundef +attribute in the corresponding position.

  • +
  • The operand of a ret instruction if the function or invoking +call site has a noundef attribute in the return value position.

  • +
+

Here are some examples:

+
entry:
+  %poison = sub nuw i32 0, 1           ; Results in a poison value.
+  %poison2 = sub i32 poison, 1         ; Also results in a poison value.
+  %still_poison = and i32 %poison, 0   ; 0, but also poison.
+  %poison_yet_again = getelementptr i32, i32* @h, i32 %still_poison
+  store i32 0, i32* %poison_yet_again  ; Undefined behavior due to
+                                       ; store to poison.
+
+  store i32 %poison, i32* @g           ; Poison value stored to memory.
+  %poison3 = load i32, i32* @g         ; Poison value loaded back from memory.
+
+  %narrowaddr = bitcast i32* @g to i16*
+  %wideaddr = bitcast i32* @g to i64*
+  %poison4 = load i16, i16* %narrowaddr ; Returns a poison value.
+  %poison5 = load i64, i64* %wideaddr   ; Returns a poison value.
+
+  %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
+  br i1 %cmp, label %end, label %end   ; undefined behavior
+
+end:
+
+
+
+
+

Well-Defined Values

+

Given a program execution, a value is well defined if the value does not +have an undef bit and is not poison in the execution. +An aggregate value or vector is well defined if its elements are well defined. +The padding of an aggregate isn’t considered, since it isn’t visible +without storing it into memory and loading it with a different type.

+

A constant of a single value, non-vector type is well +defined if it is neither ‘undef’ constant nor ‘poison’ constant. +The result of freeze instruction is well defined regardless +of its operand.

+
+
+

Addresses of Basic Blocks

+

blockaddress(@function, %block)

+

The ‘blockaddress’ constant computes the address of the specified +basic block in the specified function.

+

It always has an i8 addrspace(P)* type, where P is the address space +of the function containing %block (usually addrspace(0)).

+

Taking the address of the entry block is illegal.

+

This value only has defined behavior when used as an operand to the +‘indirectbr’ or ‘callbr’instruction, or +for comparisons against null. Pointer equality tests between labels addresses +results in undefined behavior — though, again, comparison against null is ok, +and no label is equal to the null pointer. This may be passed around as an +opaque pointer sized value as long as the bits are not inspected. This +allows ptrtoint and arithmetic to be performed on these values so +long as the original value is reconstituted before the indirectbr or +callbr instruction.

+

Finally, some targets may provide defined semantics when using the value +as the operand to an inline assembly, but that is target specific.

+
+
+

DSO Local Equivalent

+

dso_local_equivalent @func

+

A ‘dso_local_equivalent’ constant represents a function which is +functionally equivalent to a given function, but is always defined in the +current linkage unit. The resulting pointer has the same type as the underlying +function. The resulting pointer is permitted, but not required, to be different +from a pointer to the function, and it may have different values in different +translation units.

+

The target function may not have extern_weak linkage.

+

dso_local_equivalent can be implemented as such:

+
    +
  • If the function has local linkage, hidden visibility, or is +dso_local, dso_local_equivalent can be implemented as simply a pointer +to the function.

  • +
  • dso_local_equivalent can be implemented with a stub that tail-calls the +function. Many targets support relocations that resolve at link time to either +a function or a stub for it, depending on if the function is defined within the +linkage unit; LLVM will use this when available. (This is commonly called a +“PLT stub”.) On other targets, the stub may need to be emitted explicitly.

  • +
+

This can be used wherever a dso_local instance of a function is needed without +needing to explicitly make the original function dso_local. An instance where +this can be used is for static offset calculations between a function and some other +dso_local symbol. This is especially useful for the Relative VTables C++ ABI, +where dynamic relocations for function pointers in VTables can be replaced with +static relocations for offsets between the VTable and virtual functions which +may not be dso_local.

+

This is currently only supported for ELF binary formats.

+
+
+

Constant Expressions

+

Constant expressions are used to allow expressions involving other +constants to be used as constants. Constant expressions may be of any +first class type and may involve any LLVM operation +that does not have side effects (e.g. load and call are not supported). +The following is the syntax for constant expressions:

+
+
trunc (CST to TYPE)

Perform the trunc operation on constants.

+
+
zext (CST to TYPE)

Perform the zext operation on constants.

+
+
sext (CST to TYPE)

Perform the sext operation on constants.

+
+
fptrunc (CST to TYPE)

Truncate a floating-point constant to another floating-point type. +The size of CST must be larger than the size of TYPE. Both types +must be floating-point.

+
+
fpext (CST to TYPE)

Floating-point extend a constant to another type. The size of CST +must be smaller or equal to the size of TYPE. Both types must be +floating-point.

+
+
fptoui (CST to TYPE)

Convert a floating-point constant to the corresponding unsigned +integer constant. TYPE must be a scalar or vector integer type. CST +must be of scalar or vector floating-point type. Both CST and TYPE +must be scalars, or vectors of the same number of elements. If the +value won’t fit in the integer type, the result is a +poison value.

+
+
fptosi (CST to TYPE)

Convert a floating-point constant to the corresponding signed +integer constant. TYPE must be a scalar or vector integer type. CST +must be of scalar or vector floating-point type. Both CST and TYPE +must be scalars, or vectors of the same number of elements. If the +value won’t fit in the integer type, the result is a +poison value.

+
+
uitofp (CST to TYPE)

Convert an unsigned integer constant to the corresponding +floating-point constant. TYPE must be a scalar or vector floating-point +type. CST must be of scalar or vector integer type. Both CST and TYPE must +be scalars, or vectors of the same number of elements.

+
+
sitofp (CST to TYPE)

Convert a signed integer constant to the corresponding floating-point +constant. TYPE must be a scalar or vector floating-point type. +CST must be of scalar or vector integer type. Both CST and TYPE must +be scalars, or vectors of the same number of elements.

+
+
ptrtoint (CST to TYPE)

Perform the ptrtoint operation on constants.

+
+
inttoptr (CST to TYPE)

Perform the inttoptr operation on constants. +This one is really dangerous!

+
+
bitcast (CST to TYPE)

Convert a constant, CST, to another TYPE. +The constraints of the operands are the same as those for the +bitcast instruction.

+
+
addrspacecast (CST to TYPE)

Convert a constant pointer or constant vector of pointer, CST, to another +TYPE in a different address space. The constraints of the operands are the +same as those for the addrspacecast instruction.

+
+
getelementptr (TY, CSTPTR, IDX0, IDX1, ...), getelementptr inbounds (TY, CSTPTR, IDX0, IDX1, ...)

Perform the getelementptr operation on +constants. As with the getelementptr +instruction, the index list may have one or more indexes, which are +required to make sense for the type of “pointer to TY”.

+
+
select (COND, VAL1, VAL2)

Perform the select operation on constants.

+
+
icmp COND (VAL1, VAL2)

Perform the icmp operation on constants.

+
+
fcmp COND (VAL1, VAL2)

Perform the fcmp operation on constants.

+
+
extractelement (VAL, IDX)

Perform the extractelement operation on +constants.

+
+
insertelement (VAL, ELT, IDX)

Perform the insertelement operation on +constants.

+
+
shufflevector (VEC1, VEC2, IDXMASK)

Perform the shufflevector operation on +constants.

+
+
extractvalue (VAL, IDX0, IDX1, ...)

Perform the extractvalue operation on +constants. The index list is interpreted in a similar manner as +indices in a ‘getelementptr’ operation. At +least one index value must be specified.

+
+
insertvalue (VAL, ELT, IDX0, IDX1, ...)

Perform the insertvalue operation on constants. +The index list is interpreted in a similar manner as indices in a +‘getelementptr’ operation. At least one index +value must be specified.

+
+
OPCODE (LHS, RHS)

Perform the specified operation of the LHS and RHS constants. OPCODE +may be any of the binary or bitwise +binary operations. The constraints on operands are +the same as those for the corresponding instruction (e.g. no bitwise +operations on floating-point values are allowed).

+
+
+
+
+
+

Other Values

+
+

Inline Assembler Expressions

+

LLVM supports inline assembler expressions (as opposed to Module-Level +Inline Assembly) through the use of a special value. This value +represents the inline assembler as a template string (containing the +instructions to emit), a list of operand constraints (stored as a string), a +flag that indicates whether or not the inline asm expression has side effects, +and a flag indicating whether the function containing the asm needs to align its +stack conservatively.

+

The template string supports argument substitution of the operands using “$” +followed by a number, to indicate substitution of the given register/memory +location, as specified by the constraint string. “${NUM:MODIFIER}” may also +be used, where MODIFIER is a target-specific annotation for how to print the +operand (See Asm template argument modifiers).

+

A literal “$” may be included by using “$$” in the template. To include +other special characters into the output, the usual “\XX” escapes may be +used, just as in other strings. Note that after template substitution, the +resulting assembly string is parsed by LLVM’s integrated assembler unless it is +disabled – even when emitting a .s file – and thus must contain assembly +syntax known to LLVM.

+

LLVM also supports a few more substitutions useful for writing inline assembly:

+
    +
  • ${:uid}: Expands to a decimal integer unique to this inline assembly blob. +This substitution is useful when declaring a local label. Many standard +compiler optimizations, such as inlining, may duplicate an inline asm blob. +Adding a blob-unique identifier ensures that the two labels will not conflict +during assembly. This is used to implement GCC’s %= special format +string.

  • +
  • ${:comment}: Expands to the comment character of the current target’s +assembly dialect. This is usually #, but many targets use other strings, +such as ;, //, or !.

  • +
  • ${:private}: Expands to the assembler private label prefix. Labels with +this prefix will not appear in the symbol table of the assembled object. +Typically the prefix is L, but targets may use other strings. .L is +relatively popular.

  • +
+

LLVM’s support for inline asm is modeled closely on the requirements of Clang’s +GCC-compatible inline-asm support. Thus, the feature-set and the constraint and +modifier codes listed here are similar or identical to those in GCC’s inline asm +support. However, to be clear, the syntax of the template and constraint strings +described here is not the same as the syntax accepted by GCC and Clang, and, +while most constraint letters are passed through as-is by Clang, some get +translated to other codes when converting from the C source to the LLVM +assembly.

+

An example inline assembler expression is:

+
i32 (i32) asm "bswap $0", "=r,r"
+
+
+

Inline assembler expressions may only be used as the callee operand +of a call or an invoke instruction. +Thus, typically we have:

+
%X = call i32 asm "bswap $0", "=r,r"(i32 %Y)
+
+
+

Inline asms with side effects not visible in the constraint list must be +marked as having side effects. This is done through the use of the +‘sideeffect’ keyword, like so:

+
call void asm sideeffect "eieio", ""()
+
+
+

In some cases inline asms will contain code that will not work unless +the stack is aligned in some way, such as calls or SSE instructions on +x86, yet will not contain code that does that alignment within the asm. +The compiler should make conservative assumptions about what the asm +might contain and should generate its usual stack alignment code in the +prologue if the ‘alignstack’ keyword is present:

+
call void asm alignstack "eieio", ""()
+
+
+

Inline asms also support using non-standard assembly dialects. The +assumed dialect is ATT. When the ‘inteldialect’ keyword is present, +the inline asm is using the Intel dialect. Currently, ATT and Intel are +the only supported dialects. An example is:

+
call void asm inteldialect "eieio", ""()
+
+
+

In the case that the inline asm might unwind the stack, +the ‘unwind’ keyword must be used, so that the compiler emits +unwinding information:

+
call void asm unwind "call func", ""()
+
+
+

If the inline asm unwinds the stack and isn’t marked with +the ‘unwind’ keyword, the behavior is undefined.

+

If multiple keywords appear, the ‘sideeffect’ keyword must come +first, the ‘alignstack’ keyword second, the ‘inteldialect’ keyword +third and the ‘unwind’ keyword last.

+
+

Inline Asm Constraint String

+

The constraint list is a comma-separated string, each element containing one or +more constraint codes.

+

For each element in the constraint list an appropriate register or memory +operand will be chosen, and it will be made available to assembly template +string expansion as $0 for the first constraint in the list, $1 for the +second, etc.

+

There are three different types of constraints, which are distinguished by a +prefix symbol in front of the constraint code: Output, Input, and Clobber. The +constraints must always be given in that order: outputs first, then inputs, then +clobbers. They cannot be intermingled.

+

There are also three different categories of constraint codes:

+
    +
  • Register constraint. This is either a register class, or a fixed physical +register. This kind of constraint will allocate a register, and if necessary, +bitcast the argument or result to the appropriate type.

  • +
  • Memory constraint. This kind of constraint is for use with an instruction +taking a memory operand. Different constraints allow for different addressing +modes used by the target.

  • +
  • Immediate value constraint. This kind of constraint is for an integer or other +immediate value which can be rendered directly into an instruction. The +various target-specific constraints allow the selection of a value in the +proper range for the instruction you wish to use it with.

  • +
+
+
Output constraints
+

Output constraints are specified by an “=” prefix (e.g. “=r”). This +indicates that the assembly will write to this operand, and the operand will +then be made available as a return value of the asm expression. Output +constraints do not consume an argument from the call instruction. (Except, see +below about indirect outputs).

+

Normally, it is expected that no output locations are written to by the assembly +expression until all of the inputs have been read. As such, LLVM may assign +the same register to an output and an input. If this is not safe (e.g. if the +assembly contains two instructions, where the first writes to one output, and +the second reads an input and writes to a second output), then the “&” +modifier must be used (e.g. “=&r”) to specify that the output is an +“early-clobber” output. Marking an output as “early-clobber” ensures that LLVM +will not use the same register for any inputs (other than an input tied to this +output).

+
+
+
Input constraints
+

Input constraints do not have a prefix – just the constraint codes. Each input +constraint will consume one argument from the call instruction. It is not +permitted for the asm to write to any input register or memory location (unless +that input is tied to an output). Note also that multiple inputs may all be +assigned to the same register, if LLVM can determine that they necessarily all +contain the same value.

+

Instead of providing a Constraint Code, input constraints may also “tie” +themselves to an output constraint, by providing an integer as the constraint +string. Tied inputs still consume an argument from the call instruction, and +take up a position in the asm template numbering as is usual – they will simply +be constrained to always use the same register as the output they’ve been tied +to. For example, a constraint string of “=r,0” says to assign a register for +output, and use that register as an input as well (it being the 0’th +constraint).

+

It is permitted to tie an input to an “early-clobber” output. In that case, no +other input may share the same register as the input tied to the early-clobber +(even when the other input has the same value).

+

You may only tie an input to an output which has a register constraint, not a +memory constraint. Only a single input may be tied to an output.

+

There is also an “interesting” feature which deserves a bit of explanation: if a +register class constraint allocates a register which is too small for the value +type operand provided as input, the input value will be split into multiple +registers, and all of them passed to the inline asm.

+

However, this feature is often not as useful as you might think.

+

Firstly, the registers are not guaranteed to be consecutive. So, on those +architectures that have instructions which operate on multiple consecutive +instructions, this is not an appropriate way to support them. (e.g. the 32-bit +SparcV8 has a 64-bit load, which instruction takes a single 32-bit register. The +hardware then loads into both the named register, and the next register. This +feature of inline asm would not be useful to support that.)

+

A few of the targets provide a template string modifier allowing explicit access +to the second register of a two-register operand (e.g. MIPS L, M, and +D). On such an architecture, you can actually access the second allocated +register (yet, still, not any subsequent ones). But, in that case, you’re still +probably better off simply splitting the value into two separate operands, for +clarity. (e.g. see the description of the A constraint on X86, which, +despite existing only for use with this feature, is not really a good idea to +use)

+
+
+
Indirect inputs and outputs
+

Indirect output or input constraints can be specified by the “*” modifier +(which goes after the “=” in case of an output). This indicates that the asm +will write to or read from the contents of an address provided as an input +argument. (Note that in this way, indirect outputs act more like an input than +an output: just like an input, they consume an argument of the call expression, +rather than producing a return value. An indirect output constraint is an +“output” only in that the asm is expected to write to the contents of the input +memory location, instead of just read from it).

+

This is most typically used for memory constraint, e.g. “=*m”, to pass the +address of a variable as a value.

+

It is also possible to use an indirect register constraint, but only on output +(e.g. “=*r”). This will cause LLVM to allocate a register for an output +value normally, and then, separately emit a store to the address provided as +input, after the provided inline asm. (It’s not clear what value this +functionality provides, compared to writing the store explicitly after the asm +statement, and it can only produce worse code, since it bypasses many +optimization passes. I would recommend not using it.)

+
+
+
Clobber constraints
+

A clobber constraint is indicated by a “~” prefix. A clobber does not +consume an input operand, nor generate an output. Clobbers cannot use any of the +general constraint code letters – they may use only explicit register +constraints, e.g. “~{eax}”. The one exception is that a clobber string of +“~{memory}” indicates that the assembly writes to arbitrary undeclared +memory locations – not only the memory pointed to by a declared indirect +output.

+

Note that clobbering named registers that are also present in output +constraints is not legal.

+
+
+
Constraint Codes
+

After a potential prefix comes constraint code, or codes.

+

A Constraint Code is either a single letter (e.g. “r”), a “^” character +followed by two letters (e.g. “^wc”), or “{” register-name “}” +(e.g. “{eax}”).

+

The one and two letter constraint codes are typically chosen to be the same as +GCC’s constraint codes.

+

A single constraint may include one or more than constraint code in it, leaving +it up to LLVM to choose which one to use. This is included mainly for +compatibility with the translation of GCC inline asm coming from clang.

+

There are two ways to specify alternatives, and either or both may be used in an +inline asm constraint list:

+
    +
  1. Append the codes to each other, making a constraint code set. E.g. “im” +or “{eax}m”. This means “choose any of the options in the set”. The +choice of constraint is made independently for each constraint in the +constraint list.

  2. +
  3. Use “|” between constraint code sets, creating alternatives. Every +constraint in the constraint list must have the same number of alternative +sets. With this syntax, the same alternative in all of the items in the +constraint list will be chosen together.

  4. +
+

Putting those together, you might have a two operand constraint string like +"rm|r,ri|rm". This indicates that if operand 0 is r or m, then +operand 1 may be one of r or i. If operand 0 is r, then operand 1 +may be one of r or m. But, operand 0 and 1 cannot both be of type m.

+

However, the use of either of the alternatives features is NOT recommended, as +LLVM is not able to make an intelligent choice about which one to use. (At the +point it currently needs to choose, not enough information is available to do so +in a smart way.) Thus, it simply tries to make a choice that’s most likely to +compile, not one that will be optimal performance. (e.g., given “rm”, it’ll +always choose to use memory, not registers). And, if given multiple registers, +or multiple register classes, it will simply choose the first one. (In fact, it +doesn’t currently even ensure explicitly specified physical registers are +unique, so specifying multiple physical registers as alternatives, like +{r11}{r12},{r11}{r12}, will assign r11 to both operands, not at all what was +intended.)

+
+
+
Supported Constraint Code List
+

The constraint codes are, in general, expected to behave the same way they do in +GCC. LLVM’s support is often implemented on an ‘as-needed’ basis, to support C +inline asm code which was supported by GCC. A mismatch in behavior between LLVM +and GCC likely indicates a bug in LLVM.

+

Some constraint codes are typically supported by all targets:

+
    +
  • r: A register in the target’s general purpose register class.

  • +
  • m: A memory address operand. It is target-specific what addressing modes +are supported, typical examples are register, or register + register offset, +or register + immediate offset (of some target-specific size).

  • +
  • i: An integer constant (of target-specific width). Allows either a simple +immediate, or a relocatable value.

  • +
  • n: An integer constant – not including relocatable values.

  • +
  • s: An integer constant, but allowing only relocatable values.

  • +
  • X: Allows an operand of any kind, no constraint whatsoever. Typically +useful to pass a label for an asm branch or call.

    +
  • +
  • {register-name}: Requires exactly the named physical register.

  • +
+

Other constraints are target-specific:

+

AArch64:

+
    +
  • z: An immediate integer 0. Outputs WZR or XZR, as appropriate.

  • +
  • I: An immediate integer valid for an ADD or SUB instruction, +i.e. 0 to 4095 with optional shift by 12.

  • +
  • J: An immediate integer that, when negated, is valid for an ADD or +SUB instruction, i.e. -1 to -4095 with optional left shift by 12.

  • +
  • K: An immediate integer that is valid for the ‘bitmask immediate 32’ of a +logical instruction like AND, EOR, or ORR with a 32-bit register.

  • +
  • L: An immediate integer that is valid for the ‘bitmask immediate 64’ of a +logical instruction like AND, EOR, or ORR with a 64-bit register.

  • +
  • M: An immediate integer for use with the MOV assembly alias on a +32-bit register. This is a superset of K: in addition to the bitmask +immediate, also allows immediate integers which can be loaded with a single +MOVZ or MOVL instruction.

  • +
  • N: An immediate integer for use with the MOV assembly alias on a +64-bit register. This is a superset of L.

  • +
  • Q: Memory address operand must be in a single register (no +offsets). (However, LLVM currently does this for the m constraint as +well.)

  • +
  • r: A 32 or 64-bit integer register (W* or X*).

  • +
  • w: A 32, 64, or 128-bit floating-point, SIMD or SVE vector register.

  • +
  • x: Like w, but restricted to registers 0 to 15 inclusive.

  • +
  • y: Like w, but restricted to SVE vector registers Z0 to Z7 inclusive.

  • +
  • Upl: One of the low eight SVE predicate registers (P0 to P7)

  • +
  • Upa: Any of the SVE predicate registers (P0 to P15)

  • +
+

AMDGPU:

+
    +
  • r: A 32 or 64-bit integer register.

  • +
  • [0-9]v: The 32-bit VGPR register, number 0-9.

  • +
  • [0-9]s: The 32-bit SGPR register, number 0-9.

  • +
  • [0-9]a: The 32-bit AGPR register, number 0-9.

  • +
  • I: An integer inline constant in the range from -16 to 64.

  • +
  • J: A 16-bit signed integer constant.

  • +
  • A: An integer or a floating-point inline constant.

  • +
  • B: A 32-bit signed integer constant.

  • +
  • C: A 32-bit unsigned integer constant or an integer inline constant in the range from -16 to 64.

  • +
  • DA: A 64-bit constant that can be split into two “A” constants.

  • +
  • DB: A 64-bit constant that can be split into two “B” constants.

  • +
+

All ARM modes:

+
    +
  • Q, Um, Un, Uq, Us, Ut, Uv, Uy: Memory address +operand. Treated the same as operand m, at the moment.

  • +
  • Te: An even general-purpose 32-bit integer register: r0,r2,...,r12,r14

  • +
  • To: An odd general-purpose 32-bit integer register: r1,r3,...,r11

  • +
+

ARM and ARM’s Thumb2 mode:

+
    +
  • j: An immediate integer between 0 and 65535 (valid for MOVW)

  • +
  • I: An immediate integer valid for a data-processing instruction.

  • +
  • J: An immediate integer between -4095 and 4095.

  • +
  • K: An immediate integer whose bitwise inverse is valid for a +data-processing instruction. (Can be used with template modifier “B” to +print the inverted value).

  • +
  • L: An immediate integer whose negation is valid for a data-processing +instruction. (Can be used with template modifier “n” to print the negated +value).

  • +
  • M: A power of two or an integer between 0 and 32.

  • +
  • N: Invalid immediate constraint.

  • +
  • O: Invalid immediate constraint.

  • +
  • r: A general-purpose 32-bit integer register (r0-r15).

  • +
  • l: In Thumb2 mode, low 32-bit GPR registers (r0-r7). In ARM mode, same +as r.

  • +
  • h: In Thumb2 mode, a high 32-bit GPR register (r8-r15). In ARM mode, +invalid.

  • +
  • w: A 32, 64, or 128-bit floating-point/SIMD register in the ranges +s0-s31, d0-d31, or q0-q15, respectively.

  • +
  • t: A 32, 64, or 128-bit floating-point/SIMD register in the ranges +s0-s31, d0-d15, or q0-q7, respectively.

  • +
  • x: A 32, 64, or 128-bit floating-point/SIMD register in the ranges +s0-s15, d0-d7, or q0-q3, respectively.

  • +
+

ARM’s Thumb1 mode:

+
    +
  • I: An immediate integer between 0 and 255.

  • +
  • J: An immediate integer between -255 and -1.

  • +
  • K: An immediate integer between 0 and 255, with optional left-shift by +some amount.

  • +
  • L: An immediate integer between -7 and 7.

  • +
  • M: An immediate integer which is a multiple of 4 between 0 and 1020.

  • +
  • N: An immediate integer between 0 and 31.

  • +
  • O: An immediate integer which is a multiple of 4 between -508 and 508.

  • +
  • r: A low 32-bit GPR register (r0-r7).

  • +
  • l: A low 32-bit GPR register (r0-r7).

  • +
  • h: A high GPR register (r0-r7).

  • +
  • w: A 32, 64, or 128-bit floating-point/SIMD register in the ranges +s0-s31, d0-d31, or q0-q15, respectively.

  • +
  • t: A 32, 64, or 128-bit floating-point/SIMD register in the ranges +s0-s31, d0-d15, or q0-q7, respectively.

  • +
  • x: A 32, 64, or 128-bit floating-point/SIMD register in the ranges +s0-s15, d0-d7, or q0-q3, respectively.

  • +
+

Hexagon:

+
    +
  • o, v: A memory address operand, treated the same as constraint m, +at the moment.

  • +
  • r: A 32 or 64-bit register.

  • +
+

MSP430:

+
    +
  • r: An 8 or 16-bit register.

  • +
+

MIPS:

+
    +
  • I: An immediate signed 16-bit integer.

  • +
  • J: An immediate integer zero.

  • +
  • K: An immediate unsigned 16-bit integer.

  • +
  • L: An immediate 32-bit integer, where the lower 16 bits are 0.

  • +
  • N: An immediate integer between -65535 and -1.

  • +
  • O: An immediate signed 15-bit integer.

  • +
  • P: An immediate integer between 1 and 65535.

  • +
  • m: A memory address operand. In MIPS-SE mode, allows a base address +register plus 16-bit immediate offset. In MIPS mode, just a base register.

  • +
  • R: A memory address operand. In MIPS-SE mode, allows a base address +register plus a 9-bit signed offset. In MIPS mode, the same as constraint +m.

  • +
  • ZC: A memory address operand, suitable for use in a pref, ll, or +sc instruction on the given subtarget (details vary).

  • +
  • r, d, y: A 32 or 64-bit GPR register.

  • +
  • f: A 32 or 64-bit FPU register (F0-F31), or a 128-bit MSA register +(W0-W31). In the case of MSA registers, it is recommended to use the w +argument modifier for compatibility with GCC.

  • +
  • c: A 32-bit or 64-bit GPR register suitable for indirect jump (always +25).

  • +
  • l: The lo register, 32 or 64-bit.

  • +
  • x: Invalid.

  • +
+

NVPTX:

+
    +
  • b: A 1-bit integer register.

  • +
  • c or h: A 16-bit integer register.

  • +
  • r: A 32-bit integer register.

  • +
  • l or N: A 64-bit integer register.

  • +
  • f: A 32-bit float register.

  • +
  • d: A 64-bit float register.

  • +
+

PowerPC:

+
    +
  • I: An immediate signed 16-bit integer.

  • +
  • J: An immediate unsigned 16-bit integer, shifted left 16 bits.

  • +
  • K: An immediate unsigned 16-bit integer.

  • +
  • L: An immediate signed 16-bit integer, shifted left 16 bits.

  • +
  • M: An immediate integer greater than 31.

  • +
  • N: An immediate integer that is an exact power of 2.

  • +
  • O: The immediate integer constant 0.

  • +
  • P: An immediate integer constant whose negation is a signed 16-bit +constant.

  • +
  • es, o, Q, Z, Zy: A memory address operand, currently +treated the same as m.

  • +
  • r: A 32 or 64-bit integer register.

  • +
  • b: A 32 or 64-bit integer register, excluding R0 (that is: +R1-R31).

  • +
  • f: A 32 or 64-bit float register (F0-F31),

  • +
  • +
    v: For 4 x f32 or 4 x f64 types, a 128-bit altivec vector

    register (V0-V31).

    +
    +
    +
  • +
  • y: Condition register (CR0-CR7).

  • +
  • wc: An individual CR bit in a CR register.

  • +
  • wa, wd, wf: Any 128-bit VSX vector register, from the full VSX +register set (overlapping both the floating-point and vector register files).

  • +
  • ws: A 32 or 64-bit floating-point register, from the full VSX register +set.

  • +
+

RISC-V:

+
    +
  • A: An address operand (using a general-purpose register, without an +offset).

  • +
  • I: A 12-bit signed integer immediate operand.

  • +
  • J: A zero integer immediate operand.

  • +
  • K: A 5-bit unsigned integer immediate operand.

  • +
  • f: A 32- or 64-bit floating-point register (requires F or D extension).

  • +
  • r: A 32- or 64-bit general-purpose register (depending on the platform +XLEN).

  • +
  • vr: A vector register. (requires V extension).

  • +
  • vm: A vector mask register. (requires V extension).

  • +
+

Sparc:

+
    +
  • I: An immediate 13-bit signed integer.

  • +
  • r: A 32-bit integer register.

  • +
  • f: Any floating-point register on SparcV8, or a floating-point +register in the “low” half of the registers on SparcV9.

  • +
  • e: Any floating-point register. (Same as f on SparcV8.)

  • +
+

SystemZ:

+
    +
  • I: An immediate unsigned 8-bit integer.

  • +
  • J: An immediate unsigned 12-bit integer.

  • +
  • K: An immediate signed 16-bit integer.

  • +
  • L: An immediate signed 20-bit integer.

  • +
  • M: An immediate integer 0x7fffffff.

  • +
  • Q: A memory address operand with a base address and a 12-bit immediate +unsigned displacement.

  • +
  • R: A memory address operand with a base address, a 12-bit immediate +unsigned displacement, and an index register.

  • +
  • S: A memory address operand with a base address and a 20-bit immediate +signed displacement.

  • +
  • T: A memory address operand with a base address, a 20-bit immediate +signed displacement, and an index register.

  • +
  • r or d: A 32, 64, or 128-bit integer register.

  • +
  • a: A 32, 64, or 128-bit integer address register (excludes R0, which in an +address context evaluates as zero).

  • +
  • h: A 32-bit value in the high part of a 64bit data register +(LLVM-specific)

  • +
  • f: A 32, 64, or 128-bit floating-point register.

  • +
+

X86:

+
    +
  • I: An immediate integer between 0 and 31.

  • +
  • J: An immediate integer between 0 and 64.

  • +
  • K: An immediate signed 8-bit integer.

  • +
  • L: An immediate integer, 0xff or 0xffff or (in 64-bit mode only) +0xffffffff.

  • +
  • M: An immediate integer between 0 and 3.

  • +
  • N: An immediate unsigned 8-bit integer.

  • +
  • O: An immediate integer between 0 and 127.

  • +
  • e: An immediate 32-bit signed integer.

  • +
  • Z: An immediate 32-bit unsigned integer.

  • +
  • o, v: Treated the same as m, at the moment.

  • +
  • q: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit +l integer register. On X86-32, this is the a, b, c, and d +registers, and on X86-64, it is all of the integer registers.

  • +
  • Q: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit +h integer register. This is the a, b, c, and d registers.

  • +
  • r or l: An 8, 16, 32, or 64-bit integer register.

  • +
  • R: An 8, 16, 32, or 64-bit “legacy” integer register – one which has +existed since i386, and can be accessed without the REX prefix.

  • +
  • f: A 32, 64, or 80-bit ‘387 FPU stack pseudo-register.

  • +
  • y: A 64-bit MMX register, if MMX is enabled.

  • +
  • x: If SSE is enabled: a 32 or 64-bit scalar operand, or 128-bit vector +operand in a SSE register. If AVX is also enabled, can also be a 256-bit +vector operand in an AVX register. If AVX-512 is also enabled, can also be a +512-bit vector operand in an AVX512 register, Otherwise, an error.

  • +
  • Y: The same as x, if SSE2 is enabled, otherwise an error.

  • +
  • A: Special case: allocates EAX first, then EDX, for a single operand (in +32-bit mode, a 64-bit integer operand will get split into two registers). It +is not recommended to use this constraint, as in 64-bit mode, the 64-bit +operand will get allocated only to RAX – if two 32-bit operands are needed, +you’re better off splitting it yourself, before passing it to the asm +statement.

  • +
+

XCore:

+
    +
  • r: A 32-bit integer register.

  • +
+
+
+
+

Asm template argument modifiers

+

In the asm template string, modifiers can be used on the operand reference, like +“${0:n}”.

+

The modifiers are, in general, expected to behave the same way they do in +GCC. LLVM’s support is often implemented on an ‘as-needed’ basis, to support C +inline asm code which was supported by GCC. A mismatch in behavior between LLVM +and GCC likely indicates a bug in LLVM.

+

Target-independent:

+
    +
  • c: Print an immediate integer constant unadorned, without +the target-specific immediate punctuation (e.g. no $ prefix).

  • +
  • n: Negate and print immediate integer constant unadorned, without the +target-specific immediate punctuation (e.g. no $ prefix).

  • +
  • l: Print as an unadorned label, without the target-specific label +punctuation (e.g. no $ prefix).

  • +
+

AArch64:

+
    +
  • w: Print a GPR register with a w* name instead of x* name. E.g., +instead of x30, print w30.

  • +
  • x: Print a GPR register with a x* name. (this is the default, anyhow).

  • +
  • b, h, s, d, q: Print a floating-point/SIMD register with a +b*, h*, s*, d*, or q* name, rather than the default of +v*.

  • +
+

AMDGPU:

+
    +
  • r: No effect.

  • +
+

ARM:

+
    +
  • a: Print an operand as an address (with [ and ] surrounding a +register).

  • +
  • P: No effect.

  • +
  • q: No effect.

  • +
  • y: Print a VFP single-precision register as an indexed double (e.g. print +as d4[1] instead of s9)

  • +
  • B: Bitwise invert and print an immediate integer constant without # +prefix.

  • +
  • L: Print the low 16-bits of an immediate integer constant.

  • +
  • M: Print as a register set suitable for ldm/stm. Also prints all +register operands subsequent to the specified one (!), so use carefully.

  • +
  • Q: Print the low-order register of a register-pair, or the low-order +register of a two-register operand.

  • +
  • R: Print the high-order register of a register-pair, or the high-order +register of a two-register operand.

  • +
  • H: Print the second register of a register-pair. (On a big-endian system, +H is equivalent to Q, and on little-endian system, H is equivalent +to R.)

    +
  • +
  • e: Print the low doubleword register of a NEON quad register.

  • +
  • f: Print the high doubleword register of a NEON quad register.

  • +
  • m: Print the base register of a memory operand without the [ and ] +adornment.

  • +
+

Hexagon:

+
    +
  • L: Print the second register of a two-register operand. Requires that it +has been allocated consecutively to the first.

    +
  • +
  • I: Print the letter ‘i’ if the operand is an integer constant, otherwise +nothing. Used to print ‘addi’ vs ‘add’ instructions.

  • +
+

MSP430:

+

No additional modifiers.

+

MIPS:

+
    +
  • X: Print an immediate integer as hexadecimal

  • +
  • x: Print the low 16 bits of an immediate integer as hexadecimal.

  • +
  • d: Print an immediate integer as decimal.

  • +
  • m: Subtract one and print an immediate integer as decimal.

  • +
  • z: Print $0 if an immediate zero, otherwise print normally.

  • +
  • L: Print the low-order register of a two-register operand, or prints the +address of the low-order word of a double-word memory operand.

    +
  • +
  • M: Print the high-order register of a two-register operand, or prints the +address of the high-order word of a double-word memory operand.

    +
  • +
  • D: Print the second register of a two-register operand, or prints the +second word of a double-word memory operand. (On a big-endian system, D is +equivalent to L, and on little-endian system, D is equivalent to +M.)

  • +
  • w: No effect. Provided for compatibility with GCC which requires this +modifier in order to print MSA registers (W0-W31) with the f +constraint.

  • +
+

NVPTX:

+
    +
  • r: No effect.

  • +
+

PowerPC:

+
    +
  • L: Print the second register of a two-register operand. Requires that it +has been allocated consecutively to the first.

    +
  • +
  • I: Print the letter ‘i’ if the operand is an integer constant, otherwise +nothing. Used to print ‘addi’ vs ‘add’ instructions.

  • +
  • y: For a memory operand, prints formatter for a two-register X-form +instruction. (Currently always prints r0,OPERAND).

  • +
  • U: Prints ‘u’ if the memory operand is an update form, and nothing +otherwise. (NOTE: LLVM does not support update form, so this will currently +always print nothing)

  • +
  • X: Prints ‘x’ if the memory operand is an indexed form. (NOTE: LLVM does +not support indexed form, so this will currently always print nothing)

  • +
+

RISC-V:

+
    +
  • i: Print the letter ‘i’ if the operand is not a register, otherwise print +nothing. Used to print ‘addi’ vs ‘add’ instructions, etc.

  • +
  • z: Print the register zero if an immediate zero, otherwise print +normally.

  • +
+

Sparc:

+
    +
  • r: No effect.

  • +
+

SystemZ:

+

SystemZ implements only n, and does not support any of the other +target-independent modifiers.

+

X86:

+
    +
  • c: Print an unadorned integer or symbol name. (The latter is +target-specific behavior for this typically target-independent modifier).

  • +
  • A: Print a register name with a ‘*’ before it.

  • +
  • b: Print an 8-bit register name (e.g. al); do nothing on a memory +operand.

  • +
  • h: Print the upper 8-bit register name (e.g. ah); do nothing on a +memory operand.

  • +
  • w: Print the 16-bit register name (e.g. ax); do nothing on a memory +operand.

  • +
  • k: Print the 32-bit register name (e.g. eax); do nothing on a memory +operand.

  • +
  • q: Print the 64-bit register name (e.g. rax), if 64-bit registers are +available, otherwise the 32-bit register name; do nothing on a memory operand.

  • +
  • n: Negate and print an unadorned integer, or, for operands other than an +immediate integer (e.g. a relocatable symbol expression), print a ‘-‘ before +the operand. (The behavior for relocatable symbol expressions is a +target-specific behavior for this typically target-independent modifier)

  • +
  • H: Print a memory reference with additional offset +8.

  • +
  • P: Print a memory reference or operand for use as the argument of a call +instruction. (E.g. omit (rip), even though it’s PC-relative.)

  • +
+

XCore:

+

No additional modifiers.

+
+
+

Inline Asm Metadata

+

The call instructions that wrap inline asm nodes may have a +“!srcloc” MDNode attached to it that contains a list of constant +integers. If present, the code generator will use the integer as the +location cookie value when report errors through the LLVMContext +error reporting mechanisms. This allows a front-end to correlate backend +errors that occur with inline asm back to the source code that produced +it. For example:

+
call void asm sideeffect "something bad", ""(), !srcloc !42
+...
+!42 = !{ i32 1234567 }
+
+
+

It is up to the front-end to make sense of the magic numbers it places +in the IR. If the MDNode contains multiple constants, the code generator +will use the one that corresponds to the line of the asm that the error +occurs on.

+
+
+
+
+

Metadata

+

LLVM IR allows metadata to be attached to instructions and global objects in the +program that can convey extra information about the code to the optimizers and +code generator. One example application of metadata is source-level +debug information. There are two metadata primitives: strings and nodes.

+

Metadata does not have a type, and is not a value. If referenced from a +call instruction, it uses the metadata type.

+

All metadata are identified in syntax by an exclamation point (‘!’).

+
+

Metadata Nodes and Metadata Strings

+

A metadata string is a string surrounded by double quotes. It can +contain any character by escaping non-printable characters with +“\xx” where “xx” is the two digit hex code. For example: +“!"test\00"”.

+

Metadata nodes are represented with notation similar to structure +constants (a comma separated list of elements, surrounded by braces and +preceded by an exclamation point). Metadata nodes can have any values as +their operand. For example:

+
!{ !"test\00", i32 10}
+
+
+

Metadata nodes that aren’t uniqued use the distinct keyword. For example:

+
!0 = distinct !{!"test\00", i32 10}
+
+
+

distinct nodes are useful when nodes shouldn’t be merged based on their +content. They can also occur when transformations cause uniquing collisions +when metadata operands change.

+

A named metadata is a collection of +metadata nodes, which can be looked up in the module symbol table. For +example:

+
!foo = !{!4, !3}
+
+
+

Metadata can be used as function arguments. Here the llvm.dbg.value +intrinsic is using three metadata arguments:

+
call void @llvm.dbg.value(metadata !24, metadata !25, metadata !26)
+
+
+

Metadata can be attached to an instruction. Here metadata !21 is attached +to the add instruction using the !dbg identifier:

+
%indvar.next = add i64 %indvar, 1, !dbg !21
+
+
+

Instructions may not have multiple metadata attachments with the same +identifier.

+

Metadata can also be attached to a function or a global variable. Here metadata +!22 is attached to the f1 and f2 functions, and the globals g1 +and g2 using the !dbg identifier:

+
declare !dbg !22 void @f1()
+define void @f2() !dbg !22 {
+  ret void
+}
+
+@g1 = global i32 0, !dbg !22
+@g2 = external global i32, !dbg !22
+
+
+

Unlike instructions, global objects (functions and global variables) may have +multiple metadata attachments with the same identifier.

+

A transformation is required to drop any metadata attachment that it does not +know or know it can’t preserve. Currently there is an exception for metadata +attachment to globals for !type and !absolute_symbol which can’t be +unconditionally dropped unless the global is itself deleted.

+

Metadata attached to a module using named metadata may not be dropped, with +the exception of debug metadata (named metadata with the name !llvm.dbg.*).

+

More information about specific metadata nodes recognized by the +optimizers and code generator is found below.

+
+

Specialized Metadata Nodes

+

Specialized metadata nodes are custom data structures in metadata (as opposed +to generic tuples). Their fields are labelled, and can be specified in any +order.

+

These aren’t inherently debug info centric, but currently all the specialized +metadata nodes are related to debug info.

+
+
DICompileUnit
+

DICompileUnit nodes represent a compile unit. The enums:, +retainedTypes:, globals:, imports: and macros: fields are tuples +containing the debug info to be emitted along with the compile unit, regardless +of code optimizations (some nodes are only emitted if there are references to +them from instructions). The debugInfoForProfiling: field is a boolean +indicating whether or not line-table discriminators are updated to provide +more-accurate debug info for profiling results.

+
!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
+                    isOptimized: true, flags: "-O2", runtimeVersion: 2,
+                    splitDebugFilename: "abc.debug", emissionKind: FullDebug,
+                    enums: !2, retainedTypes: !3, globals: !4, imports: !5,
+                    macros: !6, dwoId: 0x0abcd)
+
+
+

Compile unit descriptors provide the root scope for objects declared in a +specific compilation unit. File descriptors are defined using this scope. These +descriptors are collected by a named metadata node !llvm.dbg.cu. They keep +track of global variables, type information, and imported entities (declarations +and namespaces).

+
+
+
DIFile
+

DIFile nodes represent files. The filename: can include slashes.

+
!0 = !DIFile(filename: "path/to/file", directory: "/path/to/dir",
+             checksumkind: CSK_MD5,
+             checksum: "000102030405060708090a0b0c0d0e0f")
+
+
+

Files are sometimes used in scope: fields, and are the only valid target +for file: fields. +Valid values for checksumkind: field are: {CSK_None, CSK_MD5, CSK_SHA1, CSK_SHA256}

+
+
+
DIBasicType
+

DIBasicType nodes represent primitive types, such as int, bool and +float. tag: defaults to DW_TAG_base_type.

+
!0 = !DIBasicType(name: "unsigned char", size: 8, align: 8,
+                  encoding: DW_ATE_unsigned_char)
+!1 = !DIBasicType(tag: DW_TAG_unspecified_type, name: "decltype(nullptr)")
+
+
+

The encoding: describes the details of the type. Usually it’s one of the +following:

+
DW_ATE_address       = 1
+DW_ATE_boolean       = 2
+DW_ATE_float         = 4
+DW_ATE_signed        = 5
+DW_ATE_signed_char   = 6
+DW_ATE_unsigned      = 7
+DW_ATE_unsigned_char = 8
+
+
+
+
+
DISubroutineType
+

DISubroutineType nodes represent subroutine types. Their types: field +refers to a tuple; the first operand is the return type, while the rest are the +types of the formal arguments in order. If the first operand is null, that +represents a function with no return value (such as void foo() {} in C++).

+
!0 = !BasicType(name: "int", size: 32, align: 32, DW_ATE_signed)
+!1 = !BasicType(name: "char", size: 8, align: 8, DW_ATE_signed_char)
+!2 = !DISubroutineType(types: !{null, !0, !1}) ; void (int, char)
+
+
+
+
+
DIDerivedType
+

DIDerivedType nodes represent types derived from other types, such as +qualified types.

+
!0 = !DIBasicType(name: "unsigned char", size: 8, align: 8,
+                  encoding: DW_ATE_unsigned_char)
+!1 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !0, size: 32,
+                    align: 32)
+
+
+

The following tag: values are valid:

+
DW_TAG_member             = 13
+DW_TAG_pointer_type       = 15
+DW_TAG_reference_type     = 16
+DW_TAG_typedef            = 22
+DW_TAG_inheritance        = 28
+DW_TAG_ptr_to_member_type = 31
+DW_TAG_const_type         = 38
+DW_TAG_friend             = 42
+DW_TAG_volatile_type      = 53
+DW_TAG_restrict_type      = 55
+DW_TAG_atomic_type        = 71
+
+
+

DW_TAG_member is used to define a member of a composite type. The type of the member is the baseType:. The +offset: is the member’s bit offset. If the composite type has an ODR +identifier: and does not set flags: DIFwdDecl, then the member is +uniqued based only on its name: and scope:.

+

DW_TAG_inheritance and DW_TAG_friend are used in the elements: +field of composite types to describe parents and +friends.

+

DW_TAG_typedef is used to provide a name for the baseType:.

+

DW_TAG_pointer_type, DW_TAG_reference_type, DW_TAG_const_type, +DW_TAG_volatile_type, DW_TAG_restrict_type and DW_TAG_atomic_type +are used to qualify the baseType:.

+

Note that the void * type is expressed as a type derived from NULL.

+
+
+
DICompositeType
+

DICompositeType nodes represent types composed of other types, like +structures and unions. elements: points to a tuple of the composed types.

+

If the source language supports ODR, the identifier: field gives the unique +identifier used for type merging between modules. When specified, +subprogram declarations and member +derived types that reference the ODR-type in their +scope: change uniquing rules.

+

For a given identifier:, there should only be a single composite type that +does not have flags: DIFlagFwdDecl set. LLVM tools that link modules +together will unique such definitions at parse time via the identifier: +field, even if the nodes are distinct.

+
!0 = !DIEnumerator(name: "SixKind", value: 7)
+!1 = !DIEnumerator(name: "SevenKind", value: 7)
+!2 = !DIEnumerator(name: "NegEightKind", value: -8)
+!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum", file: !12,
+                      line: 2, size: 32, align: 32, identifier: "_M4Enum",
+                      elements: !{!0, !1, !2})
+
+
+

The following tag: values are valid:

+
DW_TAG_array_type       = 1
+DW_TAG_class_type       = 2
+DW_TAG_enumeration_type = 4
+DW_TAG_structure_type   = 19
+DW_TAG_union_type       = 23
+
+
+

For DW_TAG_array_type, the elements: should be subrange +descriptors, each representing the range of subscripts at that +level of indexing. The DIFlagVector flag to flags: indicates that an +array type is a native packed vector. The optional dataLocation is a +DIExpression that describes how to get from an object’s address to the actual +raw data, if they aren’t equivalent. This is only supported for array types, +particularly to describe Fortran arrays, which have an array descriptor in +addition to the array data. Alternatively it can also be DIVariable which +has the address of the actual raw data. The Fortran language supports pointer +arrays which can be attached to actual arrays, this attachment between pointer +and pointee is called association. The optional associated is a +DIExpression that describes whether the pointer array is currently associated. +The optional allocated is a DIExpression that describes whether the +allocatable array is currently allocated. The optional rank is a +DIExpression that describes the rank (number of dimensions) of fortran assumed +rank array (rank is known at runtime).

+

For DW_TAG_enumeration_type, the elements: should be enumerator +descriptors, each representing the definition of an enumeration +value for the set. All enumeration type descriptors are collected in the +enums: field of the compile unit.

+

For DW_TAG_structure_type, DW_TAG_class_type, and +DW_TAG_union_type, the elements: should be derived types with tag: DW_TAG_member, tag: DW_TAG_inheritance, or +tag: DW_TAG_friend; or subprograms with +isDefinition: false.

+
+
+
DISubrange
+

DISubrange nodes are the elements for DW_TAG_array_type variants of +DICompositeType.

+ +
!0 = !DISubrange(count: 5, lowerBound: 0) ; array counting from 0
+!1 = !DISubrange(count: 5, lowerBound: 1) ; array counting from 1
+!2 = !DISubrange(count: -1) ; empty array.
+
+; Scopes used in rest of example
+!6 = !DIFile(filename: "vla.c", directory: "/path/to/file")
+!7 = distinct !DICompileUnit(language: DW_LANG_C99, file: !6)
+!8 = distinct !DISubprogram(name: "foo", scope: !7, file: !6, line: 5)
+
+; Use of local variable as count value
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !DILocalVariable(name: "count", scope: !8, file: !6, line: 42, type: !9)
+!11 = !DISubrange(count: !10, lowerBound: 0)
+
+; Use of global variable as count value
+!12 = !DIGlobalVariable(name: "count", scope: !8, file: !6, line: 22, type: !9)
+!13 = !DISubrange(count: !12, lowerBound: 0)
+
+
+
+
+
DIEnumerator
+

DIEnumerator nodes are the elements for DW_TAG_enumeration_type +variants of DICompositeType.

+
!0 = !DIEnumerator(name: "SixKind", value: 7)
+!1 = !DIEnumerator(name: "SevenKind", value: 7)
+!2 = !DIEnumerator(name: "NegEightKind", value: -8)
+
+
+
+
+
DITemplateTypeParameter
+

DITemplateTypeParameter nodes represent type parameters to generic source +language constructs. They are used (optionally) in DICompositeType and +DISubprogram templateParams: fields.

+
!0 = !DITemplateTypeParameter(name: "Ty", type: !1)
+
+
+
+
+
DITemplateValueParameter
+

DITemplateValueParameter nodes represent value parameters to generic source +language constructs. tag: defaults to DW_TAG_template_value_parameter, +but if specified can also be set to DW_TAG_GNU_template_template_param or +DW_TAG_GNU_template_param_pack. They are used (optionally) in +DICompositeType and DISubprogram templateParams: fields.

+
!0 = !DITemplateValueParameter(name: "Ty", type: !1, value: i32 7)
+
+
+
+
+
DINamespace
+

DINamespace nodes represent namespaces in the source language.

+
!0 = !DINamespace(name: "myawesomeproject", scope: !1, file: !2, line: 7)
+
+
+
+
+
DIGlobalVariable
+

DIGlobalVariable nodes represent global variables in the source language.

+
@foo = global i32, !dbg !0
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = !DIGlobalVariable(name: "foo", linkageName: "foo", scope: !2,
+                       file: !3, line: 7, type: !4, isLocal: true,
+                       isDefinition: false, declaration: !5)
+
+
+
+
+
DIGlobalVariableExpression
+

DIGlobalVariableExpression nodes tie a DIGlobalVariable together +with a DIExpression.

+
@lower = global i32, !dbg !0
+@upper = global i32, !dbg !1
+!0 = !DIGlobalVariableExpression(
+         var: !2,
+         expr: !DIExpression(DW_OP_LLVM_fragment, 0, 32)
+         )
+!1 = !DIGlobalVariableExpression(
+         var: !2,
+         expr: !DIExpression(DW_OP_LLVM_fragment, 32, 32)
+         )
+!2 = !DIGlobalVariable(name: "split64", linkageName: "split64", scope: !3,
+                       file: !4, line: 8, type: !5, declaration: !6)
+
+
+

All global variable expressions should be referenced by the globals: field of +a compile unit.

+
+
+
DISubprogram
+

DISubprogram nodes represent functions from the source language. A distinct +DISubprogram may be attached to a function definition using !dbg +metadata. A unique DISubprogram may be attached to a function declaration +used for call site debug info. The retainedNodes: field is a list of +variables and labels that must be +retained, even if their IR counterparts are optimized out of the IR. The +type: field must point at an DISubroutineType.

+

When isDefinition: false, subprograms describe a declaration in the type +tree as opposed to a definition of a function. If the scope is a composite +type with an ODR identifier: and that does not set flags: DIFwdDecl, +then the subprogram declaration is uniqued based only on its linkageName: +and scope:.

+
define void @_Z3foov() !dbg !0 {
+  ...
+}
+
+!0 = distinct !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
+                            file: !2, line: 7, type: !3, isLocal: true,
+                            isDefinition: true, scopeLine: 8,
+                            containingType: !4,
+                            virtuality: DW_VIRTUALITY_pure_virtual,
+                            virtualIndex: 10, flags: DIFlagPrototyped,
+                            isOptimized: true, unit: !5, templateParams: !6,
+                            declaration: !7, retainedNodes: !8,
+                            thrownTypes: !9)
+
+
+
+
+
DILexicalBlock
+

DILexicalBlock nodes describe nested blocks within a subprogram. The line number and column numbers are used to distinguish +two lexical blocks at same depth. They are valid targets for scope: +fields.

+
!0 = distinct !DILexicalBlock(scope: !1, file: !2, line: 7, column: 35)
+
+
+

Usually lexical blocks are distinct to prevent node merging based on +operands.

+
+
+
DILexicalBlockFile
+

DILexicalBlockFile nodes are used to discriminate between sections of a +lexical block. The file: field can be changed to +indicate textual inclusion, or the discriminator: field can be used to +discriminate between control flow within a single block in the source language.

+
!0 = !DILexicalBlock(scope: !3, file: !4, line: 7, column: 35)
+!1 = !DILexicalBlockFile(scope: !0, file: !4, discriminator: 0)
+!2 = !DILexicalBlockFile(scope: !0, file: !4, discriminator: 1)
+
+
+
+
+
DILocation
+

DILocation nodes represent source debug locations. The scope: field is +mandatory, and points at an DILexicalBlockFile, an +DILexicalBlock, or an DISubprogram.

+
!0 = !DILocation(line: 2900, column: 42, scope: !1, inlinedAt: !2)
+
+
+
+
+
DILocalVariable
+

DILocalVariable nodes represent local variables in the source language. If +the arg: field is set to non-zero, then this variable is a subprogram +parameter, and it will be included in the retainedNodes: field of its +DISubprogram.

+
!0 = !DILocalVariable(name: "this", arg: 1, scope: !3, file: !2, line: 7,
+                      type: !3, flags: DIFlagArtificial)
+!1 = !DILocalVariable(name: "x", arg: 2, scope: !4, file: !2, line: 7,
+                      type: !3)
+!2 = !DILocalVariable(name: "y", scope: !5, file: !2, line: 7, type: !3)
+
+
+
+
+
DIExpression
+

DIExpression nodes represent expressions that are inspired by the DWARF +expression language. They are used in debug intrinsics +(such as llvm.dbg.declare and llvm.dbg.value) to describe how the +referenced LLVM variable relates to the source language variable. Debug +intrinsics are interpreted left-to-right: start by pushing the value/address +operand of the intrinsic onto a stack, then repeatedly push and evaluate +opcodes from the DIExpression until the final variable description is produced.

+

The current supported opcode vocabulary is limited:

+
    +
  • DW_OP_deref dereferences the top of the expression stack.

  • +
  • DW_OP_plus pops the last two entries from the expression stack, adds +them together and appends the result to the expression stack.

  • +
  • DW_OP_minus pops the last two entries from the expression stack, subtracts +the last entry from the second last entry and appends the result to the +expression stack.

  • +
  • DW_OP_plus_uconst, 93 adds 93 to the working expression.

  • +
  • DW_OP_LLVM_fragment, 16, 8 specifies the offset and size (16 and 8 +here, respectively) of the variable fragment from the working expression. Note +that contrary to DW_OP_bit_piece, the offset is describing the location +within the described source variable.

  • +
  • DW_OP_LLVM_convert, 16, DW_ATE_signed specifies a bit size and encoding +(16 and DW_ATE_signed here, respectively) to which the top of the +expression stack is to be converted. Maps into a DW_OP_convert operation +that references a base type constructed from the supplied values.

  • +
  • DW_OP_LLVM_tag_offset, tag_offset specifies that a memory tag should be +optionally applied to the pointer. The memory tag is derived from the +given tag offset in an implementation-defined manner.

  • +
  • DW_OP_swap swaps top two stack entries.

  • +
  • DW_OP_xderef provides extended dereference mechanism. The entry at the top +of the stack is treated as an address. The second stack entry is treated as an +address space identifier.

  • +
  • DW_OP_stack_value marks a constant value.

  • +
  • DW_OP_LLVM_entry_value, N may only appear in MIR and at the +beginning of a DIExpression. In DWARF a DBG_VALUE +instruction binding a DIExpression(DW_OP_LLVM_entry_value to a +register is lowered to a DW_OP_entry_value [reg], pushing the +value the register had upon function entry onto the stack. The next +(N - 1) operations will be part of the DW_OP_entry_value +block argument. For example, !DIExpression(DW_OP_LLVM_entry_value, +1, DW_OP_plus_uconst, 123, DW_OP_stack_value) specifies an +expression where the entry value of the debug value instruction’s +value/address operand is pushed to the stack, and is added +with 123. Due to framework limitations N can currently only +be 1.

    +

    The operation is introduced by the LiveDebugValues pass, which +applies it only to function parameters that are unmodified +throughout the function. Support is limited to simple register +location descriptions, or as indirect locations (e.g., when a struct +is passed-by-value to a callee via a pointer to a temporary copy +made in the caller). The entry value op is also introduced by the +AsmPrinter pass when a call site parameter value +(DW_AT_call_site_parameter_value) is represented as entry value +of the parameter.

    +
  • +
  • DW_OP_LLVM_arg, N is used in debug intrinsics that refer to more than one +value, such as one that calculates the sum of two registers. This is always +used in combination with an ordered list of values, such that +DW_OP_LLVM_arg, N refers to the N``th element in that list. For +example, ``!DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, +DW_OP_stack_value) used with the list (%reg1, %reg2) would evaluate to +%reg1 - reg2. This list of values should be provided by the containing +intrinsic/instruction.

  • +
  • DW_OP_breg (or DW_OP_bregx) represents a content on the provided +signed offset of the specified register. The opcode is only generated by the +AsmPrinter pass to describe call site parameter value which requires an +expression over two registers.

  • +
  • DW_OP_push_object_address pushes the address of the object which can then +serve as a descriptor in subsequent calculation. This opcode can be used to +calculate bounds of fortran allocatable array which has array descriptors.

  • +
  • DW_OP_over duplicates the entry currently second in the stack at the top +of the stack. This opcode can be used to calculate bounds of fortran assumed +rank array which has rank known at run time and current dimension number is +implicitly first element of the stack.

  • +
  • DW_OP_LLVM_implicit_pointer It specifies the dereferenced value. It can +be used to represent pointer variables which are optimized out but the value +it points to is known. This operator is required as it is different than DWARF +operator DW_OP_implicit_pointer in representation and specification (number +and types of operands) and later can not be used as multiple level.

  • +
+
IR for "*ptr = 4;"
+--------------
+call void @llvm.dbg.value(metadata i32 4, metadata !17, metadata !20)
+!17 = !DILocalVariable(name: "ptr1", scope: !12, file: !3, line: 5,
+                       type: !18)
+!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64)
+!19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!20 = !DIExpression(DW_OP_LLVM_implicit_pointer))
+
+IR for "**ptr = 4;"
+--------------
+call void @llvm.dbg.value(metadata i32 4, metadata !17, metadata !21)
+!17 = !DILocalVariable(name: "ptr1", scope: !12, file: !3, line: 5,
+                       type: !18)
+!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64)
+!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64)
+!20 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!21 = !DIExpression(DW_OP_LLVM_implicit_pointer,
+                    DW_OP_LLVM_implicit_pointer))
+
+
+

DWARF specifies three kinds of simple location descriptions: Register, memory, +and implicit location descriptions. Note that a location description is +defined over certain ranges of a program, i.e the location of a variable may +change over the course of the program. Register and memory location +descriptions describe the concrete location of a source variable (in the +sense that a debugger might modify its value), whereas implicit locations +describe merely the actual value of a source variable which might not exist +in registers or in memory (see DW_OP_stack_value).

+

A llvm.dbg.addr or llvm.dbg.declare intrinsic describes an indirect +value (the address) of a source variable. The first operand of the intrinsic +must be an address of some kind. A DIExpression attached to the intrinsic +refines this address to produce a concrete location for the source variable.

+

A llvm.dbg.value intrinsic describes the direct value of a source variable. +The first operand of the intrinsic may be a direct or indirect value. A +DIExpression attached to the intrinsic refines the first operand to produce a +direct value. For example, if the first operand is an indirect value, it may be +necessary to insert DW_OP_deref into the DIExpression in order to produce a +valid debug intrinsic.

+
+

Note

+

A DIExpression is interpreted in the same way regardless of which kind of +debug intrinsic it’s attached to.

+
+
!0 = !DIExpression(DW_OP_deref)
+!1 = !DIExpression(DW_OP_plus_uconst, 3)
+!1 = !DIExpression(DW_OP_constu, 3, DW_OP_plus)
+!2 = !DIExpression(DW_OP_bit_piece, 3, 7)
+!3 = !DIExpression(DW_OP_deref, DW_OP_constu, 3, DW_OP_plus, DW_OP_LLVM_fragment, 3, 7)
+!4 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef)
+!5 = !DIExpression(DW_OP_constu, 42, DW_OP_stack_value)
+
+
+
+
+
DIArgList
+

DIArgList nodes hold a list of constant or SSA value references. These are +used in debug intrinsics (currently only in +llvm.dbg.value) in combination with a DIExpression that uses the +DW_OP_LLVM_arg operator. Because a DIArgList may refer to local values +within a function, it must only be used as a function argument, must always be +inlined, and cannot appear in named metadata.

+
llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %b),
+               metadata !16,
+               metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus))
+
+
+
+
+
DIFlags
+

These flags encode various properties of DINodes.

+

The ExportSymbols flag marks a class, struct or union whose members +may be referenced as if they were defined in the containing class or +union. This flag is used to decide whether the DW_AT_export_symbols can +be used for the structure type.

+
+
+
DIObjCProperty
+

DIObjCProperty nodes represent Objective-C property nodes.

+
!3 = !DIObjCProperty(name: "foo", file: !1, line: 7, setter: "setFoo",
+                     getter: "getFoo", attributes: 7, type: !2)
+
+
+
+
+
DIImportedEntity
+

DIImportedEntity nodes represent entities (such as modules) imported into a +compile unit.

+
!2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0,
+                       entity: !1, line: 7)
+
+
+
+
+
DIMacro
+

DIMacro nodes represent definition or undefinition of a macro identifiers. +The name: field is the macro identifier, followed by macro parameters when +defining a function-like macro, and the value field is the token-string +used to expand the macro identifier.

+
!2 = !DIMacro(macinfo: DW_MACINFO_define, line: 7, name: "foo(x)",
+              value: "((x) + 1)")
+!3 = !DIMacro(macinfo: DW_MACINFO_undef, line: 30, name: "foo")
+
+
+
+
+
DIMacroFile
+

DIMacroFile nodes represent inclusion of source files. +The nodes: field is a list of DIMacro and DIMacroFile nodes that +appear in the included source file.

+
!2 = !DIMacroFile(macinfo: DW_MACINFO_start_file, line: 7, file: !2,
+                  nodes: !3)
+
+
+
+
+
DILabel
+

DILabel nodes represent labels within a DISubprogram. All fields of +a DILabel are mandatory. The scope: field must be one of either a +DILexicalBlockFile, a DILexicalBlock, or a DISubprogram. +The name: field is the label identifier. The file: field is the +DIFile the label is present in. The line: field is the source line +within the file where the label is declared.

+
!2 = !DILabel(scope: !0, name: "foo", file: !1, line: 7)
+
+
+
+
+
+

tbaa’ Metadata

+

In LLVM IR, memory does not have types, so LLVM’s own type system is not +suitable for doing type based alias analysis (TBAA). Instead, metadata is +added to the IR to describe a type system of a higher level language. This +can be used to implement C/C++ strict type aliasing rules, but it can also +be used to implement custom alias analysis behavior for other languages.

+

This description of LLVM’s TBAA system is broken into two parts: +Semantics talks about high level issues, and +Representation talks about the metadata +encoding of various entities.

+

It is always possible to trace any TBAA node to a “root” TBAA node (details +in the Representation section). TBAA +nodes with different roots have an unknown aliasing relationship, and LLVM +conservatively infers MayAlias between them. The rules mentioned in +this section only pertain to TBAA nodes living under the same root.

+
+
Semantics
+

The TBAA metadata system, referred to as “struct path TBAA” (not to be +confused with tbaa.struct), consists of the following high level +concepts: Type Descriptors, further subdivided into scalar type +descriptors and struct type descriptors; and Access Tags.

+

Type descriptors describe the type system of the higher level language +being compiled. Scalar type descriptors describe types that do not +contain other types. Each scalar type has a parent type, which must also +be a scalar type or the TBAA root. Via this parent relation, scalar types +within a TBAA root form a tree. Struct type descriptors denote types +that contain a sequence of other type descriptors, at known offsets. These +contained type descriptors can either be struct type descriptors themselves +or scalar type descriptors.

+

Access tags are metadata nodes attached to load and store instructions. +Access tags use type descriptors to describe the location being accessed +in terms of the type system of the higher level language. Access tags are +tuples consisting of a base type, an access type and an offset. The base +type is a scalar type descriptor or a struct type descriptor, the access +type is a scalar type descriptor, and the offset is a constant integer.

+

The access tag (BaseTy, AccessTy, Offset) can describe one of two +things:

+
+
    +
  • If BaseTy is a struct type, the tag describes a memory access (load +or store) of a value of type AccessTy contained in the struct type +BaseTy at offset Offset.

  • +
  • If BaseTy is a scalar type, Offset must be 0 and BaseTy and +AccessTy must be the same; and the access tag describes a scalar +access with scalar type AccessTy.

  • +
+
+

We first define an ImmediateParent relation on (BaseTy, Offset) +tuples this way:

+
+
    +
  • If BaseTy is a scalar type then ImmediateParent(BaseTy, 0) is +(ParentTy, 0) where ParentTy is the parent of the scalar type as +described in the TBAA metadata. ImmediateParent(BaseTy, Offset) is +undefined if Offset is non-zero.

  • +
  • If BaseTy is a struct type then ImmediateParent(BaseTy, Offset) +is (NewTy, NewOffset) where NewTy is the type contained in +BaseTy at offset Offset and NewOffset is Offset adjusted +to be relative within that inner type.

  • +
+
+

A memory access with an access tag (BaseTy1, AccessTy1, Offset1) +aliases a memory access with an access tag (BaseTy2, AccessTy2, +Offset2) if either (BaseTy1, Offset1) is reachable from (Base2, +Offset2) via the Parent relation or vice versa.

+

As a concrete example, the type descriptor graph for the following program

+
struct Inner {
+  int i;    // offset 0
+  float f;  // offset 4
+};
+
+struct Outer {
+  float f;  // offset 0
+  double d; // offset 4
+  struct Inner inner_a;  // offset 12
+};
+
+void f(struct Outer* outer, struct Inner* inner, float* f, int* i, char* c) {
+  outer->f = 0;            // tag0: (OuterStructTy, FloatScalarTy, 0)
+  outer->inner_a.i = 0;    // tag1: (OuterStructTy, IntScalarTy, 12)
+  outer->inner_a.f = 0.0;  // tag2: (OuterStructTy, FloatScalarTy, 16)
+  *f = 0.0;                // tag3: (FloatScalarTy, FloatScalarTy, 0)
+}
+
+
+

is (note that in C and C++, char can be used to access any arbitrary +type):

+
Root = "TBAA Root"
+CharScalarTy = ("char", Root, 0)
+FloatScalarTy = ("float", CharScalarTy, 0)
+DoubleScalarTy = ("double", CharScalarTy, 0)
+IntScalarTy = ("int", CharScalarTy, 0)
+InnerStructTy = {"Inner" (IntScalarTy, 0), (FloatScalarTy, 4)}
+OuterStructTy = {"Outer", (FloatScalarTy, 0), (DoubleScalarTy, 4),
+                 (InnerStructTy, 12)}
+
+
+

with (e.g.) ImmediateParent(OuterStructTy, 12) = (InnerStructTy, +0), ImmediateParent(InnerStructTy, 0) = (IntScalarTy, 0), and +ImmediateParent(IntScalarTy, 0) = (CharScalarTy, 0).

+
+
+
Representation
+

The root node of a TBAA type hierarchy is an MDNode with 0 operands or +with exactly one MDString operand.

+

Scalar type descriptors are represented as an MDNode s with two +operands. The first operand is an MDString denoting the name of the +struct type. LLVM does not assign meaning to the value of this operand, it +only cares about it being an MDString. The second operand is an +MDNode which points to the parent for said scalar type descriptor, +which is either another scalar type descriptor or the TBAA root. Scalar +type descriptors can have an optional third argument, but that must be the +constant integer zero.

+

Struct type descriptors are represented as MDNode s with an odd number +of operands greater than 1. The first operand is an MDString denoting +the name of the struct type. Like in scalar type descriptors the actual +value of this name operand is irrelevant to LLVM. After the name operand, +the struct type descriptors have a sequence of alternating MDNode and +ConstantInt operands. With N starting from 1, the 2N - 1 th operand, +an MDNode, denotes a contained field, and the 2N th operand, a +ConstantInt, is the offset of the said contained field. The offsets +must be in non-decreasing order.

+

Access tags are represented as MDNode s with either 3 or 4 operands. +The first operand is an MDNode pointing to the node representing the +base type. The second operand is an MDNode pointing to the node +representing the access type. The third operand is a ConstantInt that +states the offset of the access. If a fourth field is present, it must be +a ConstantInt valued at 0 or 1. If it is 1 then the access tag states +that the location being accessed is “constant” (meaning +pointsToConstantMemory should return true; see other useful +AliasAnalysis methods). The TBAA root of +the access type and the base type of an access tag must be the same, and +that is the TBAA root of the access tag.

+
+
+
+

tbaa.struct’ Metadata

+

The llvm.memcpy is often used to implement +aggregate assignment operations in C and similar languages, however it +is defined to copy a contiguous region of memory, which is more than +strictly necessary for aggregate types which contain holes due to +padding. Also, it doesn’t contain any TBAA information about the fields +of the aggregate.

+

!tbaa.struct metadata can describe which memory subregions in a +memcpy are padding and what the TBAA tags of the struct are.

+

The current metadata format is very simple. !tbaa.struct metadata +nodes are a list of operands which are in conceptual groups of three. +For each group of three, the first operand gives the byte offset of a +field in bytes, the second gives its size in bytes, and the third gives +its tbaa tag. e.g.:

+
!4 = !{ i64 0, i64 4, !1, i64 8, i64 4, !2 }
+
+
+

This describes a struct with two fields. The first is at offset 0 bytes +with size 4 bytes, and has tbaa tag !1. The second is at offset 8 bytes +and has size 4 bytes and has tbaa tag !2.

+

Note that the fields need not be contiguous. In this example, there is a +4 byte gap between the two fields. This gap represents padding which +does not carry useful data and need not be preserved.

+
+
+

noalias’ and ‘alias.scope’ Metadata

+

noalias and alias.scope metadata provide the ability to specify generic +noalias memory-access sets. This means that some collection of memory access +instructions (loads, stores, memory-accessing calls, etc.) that carry +noalias metadata can specifically be specified not to alias with some other +collection of memory access instructions that carry alias.scope metadata. +Each type of metadata specifies a list of scopes where each scope has an id and +a domain.

+

When evaluating an aliasing query, if for some domain, the set +of scopes with that domain in one instruction’s alias.scope list is a +subset of (or equal to) the set of scopes for that domain in another +instruction’s noalias list, then the two memory accesses are assumed not to +alias.

+

Because scopes in one domain don’t affect scopes in other domains, separate +domains can be used to compose multiple independent noalias sets. This is +used for example during inlining. As the noalias function parameters are +turned into noalias scope metadata, a new domain is used every time the +function is inlined.

+

The metadata identifying each domain is itself a list containing one or two +entries. The first entry is the name of the domain. Note that if the name is a +string then it can be combined across functions and translation units. A +self-reference can be used to create globally unique domain names. A +descriptive string may optionally be provided as a second list entry.

+

The metadata identifying each scope is also itself a list containing two or +three entries. The first entry is the name of the scope. Note that if the name +is a string then it can be combined across functions and translation units. A +self-reference can be used to create globally unique scope names. A metadata +reference to the scope’s domain is the second entry. A descriptive string may +optionally be provided as a third list entry.

+

For example,

+
; Two scope domains:
+!0 = !{!0}
+!1 = !{!1}
+
+; Some scopes in these domains:
+!2 = !{!2, !0}
+!3 = !{!3, !0}
+!4 = !{!4, !1}
+
+; Some scope lists:
+!5 = !{!4} ; A list containing only scope !4
+!6 = !{!4, !3, !2}
+!7 = !{!3}
+
+; These two instructions don't alias:
+%0 = load float, float* %c, align 4, !alias.scope !5
+store float %0, float* %arrayidx.i, align 4, !noalias !5
+
+; These two instructions also don't alias (for domain !1, the set of scopes
+; in the !alias.scope equals that in the !noalias list):
+%2 = load float, float* %c, align 4, !alias.scope !5
+store float %2, float* %arrayidx.i2, align 4, !noalias !6
+
+; These two instructions may alias (for domain !0, the set of scopes in
+; the !noalias list is not a superset of, or equal to, the scopes in the
+; !alias.scope list):
+%2 = load float, float* %c, align 4, !alias.scope !6
+store float %0, float* %arrayidx.i, align 4, !noalias !7
+
+
+
+
+

fpmath’ Metadata

+

fpmath metadata may be attached to any instruction of floating-point +type. It can be used to express the maximum acceptable error in the +result of that instruction, in ULPs, thus potentially allowing the +compiler to use a more efficient but less accurate method of computing +it. ULP is defined as follows:

+
+

If x is a real number that lies between two finite consecutive +floating-point numbers a and b, without being equal to one +of them, then ulp(x) = |b - a|, otherwise ulp(x) is the +distance between the two non-equal finite floating-point numbers +nearest x. Moreover, ulp(NaN) is NaN.

+
+

The metadata node shall consist of a single positive float type number +representing the maximum relative error, for example:

+
!0 = !{ float 2.5 } ; maximum acceptable inaccuracy is 2.5 ULPs
+
+
+
+
+

range’ Metadata

+

range metadata may be attached only to load, call and invoke of +integer types. It expresses the possible ranges the loaded value or the value +returned by the called function at this call site is in. If the loaded or +returned value is not in the specified range, the behavior is undefined. The +ranges are represented with a flattened list of integers. The loaded value or +the value returned is known to be in the union of the ranges defined by each +consecutive pair. Each pair has the following properties:

+
    +
  • The type must match the type loaded by the instruction.

  • +
  • The pair a,b represents the range [a,b).

  • +
  • Both a and b are constants.

  • +
  • The range is allowed to wrap.

  • +
  • The range should not represent the full or empty set. That is, +a!=b.

  • +
+

In addition, the pairs must be in signed order of the lower bound and +they must be non-contiguous.

+

Examples:

+
  %a = load i8, i8* %x, align 1, !range !0 ; Can only be 0 or 1
+  %b = load i8, i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
+  %c = call i8 @foo(),       !range !2 ; Can only be 0, 1, 3, 4 or 5
+  %d = invoke i8 @bar() to label %cont
+         unwind label %lpad, !range !3 ; Can only be -2, -1, 3, 4 or 5
+...
+!0 = !{ i8 0, i8 2 }
+!1 = !{ i8 255, i8 2 }
+!2 = !{ i8 0, i8 2, i8 3, i8 6 }
+!3 = !{ i8 -2, i8 0, i8 3, i8 6 }
+
+
+
+
+

absolute_symbol’ Metadata

+

absolute_symbol metadata may be attached to a global variable +declaration. It marks the declaration as a reference to an absolute symbol, +which causes the backend to use absolute relocations for the symbol even +in position independent code, and expresses the possible ranges that the +global variable’s address (not its value) is in, in the same format as +range metadata, with the extension that the pair all-ones,all-ones +may be used to represent the full set.

+

Example (assuming 64-bit pointers):

+
  @a = external global i8, !absolute_symbol !0 ; Absolute symbol in range [0,256)
+  @b = external global i8, !absolute_symbol !1 ; Absolute symbol in range [0,2^64)
+
+...
+!0 = !{ i64 0, i64 256 }
+!1 = !{ i64 -1, i64 -1 }
+
+
+
+
+

callees’ Metadata

+

callees metadata may be attached to indirect call sites. If callees +metadata is attached to a call site, and any callee is not among the set of +functions provided by the metadata, the behavior is undefined. The intent of +this metadata is to facilitate optimizations such as indirect-call promotion. +For example, in the code below, the call instruction may only target the +add or sub functions:

+
%result = call i64 %binop(i64 %x, i64 %y), !callees !0
+
+...
+!0 = !{i64 (i64, i64)* @add, i64 (i64, i64)* @sub}
+
+
+
+
+

callback’ Metadata

+

callback metadata may be attached to a function declaration, or definition. +(Call sites are excluded only due to the lack of a use case.) For ease of +exposition, we’ll refer to the function annotated w/ metadata as a broker +function. The metadata describes how the arguments of a call to the broker are +in turn passed to the callback function specified by the metadata. Thus, the +callback metadata provides a partial description of a call site inside the +broker function with regards to the arguments of a call to the broker. The only +semantic restriction on the broker function itself is that it is not allowed to +inspect or modify arguments referenced in the callback metadata as +pass-through to the callback function.

+

The broker is not required to actually invoke the callback function at runtime. +However, the assumptions about not inspecting or modifying arguments that would +be passed to the specified callback function still hold, even if the callback +function is not dynamically invoked. The broker is allowed to invoke the +callback function more than once per invocation of the broker. The broker is +also allowed to invoke (directly or indirectly) the function passed as a +callback through another use. Finally, the broker is also allowed to relay the +callback callee invocation to a different thread.

+

The metadata is structured as follows: At the outer level, callback +metadata is a list of callback encodings. Each encoding starts with a +constant i64 which describes the argument position of the callback function +in the call to the broker. The following elements, except the last, describe +what arguments are passed to the callback function. Each element is again an +i64 constant identifying the argument of the broker that is passed through, +or i64 -1 to indicate an unknown or inspected argument. The order in which +they are listed has to be the same in which they are passed to the callback +callee. The last element of the encoding is a boolean which specifies how +variadic arguments of the broker are handled. If it is true, all variadic +arguments of the broker are passed through to the callback function after the +arguments encoded explicitly before.

+

In the code below, the pthread_create function is marked as a broker +through the !callback !1 metadata. In the example, there is only one +callback encoding, namely !2, associated with the broker. This encoding +identifies the callback function as the second argument of the broker (i64 +2) and the sole argument of the callback function as the third one of the +broker function (i64 3).

+
declare !callback !1 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)
+
+...
+!2 = !{i64 2, i64 3, i1 false}
+!1 = !{!2}
+
+
+

Another example is shown below. The callback callee is the second argument of +the __kmpc_fork_call function (i64 2). The callee is given two unknown +values (each identified by a i64 -1) and afterwards all +variadic arguments that are passed to the __kmpc_fork_call call (due to the +final i1 true).

+
declare !callback !0 dso_local void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+
+...
+!1 = !{i64 2, i64 -1, i64 -1, i1 true}
+!0 = !{!1}
+
+
+
+
+

unpredictable’ Metadata

+

unpredictable metadata may be attached to any branch or switch +instruction. It can be used to express the unpredictability of control +flow. Similar to the llvm.expect intrinsic, it may be used to alter +optimizations related to compare and branch instructions. The metadata +is treated as a boolean value; if it exists, it signals that the branch +or switch that it is attached to is completely unpredictable.

+
+
+

dereferenceable’ Metadata

+

The existence of the !dereferenceable metadata on the instruction +tells the optimizer that the value loaded is known to be dereferenceable. +The number of bytes known to be dereferenceable is specified by the integer +value in the metadata node. This is analogous to the ‘’dereferenceable’’ +attribute on parameters and return values.

+
+
+

dereferenceable_or_null’ Metadata

+

The existence of the !dereferenceable_or_null metadata on the +instruction tells the optimizer that the value loaded is known to be either +dereferenceable or null. +The number of bytes known to be dereferenceable is specified by the integer +value in the metadata node. This is analogous to the ‘’dereferenceable_or_null’’ +attribute on parameters and return values.

+
+
+

llvm.loop

+

It is sometimes useful to attach information to loop constructs. Currently, +loop metadata is implemented as metadata attached to the branch instruction +in the loop latch block. The loop metadata node is a list of +other metadata nodes, each representing a property of the loop. Usually, +the first item of the property node is a string. For example, the +llvm.loop.unroll.count suggests an unroll factor to the loop +unroller:

+
  br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !0
+...
+!0 = !{!0, !1, !2}
+!1 = !{!"llvm.loop.unroll.enable"}
+!2 = !{!"llvm.loop.unroll.count", i32 4}
+
+
+

For legacy reasons, the first item of a loop metadata node must be a +reference to itself. Before the advent of the ‘distinct’ keyword, this +forced the preservation of otherwise identical metadata nodes. Since +the loop-metadata node can be attached to multiple nodes, the ‘distinct’ +keyword has become unnecessary.

+

Prior to the property nodes, one or two DILocation (debug location) +nodes can be present in the list. The first, if present, identifies the +source-code location where the loop begins. The second, if present, +identifies the source-code location where the loop ends.

+

Loop metadata nodes cannot be used as unique identifiers. They are +neither persistent for the same loop through transformations nor +necessarily unique to just one loop.

+
+
+

llvm.loop.disable_nonforced

+

This metadata disables all optional loop transformations unless +explicitly instructed using other transformation metadata such as +llvm.loop.unroll.enable. That is, no heuristic will try to determine +whether a transformation is profitable. The purpose is to avoid that the +loop is transformed to a different loop before an explicitly requested +(forced) transformation is applied. For instance, loop fusion can make +other transformations impossible. Mandatory loop canonicalizations such +as loop rotation are still applied.

+

It is recommended to use this metadata in addition to any llvm.loop.* +transformation directive. Also, any loop should have at most one +directive applied to it (and a sequence of transformations built using +followup-attributes). Otherwise, which transformation will be applied +depends on implementation details such as the pass pipeline order.

+

See Code Transformation Metadata for details.

+
+
+

llvm.loop.vectorize’ and ‘llvm.loop.interleave

+

Metadata prefixed with llvm.loop.vectorize or llvm.loop.interleave are +used to control per-loop vectorization and interleaving parameters such as +vectorization width and interleave count. These metadata should be used in +conjunction with llvm.loop loop identification metadata. The +llvm.loop.vectorize and llvm.loop.interleave metadata are only +optimization hints and the optimizer will only interleave and vectorize loops if +it believes it is safe to do so. The llvm.loop.parallel_accesses metadata +which contains information about loop-carried memory dependencies can be helpful +in determining the safety of these transformations.

+
+
+

llvm.loop.interleave.count’ Metadata

+

This metadata suggests an interleave count to the loop interleaver. +The first operand is the string llvm.loop.interleave.count and the +second operand is an integer specifying the interleave count. For +example:

+
!0 = !{!"llvm.loop.interleave.count", i32 4}
+
+
+

Note that setting llvm.loop.interleave.count to 1 disables interleaving +multiple iterations of the loop. If llvm.loop.interleave.count is set to 0 +then the interleave count will be determined automatically.

+
+
+

llvm.loop.vectorize.enable’ Metadata

+

This metadata selectively enables or disables vectorization for the loop. The +first operand is the string llvm.loop.vectorize.enable and the second operand +is a bit. If the bit operand value is 1 vectorization is enabled. A value of +0 disables vectorization:

+
!0 = !{!"llvm.loop.vectorize.enable", i1 0}
+!1 = !{!"llvm.loop.vectorize.enable", i1 1}
+
+
+
+
+

llvm.loop.vectorize.predicate.enable’ Metadata

+

This metadata selectively enables or disables creating predicated instructions +for the loop, which can enable folding of the scalar epilogue loop into the +main loop. The first operand is the string +llvm.loop.vectorize.predicate.enable and the second operand is a bit. If +the bit operand value is 1 vectorization is enabled. A value of 0 disables +vectorization:

+
!0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0}
+!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1}
+
+
+
+
+

llvm.loop.vectorize.scalable.enable’ Metadata

+

This metadata selectively enables or disables scalable vectorization for the +loop, and only has any effect if vectorization for the loop is already enabled. +The first operand is the string llvm.loop.vectorize.scalable.enable +and the second operand is a bit. If the bit operand value is 1 scalable +vectorization is enabled, whereas a value of 0 reverts to the default fixed +width vectorization:

+
!0 = !{!"llvm.loop.vectorize.scalable.enable", i1 0}
+!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 1}
+
+
+
+
+

llvm.loop.vectorize.width’ Metadata

+

This metadata sets the target width of the vectorizer. The first +operand is the string llvm.loop.vectorize.width and the second +operand is an integer specifying the width. For example:

+
!0 = !{!"llvm.loop.vectorize.width", i32 4}
+
+
+

Note that setting llvm.loop.vectorize.width to 1 disables +vectorization of the loop. If llvm.loop.vectorize.width is set to +0 or if the loop does not have this metadata the width will be +determined automatically.

+
+
+

llvm.loop.vectorize.followup_vectorized’ Metadata

+

This metadata defines which loop attributes the vectorized loop will +have. See Code Transformation Metadata for details.

+
+
+

llvm.loop.vectorize.followup_epilogue’ Metadata

+

This metadata defines which loop attributes the epilogue will have. The +epilogue is not vectorized and is executed when either the vectorized +loop is not known to preserve semantics (because e.g., it processes two +arrays that are found to alias by a runtime check) or for the last +iterations that do not fill a complete set of vector lanes. See +Transformation Metadata for details.

+
+
+

llvm.loop.vectorize.followup_all’ Metadata

+

Attributes in the metadata will be added to both the vectorized and +epilogue loop. +See Transformation Metadata for details.

+
+
+

llvm.loop.unroll

+

Metadata prefixed with llvm.loop.unroll are loop unrolling +optimization hints such as the unroll factor. llvm.loop.unroll +metadata should be used in conjunction with llvm.loop loop +identification metadata. The llvm.loop.unroll metadata are only +optimization hints and the unrolling will only be performed if the +optimizer believes it is safe to do so.

+
+
+

llvm.loop.unroll.count’ Metadata

+

This metadata suggests an unroll factor to the loop unroller. The +first operand is the string llvm.loop.unroll.count and the second +operand is a positive integer specifying the unroll factor. For +example:

+
!0 = !{!"llvm.loop.unroll.count", i32 4}
+
+
+

If the trip count of the loop is less than the unroll count the loop +will be partially unrolled.

+
+
+

llvm.loop.unroll.disable’ Metadata

+

This metadata disables loop unrolling. The metadata has a single operand +which is the string llvm.loop.unroll.disable. For example:

+
!0 = !{!"llvm.loop.unroll.disable"}
+
+
+
+
+

llvm.loop.unroll.runtime.disable’ Metadata

+

This metadata disables runtime loop unrolling. The metadata has a single +operand which is the string llvm.loop.unroll.runtime.disable. For example:

+
!0 = !{!"llvm.loop.unroll.runtime.disable"}
+
+
+
+
+

llvm.loop.unroll.enable’ Metadata

+

This metadata suggests that the loop should be fully unrolled if the trip count +is known at compile time and partially unrolled if the trip count is not known +at compile time. The metadata has a single operand which is the string +llvm.loop.unroll.enable. For example:

+
!0 = !{!"llvm.loop.unroll.enable"}
+
+
+
+
+

llvm.loop.unroll.full’ Metadata

+

This metadata suggests that the loop should be unrolled fully. The +metadata has a single operand which is the string llvm.loop.unroll.full. +For example:

+
!0 = !{!"llvm.loop.unroll.full"}
+
+
+
+
+

llvm.loop.unroll.followup’ Metadata

+

This metadata defines which loop attributes the unrolled loop will have. +See Transformation Metadata for details.

+
+
+

llvm.loop.unroll.followup_remainder’ Metadata

+

This metadata defines which loop attributes the remainder loop after +partial/runtime unrolling will have. See +Transformation Metadata for details.

+
+
+

llvm.loop.unroll_and_jam

+

This metadata is treated very similarly to the llvm.loop.unroll metadata +above, but affect the unroll and jam pass. In addition any loop with +llvm.loop.unroll metadata but no llvm.loop.unroll_and_jam metadata will +disable unroll and jam (so llvm.loop.unroll metadata will be left to the +unroller, plus llvm.loop.unroll.disable metadata will disable unroll and jam +too.)

+

The metadata for unroll and jam otherwise is the same as for unroll. +llvm.loop.unroll_and_jam.enable, llvm.loop.unroll_and_jam.disable and +llvm.loop.unroll_and_jam.count do the same as for unroll. +llvm.loop.unroll_and_jam.full is not supported. Again these are only hints +and the normal safety checks will still be performed.

+
+
+

llvm.loop.unroll_and_jam.count’ Metadata

+

This metadata suggests an unroll and jam factor to use, similarly to +llvm.loop.unroll.count. The first operand is the string +llvm.loop.unroll_and_jam.count and the second operand is a positive integer +specifying the unroll factor. For example:

+
!0 = !{!"llvm.loop.unroll_and_jam.count", i32 4}
+
+
+

If the trip count of the loop is less than the unroll count the loop +will be partially unroll and jammed.

+
+
+

llvm.loop.unroll_and_jam.disable’ Metadata

+

This metadata disables loop unroll and jamming. The metadata has a single +operand which is the string llvm.loop.unroll_and_jam.disable. For example:

+
!0 = !{!"llvm.loop.unroll_and_jam.disable"}
+
+
+
+
+

llvm.loop.unroll_and_jam.enable’ Metadata

+

This metadata suggests that the loop should be fully unroll and jammed if the +trip count is known at compile time and partially unrolled if the trip count is +not known at compile time. The metadata has a single operand which is the +string llvm.loop.unroll_and_jam.enable. For example:

+
!0 = !{!"llvm.loop.unroll_and_jam.enable"}
+
+
+
+
+

llvm.loop.unroll_and_jam.followup_outer’ Metadata

+

This metadata defines which loop attributes the outer unrolled loop will +have. See Transformation Metadata for +details.

+
+
+

llvm.loop.unroll_and_jam.followup_inner’ Metadata

+

This metadata defines which loop attributes the inner jammed loop will +have. See Transformation Metadata for +details.

+
+
+

llvm.loop.unroll_and_jam.followup_remainder_outer’ Metadata

+

This metadata defines which attributes the epilogue of the outer loop +will have. This loop is usually unrolled, meaning there is no such +loop. This attribute will be ignored in this case. See +Transformation Metadata for details.

+
+
+

llvm.loop.unroll_and_jam.followup_remainder_inner’ Metadata

+

This metadata defines which attributes the inner loop of the epilogue +will have. The outer epilogue will usually be unrolled, meaning there +can be multiple inner remainder loops. See +Transformation Metadata for details.

+
+
+

llvm.loop.unroll_and_jam.followup_all’ Metadata

+

Attributes specified in the metadata is added to all +llvm.loop.unroll_and_jam.* loops. See +Transformation Metadata for details.

+
+
+

llvm.loop.licm_versioning.disable’ Metadata

+

This metadata indicates that the loop should not be versioned for the purpose +of enabling loop-invariant code motion (LICM). The metadata has a single operand +which is the string llvm.loop.licm_versioning.disable. For example:

+
!0 = !{!"llvm.loop.licm_versioning.disable"}
+
+
+
+
+

llvm.loop.distribute.enable’ Metadata

+

Loop distribution allows splitting a loop into multiple loops. Currently, +this is only performed if the entire loop cannot be vectorized due to unsafe +memory dependencies. The transformation will attempt to isolate the unsafe +dependencies into their own loop.

+

This metadata can be used to selectively enable or disable distribution of the +loop. The first operand is the string llvm.loop.distribute.enable and the +second operand is a bit. If the bit operand value is 1 distribution is +enabled. A value of 0 disables distribution:

+
!0 = !{!"llvm.loop.distribute.enable", i1 0}
+!1 = !{!"llvm.loop.distribute.enable", i1 1}
+
+
+

This metadata should be used in conjunction with llvm.loop loop +identification metadata.

+
+
+

llvm.loop.distribute.followup_coincident’ Metadata

+

This metadata defines which attributes extracted loops with no cyclic +dependencies will have (i.e. can be vectorized). See +Transformation Metadata for details.

+
+
+

llvm.loop.distribute.followup_sequential’ Metadata

+

This metadata defines which attributes the isolated loops with unsafe +memory dependencies will have. See +Transformation Metadata for details.

+
+
+

llvm.loop.distribute.followup_fallback’ Metadata

+

If loop versioning is necessary, this metadata defined the attributes +the non-distributed fallback version will have. See +Transformation Metadata for details.

+
+
+

llvm.loop.distribute.followup_all’ Metadata

+

The attributes in this metadata is added to all followup loops of the +loop distribution pass. See +Transformation Metadata for details.

+
+
+

llvm.licm.disable’ Metadata

+

This metadata indicates that loop-invariant code motion (LICM) should not be +performed on this loop. The metadata has a single operand which is the string +llvm.licm.disable. For example:

+
!0 = !{!"llvm.licm.disable"}
+
+
+

Note that although it operates per loop it isn’t given the llvm.loop prefix +as it is not affected by the llvm.loop.disable_nonforced metadata.

+
+
+

llvm.access.group’ Metadata

+

llvm.access.group metadata can be attached to any instruction that +potentially accesses memory. It can point to a single distinct metadata +node, which we call access group. This node represents all memory access +instructions referring to it via llvm.access.group. When an +instruction belongs to multiple access groups, it can also point to a +list of accesses groups, illustrated by the following example.

+
%val = load i32, i32* %arrayidx, !llvm.access.group !0
+...
+!0 = !{!1, !2}
+!1 = distinct !{}
+!2 = distinct !{}
+
+
+

It is illegal for the list node to be empty since it might be confused +with an access group.

+

The access group metadata node must be ‘distinct’ to avoid collapsing +multiple access groups by content. A access group metadata node must +always be empty which can be used to distinguish an access group +metadata node from a list of access groups. Being empty avoids the +situation that the content must be updated which, because metadata is +immutable by design, would required finding and updating all references +to the access group node.

+

The access group can be used to refer to a memory access instruction +without pointing to it directly (which is not possible in global +metadata). Currently, the only metadata making use of it is +llvm.loop.parallel_accesses.

+
+
+

llvm.loop.parallel_accesses’ Metadata

+

The llvm.loop.parallel_accesses metadata refers to one or more +access group metadata nodes (see llvm.access.group). It denotes that +no loop-carried memory dependence exist between it and other instructions +in the loop with this metadata.

+

Let m1 and m2 be two instructions that both have the +llvm.access.group metadata to the access group g1, respectively +g2 (which might be identical). If a loop contains both access groups +in its llvm.loop.parallel_accesses metadata, then the compiler can +assume that there is no dependency between m1 and m2 carried by +this loop. Instructions that belong to multiple access groups are +considered having this property if at least one of the access groups +matches the llvm.loop.parallel_accesses list.

+

If all memory-accessing instructions in a loop have +llvm.access.group metadata that each refer to one of the access +groups of a loop’s llvm.loop.parallel_accesses metadata, then the +loop has no loop carried memory dependences and is considered to be a +parallel loop.

+

Note that if not all memory access instructions belong to an access +group referred to by llvm.loop.parallel_accesses, then the loop must +not be considered trivially parallel. Additional +memory dependence analysis is required to make that determination. As a fail +safe mechanism, this causes loops that were originally parallel to be considered +sequential (if optimization passes that are unaware of the parallel semantics +insert new memory instructions into the loop body).

+

Example of a loop that is considered parallel due to its correct use of +both llvm.access.group and llvm.loop.parallel_accesses +metadata types.

+
for.body:
+  ...
+  %val0 = load i32, i32* %arrayidx, !llvm.access.group !1
+  ...
+  store i32 %val0, i32* %arrayidx1, !llvm.access.group !1
+  ...
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+...
+!0 = distinct !{!0, !{!"llvm.loop.parallel_accesses", !1}}
+!1 = distinct !{}
+
+
+

It is also possible to have nested parallel loops:

+
outer.for.body:
+  ...
+  %val1 = load i32, i32* %arrayidx3, !llvm.access.group !4
+  ...
+  br label %inner.for.body
+
+inner.for.body:
+  ...
+  %val0 = load i32, i32* %arrayidx1, !llvm.access.group !3
+  ...
+  store i32 %val0, i32* %arrayidx2, !llvm.access.group !3
+  ...
+  br i1 %exitcond, label %inner.for.end, label %inner.for.body, !llvm.loop !1
+
+inner.for.end:
+  ...
+  store i32 %val1, i32* %arrayidx4, !llvm.access.group !4
+  ...
+  br i1 %exitcond, label %outer.for.end, label %outer.for.body, !llvm.loop !2
+
+outer.for.end:                                          ; preds = %for.body
+...
+!1 = distinct !{!1, !{!"llvm.loop.parallel_accesses", !3}}     ; metadata for the inner loop
+!2 = distinct !{!2, !{!"llvm.loop.parallel_accesses", !3, !4}} ; metadata for the outer loop
+!3 = distinct !{} ; access group for instructions in the inner loop (which are implicitly contained in outer loop as well)
+!4 = distinct !{} ; access group for instructions in the outer, but not the inner loop
+
+
+
+
+

llvm.loop.mustprogress’ Metadata

+

The llvm.loop.mustprogress metadata indicates that this loop is required to +terminate, unwind, or interact with the environment in an observable way e.g. +via a volatile memory access, I/O, or other synchronization. If such a loop is +not found to interact with the environment in an observable way, the loop may +be removed. This corresponds to the mustprogress function attribute.

+
+
+

irr_loop’ Metadata

+

irr_loop metadata may be attached to the terminator instruction of a basic +block that’s an irreducible loop header (note that an irreducible loop has more +than once header basic blocks.) If irr_loop metadata is attached to the +terminator instruction of a basic block that is not really an irreducible loop +header, the behavior is undefined. The intent of this metadata is to improve the +accuracy of the block frequency propagation. For example, in the code below, the +block header0 may have a loop header weight (relative to the other headers of +the irreducible loop) of 100:

+
header0:
+...
+br i1 %cmp, label %t1, label %t2, !irr_loop !0
+
+...
+!0 = !{"loop_header_weight", i64 100}
+
+
+

Irreducible loop header weights are typically based on profile data.

+
+
+

invariant.group’ Metadata

+

The experimental invariant.group metadata may be attached to +load/store instructions referencing a single metadata with no entries. +The existence of the invariant.group metadata on the instruction tells +the optimizer that every load and store to the same pointer operand +can be assumed to load or store the same +value (but see the llvm.launder.invariant.group intrinsic which affects +when two pointers are considered the same). Pointers returned by bitcast or +getelementptr with only zero indices are considered the same.

+

Examples:

+
@unknownPtr = external global i8
+...
+%ptr = alloca i8
+store i8 42, i8* %ptr, !invariant.group !0
+call void @foo(i8* %ptr)
+
+%a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
+call void @foo(i8* %ptr)
+
+%newPtr = call i8* @getPointer(i8* %ptr)
+%c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
+
+%unknownValue = load i8, i8* @unknownPtr
+store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42
+
+call void @foo(i8* %ptr)
+%newPtr2 = call i8* @llvm.launder.invariant.group(i8* %ptr)
+%d = load i8, i8* %newPtr2, !invariant.group !0  ; Can't step through launder.invariant.group to get value of %ptr
+
+...
+declare void @foo(i8*)
+declare i8* @getPointer(i8*)
+declare i8* @llvm.launder.invariant.group(i8*)
+
+!0 = !{}
+
+
+

The invariant.group metadata must be dropped when replacing one pointer by +another based on aliasing information. This is because invariant.group is tied +to the SSA value of the pointer operand.

+
%v = load i8, i8* %x, !invariant.group !0
+; if %x mustalias %y then we can replace the above instruction with
+%v = load i8, i8* %y
+
+
+

Note that this is an experimental feature, which means that its semantics might +change in the future.

+
+ +
+

associated’ Metadata

+

The associated metadata may be attached to a global variable definition with +a single argument that references a global object (optionally through an alias).

+

This metadata lowers to the ELF section flag SHF_LINK_ORDER which prevents +discarding of the global variable in linker GC unless the referenced object is +also discarded. The linker support for this feature is spotty. For best +compatibility, globals carrying this metadata should:

+
    +
  • Be in @llvm.compiler.used.

  • +
  • If the referenced global variable is in a comdat, be in the same comdat.

  • +
+

!associated can not express many-to-one relationship. A global variable with +the metadata should generally not be referenced by a function: the function may +be inlined into other functions, leading to more references to the metadata. +Ideally we would want to keep metadata alive as long as any inline location is +alive, but this many-to-one relationship is not representable. Moreover, if the +metadata is retained while the function is discarded, the linker will report an +error of a relocation referencing a discarded section.

+

The metadata is often used with an explicit section consisting of valid C +identifiers so that the runtime can find the metadata section with +linker-defined encapsulation symbols __start_<section_name> and +__stop_<section_name>.

+

It does not have any effect on non-ELF targets.

+

Example:

+
$a = comdat any
+@a = global i32 1, comdat $a
+@b = internal global i32 2, comdat $a, section "abc", !associated !0
+!0 = !{i32* @a}
+
+
+
+
+

prof’ Metadata

+

The prof metadata is used to record profile data in the IR. +The first operand of the metadata node indicates the profile metadata +type. There are currently 3 types: +branch_weights, +function_entry_count, and +VP.

+
+
branch_weights
+

Branch weight metadata attached to a branch, select, switch or call instruction +represents the likeliness of the associated branch being taken. +For more information, see LLVM Branch Weight Metadata.

+
+
+
function_entry_count
+

Function entry count metadata can be attached to function definitions +to record the number of times the function is called. Used with BFI +information, it is also used to derive the basic block profile count. +For more information, see LLVM Branch Weight Metadata.

+
+
+
VP
+

VP (value profile) metadata can be attached to instructions that have +value profile information. Currently this is indirect calls (where it +records the hottest callees) and calls to memory intrinsics such as memcpy, +memmove, and memset (where it records the hottest byte lengths).

+

Each VP metadata node contains “VP” string, then a uint32_t value for the value +profiling kind, a uint64_t value for the total number of times the instruction +is executed, followed by uint64_t value and execution count pairs. +The value profiling kind is 0 for indirect call targets and 1 for memory +operations. For indirect call targets, each profile value is a hash +of the callee function name, and for memory operations each value is the +byte length.

+

Note that the value counts do not need to add up to the total count +listed in the third operand (in practice only the top hottest values +are tracked and reported).

+

Indirect call example:

+
call void %f(), !prof !1
+!1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410}
+
+
+

Note that the VP type is 0 (the second operand), which indicates this is +an indirect call value profile data. The third operand indicates that the +indirect call executed 1600 times. The 4th and 6th operands give the +hashes of the 2 hottest target functions’ names (this is the same hash used +to represent function names in the profile database), and the 5th and 7th +operands give the execution count that each of the respective prior target +functions was called.

+
+
+
+

annotation’ Metadata

+

The annotation metadata can be used to attach a tuple of annotation strings +to any instruction. This metadata does not impact the semantics of the program +and may only be used to provide additional insight about the program and +transformations to users.

+

Example:

+
%a.addr = alloca float*, align 8, !annotation !0
+!0 = !{!"auto-init"}
+
+
+
+
+
+
+

Module Flags Metadata

+

Information about the module as a whole is difficult to convey to LLVM’s +subsystems. The LLVM IR isn’t sufficient to transmit this information. +The llvm.module.flags named metadata exists in order to facilitate +this. These flags are in the form of key / value pairs — much like a +dictionary — making it easy for any subsystem who cares about a flag to +look it up.

+

The llvm.module.flags metadata contains a list of metadata triplets. +Each triplet has the following form:

+
    +
  • The first element is a behavior flag, which specifies the behavior +when two (or more) modules are merged together, and it encounters two +(or more) metadata with the same ID. The supported behaviors are +described below.

  • +
  • The second element is a metadata string that is a unique ID for the +metadata. Each module may only have one flag entry for each unique ID (not +including entries with the Require behavior).

  • +
  • The third element is the value of the flag.

  • +
+

When two (or more) modules are merged together, the resulting +llvm.module.flags metadata is the union of the modules’ flags. That is, for +each unique metadata ID string, there will be exactly one entry in the merged +modules llvm.module.flags metadata table, and the value for that entry will +be determined by the merge behavior flag, as described below. The only exception +is that entries with the Require behavior are always preserved.

+

The following behaviors are supported:

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Value

Behavior

1

+
Error

Emits an error if two values disagree, otherwise the resulting value +is that of the operands.

+
+
+

2

+
Warning

Emits a warning if two values disagree. The result value will be the +operand for the flag from the first module being linked, or the max +if the other module uses Max (in which case the resulting flag +will be Max).

+
+
+

3

+
Require

Adds a requirement that another module flag be present and have a +specified value after linking is performed. The value must be a +metadata pair, where the first element of the pair is the ID of the +module flag to be restricted, and the second element of the pair is +the value the module flag should be restricted to. This behavior can +be used to restrict the allowable results (via triggering of an +error) of linking IDs with the Override behavior.

+
+
+

4

+
Override

Uses the specified value, regardless of the behavior or value of the +other module. If both modules specify Override, but the values +differ, an error will be emitted.

+
+
+

5

+
Append

Appends the two values, which are required to be metadata nodes.

+
+
+

6

+
AppendUnique

Appends the two values, which are required to be metadata +nodes. However, duplicate entries in the second list are dropped +during the append operation.

+
+
+

7

+
Max

Takes the max of the two values, which are required to be integers.

+
+
+
+

It is an error for a particular unique flag ID to have multiple behaviors, +except in the case of Require (which adds restrictions on another metadata +value) or Override.

+

An example of module flags:

+
!0 = !{ i32 1, !"foo", i32 1 }
+!1 = !{ i32 4, !"bar", i32 37 }
+!2 = !{ i32 2, !"qux", i32 42 }
+!3 = !{ i32 3, !"qux",
+  !{
+    !"foo", i32 1
+  }
+}
+!llvm.module.flags = !{ !0, !1, !2, !3 }
+
+
+
    +
  • Metadata !0 has the ID !"foo" and the value ‘1’. The behavior +if two or more !"foo" flags are seen is to emit an error if their +values are not equal.

  • +
  • Metadata !1 has the ID !"bar" and the value ‘37’. The +behavior if two or more !"bar" flags are seen is to use the value +‘37’.

  • +
  • Metadata !2 has the ID !"qux" and the value ‘42’. The +behavior if two or more !"qux" flags are seen is to emit a +warning if their values are not equal.

  • +
  • Metadata !3 has the ID !"qux" and the value:

    +
    !{ !"foo", i32 1 }
    +
    +
    +

    The behavior is to emit an error if the llvm.module.flags does not +contain a flag with the ID !"foo" that has the value ‘1’ after linking is +performed.

    +
  • +
+
+

Synthesized Functions Module Flags Metadata

+

These metadata specify the default attributes synthesized functions should have. +These metadata are currently respected by a few instrumentation passes, such as +sanitizers.

+

These metadata correspond to a few function attributes with significant code +generation behaviors. Function attributes with just optimization purposes +should not be listed because the performance impact of these synthesized +functions is small.

+
    +
  • “frame-pointer”: Max. The value can be 0, 1, or 2. A synthesized function +will get the “frame-pointer” function attribute, with value being “none”, +“non-leaf”, or “all”, respectively.

  • +
  • “uwtable”: Max. The value can be 0 or 1. If the value is 1, a synthesized +function will get the uwtable function attribute.

  • +
+
+
+

Objective-C Garbage Collection Module Flags Metadata

+

On the Mach-O platform, Objective-C stores metadata about garbage +collection in a special section called “image info”. The metadata +consists of a version number and a bitmask specifying what types of +garbage collection are supported (if any) by the file. If two or more +modules are linked together their garbage collection metadata needs to +be merged rather than appended together.

+

The Objective-C garbage collection module flags metadata consists of the +following key-value pairs:

+ ++++ + + + + + + + + + + + + + + + + + + + + + + +

Key

Value

Objective-C Version

[Required] — The Objective-C ABI version. Valid values are 1 and 2.

Objective-C Image Info Version

[Required] — The version of the image info section. Currently +always 0.

Objective-C Image Info Section

[Required] — The section to place the metadata. Valid values are +"__OBJC, __image_info, regular" for Objective-C ABI version 1, and +"__DATA,__objc_imageinfo, regular, no_dead_strip" for +Objective-C ABI version 2.

Objective-C Garbage Collection

[Required] — Specifies whether garbage collection is supported or +not. Valid values are 0, for no garbage collection, and 2, for garbage +collection supported.

Objective-C GC Only

[Optional] — Specifies that only garbage collection is supported. +If present, its value must be 6. This flag requires that the +Objective-C Garbage Collection flag have the value 2.

+

Some important flag interactions:

+
    +
  • If a module with Objective-C Garbage Collection set to 0 is +merged with a module with Objective-C Garbage Collection set to +2, then the resulting module has the +Objective-C Garbage Collection flag set to 0.

  • +
  • A module with Objective-C Garbage Collection set to 0 cannot be +merged with a module with Objective-C GC Only set to 6.

  • +
+
+
+

C type width Module Flags Metadata

+

The ARM backend emits a section into each generated object file describing the +options that it was compiled with (in a compiler-independent way) to prevent +linking incompatible objects, and to allow automatic library selection. Some +of these options are not visible at the IR level, namely wchar_t width and enum +width.

+

To pass this information to the backend, these options are encoded in module +flags metadata, using the following key-value pairs:

+ ++++ + + + + + + + + + + + + + +

Key

Value

short_wchar

    +
  • 0 — sizeof(wchar_t) == 4

  • +
  • 1 — sizeof(wchar_t) == 2

  • +
+

short_enum

    +
  • 0 — Enums are at least as large as an int.

  • +
  • 1 — Enums are stored in the smallest integer type which can +represent all of its values.

  • +
+
+

For example, the following metadata section specifies that the module was +compiled with a wchar_t width of 4 bytes, and the underlying type of an +enum is the smallest type which can represent all of its values:

+
!llvm.module.flags = !{!0, !1}
+!0 = !{i32 1, !"short_wchar", i32 1}
+!1 = !{i32 1, !"short_enum", i32 0}
+
+
+
+
+

LTO Post-Link Module Flags Metadata

+

Some optimisations are only when the entire LTO unit is present in the current +module. This is represented by the LTOPostLink module flags metadata, which +will be created with a value of 1 when LTO linking occurs.

+
+
+
+

Automatic Linker Flags Named Metadata

+

Some targets support embedding of flags to the linker inside individual object +files. Typically this is used in conjunction with language extensions which +allow source files to contain linker command line options, and have these +automatically be transmitted to the linker via object files.

+

These flags are encoded in the IR using named metadata with the name +!llvm.linker.options. Each operand is expected to be a metadata node +which should be a list of other metadata nodes, each of which should be a +list of metadata strings defining linker options.

+

For example, the following metadata section specifies two separate sets of +linker options, presumably to link against libz and the Cocoa +framework:

+
!0 = !{ !"-lz" }
+!1 = !{ !"-framework", !"Cocoa" }
+!llvm.linker.options = !{ !0, !1 }
+
+
+

The metadata encoding as lists of lists of options, as opposed to a collapsed +list of options, is chosen so that the IR encoding can use multiple option +strings to specify e.g., a single library, while still having that specifier be +preserved as an atomic element that can be recognized by a target specific +assembly writer or object file emitter.

+

Each individual option is required to be either a valid option for the target’s +linker, or an option that is reserved by the target specific assembly writer or +object file emitter. No other aspect of these options is defined by the IR.

+
+
+

Dependent Libs Named Metadata

+

Some targets support embedding of strings into object files to indicate +a set of libraries to add to the link. Typically this is used in conjunction +with language extensions which allow source files to explicitly declare the +libraries they depend on, and have these automatically be transmitted to the +linker via object files.

+

The list is encoded in the IR using named metadata with the name +!llvm.dependent-libraries. Each operand is expected to be a metadata node +which should contain a single string operand.

+

For example, the following metadata section contains two library specifiers:

+
!0 = !{!"a library specifier"}
+!1 = !{!"another library specifier"}
+!llvm.dependent-libraries = !{ !0, !1 }
+
+
+

Each library specifier will be handled independently by the consuming linker. +The effect of the library specifiers are defined by the consuming linker.

+
+
+

ThinLTO Summary

+

Compiling with ThinLTO +causes the building of a compact summary of the module that is emitted into +the bitcode. The summary is emitted into the LLVM assembly and identified +in syntax by a caret (‘^’).

+

The summary is parsed into a bitcode output, along with the Module +IR, via the “llvm-as” tool. Tools that parse the Module IR for the purposes +of optimization (e.g. “clang -x ir” and “opt”), will ignore the +summary entries (just as they currently ignore summary entries in a bitcode +input file).

+

Eventually, the summary will be parsed into a ModuleSummaryIndex object under +the same conditions where summary index is currently built from bitcode. +Specifically, tools that test the Thin Link portion of a ThinLTO compile +(i.e. llvm-lto and llvm-lto2), or when parsing a combined index +for a distributed ThinLTO backend via clang’s “-fthinlto-index=<>” flag +(this part is not yet implemented, use llvm-as to create a bitcode object +before feeding into thin link tools for now).

+

There are currently 3 types of summary entries in the LLVM assembly: +module paths, +global values, and +type identifiers.

+
+

Module Path Summary Entry

+

Each module path summary entry lists a module containing global values included +in the summary. For a single IR module there will be one such entry, but +in a combined summary index produced during the thin link, there will be +one module path entry per linked module with summary.

+

Example:

+
^0 = module: (path: "/path/to/file.o", hash: (2468601609, 1329373163, 1565878005, 638838075, 3148790418))
+
+
+

The path field is a string path to the bitcode file, and the hash +field is the 160-bit SHA-1 hash of the IR bitcode contents, used for +incremental builds and caching.

+
+
+

Global Value Summary Entry

+

Each global value summary entry corresponds to a global value defined or +referenced by a summarized module.

+

Example:

+
^4 = gv: (name: "f"[, summaries: (Summary)[, (Summary)]*]?) ; guid = 14740650423002898831
+
+
+

For declarations, there will not be a summary list. For definitions, a +global value will contain a list of summaries, one per module containing +a definition. There can be multiple entries in a combined summary index +for symbols with weak linkage.

+

Each Summary format will depend on whether the global value is a +function, variable, or +alias.

+
+

Function Summary

+

If the global value is a function, the Summary entry will look like:

+
function: (module: ^0, flags: (linkage: external, notEligibleToImport: 0, live: 0, dsoLocal: 0), insts: 2[, FuncFlags]?[, Calls]?[, TypeIdInfo]?[, Params]?[, Refs]?
+
+
+

The module field includes the summary entry id for the module containing +this definition, and the flags field contains information such as +the linkage type, a flag indicating whether it is legal to import the +definition, whether it is globally live and whether the linker resolved it +to a local definition (the latter two are populated during the thin link). +The insts field contains the number of IR instructions in the function. +Finally, there are several optional fields: FuncFlags, +Calls, TypeIdInfo, +Params, Refs.

+
+
+

Global Variable Summary

+

If the global value is a variable, the Summary entry will look like:

+
variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 0, live: 0, dsoLocal: 0)[, Refs]?
+
+
+

The variable entry contains a subset of the fields in a +function summary, see the descriptions there.

+
+
+

Alias Summary

+

If the global value is an alias, the Summary entry will look like:

+
alias: (module: ^0, flags: (linkage: external, notEligibleToImport: 0, live: 0, dsoLocal: 0), aliasee: ^2)
+
+
+

The module and flags fields are as described for a +function summary. The aliasee field +contains a reference to the global value summary entry of the aliasee.

+
+
+

Function Flags

+

The optional FuncFlags field looks like:

+
funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0)
+
+
+

If unspecified, flags are assumed to hold the conservative false value of +0.

+
+
+

Calls

+

The optional Calls field looks like:

+
calls: ((Callee)[, (Callee)]*)
+
+
+

where each Callee looks like:

+
callee: ^1[, hotness: None]?[, relbf: 0]?
+
+
+

The callee refers to the summary entry id of the callee. At most one +of hotness (which can take the values Unknown, Cold, None, +Hot, and Critical), and relbf (which holds the integer +branch frequency relative to the entry frequency, scaled down by 2^8) +may be specified. The defaults are Unknown and 0, respectively.

+
+
+

Params

+

The optional Params is used by StackSafety and looks like:

+
Params: ((Param)[, (Param)]*)
+
+
+

where each Param describes pointer parameter access inside of the +function and looks like:

+
param: 4, offset: [0, 5][, calls: ((Callee)[, (Callee)]*)]?
+
+
+

where the first param is the number of the parameter it describes, +offset is the inclusive range of offsets from the pointer parameter to bytes +which can be accessed by the function. This range does not include accesses by +function calls from calls list.

+

where each Callee describes how parameter is forwarded into other +functions and looks like:

+
callee: ^3, param: 5, offset: [-3, 3]
+
+
+

The callee refers to the summary entry id of the callee, param is +the number of the callee parameter which points into the callers parameter +with offset known to be inside of the offset range. calls will be +consumed and removed by thin link stage to update Param::offset so it +covers all accesses possible by calls.

+

Pointer parameter without corresponding Param is considered unsafe and we +assume that access with any offset is possible.

+

Example:

+

If we have the following function:

+
define i64 @foo(i64* %0, i32* %1, i8* %2, i8 %3) {
+  store i32* %1, i32** @x
+  %5 = getelementptr inbounds i8, i8* %2, i64 5
+  %6 = load i8, i8* %5
+  %7 = getelementptr inbounds i8, i8* %2, i8 %3
+  tail call void @bar(i8 %3, i8* %7)
+  %8 = load i64, i64* %0
+  ret i64 %8
+}
+
+
+

We can expect the record like this:

+
params: ((param: 0, offset: [0, 7]),(param: 2, offset: [5, 5], calls: ((callee: ^3, param: 1, offset: [-128, 127]))))
+
+
+

The function may access just 8 bytes of the parameter %0 . calls is empty, +so the parameter is either not used for function calls or offset already +covers all accesses from nested function calls. +Parameter %1 escapes, so access is unknown. +The function itself can access just a single byte of the parameter %2. Additional +access is possible inside of the @bar or ^3. The function adds signed +offset to the pointer and passes the result as the argument %1 into ^3. +This record itself does not tell us how ^3 will access the parameter. +Parameter %3 is not a pointer.

+
+
+

Refs

+

The optional Refs field looks like:

+
refs: ((Ref)[, (Ref)]*)
+
+
+

where each Ref contains a reference to the summary id of the referenced +value (e.g. ^1).

+
+
+

TypeIdInfo

+

The optional TypeIdInfo field, used for +Control Flow Integrity, +looks like:

+
typeIdInfo: [(TypeTests)]?[, (TypeTestAssumeVCalls)]?[, (TypeCheckedLoadVCalls)]?[, (TypeTestAssumeConstVCalls)]?[, (TypeCheckedLoadConstVCalls)]?
+
+
+

These optional fields have the following forms:

+
+
TypeTests
+
typeTests: (TypeIdRef[, TypeIdRef]*)
+
+
+

Where each TypeIdRef refers to a type id +by summary id or GUID.

+
+
+
TypeTestAssumeVCalls
+
typeTestAssumeVCalls: (VFuncId[, VFuncId]*)
+
+
+

Where each VFuncId has the format:

+
vFuncId: (TypeIdRef, offset: 16)
+
+
+

Where each TypeIdRef refers to a type id +by summary id or GUID preceded by a guid: tag.

+
+
+
TypeCheckedLoadVCalls
+
typeCheckedLoadVCalls: (VFuncId[, VFuncId]*)
+
+
+

Where each VFuncId has the format described for TypeTestAssumeVCalls.

+
+
+
TypeTestAssumeConstVCalls
+
typeTestAssumeConstVCalls: (ConstVCall[, ConstVCall]*)
+
+
+

Where each ConstVCall has the format:

+
(VFuncId, args: (Arg[, Arg]*))
+
+
+

and where each VFuncId has the format described for TypeTestAssumeVCalls, +and each Arg is an integer argument number.

+
+
+
TypeCheckedLoadConstVCalls
+
typeCheckedLoadConstVCalls: (ConstVCall[, ConstVCall]*)
+
+
+

Where each ConstVCall has the format described for +TypeTestAssumeConstVCalls.

+
+
+
+
+

Type ID Summary Entry

+

Each type id summary entry corresponds to a type identifier resolution +which is generated during the LTO link portion of the compile when building +with Control Flow Integrity, +so these are only present in a combined summary index.

+

Example:

+
^4 = typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: allOnes, sizeM1BitWidth: 7[, alignLog2: 0]?[, sizeM1: 0]?[, bitMask: 0]?[, inlineBits: 0]?)[, WpdResolutions]?)) ; guid = 7004155349499253778
+
+
+

The typeTestRes gives the type test resolution kind (which may +be unsat, byteArray, inline, single, or allOnes), and +the size-1 bit width. It is followed by optional flags, which default to 0, +and an optional WpdResolutions (whole program devirtualization resolution) +field that looks like:

+
wpdResolutions: ((offset: 0, WpdRes)[, (offset: 1, WpdRes)]*
+
+
+

where each entry is a mapping from the given byte offset to the whole-program +devirtualization resolution WpdRes, that has one of the following formats:

+
wpdRes: (kind: branchFunnel)
+wpdRes: (kind: singleImpl, singleImplName: "_ZN1A1nEi")
+wpdRes: (kind: indir)
+
+
+

Additionally, each wpdRes has an optional resByArg field, which +describes the resolutions for calls with all constant integer arguments:

+
resByArg: (ResByArg[, ResByArg]*)
+
+
+

where ResByArg is:

+
args: (Arg[, Arg]*), byArg: (kind: UniformRetVal[, info: 0][, byte: 0][, bit: 0])
+
+
+

Where the kind can be Indir, UniformRetVal, UniqueRetVal +or VirtualConstProp. The info field is only used if the kind +is UniformRetVal (indicates the uniform return value), or +UniqueRetVal (holds the return value associated with the unique vtable +(0 or 1)). The byte and bit fields are only used if the target does +not support the use of absolute symbols to store constants.

+
+
+
+

Intrinsic Global Variables

+

LLVM has a number of “magic” global variables that contain data that +affect code generation or other IR semantics. These are documented here. +All globals of this sort should have a section specified as +“llvm.metadata”. This section and all globals that start with +“llvm.” are reserved for use by LLVM.

+
+

The ‘llvm.used’ Global Variable

+

The @llvm.used global is an array which has +appending linkage. This array contains a list of +pointers to named global variables, functions and aliases which may optionally +have a pointer cast formed of bitcast or getelementptr. For example, a legal +use of it is:

+
@X = global i8 4
+@Y = global i32 123
+
+@llvm.used = appending global [2 x i8*] [
+   i8* @X,
+   i8* bitcast (i32* @Y to i8*)
+], section "llvm.metadata"
+
+
+

If a symbol appears in the @llvm.used list, then the compiler, assembler, +and linker are required to treat the symbol as if there is a reference to the +symbol that it cannot see (which is why they have to be named). For example, if +a variable has internal linkage and no references other than that from the +@llvm.used list, it cannot be deleted. This is commonly used to represent +references from inline asms and other things the compiler cannot “see”, and +corresponds to “attribute((used))” in GNU C.

+

On some targets, the code generator must emit a directive to the +assembler or object file to prevent the assembler and linker from +removing the symbol.

+
+
+

The ‘llvm.compiler.used’ Global Variable

+

The @llvm.compiler.used directive is the same as the @llvm.used +directive, except that it only prevents the compiler from touching the +symbol. On targets that support it, this allows an intelligent linker to +optimize references to the symbol without being impeded as it would be +by @llvm.used.

+

This is a rare construct that should only be used in rare circumstances, +and should not be exposed to source languages.

+
+
+

The ‘llvm.global_ctors’ Global Variable

+
%0 = type { i32, void ()*, i8* }
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor, i8* @data }]
+
+
+

The @llvm.global_ctors array contains a list of constructor +functions, priorities, and an associated global or function. +The functions referenced by this array will be called in ascending order +of priority (i.e. lowest first) when the module is loaded. The order of +functions with the same priority is not defined.

+

If the third field is non-null, and points to a global variable +or function, the initializer function will only run if the associated +data from the current module is not discarded. +On ELF the referenced global variable or function must be in a comdat.

+
+
+

The ‘llvm.global_dtors’ Global Variable

+
%0 = type { i32, void ()*, i8* }
+@llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor, i8* @data }]
+
+
+

The @llvm.global_dtors array contains a list of destructor +functions, priorities, and an associated global or function. +The functions referenced by this array will be called in descending +order of priority (i.e. highest first) when the module is unloaded. The +order of functions with the same priority is not defined.

+

If the third field is non-null, and points to a global variable +or function, the destructor function will only run if the associated +data from the current module is not discarded. +On ELF the referenced global variable or function must be in a comdat.

+
+
+
+

Instruction Reference

+

The LLVM instruction set consists of several different classifications +of instructions: terminator instructions, binary +instructions, bitwise binary +instructions, memory instructions, and +other instructions.

+
+

Terminator Instructions

+

As mentioned previously, every basic block in a +program ends with a “Terminator” instruction, which indicates which +block should be executed after the current block is finished. These +terminator instructions typically yield a ‘void’ value: they produce +control flow, not values (the one exception being the +‘invoke’ instruction).

+

The terminator instructions are: ‘ret’, +‘br’, ‘switch’, +‘indirectbr’, ‘invoke’, +‘callbr’ +‘resume’, ‘catchswitch’, +‘catchret’, +‘cleanupret’, +and ‘unreachable’.

+
+

ret’ Instruction

+
+
Syntax:
+
ret <type> <value>       ; Return a value from a non-void function
+ret void                 ; Return from void function
+
+
+
+
+
Overview:
+

The ‘ret’ instruction is used to return control flow (and optionally +a value) from a function back to the caller.

+

There are two forms of the ‘ret’ instruction: one that returns a +value and then causes control flow, and one that just causes control +flow to occur.

+
+
+
Arguments:
+

The ‘ret’ instruction optionally accepts a single argument, the +return value. The type of the return value must be a ‘first +class’ type.

+

A function is not well formed if it has a non-void +return type and contains a ‘ret’ instruction with no return value or +a return value with a type that does not match its type, or if it has a +void return type and contains a ‘ret’ instruction with a return +value.

+
+
+
Semantics:
+

When the ‘ret’ instruction is executed, control flow returns back to +the calling function’s context. If the caller is a +“call” instruction, execution continues at the +instruction after the call. If the caller was an +“invoke” instruction, execution continues at the +beginning of the “normal” destination block. If the instruction returns +a value, that value shall set the call or invoke instruction’s return +value.

+
+
+
Example:
+
ret i32 5                       ; Return an integer value of 5
+ret void                        ; Return from a void function
+ret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2
+
+
+
+
+
+

br’ Instruction

+
+
Syntax:
+
br i1 <cond>, label <iftrue>, label <iffalse>
+br label <dest>          ; Unconditional branch
+
+
+
+
+
Overview:
+

The ‘br’ instruction is used to cause control flow to transfer to a +different basic block in the current function. There are two forms of +this instruction, corresponding to a conditional branch and an +unconditional branch.

+
+
+
Arguments:
+

The conditional branch form of the ‘br’ instruction takes a single +‘i1’ value and two ‘label’ values. The unconditional form of the +‘br’ instruction takes a single ‘label’ value as a target.

+
+
+
Semantics:
+

Upon execution of a conditional ‘br’ instruction, the ‘i1’ +argument is evaluated. If the value is true, control flows to the +‘iftruelabel argument. If “cond” is false, control flows +to the ‘iffalselabel argument. +If ‘cond’ is poison or undef, this instruction has undefined +behavior.

+
+
+
Example:
+
Test:
+  %cond = icmp eq i32 %a, %b
+  br i1 %cond, label %IfEqual, label %IfUnequal
+IfEqual:
+  ret i32 1
+IfUnequal:
+  ret i32 0
+
+
+
+
+
+

switch’ Instruction

+
+
Syntax:
+
switch <intty> <value>, label <defaultdest> [ <intty> <val>, label <dest> ... ]
+
+
+
+
+
Overview:
+

The ‘switch’ instruction is used to transfer control flow to one of +several different places. It is a generalization of the ‘br’ +instruction, allowing a branch to occur to one of many possible +destinations.

+
+
+
Arguments:
+

The ‘switch’ instruction uses three parameters: an integer +comparison value ‘value’, a default ‘label’ destination, and an +array of pairs of comparison value constants and ‘label’s. The table +is not allowed to contain duplicate constant entries.

+
+
+
Semantics:
+

The switch instruction specifies a table of values and destinations. +When the ‘switch’ instruction is executed, this table is searched +for the given value. If the value is found, control flow is transferred +to the corresponding destination; otherwise, control flow is transferred +to the default destination. +If ‘value’ is poison or undef, this instruction has undefined +behavior.

+
+
+
Implementation:
+

Depending on properties of the target machine and the particular +switch instruction, this instruction may be code generated in +different ways. For example, it could be generated as a series of +chained conditional branches or with a lookup table.

+
+
+
Example:
+
; Emulate a conditional br instruction
+%Val = zext i1 %value to i32
+switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
+
+; Emulate an unconditional br instruction
+switch i32 0, label %dest [ ]
+
+; Implement a jump table:
+switch i32 %val, label %otherwise [ i32 0, label %onzero
+                                    i32 1, label %onone
+                                    i32 2, label %ontwo ]
+
+
+
+
+
+

indirectbr’ Instruction

+
+
Syntax:
+
indirectbr <somety>* <address>, [ label <dest1>, label <dest2>, ... ]
+
+
+
+
+
Overview:
+

The ‘indirectbr’ instruction implements an indirect branch to a +label within the current function, whose address is specified by +“address”. Address must be derived from a +blockaddress constant.

+
+
+
Arguments:
+

The ‘address’ argument is the address of the label to jump to. The +rest of the arguments indicate the full set of possible destinations +that the address may point to. Blocks are allowed to occur multiple +times in the destination list, though this isn’t particularly useful.

+

This destination list is required so that dataflow analysis has an +accurate understanding of the CFG.

+
+
+
Semantics:
+

Control transfers to the block specified in the address argument. All +possible destination blocks must be listed in the label list, otherwise +this instruction has undefined behavior. This implies that jumps to +labels defined in other functions have undefined behavior as well. +If ‘address’ is poison or undef, this instruction has undefined +behavior.

+
+
+
Implementation:
+

This is typically implemented with a jump through a register.

+
+
+
Example:
+
indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
+
+
+
+
+
+

invoke’ Instruction

+
+
Syntax:
+
<result> = invoke [cconv] [ret attrs] [addrspace(<num>)] <ty>|<fnty> <fnptrval>(<function args>) [fn attrs]
+              [operand bundles] to label <normal label> unwind label <exception label>
+
+
+
+
+
Overview:
+

The ‘invoke’ instruction causes control to transfer to a specified +function, with the possibility of control flow transfer to either the +‘normal’ label or the ‘exception’ label. If the callee function +returns with the “ret” instruction, control flow will return to the +“normal” label. If the callee (or any indirect callees) returns via the +“resume” instruction or other exception handling +mechanism, control is interrupted and continued at the dynamically +nearest “exception” label.

+

The ‘exception’ label is a landing +pad for the exception. As such, +‘exception’ label is required to have the +“landingpad” instruction, which contains the +information about the behavior of the program after unwinding happens, +as its first non-PHI instruction. The restrictions on the +“landingpad” instruction’s tightly couples it to the “invoke” +instruction, so that the important information contained within the +“landingpad” instruction can’t be lost through normal code motion.

+
+
+
Arguments:
+

This instruction requires several arguments:

+
    +
  1. The optional “cconv” marker indicates which calling +convention the call should use. If none is +specified, the call defaults to using C calling conventions.

  2. +
  3. The optional Parameter Attributes list for return +values. Only ‘zeroext’, ‘signext’, and ‘inreg’ attributes +are valid here.

  4. +
  5. The optional addrspace attribute can be used to indicate the address space +of the called function. If it is not specified, the program address space +from the datalayout string will be used.

  6. +
  7. ty’: the type of the call instruction itself which is also the +type of the return value. Functions that return no value are marked +void.

  8. +
  9. fnty’: shall be the signature of the function being invoked. The +argument types must match the types implied by this signature. This +type can be omitted if the function is not varargs.

  10. +
  11. fnptrval’: An LLVM value containing a pointer to a function to +be invoked. In most cases, this is a direct function invocation, but +indirect invoke’s are just as possible, calling an arbitrary pointer +to function value.

  12. +
  13. function args’: argument list whose types match the function +signature argument types and parameter attributes. All arguments must +be of first class type. If the function signature +indicates the function accepts a variable number of arguments, the +extra arguments can be specified.

  14. +
  15. normal label’: the label reached when the called function +executes a ‘ret’ instruction.

  16. +
  17. exception label’: the label reached when a callee returns via +the resume instruction or other exception handling +mechanism.

  18. +
  19. The optional function attributes list.

  20. +
  21. The optional operand bundles list.

  22. +
+
+
+
Semantics:
+

This instruction is designed to operate as a standard ‘call’ +instruction in most regards. The primary difference is that it +establishes an association with a label, which is used by the runtime +library to unwind the stack.

+

This instruction is used in languages with destructors to ensure that +proper cleanup is performed in the case of either a longjmp or a +thrown exception. Additionally, this is important for implementation of +‘catch’ clauses in high-level languages that support them.

+

For the purposes of the SSA form, the definition of the value returned +by the ‘invoke’ instruction is deemed to occur on the edge from the +current block to the “normal” label. If the callee unwinds then no +return value is available.

+
+
+
Example:
+
%retval = invoke i32 @Test(i32 15) to label %Continue
+            unwind label %TestCleanup              ; i32:retval set
+%retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue
+            unwind label %TestCleanup              ; i32:retval set
+
+
+
+
+
+

callbr’ Instruction

+
+
Syntax:
+
<result> = callbr [cconv] [ret attrs] [addrspace(<num>)] <ty>|<fnty> <fnptrval>(<function args>) [fn attrs]
+              [operand bundles] to label <fallthrough label> [indirect labels]
+
+
+
+
+
Overview:
+

The ‘callbr’ instruction causes control to transfer to a specified +function, with the possibility of control flow transfer to either the +‘fallthrough’ label or one of the ‘indirect’ labels.

+

This instruction should only be used to implement the “goto” feature of gcc +style inline assembly. Any other usage is an error in the IR verifier.

+
+
+
Arguments:
+

This instruction requires several arguments:

+
    +
  1. The optional “cconv” marker indicates which calling +convention the call should use. If none is +specified, the call defaults to using C calling conventions.

  2. +
  3. The optional Parameter Attributes list for return +values. Only ‘zeroext’, ‘signext’, and ‘inreg’ attributes +are valid here.

  4. +
  5. The optional addrspace attribute can be used to indicate the address space +of the called function. If it is not specified, the program address space +from the datalayout string will be used.

  6. +
  7. ty’: the type of the call instruction itself which is also the +type of the return value. Functions that return no value are marked +void.

  8. +
  9. fnty’: shall be the signature of the function being called. The +argument types must match the types implied by this signature. This +type can be omitted if the function is not varargs.

  10. +
  11. fnptrval’: An LLVM value containing a pointer to a function to +be called. In most cases, this is a direct function call, but +other callbr’s are just as possible, calling an arbitrary pointer +to function value.

  12. +
  13. function args’: argument list whose types match the function +signature argument types and parameter attributes. All arguments must +be of first class type. If the function signature +indicates the function accepts a variable number of arguments, the +extra arguments can be specified.

  14. +
  15. fallthrough label’: the label reached when the inline assembly’s +execution exits the bottom.

  16. +
  17. indirect labels’: the labels reached when a callee transfers control +to a location other than the ‘fallthrough label’. The blockaddress +constant for these should also be in the list of ‘function args’.

  18. +
  19. The optional function attributes list.

  20. +
  21. The optional operand bundles list.

  22. +
+
+
+
Semantics:
+

This instruction is designed to operate as a standard ‘call’ +instruction in most regards. The primary difference is that it +establishes an association with additional labels to define where control +flow goes after the call.

+

The output values of a ‘callbr’ instruction are available only to +the ‘fallthrough’ block, not to any ‘indirect’ blocks(s).

+

The only use of this today is to implement the “goto” feature of gcc inline +assembly where additional labels can be provided as locations for the inline +assembly to jump to.

+
+
+
Example:
+
; "asm goto" without output constraints.
+callbr void asm "", "r,X"(i32 %x, i8 *blockaddress(@foo, %indirect))
+            to label %fallthrough [label %indirect]
+
+; "asm goto" with output constraints.
+<result> = callbr i32 asm "", "=r,r,X"(i32 %x, i8 *blockaddress(@foo, %indirect))
+            to label %fallthrough [label %indirect]
+
+
+
+
+
+

resume’ Instruction

+
+
Syntax:
+
resume <type> <value>
+
+
+
+
+
Overview:
+

The ‘resume’ instruction is a terminator instruction that has no +successors.

+
+
+
Arguments:
+

The ‘resume’ instruction requires one argument, which must have the +same type as the result of any ‘landingpad’ instruction in the same +function.

+
+
+
Semantics:
+

The ‘resume’ instruction resumes propagation of an existing +(in-flight) exception whose unwinding was interrupted with a +landingpad instruction.

+
+
+
Example:
+
resume { i8*, i32 } %exn
+
+
+
+
+
+

catchswitch’ Instruction

+
+
Syntax:
+
<resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind to caller
+<resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind label <default>
+
+
+
+
+
Overview:
+

The ‘catchswitch’ instruction is used by LLVM’s exception handling system to describe the set of possible catch handlers +that may be executed by the EH personality routine.

+
+
+
Arguments:
+

The parent argument is the token of the funclet that contains the +catchswitch instruction. If the catchswitch is not inside a funclet, +this operand may be the token none.

+

The default argument is the label of another basic block beginning with +either a cleanuppad or catchswitch instruction. This unwind destination +must be a legal target with respect to the parent links, as described in +the exception handling documentation.

+

The handlers are a nonempty list of successor blocks that each begin with a +catchpad instruction.

+
+
+
Semantics:
+

Executing this instruction transfers control to one of the successors in +handlers, if appropriate, or continues to unwind via the unwind label if +present.

+

The catchswitch is both a terminator and a “pad” instruction, meaning that +it must be both the first non-phi instruction and last instruction in the basic +block. Therefore, it must be the only non-phi instruction in the block.

+
+
+
Example:
+
dispatch1:
+  %cs1 = catchswitch within none [label %handler0, label %handler1] unwind to caller
+dispatch2:
+  %cs2 = catchswitch within %parenthandler [label %handler0] unwind label %cleanup
+
+
+
+
+
+

catchret’ Instruction

+
+
Syntax:
+
catchret from <token> to label <normal>
+
+
+
+
+
Overview:
+

The ‘catchret’ instruction is a terminator instruction that has a +single successor.

+
+
+
Arguments:
+

The first argument to a ‘catchret’ indicates which catchpad it +exits. It must be a catchpad. +The second argument to a ‘catchret’ specifies where control will +transfer to next.

+
+
+
Semantics:
+

The ‘catchret’ instruction ends an existing (in-flight) exception whose +unwinding was interrupted with a catchpad instruction. The +personality function gets a chance to execute arbitrary +code to, for example, destroy the active exception. Control then transfers to +normal.

+

The token argument must be a token produced by a catchpad instruction. +If the specified catchpad is not the most-recently-entered not-yet-exited +funclet pad (as described in the EH documentation), +the catchret’s behavior is undefined.

+
+
+
Example:
+
catchret from %catch label %continue
+
+
+
+
+
+

cleanupret’ Instruction

+
+
Syntax:
+
cleanupret from <value> unwind label <continue>
+cleanupret from <value> unwind to caller
+
+
+
+
+
Overview:
+

The ‘cleanupret’ instruction is a terminator instruction that has +an optional successor.

+
+
+
Arguments:
+

The ‘cleanupret’ instruction requires one argument, which indicates +which cleanuppad it exits, and must be a cleanuppad. +If the specified cleanuppad is not the most-recently-entered not-yet-exited +funclet pad (as described in the EH documentation), +the cleanupret’s behavior is undefined.

+

The ‘cleanupret’ instruction also has an optional successor, continue, +which must be the label of another basic block beginning with either a +cleanuppad or catchswitch instruction. This unwind destination must +be a legal target with respect to the parent links, as described in the +exception handling documentation.

+
+
+
Semantics:
+

The ‘cleanupret’ instruction indicates to the +personality function that one +cleanuppad it transferred control to has ended. +It transfers control to continue or unwinds out of the function.

+
+
+
Example:
+
cleanupret from %cleanup unwind to caller
+cleanupret from %cleanup unwind label %continue
+
+
+
+
+
+

unreachable’ Instruction

+
+
Syntax:
+
unreachable
+
+
+
+
+
Overview:
+

The ‘unreachable’ instruction has no defined semantics. This +instruction is used to inform the optimizer that a particular portion of +the code is not reachable. This can be used to indicate that the code +after a no-return function cannot be reached, and other facts.

+
+
+
Semantics:
+

The ‘unreachable’ instruction has no defined semantics.

+
+
+
+
+

Unary Operations

+

Unary operators require a single operand, execute an operation on +it, and produce a single value. The operand might represent multiple +data, as is the case with the vector data type. The +result value has the same type as its operand.

+
+

fneg’ Instruction

+
+
Syntax:
+
<result> = fneg [fast-math flags]* <ty> <op1>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘fneg’ instruction returns the negation of its operand.

+
+
+
Arguments:
+

The argument to the ‘fneg’ instruction must be a +floating-point or vector of +floating-point values.

+
+
+
Semantics:
+

The value produced is a copy of the operand with its sign bit flipped. +This instruction can also take any number of fast-math +flags, which are optimization hints to enable otherwise +unsafe floating-point optimizations:

+
+
+
Example:
+
<result> = fneg float %val          ; yields float:result = -%var
+
+
+
+
+
+
+

Binary Operations

+

Binary operators are used to do most of the computation in a program. +They require two operands of the same type, execute an operation on +them, and produce a single value. The operands might represent multiple +data, as is the case with the vector data type. The +result value has the same type as its operands.

+

There are several different binary operators:

+
+

add’ Instruction

+
+
Syntax:
+
<result> = add <ty> <op1>, <op2>          ; yields ty:result
+<result> = add nuw <ty> <op1>, <op2>      ; yields ty:result
+<result> = add nsw <ty> <op1>, <op2>      ; yields ty:result
+<result> = add nuw nsw <ty> <op1>, <op2>  ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘add’ instruction returns the sum of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘add’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the integer sum of the two operands.

+

If the sum has unsigned overflow, the result returned is the +mathematical result modulo 2n, where n is the bit width of +the result.

+

Because LLVM integers use a two’s complement representation, this +instruction is appropriate for both signed and unsigned integers.

+

nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”, +respectively. If the nuw and/or nsw keywords are present, the +result value of the add is a poison value if +unsigned and/or signed overflow, respectively, occurs.

+
+
+
Example:
+
<result> = add i32 4, %var          ; yields i32:result = 4 + %var
+
+
+
+
+
+

fadd’ Instruction

+
+
Syntax:
+
<result> = fadd [fast-math flags]* <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘fadd’ instruction returns the sum of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘fadd’ instruction must be +floating-point or vector of +floating-point values. Both arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the floating-point sum of the two operands. +This instruction is assumed to execute in the default floating-point +environment. +This instruction can also take any number of fast-math +flags, which are optimization hints to enable otherwise +unsafe floating-point optimizations:

+
+
+
Example:
+
<result> = fadd float 4.0, %var          ; yields float:result = 4.0 + %var
+
+
+
+
+
+

sub’ Instruction

+
+
Syntax:
+
<result> = sub <ty> <op1>, <op2>          ; yields ty:result
+<result> = sub nuw <ty> <op1>, <op2>      ; yields ty:result
+<result> = sub nsw <ty> <op1>, <op2>      ; yields ty:result
+<result> = sub nuw nsw <ty> <op1>, <op2>  ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘sub’ instruction returns the difference of its two operands.

+

Note that the ‘sub’ instruction is used to represent the ‘neg’ +instruction present in most other intermediate representations.

+
+
+
Arguments:
+

The two arguments to the ‘sub’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the integer difference of the two operands.

+

If the difference has unsigned overflow, the result returned is the +mathematical result modulo 2n, where n is the bit width of +the result.

+

Because LLVM integers use a two’s complement representation, this +instruction is appropriate for both signed and unsigned integers.

+

nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”, +respectively. If the nuw and/or nsw keywords are present, the +result value of the sub is a poison value if +unsigned and/or signed overflow, respectively, occurs.

+
+
+
Example:
+
<result> = sub i32 4, %var          ; yields i32:result = 4 - %var
+<result> = sub i32 0, %val          ; yields i32:result = -%var
+
+
+
+
+
+

fsub’ Instruction

+
+
Syntax:
+
<result> = fsub [fast-math flags]* <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘fsub’ instruction returns the difference of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘fsub’ instruction must be +floating-point or vector of +floating-point values. Both arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the floating-point difference of the two operands. +This instruction is assumed to execute in the default floating-point +environment. +This instruction can also take any number of fast-math +flags, which are optimization hints to enable otherwise +unsafe floating-point optimizations:

+
+
+
Example:
+
<result> = fsub float 4.0, %var           ; yields float:result = 4.0 - %var
+<result> = fsub float -0.0, %val          ; yields float:result = -%var
+
+
+
+
+
+

mul’ Instruction

+
+
Syntax:
+
<result> = mul <ty> <op1>, <op2>          ; yields ty:result
+<result> = mul nuw <ty> <op1>, <op2>      ; yields ty:result
+<result> = mul nsw <ty> <op1>, <op2>      ; yields ty:result
+<result> = mul nuw nsw <ty> <op1>, <op2>  ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘mul’ instruction returns the product of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘mul’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the integer product of the two operands.

+

If the result of the multiplication has unsigned overflow, the result +returned is the mathematical result modulo 2n, where n is the +bit width of the result.

+

Because LLVM integers use a two’s complement representation, and the +result is the same width as the operands, this instruction returns the +correct result for both signed and unsigned integers. If a full product +(e.g. i32 * i32 -> i64) is needed, the operands should be +sign-extended or zero-extended as appropriate to the width of the full +product.

+

nuw and nsw stand for “No Unsigned Wrap” and “No Signed Wrap”, +respectively. If the nuw and/or nsw keywords are present, the +result value of the mul is a poison value if +unsigned and/or signed overflow, respectively, occurs.

+
+
+
Example:
+
<result> = mul i32 4, %var          ; yields i32:result = 4 * %var
+
+
+
+
+
+

fmul’ Instruction

+
+
Syntax:
+
<result> = fmul [fast-math flags]* <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘fmul’ instruction returns the product of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘fmul’ instruction must be +floating-point or vector of +floating-point values. Both arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the floating-point product of the two operands. +This instruction is assumed to execute in the default floating-point +environment. +This instruction can also take any number of fast-math +flags, which are optimization hints to enable otherwise +unsafe floating-point optimizations:

+
+
+
Example:
+
<result> = fmul float 4.0, %var          ; yields float:result = 4.0 * %var
+
+
+
+
+
+

udiv’ Instruction

+
+
Syntax:
+
<result> = udiv <ty> <op1>, <op2>         ; yields ty:result
+<result> = udiv exact <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘udiv’ instruction returns the quotient of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘udiv’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the unsigned integer quotient of the two operands.

+

Note that unsigned integer division and signed integer division are +distinct operations; for signed integer division, use ‘sdiv’.

+

Division by zero is undefined behavior. For vectors, if any element +of the divisor is zero, the operation has undefined behavior.

+

If the exact keyword is present, the result value of the udiv is +a poison value if %op1 is not a multiple of %op2 (as +such, “((a udiv exact b) mul b) == a”).

+
+
+
Example:
+
<result> = udiv i32 4, %var          ; yields i32:result = 4 / %var
+
+
+
+
+
+

sdiv’ Instruction

+
+
Syntax:
+
<result> = sdiv <ty> <op1>, <op2>         ; yields ty:result
+<result> = sdiv exact <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘sdiv’ instruction returns the quotient of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘sdiv’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the signed integer quotient of the two operands +rounded towards zero.

+

Note that signed integer division and unsigned integer division are +distinct operations; for unsigned integer division, use ‘udiv’.

+

Division by zero is undefined behavior. For vectors, if any element +of the divisor is zero, the operation has undefined behavior. +Overflow also leads to undefined behavior; this is a rare case, but can +occur, for example, by doing a 32-bit division of -2147483648 by -1.

+

If the exact keyword is present, the result value of the sdiv is +a poison value if the result would be rounded.

+
+
+
Example:
+
<result> = sdiv i32 4, %var          ; yields i32:result = 4 / %var
+
+
+
+
+
+

fdiv’ Instruction

+
+
Syntax:
+
<result> = fdiv [fast-math flags]* <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘fdiv’ instruction returns the quotient of its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘fdiv’ instruction must be +floating-point or vector of +floating-point values. Both arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the floating-point quotient of the two operands. +This instruction is assumed to execute in the default floating-point +environment. +This instruction can also take any number of fast-math +flags, which are optimization hints to enable otherwise +unsafe floating-point optimizations:

+
+
+
Example:
+
<result> = fdiv float 4.0, %var          ; yields float:result = 4.0 / %var
+
+
+
+
+
+

urem’ Instruction

+
+
Syntax:
+
<result> = urem <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘urem’ instruction returns the remainder from the unsigned +division of its two arguments.

+
+
+
Arguments:
+

The two arguments to the ‘urem’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

This instruction returns the unsigned integer remainder of a division. +This instruction always performs an unsigned division to get the +remainder.

+

Note that unsigned integer remainder and signed integer remainder are +distinct operations; for signed integer remainder, use ‘srem’.

+

Taking the remainder of a division by zero is undefined behavior. +For vectors, if any element of the divisor is zero, the operation has +undefined behavior.

+
+
+
Example:
+
<result> = urem i32 4, %var          ; yields i32:result = 4 % %var
+
+
+
+
+
+

srem’ Instruction

+
+
Syntax:
+
<result> = srem <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘srem’ instruction returns the remainder from the signed +division of its two operands. This instruction can also take +vector versions of the values in which case the elements +must be integers.

+
+
+
Arguments:
+

The two arguments to the ‘srem’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

This instruction returns the remainder of a division (where the result +is either zero or has the same sign as the dividend, op1), not the +modulo operator (where the result is either zero or has the same sign +as the divisor, op2) of a value. For more information about the +difference, see The Math +Forum. For a +table of how this is implemented in various languages, please see +Wikipedia: modulo +operation.

+

Note that signed integer remainder and unsigned integer remainder are +distinct operations; for unsigned integer remainder, use ‘urem’.

+

Taking the remainder of a division by zero is undefined behavior. +For vectors, if any element of the divisor is zero, the operation has +undefined behavior. +Overflow also leads to undefined behavior; this is a rare case, but can +occur, for example, by taking the remainder of a 32-bit division of +-2147483648 by -1. (The remainder doesn’t actually overflow, but this +rule lets srem be implemented using instructions that return both the +result of the division and the remainder.)

+
+
+
Example:
+
<result> = srem i32 4, %var          ; yields i32:result = 4 % %var
+
+
+
+
+
+

frem’ Instruction

+
+
Syntax:
+
<result> = frem [fast-math flags]* <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘frem’ instruction returns the remainder from the division of +its two operands.

+
+
+
Arguments:
+

The two arguments to the ‘frem’ instruction must be +floating-point or vector of +floating-point values. Both arguments must have identical types.

+
+
+
Semantics:
+

The value produced is the floating-point remainder of the two operands. +This is the same output as a libm ‘fmod’ function, but without any +possibility of setting errno. The remainder has the same sign as the +dividend. +This instruction is assumed to execute in the default floating-point +environment. +This instruction can also take any number of fast-math +flags, which are optimization hints to enable otherwise +unsafe floating-point optimizations:

+
+
+
Example:
+
<result> = frem float 4.0, %var          ; yields float:result = 4.0 % %var
+
+
+
+
+
+
+

Bitwise Binary Operations

+

Bitwise binary operators are used to do various forms of bit-twiddling +in a program. They are generally very efficient instructions and can +commonly be strength reduced from other instructions. They require two +operands of the same type, execute an operation on them, and produce a +single value. The resulting value is the same type as its operands.

+
+

shl’ Instruction

+
+
Syntax:
+
<result> = shl <ty> <op1>, <op2>           ; yields ty:result
+<result> = shl nuw <ty> <op1>, <op2>       ; yields ty:result
+<result> = shl nsw <ty> <op1>, <op2>       ; yields ty:result
+<result> = shl nuw nsw <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘shl’ instruction returns the first operand shifted to the left +a specified number of bits.

+
+
+
Arguments:
+

Both arguments to the ‘shl’ instruction must be the same +integer or vector of integer type. +‘op2’ is treated as an unsigned value.

+
+
+
Semantics:
+

The value produced is op1 * 2op2 mod 2n, +where n is the width of the result. If op2 is (statically or +dynamically) equal to or larger than the number of bits in +op1, this instruction returns a poison value. +If the arguments are vectors, each vector element of op1 is shifted +by the corresponding shift amount in op2.

+

If the nuw keyword is present, then the shift produces a poison +value if it shifts out any non-zero bits. +If the nsw keyword is present, then the shift produces a poison +value if it shifts out any bits that disagree with the resultant sign bit.

+
+
+
Example:
+
<result> = shl i32 4, %var   ; yields i32: 4 << %var
+<result> = shl i32 4, 2      ; yields i32: 16
+<result> = shl i32 1, 10     ; yields i32: 1024
+<result> = shl i32 1, 32     ; undefined
+<result> = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 2, i32 4>
+
+
+
+
+
+

lshr’ Instruction

+
+
Syntax:
+
<result> = lshr <ty> <op1>, <op2>         ; yields ty:result
+<result> = lshr exact <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘lshr’ instruction (logical shift right) returns the first +operand shifted to the right a specified number of bits with zero fill.

+
+
+
Arguments:
+

Both arguments to the ‘lshr’ instruction must be the same +integer or vector of integer type. +‘op2’ is treated as an unsigned value.

+
+
+
Semantics:
+

This instruction always performs a logical shift right operation. The +most significant bits of the result will be filled with zero bits after +the shift. If op2 is (statically or dynamically) equal to or larger +than the number of bits in op1, this instruction returns a poison +value. If the arguments are vectors, each vector element +of op1 is shifted by the corresponding shift amount in op2.

+

If the exact keyword is present, the result value of the lshr is +a poison value if any of the bits shifted out are non-zero.

+
+
+
Example:
+
<result> = lshr i32 4, 1   ; yields i32:result = 2
+<result> = lshr i32 4, 2   ; yields i32:result = 1
+<result> = lshr i8  4, 3   ; yields i8:result = 0
+<result> = lshr i8 -2, 1   ; yields i8:result = 0x7F
+<result> = lshr i32 1, 32  ; undefined
+<result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>
+
+
+
+
+
+

ashr’ Instruction

+
+
Syntax:
+
<result> = ashr <ty> <op1>, <op2>         ; yields ty:result
+<result> = ashr exact <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘ashr’ instruction (arithmetic shift right) returns the first +operand shifted to the right a specified number of bits with sign +extension.

+
+
+
Arguments:
+

Both arguments to the ‘ashr’ instruction must be the same +integer or vector of integer type. +‘op2’ is treated as an unsigned value.

+
+
+
Semantics:
+

This instruction always performs an arithmetic shift right operation, +The most significant bits of the result will be filled with the sign bit +of op1. If op2 is (statically or dynamically) equal to or larger +than the number of bits in op1, this instruction returns a poison +value. If the arguments are vectors, each vector element +of op1 is shifted by the corresponding shift amount in op2.

+

If the exact keyword is present, the result value of the ashr is +a poison value if any of the bits shifted out are non-zero.

+
+
+
Example:
+
<result> = ashr i32 4, 1   ; yields i32:result = 2
+<result> = ashr i32 4, 2   ; yields i32:result = 1
+<result> = ashr i8  4, 3   ; yields i8:result = 0
+<result> = ashr i8 -2, 1   ; yields i8:result = -1
+<result> = ashr i32 1, 32  ; undefined
+<result> = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3>   ; yields: result=<2 x i32> < i32 -1, i32 0>
+
+
+
+
+
+

and’ Instruction

+
+
Syntax:
+
<result> = and <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘and’ instruction returns the bitwise logical and of its two +operands.

+
+
+
Arguments:
+

The two arguments to the ‘and’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The truth table used for the ‘and’ instruction is:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + +

In0

In1

Out

0

0

0

0

1

0

1

0

0

1

1

1

+
+
+
Example:
+
<result> = and i32 4, %var         ; yields i32:result = 4 & %var
+<result> = and i32 15, 40          ; yields i32:result = 8
+<result> = and i32 4, 8            ; yields i32:result = 0
+
+
+
+
+
+

or’ Instruction

+
+
Syntax:
+
<result> = or <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘or’ instruction returns the bitwise logical inclusive or of its +two operands.

+
+
+
Arguments:
+

The two arguments to the ‘or’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The truth table used for the ‘or’ instruction is:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + +

In0

In1

Out

0

0

0

0

1

1

1

0

1

1

1

1

+
+
+
Example:
+
<result> = or i32 4, %var         ; yields i32:result = 4 | %var
+<result> = or i32 15, 40          ; yields i32:result = 47
+<result> = or i32 4, 8            ; yields i32:result = 12
+
+
+
+
+
+

xor’ Instruction

+
+
Syntax:
+
<result> = xor <ty> <op1>, <op2>   ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘xor’ instruction returns the bitwise logical exclusive or of +its two operands. The xor is used to implement the “one’s +complement” operation, which is the “~” operator in C.

+
+
+
Arguments:
+

The two arguments to the ‘xor’ instruction must be +integer or vector of integer values. Both +arguments must have identical types.

+
+
+
Semantics:
+

The truth table used for the ‘xor’ instruction is:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + +

In0

In1

Out

0

0

0

0

1

1

1

0

1

1

1

0

+
+
+
Example:
+
<result> = xor i32 4, %var         ; yields i32:result = 4 ^ %var
+<result> = xor i32 15, 40          ; yields i32:result = 39
+<result> = xor i32 4, 8            ; yields i32:result = 12
+<result> = xor i32 %V, -1          ; yields i32:result = ~%V
+
+
+
+
+
+
+

Vector Operations

+

LLVM supports several instructions to represent vector operations in a +target-independent manner. These instructions cover the element-access +and vector-specific operations needed to process vectors effectively. +While LLVM does directly support these vector operations, many +sophisticated algorithms will want to use target-specific intrinsics to +take full advantage of a specific target.

+
+

extractelement’ Instruction

+
+
Syntax:
+
<result> = extractelement <n x <ty>> <val>, <ty2> <idx>  ; yields <ty>
+<result> = extractelement <vscale x n x <ty>> <val>, <ty2> <idx> ; yields <ty>
+
+
+
+
+
Overview:
+

The ‘extractelement’ instruction extracts a single scalar element +from a vector at a specified index.

+
+
+
Arguments:
+

The first operand of an ‘extractelement’ instruction is a value of +vector type. The second operand is an index indicating +the position from which to extract the element. The index may be a +variable of any integer type.

+
+
+
Semantics:
+

The result is a scalar of the same type as the element type of val. +Its value is the value at position idx of val. If idx +exceeds the length of val for a fixed-length vector, the result is a +poison value. For a scalable vector, if the value +of idx exceeds the runtime length of the vector, the result is a +poison value.

+
+
+
Example:
+
<result> = extractelement <4 x i32> %vec, i32 0    ; yields i32
+
+
+
+
+
+

insertelement’ Instruction

+
+
Syntax:
+
<result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>    ; yields <n x <ty>>
+<result> = insertelement <vscale x n x <ty>> <val>, <ty> <elt>, <ty2> <idx> ; yields <vscale x n x <ty>>
+
+
+
+
+
Overview:
+

The ‘insertelement’ instruction inserts a scalar element into a +vector at a specified index.

+
+
+
Arguments:
+

The first operand of an ‘insertelement’ instruction is a value of +vector type. The second operand is a scalar value whose +type must equal the element type of the first operand. The third operand +is an index indicating the position at which to insert the value. The +index may be a variable of any integer type.

+
+
+
Semantics:
+

The result is a vector of the same type as val. Its element values +are those of val except at position idx, where it gets the value +elt. If idx exceeds the length of val for a fixed-length vector, +the result is a poison value. For a scalable vector, +if the value of idx exceeds the runtime length of the vector, the result +is a poison value.

+
+
+
Example:
+
<result> = insertelement <4 x i32> %vec, i32 1, i32 0    ; yields <4 x i32>
+
+
+
+
+
+

shufflevector’ Instruction

+
+
Syntax:
+
<result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>    ; yields <m x <ty>>
+<result> = shufflevector <vscale x n x <ty>> <v1>, <vscale x n x <ty>> v2, <vscale x m x i32> <mask>  ; yields <vscale x m x <ty>>
+
+
+
+
+
Overview:
+

The ‘shufflevector’ instruction constructs a permutation of elements +from two input vectors, returning a vector with the same element type as +the input and length that is the same as the shuffle mask.

+
+
+
Arguments:
+

The first two operands of a ‘shufflevector’ instruction are vectors +with the same type. The third argument is a shuffle mask vector constant +whose element type is i32. The mask vector elements must be constant +integers or undef values. The result of the instruction is a vector +whose length is the same as the shuffle mask and whose element type is the +same as the element type of the first two operands.

+
+
+
Semantics:
+

The elements of the two input vectors are numbered from left to right +across both of the vectors. For each element of the result vector, the +shuffle mask selects an element from one of the input vectors to copy +to the result. Non-negative elements in the mask represent an index +into the concatenated pair of input vectors.

+

If the shuffle mask is undefined, the result vector is undefined. If +the shuffle mask selects an undefined element from one of the input +vectors, the resulting element is undefined. An undefined element +in the mask vector specifies that the resulting element is undefined. +An undefined element in the mask vector prevents a poisoned vector +element from propagating.

+

For scalable vectors, the only valid mask values at present are +zeroinitializer and undef, since we cannot write all indices as +literals for a vector with a length unknown at compile time.

+
+
+
Example:
+
<result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
+                        <4 x i32> <i32 0, i32 4, i32 1, i32 5>  ; yields <4 x i32>
+<result> = shufflevector <4 x i32> %v1, <4 x i32> undef,
+                        <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32> - Identity shuffle.
+<result> = shufflevector <8 x i32> %v1, <8 x i32> undef,
+                        <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32>
+<result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
+                        <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >  ; yields <8 x i32>
+
+
+
+
+
+
+

Aggregate Operations

+

LLVM supports several instructions for working with +aggregate values.

+
+

extractvalue’ Instruction

+
+
Syntax:
+
<result> = extractvalue <aggregate type> <val>, <idx>{, <idx>}*
+
+
+
+
+
Overview:
+

The ‘extractvalue’ instruction extracts the value of a member field +from an aggregate value.

+
+
+
Arguments:
+

The first operand of an ‘extractvalue’ instruction is a value of +struct or array type. The other operands are +constant indices to specify which value to extract in a similar manner +as indices in a ‘getelementptr’ instruction.

+

The major differences to getelementptr indexing are:

+
    +
  • Since the value being indexed is not a pointer, the first index is +omitted and assumed to be zero.

  • +
  • At least one index must be specified.

  • +
  • Not only struct indices but also array indices must be in bounds.

  • +
+
+
+
Semantics:
+

The result is the value at the position in the aggregate specified by +the index operands.

+
+
+
Example:
+
<result> = extractvalue {i32, float} %agg, 0    ; yields i32
+
+
+
+
+
+

insertvalue’ Instruction

+
+
Syntax:
+
<result> = insertvalue <aggregate type> <val>, <ty> <elt>, <idx>{, <idx>}*    ; yields <aggregate type>
+
+
+
+
+
Overview:
+

The ‘insertvalue’ instruction inserts a value into a member field in +an aggregate value.

+
+
+
Arguments:
+

The first operand of an ‘insertvalue’ instruction is a value of +struct or array type. The second operand is +a first-class value to insert. The following operands are constant +indices indicating the position at which to insert the value in a +similar manner as indices in a ‘extractvalue’ instruction. The value +to insert must have the same type as the value identified by the +indices.

+
+
+
Semantics:
+

The result is an aggregate of the same type as val. Its value is +that of val except that the value at the position specified by the +indices is that of elt.

+
+
+
Example:
+
%agg1 = insertvalue {i32, float} undef, i32 1, 0              ; yields {i32 1, float undef}
+%agg2 = insertvalue {i32, float} %agg1, float %val, 1         ; yields {i32 1, float %val}
+%agg3 = insertvalue {i32, {float}} undef, float %val, 1, 0    ; yields {i32 undef, {float %val}}
+
+
+
+
+
+
+

Memory Access and Addressing Operations

+

A key design point of an SSA-based representation is how it represents +memory. In LLVM, no memory locations are in SSA form, which makes things +very simple. This section describes how to read, write, and allocate +memory in LLVM.

+
+

alloca’ Instruction

+
+
Syntax:
+
<result> = alloca [inalloca] <type> [, <ty> <NumElements>] [, align <alignment>] [, addrspace(<num>)]     ; yields type addrspace(num)*:result
+
+
+
+
+
Overview:
+

The ‘alloca’ instruction allocates memory on the stack frame of the +currently executing function, to be automatically released when this +function returns to its caller. If the address space is not explicitly +specified, the object is allocated in the alloca address space from the +datalayout string.

+
+
+
Arguments:
+

The ‘alloca’ instruction allocates sizeof(<type>)*NumElements +bytes of memory on the runtime stack, returning a pointer of the +appropriate type to the program. If “NumElements” is specified, it is +the number of elements allocated, otherwise “NumElements” is defaulted +to be one. If a constant alignment is specified, the value result of the +allocation is guaranteed to be aligned to at least that boundary. The +alignment may not be greater than 1 << 29. If not specified, or if +zero, the target can choose to align the allocation on any convenient +boundary compatible with the type.

+

type’ may be any sized type.

+
+
+
Semantics:
+

Memory is allocated; a pointer is returned. The allocated memory is +uninitialized, and loading from uninitialized memory produces an undefined +value. The operation itself is undefined if there is insufficient stack +space for the allocation.’alloca’d memory is automatically released +when the function returns. The ‘alloca’ instruction is commonly used +to represent automatic variables that must have an address available. When +the function returns (either with the ret or resume instructions), +the memory is reclaimed. Allocating zero bytes is legal, but the returned +pointer may not be unique. The order in which memory is allocated (ie., +which way the stack grows) is not specified.

+

Note that ‘alloca’ outside of the alloca address space from the +datalayout string is meaningful only if the +target has assigned it a semantics.

+

If the returned pointer is used by llvm.lifetime.start, +the returned object is initially dead. +See llvm.lifetime.start and +llvm.lifetime.end for the precise semantics of +lifetime-manipulating intrinsics.

+
+
+
Example:
+
%ptr = alloca i32                             ; yields i32*:ptr
+%ptr = alloca i32, i32 4                      ; yields i32*:ptr
+%ptr = alloca i32, i32 4, align 1024          ; yields i32*:ptr
+%ptr = alloca i32, align 1024                 ; yields i32*:ptr
+
+
+
+
+
+

load’ Instruction

+
+
Syntax:
+
<result> = load [volatile] <ty>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.load !<empty_node>][, !invariant.group !<empty_node>][, !nonnull !<empty_node>][, !dereferenceable !<deref_bytes_node>][, !dereferenceable_or_null !<deref_bytes_node>][, !align !<align_node>][, !noundef !<empty_node>]
+<result> = load atomic [volatile] <ty>, <ty>* <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>]
+!<nontemp_node> = !{ i32 1 }
+!<empty_node> = !{}
+!<deref_bytes_node> = !{ i64 <dereferenceable_bytes> }
+!<align_node> = !{ i64 <value_alignment> }
+
+
+
+
+
Overview:
+

The ‘load’ instruction is used to read from memory.

+
+
+
Arguments:
+

The argument to the load instruction specifies the memory address from which +to load. The type specified must be a first class type of +known size (i.e. not containing an opaque structural type). If +the load is marked as volatile, then the optimizer is not allowed to +modify the number or order of execution of this load with other +volatile operations.

+

If the load is marked as atomic, it takes an extra ordering and optional syncscope("<target-scope>") argument. The +release and acq_rel orderings are not valid on load instructions. +Atomic loads produce defined results when they may see +multiple atomic stores. The type of the pointee must be an integer, pointer, or +floating-point type whose bit width is a power of two greater than or equal to +eight and less than or equal to a target-specific size limit. align must be +explicitly specified on atomic loads, and the load has undefined behavior if the +alignment is not set to a value which is at least the size in bytes of the +pointee. !nontemporal does not have any defined semantics for atomic loads.

+

The optional constant align argument specifies the alignment of the +operation (that is, the alignment of the memory address). A value of 0 +or an omitted align argument means that the operation has the ABI +alignment for the target. It is the responsibility of the code emitter +to ensure that the alignment information is correct. Overestimating the +alignment results in undefined behavior. Underestimating the alignment +may produce less efficient code. An alignment of 1 is always safe. The +maximum possible alignment is 1 << 29. An alignment value higher +than the size of the loaded type implies memory up to the alignment +value bytes can be safely loaded without trapping in the default +address space. Access of the high bytes can interfere with debugging +tools, so should not be accessed if the function has the +sanitize_thread or sanitize_address attributes.

+

The optional !nontemporal metadata must reference a single +metadata name <nontemp_node> corresponding to a metadata node with one +i32 entry of value 1. The existence of the !nontemporal +metadata on the instruction tells the optimizer and code generator +that this load is not expected to be reused in the cache. The code +generator may select special instructions to save cache bandwidth, such +as the MOVNT instruction on x86.

+

The optional !invariant.load metadata must reference a single +metadata name <empty_node> corresponding to a metadata node with no +entries. If a load instruction tagged with the !invariant.load +metadata is executed, the memory location referenced by the load has +to contain the same value at all points in the program where the +memory location is dereferenceable; otherwise, the behavior is +undefined.

+
+
The optional !invariant.group metadata must reference a single metadata name

<empty_node> corresponding to a metadata node with no entries. +See invariant.group metadata invariant.group.

+
+
+

The optional !nonnull metadata must reference a single +metadata name <empty_node> corresponding to a metadata node with no +entries. The existence of the !nonnull metadata on the +instruction tells the optimizer that the value loaded is known to +never be null. If the value is null at runtime, the behavior is undefined. +This is analogous to the nonnull attribute on parameters and return +values. This metadata can only be applied to loads of a pointer type.

+

The optional !dereferenceable metadata must reference a single metadata +name <deref_bytes_node> corresponding to a metadata node with one i64 +entry. +See dereferenceable metadata dereferenceable.

+

The optional !dereferenceable_or_null metadata must reference a single +metadata name <deref_bytes_node> corresponding to a metadata node with one +i64 entry. +See dereferenceable_or_null metadata dereferenceable_or_null.

+

The optional !align metadata must reference a single metadata name +<align_node> corresponding to a metadata node with one i64 entry. +The existence of the !align metadata on the instruction tells the +optimizer that the value loaded is known to be aligned to a boundary specified +by the integer value in the metadata node. The alignment must be a power of 2. +This is analogous to the ‘’align’’ attribute on parameters and return values. +This metadata can only be applied to loads of a pointer type. If the returned +value is not appropriately aligned at runtime, the behavior is undefined.

+

The optional !noundef metadata must reference a single metadata name +<empty_node> corresponding to a node with no entries. The existence of +!noundef metadata on the instruction tells the optimizer that the value +loaded is known to be well defined. +If the value isn’t well defined, the behavior is undefined.

+
+
+
Semantics:
+

The location of memory pointed to is loaded. If the value being loaded +is of scalar type then the number of bytes read does not exceed the +minimum number of bytes needed to hold all bits of the type. For +example, loading an i24 reads at most three bytes. When loading a +value of a type like i20 with a size that is not an integral number +of bytes, the result is undefined if the value was not originally +written using a store of the same type. +If the value being loaded is of aggregate type, the bytes that correspond to +padding may be accessed but are ignored, because it is impossible to observe +padding from the loaded aggregate value. +If <pointer> is not a well-defined value, the behavior is undefined.

+
+
+
Examples:
+
%ptr = alloca i32                               ; yields i32*:ptr
+store i32 3, i32* %ptr                          ; yields void
+%val = load i32, i32* %ptr                      ; yields i32:val = i32 3
+
+
+
+
+
+

store’ Instruction

+
+
Syntax:
+
store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<nontemp_node>][, !invariant.group !<empty_node>]        ; yields void
+store atomic [volatile] <ty> <value>, <ty>* <pointer> [syncscope("<target-scope>")] <ordering>, align <alignment> [, !invariant.group !<empty_node>] ; yields void
+!<nontemp_node> = !{ i32 1 }
+!<empty_node> = !{}
+
+
+
+
+
Overview:
+

The ‘store’ instruction is used to write to memory.

+
+
+
Arguments:
+

There are two arguments to the store instruction: a value to store and an +address at which to store it. The type of the <pointer> operand must be a +pointer to the first class type of the <value> +operand. If the store is marked as volatile, then the optimizer is not +allowed to modify the number or order of execution of this store with other +volatile operations. Only values of first class types of known size (i.e. not containing an opaque +structural type) can be stored.

+

If the store is marked as atomic, it takes an extra ordering and optional syncscope("<target-scope>") argument. The +acquire and acq_rel orderings aren’t valid on store instructions. +Atomic loads produce defined results when they may see +multiple atomic stores. The type of the pointee must be an integer, pointer, or +floating-point type whose bit width is a power of two greater than or equal to +eight and less than or equal to a target-specific size limit. align must be +explicitly specified on atomic stores, and the store has undefined behavior if +the alignment is not set to a value which is at least the size in bytes of the +pointee. !nontemporal does not have any defined semantics for atomic stores.

+

The optional constant align argument specifies the alignment of the +operation (that is, the alignment of the memory address). A value of 0 +or an omitted align argument means that the operation has the ABI +alignment for the target. It is the responsibility of the code emitter +to ensure that the alignment information is correct. Overestimating the +alignment results in undefined behavior. Underestimating the +alignment may produce less efficient code. An alignment of 1 is always +safe. The maximum possible alignment is 1 << 29. An alignment +value higher than the size of the stored type implies memory up to the +alignment value bytes can be stored to without trapping in the default +address space. Storing to the higher bytes however may result in data +races if another thread can access the same address. Introducing a +data race is not allowed. Storing to the extra bytes is not allowed +even in situations where a data race is known to not exist if the +function has the sanitize_address attribute.

+

The optional !nontemporal metadata must reference a single metadata +name <nontemp_node> corresponding to a metadata node with one i32 entry +of value 1. The existence of the !nontemporal metadata on the instruction +tells the optimizer and code generator that this load is not expected to +be reused in the cache. The code generator may select special +instructions to save cache bandwidth, such as the MOVNT instruction on +x86.

+

The optional !invariant.group metadata must reference a +single metadata name <empty_node>. See invariant.group metadata.

+
+
+
Semantics:
+

The contents of memory are updated to contain <value> at the +location specified by the <pointer> operand. If <value> is +of scalar type then the number of bytes written does not exceed the +minimum number of bytes needed to hold all bits of the type. For +example, storing an i24 writes at most three bytes. When writing a +value of a type like i20 with a size that is not an integral number +of bytes, it is unspecified what happens to the extra bits that do not +belong to the type, but they will typically be overwritten. +If <value> is of aggregate type, padding is filled with +undef. +If <pointer> is not a well-defined value, the behavior is undefined.

+
+
+
Example:
+
%ptr = alloca i32                               ; yields i32*:ptr
+store i32 3, i32* %ptr                          ; yields void
+%val = load i32, i32* %ptr                      ; yields i32:val = i32 3
+
+
+
+
+
+

fence’ Instruction

+
+
Syntax:
+
fence [syncscope("<target-scope>")] <ordering>  ; yields void
+
+
+
+
+
Overview:
+

The ‘fence’ instruction is used to introduce happens-before edges +between operations.

+
+
+
Arguments:
+

fence’ instructions take an ordering argument which +defines what synchronizes-with edges they add. They can only be given +acquire, release, acq_rel, and seq_cst orderings.

+
+
+
Semantics:
+

A fence A which has (at least) release ordering semantics +synchronizes with a fence B with (at least) acquire ordering +semantics if and only if there exist atomic operations X and Y, both +operating on some atomic object M, such that A is sequenced before X, X +modifies M (either directly or through some side effect of a sequence +headed by X), Y is sequenced before B, and Y observes M. This provides a +happens-before dependency between A and B. Rather than an explicit +fence, one (but not both) of the atomic operations X or Y might +provide a release or acquire (resp.) ordering constraint and +still synchronize-with the explicit fence and establish the +happens-before edge.

+

A fence which has seq_cst ordering, in addition to having both +acquire and release semantics specified above, participates in +the global program order of other seq_cst operations and/or fences.

+

A fence instruction can also take an optional +“syncscope” argument.

+
+
+
Example:
+
fence acquire                                        ; yields void
+fence syncscope("singlethread") seq_cst              ; yields void
+fence syncscope("agent") seq_cst                     ; yields void
+
+
+
+
+
+

cmpxchg’ Instruction

+
+
Syntax:
+
cmpxchg [weak] [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [syncscope("<target-scope>")] <success ordering> <failure ordering>[, align <alignment>] ; yields  { ty, i1 }
+
+
+
+
+
Overview:
+

The ‘cmpxchg’ instruction is used to atomically modify memory. It +loads a value in memory and compares it to a given value. If they are +equal, it tries to store a new value into the memory.

+
+
+
Arguments:
+

There are three arguments to the ‘cmpxchg’ instruction: an address +to operate on, a value to compare to the value currently be at that +address, and a new value to place at that address if the compared values +are equal. The type of ‘<cmp>’ must be an integer or pointer type whose +bit width is a power of two greater than or equal to eight and less +than or equal to a target-specific size limit. ‘<cmp>’ and ‘<new>’ must +have the same type, and the type of ‘<pointer>’ must be a pointer to +that type. If the cmpxchg is marked as volatile, then the +optimizer is not allowed to modify the number or order of execution of +this cmpxchg with other volatile operations.

+

The success and failure ordering arguments specify how this +cmpxchg synchronizes with other atomic operations. Both ordering parameters +must be at least monotonic, the failure ordering cannot be either +release or acq_rel.

+

A cmpxchg instruction can also take an optional +“syncscope” argument.

+

The instruction can take an optional align attribute. +The alignment must be a power of two greater or equal to the size of the +<value> type. If unspecified, the alignment is assumed to be equal to the +size of the ‘<value>’ type. Note that this default alignment assumption is +different from the alignment used for the load/store instructions when align +isn’t specified.

+

The pointer passed into cmpxchg must have alignment greater than or +equal to the size in memory of the operand.

+
+
+
Semantics:
+

The contents of memory at the location specified by the ‘<pointer>’ operand +is read and compared to ‘<cmp>’; if the values are equal, ‘<new>’ is +written to the location. The original value at the location is returned, +together with a flag indicating success (true) or failure (false).

+

If the cmpxchg operation is marked as weak then a spurious failure is +permitted: the operation may not write <new> even if the comparison +matched.

+

If the cmpxchg operation is strong (the default), the i1 value is 1 if and only +if the value loaded equals cmp.

+

A successful cmpxchg is a read-modify-write instruction for the purpose of +identifying release sequences. A failed cmpxchg is equivalent to an atomic +load with an ordering parameter determined the second ordering parameter.

+
+
+
Example:
+
entry:
+  %orig = load atomic i32, i32* %ptr unordered, align 4                      ; yields i32
+  br label %loop
+
+loop:
+  %cmp = phi i32 [ %orig, %entry ], [%value_loaded, %loop]
+  %squared = mul i32 %cmp, %cmp
+  %val_success = cmpxchg i32* %ptr, i32 %cmp, i32 %squared acq_rel monotonic ; yields  { i32, i1 }
+  %value_loaded = extractvalue { i32, i1 } %val_success, 0
+  %success = extractvalue { i32, i1 } %val_success, 1
+  br i1 %success, label %done, label %loop
+
+done:
+  ...
+
+
+
+
+
+

atomicrmw’ Instruction

+
+
Syntax:
+
atomicrmw [volatile] <operation> <ty>* <pointer>, <ty> <value> [syncscope("<target-scope>")] <ordering>[, align <alignment>]  ; yields ty
+
+
+
+
+
Overview:
+

The ‘atomicrmw’ instruction is used to atomically modify memory.

+
+
+
Arguments:
+

There are three arguments to the ‘atomicrmw’ instruction: an +operation to apply, an address whose value to modify, an argument to the +operation. The operation must be one of the following keywords:

+
    +
  • xchg

  • +
  • add

  • +
  • sub

  • +
  • and

  • +
  • nand

  • +
  • or

  • +
  • xor

  • +
  • max

  • +
  • min

  • +
  • umax

  • +
  • umin

  • +
  • fadd

  • +
  • fsub

  • +
+

For most of these operations, the type of ‘<value>’ must be an integer +type whose bit width is a power of two greater than or equal to eight +and less than or equal to a target-specific size limit. For xchg, this +may also be a floating point type with the same size constraints as +integers. For fadd/fsub, this must be a floating point type. The +type of the ‘<pointer>’ operand must be a pointer to that type. If +the atomicrmw is marked as volatile, then the optimizer is not +allowed to modify the number or order of execution of this +atomicrmw with other volatile operations.

+

The instruction can take an optional align attribute. +The alignment must be a power of two greater or equal to the size of the +<value> type. If unspecified, the alignment is assumed to be equal to the +size of the ‘<value>’ type. Note that this default alignment assumption is +different from the alignment used for the load/store instructions when align +isn’t specified.

+

A atomicrmw instruction can also take an optional +“syncscope” argument.

+
+
+
Semantics:
+

The contents of memory at the location specified by the ‘<pointer>’ +operand are atomically read, modified, and written back. The original +value at the location is returned. The modification is specified by the +operation argument:

+
    +
  • xchg: *ptr = val

  • +
  • add: *ptr = *ptr + val

  • +
  • sub: *ptr = *ptr - val

  • +
  • and: *ptr = *ptr & val

  • +
  • nand: *ptr = ~(*ptr & val)

  • +
  • or: *ptr = *ptr | val

  • +
  • xor: *ptr = *ptr ^ val

  • +
  • max: *ptr = *ptr > val ? *ptr : val (using a signed comparison)

  • +
  • min: *ptr = *ptr < val ? *ptr : val (using a signed comparison)

  • +
  • umax: *ptr = *ptr > val ? *ptr : val (using an unsigned comparison)

  • +
  • umin: *ptr = *ptr < val ? *ptr : val (using an unsigned comparison)

  • +
  • fadd: *ptr = *ptr + val (using floating point arithmetic)

  • +
  • fsub: *ptr = *ptr - val (using floating point arithmetic)

  • +
+
+
+
Example:
+
%old = atomicrmw add i32* %ptr, i32 1 acquire                        ; yields i32
+
+
+
+
+
+

getelementptr’ Instruction

+
+
Syntax:
+
<result> = getelementptr <ty>, <ty>* <ptrval>{, [inrange] <ty> <idx>}*
+<result> = getelementptr inbounds <ty>, <ty>* <ptrval>{, [inrange] <ty> <idx>}*
+<result> = getelementptr <ty>, <ptr vector> <ptrval>, [inrange] <vector index type> <idx>
+
+
+
+
+
Overview:
+

The ‘getelementptr’ instruction is used to get the address of a +subelement of an aggregate data structure. It performs +address calculation only and does not access memory. The instruction can also +be used to calculate a vector of such addresses.

+
+
+
Arguments:
+

The first argument is always a type used as the basis for the calculations. +The second argument is always a pointer or a vector of pointers, and is the +base address to start from. The remaining arguments are indices +that indicate which of the elements of the aggregate object are indexed. +The interpretation of each index is dependent on the type being indexed +into. The first index always indexes the pointer value given as the +second argument, the second index indexes a value of the type pointed to +(not necessarily the value directly pointed to, since the first index +can be non-zero), etc. The first type indexed into must be a pointer +value, subsequent types can be arrays, vectors, and structs. Note that +subsequent types being indexed into can never be pointers, since that +would require loading the pointer before continuing calculation.

+

The type of each index argument depends on the type it is indexing into. +When indexing into a (optionally packed) structure, only i32 integer +constants are allowed (when using a vector of indices they must all +be the same i32 integer constant). When indexing into an array, +pointer or vector, integers of any width are allowed, and they are not +required to be constant. These integers are treated as signed values +where relevant.

+

For example, let’s consider a C code fragment and how it gets compiled +to LLVM:

+
struct RT {
+  char A;
+  int B[10][20];
+  char C;
+};
+struct ST {
+  int X;
+  double Y;
+  struct RT Z;
+};
+
+int *foo(struct ST *s) {
+  return &s[1].Z.B[5][13];
+}
+
+
+

The LLVM code generated by Clang is:

+
%struct.RT = type { i8, [10 x [20 x i32]], i8 }
+%struct.ST = type { i32, double, %struct.RT }
+
+define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
+entry:
+  %arrayidx = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
+  ret i32* %arrayidx
+}
+
+
+
+
+
Semantics:
+

In the example above, the first index is indexing into the +‘%struct.ST*’ type, which is a pointer, yielding a ‘%struct.ST’ += ‘{ i32, double, %struct.RT }’ type, a structure. The second index +indexes into the third element of the structure, yielding a +‘%struct.RT’ = ‘{ i8 , [10 x [20 x i32]], i8 }’ type, another +structure. The third index indexes into the second element of the +structure, yielding a ‘[10 x [20 x i32]]’ type, an array. The two +dimensions of the array are subscripted into, yielding an ‘i32’ +type. The ‘getelementptr’ instruction returns a pointer to this +element, thus computing a value of ‘i32*’ type.

+

Note that it is perfectly legal to index partially through a structure, +returning a pointer to an inner element. Because of this, the LLVM code +for the given testcase is equivalent to:

+
define i32* @foo(%struct.ST* %s) {
+  %t1 = getelementptr %struct.ST, %struct.ST* %s, i32 1                        ; yields %struct.ST*:%t1
+  %t2 = getelementptr %struct.ST, %struct.ST* %t1, i32 0, i32 2                ; yields %struct.RT*:%t2
+  %t3 = getelementptr %struct.RT, %struct.RT* %t2, i32 0, i32 1                ; yields [10 x [20 x i32]]*:%t3
+  %t4 = getelementptr [10 x [20 x i32]], [10 x [20 x i32]]* %t3, i32 0, i32 5  ; yields [20 x i32]*:%t4
+  %t5 = getelementptr [20 x i32], [20 x i32]* %t4, i32 0, i32 13               ; yields i32*:%t5
+  ret i32* %t5
+}
+
+
+

If the inbounds keyword is present, the result value of the +getelementptr is a poison value if one of the +following rules is violated:

+
    +
  • The base pointer has an in bounds address of an allocated object, which +means that it points into an allocated object, or to its end. The only +in bounds address for a null pointer in the default address-space is the +null pointer itself.

  • +
  • If the type of an index is larger than the pointer index type, the +truncation to the pointer index type preserves the signed value.

  • +
  • The multiplication of an index by the type size does not wrap the pointer +index type in a signed sense (nsw).

  • +
  • The successive addition of offsets (without adding the base address) does +not wrap the pointer index type in a signed sense (nsw).

  • +
  • The successive addition of the current address, interpreted as an unsigned +number, and an offset, interpreted as a signed number, does not wrap the +unsigned address space and remains in bounds of the allocated object. +As a corollary, if the added offset is non-negative, the addition does not +wrap in an unsigned sense (nuw).

  • +
  • In cases where the base is a vector of pointers, the inbounds keyword +applies to each of the computations element-wise.

  • +
+

These rules are based on the assumption that no allocated object may cross +the unsigned address space boundary, and no allocated object may be larger +than half the pointer index type space.

+

If the inbounds keyword is not present, the offsets are added to the +base address with silently-wrapping two’s complement arithmetic. If the +offsets have a different width from the pointer, they are sign-extended +or truncated to the width of the pointer. The result value of the +getelementptr may be outside the object pointed to by the base +pointer. The result value may not necessarily be used to access memory +though, even if it happens to point into allocated storage. See the +Pointer Aliasing Rules section for more +information.

+

If the inrange keyword is present before any index, loading from or +storing to any pointer derived from the getelementptr has undefined +behavior if the load or store would access memory outside of the bounds of +the element selected by the index marked as inrange. The result of a +pointer comparison or ptrtoint (including ptrtoint-like operations +involving memory) involving a pointer derived from a getelementptr with +the inrange keyword is undefined, with the exception of comparisons +in the case where both operands are in the range of the element selected +by the inrange keyword, inclusive of the address one past the end of +that element. Note that the inrange keyword is currently only allowed +in constant getelementptr expressions.

+

The getelementptr instruction is often confusing. For some more insight +into how it works, see the getelementptr FAQ.

+
+
+
Example:
+
; yields [12 x i8]*:aptr
+%aptr = getelementptr {i32, [12 x i8]}, {i32, [12 x i8]}* %saptr, i64 0, i32 1
+; yields i8*:vptr
+%vptr = getelementptr {i32, <2 x i8>}, {i32, <2 x i8>}* %svptr, i64 0, i32 1, i32 1
+; yields i8*:eptr
+%eptr = getelementptr [12 x i8], [12 x i8]* %aptr, i64 0, i32 1
+; yields i32*:iptr
+%iptr = getelementptr [10 x i32], [10 x i32]* @arr, i16 0, i16 0
+
+
+
+
+
Vector of pointers:
+

The getelementptr returns a vector of pointers, instead of a single address, +when one or more of its arguments is a vector. In such cases, all vector +arguments should have the same number of elements, and every scalar argument +will be effectively broadcast into a vector during address calculation.

+
; All arguments are vectors:
+;   A[i] = ptrs[i] + offsets[i]*sizeof(i8)
+%A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets
+
+; Add the same scalar offset to each pointer of a vector:
+;   A[i] = ptrs[i] + offset*sizeof(i8)
+%A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset
+
+; Add distinct offsets to the same pointer:
+;   A[i] = ptr + offsets[i]*sizeof(i8)
+%A = getelementptr i8, i8* %ptr, <4 x i64> %offsets
+
+; In all cases described above the type of the result is <4 x i8*>
+
+
+

The two following instructions are equivalent:

+
getelementptr  %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
+  <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
+  <4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+  <4 x i32> %ind4,
+  <4 x i64> <i64 13, i64 13, i64 13, i64 13>
+
+getelementptr  %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
+  i32 2, i32 1, <4 x i32> %ind4, i64 13
+
+
+

Let’s look at the C code, where the vector version of getelementptr +makes sense:

+
// Let's assume that we vectorize the following loop:
+double *A, *B; int *C;
+for (int i = 0; i < size; ++i) {
+  A[i] = B[C[i]];
+}
+
+
+
; get pointers for 8 elements from array B
+%ptrs = getelementptr double, double* %B, <8 x i32> %C
+; load 8 elements from array B into A
+%A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> %ptrs,
+     i32 8, <8 x i1> %mask, <8 x double> %passthru)
+
+
+
+
+
+
+

Conversion Operations

+

The instructions in this category are the conversion instructions +(casting) which all take a single operand and a type. They perform +various bit conversions on the operand.

+
+

trunc .. to’ Instruction

+
+
Syntax:
+
<result> = trunc <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘trunc’ instruction truncates its operand to the type ty2.

+
+
+
Arguments:
+

The ‘trunc’ instruction takes a value to trunc, and a type to trunc +it to. Both types must be of integer types, or vectors +of the same number of integers. The bit size of the value must be +larger than the bit size of the destination type, ty2. Equal sized +types are not allowed.

+
+
+
Semantics:
+

The ‘trunc’ instruction truncates the high order bits in value +and converts the remaining bits to ty2. Since the source size must +be larger than the destination size, trunc cannot be a no-op cast. +It will always truncate bits.

+
+
+
Example:
+
%X = trunc i32 257 to i8                        ; yields i8:1
+%Y = trunc i32 123 to i1                        ; yields i1:true
+%Z = trunc i32 122 to i1                        ; yields i1:false
+%W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>
+
+
+
+
+
+

zext .. to’ Instruction

+
+
Syntax:
+
<result> = zext <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘zext’ instruction zero extends its operand to type ty2.

+
+
+
Arguments:
+

The ‘zext’ instruction takes a value to cast, and a type to cast it +to. Both types must be of integer types, or vectors of +the same number of integers. The bit size of the value must be +smaller than the bit size of the destination type, ty2.

+
+
+
Semantics:
+

The zext fills the high order bits of the value with zero bits +until it reaches the size of the destination type, ty2.

+

When zero extending from i1, the result will always be either 0 or 1.

+
+
+
Example:
+
%X = zext i32 257 to i64              ; yields i64:257
+%Y = zext i1 true to i32              ; yields i32:1
+%Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
+
+
+
+
+
+

sext .. to’ Instruction

+
+
Syntax:
+
<result> = sext <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘sext’ sign extends value to the type ty2.

+
+
+
Arguments:
+

The ‘sext’ instruction takes a value to cast, and a type to cast it +to. Both types must be of integer types, or vectors of +the same number of integers. The bit size of the value must be +smaller than the bit size of the destination type, ty2.

+
+
+
Semantics:
+

The ‘sext’ instruction performs a sign extension by copying the sign +bit (highest order bit) of the value until it reaches the bit size +of the type ty2.

+

When sign extending from i1, the extension always results in -1 or 0.

+
+
+
Example:
+
%X = sext i8  -1 to i16              ; yields i16   :65535
+%Y = sext i1 true to i32             ; yields i32:-1
+%Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
+
+
+
+
+
+

fptrunc .. to’ Instruction

+
+
Syntax:
+
<result> = fptrunc <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘fptrunc’ instruction truncates value to type ty2.

+
+
+
Arguments:
+

The ‘fptrunc’ instruction takes a floating-point +value to cast and a floating-point type to cast it to. +The size of value must be larger than the size of ty2. This +implies that fptrunc cannot be used to make a no-op cast.

+
+
+
Semantics:
+

The ‘fptrunc’ instruction casts a value from a larger +floating-point type to a smaller floating-point type. +This instruction is assumed to execute in the default floating-point +environment.

+
+
+
Example:
+
%X = fptrunc double 16777217.0 to float    ; yields float:16777216.0
+%Y = fptrunc double 1.0E+300 to half       ; yields half:+infinity
+
+
+
+
+
+

fpext .. to’ Instruction

+
+
Syntax:
+
<result> = fpext <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘fpext’ extends a floating-point value to a larger floating-point +value.

+
+
+
Arguments:
+

The ‘fpext’ instruction takes a floating-point +value to cast, and a floating-point type to cast it +to. The source type must be smaller than the destination type.

+
+
+
Semantics:
+

The ‘fpext’ instruction extends the value from a smaller +floating-point type to a larger floating-point type. The fpext cannot be used to make a +no-op cast because it always changes bits. Use bitcast to make a +no-op cast for a floating-point cast.

+
+
+
Example:
+
%X = fpext float 3.125 to double         ; yields double:3.125000e+00
+%Y = fpext double %X to fp128            ; yields fp128:0xL00000000000000004000900000000000
+
+
+
+
+
+

fptoui .. to’ Instruction

+
+
Syntax:
+
<result> = fptoui <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘fptoui’ converts a floating-point value to its unsigned +integer equivalent of type ty2.

+
+
+
Arguments:
+

The ‘fptoui’ instruction takes a value to cast, which must be a +scalar or vector floating-point value, and a type to +cast it to ty2, which must be an integer type. If +ty is a vector floating-point type, ty2 must be a vector integer +type with the same number of elements as ty

+
+
+
Semantics:
+

The ‘fptoui’ instruction converts its floating-point operand into the nearest (rounding towards zero) +unsigned integer value. If the value cannot fit in ty2, the result +is a poison value.

+
+
+
Example:
+
%X = fptoui double 123.0 to i32      ; yields i32:123
+%Y = fptoui float 1.0E+300 to i1     ; yields undefined:1
+%Z = fptoui float 1.04E+17 to i8     ; yields undefined:1
+
+
+
+
+
+

fptosi .. to’ Instruction

+
+
Syntax:
+
<result> = fptosi <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘fptosi’ instruction converts floating-point +value to type ty2.

+
+
+
Arguments:
+

The ‘fptosi’ instruction takes a value to cast, which must be a +scalar or vector floating-point value, and a type to +cast it to ty2, which must be an integer type. If +ty is a vector floating-point type, ty2 must be a vector integer +type with the same number of elements as ty

+
+
+
Semantics:
+

The ‘fptosi’ instruction converts its floating-point operand into the nearest (rounding towards zero) +signed integer value. If the value cannot fit in ty2, the result +is a poison value.

+
+
+
Example:
+
%X = fptosi double -123.0 to i32      ; yields i32:-123
+%Y = fptosi float 1.0E-247 to i1      ; yields undefined:1
+%Z = fptosi float 1.04E+17 to i8      ; yields undefined:1
+
+
+
+
+
+

uitofp .. to’ Instruction

+
+
Syntax:
+
<result> = uitofp <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘uitofp’ instruction regards value as an unsigned integer +and converts that value to the ty2 type.

+
+
+
Arguments:
+

The ‘uitofp’ instruction takes a value to cast, which must be a +scalar or vector integer value, and a type to cast it to +ty2, which must be an floating-point type. If +ty is a vector integer type, ty2 must be a vector floating-point +type with the same number of elements as ty

+
+
+
Semantics:
+

The ‘uitofp’ instruction interprets its operand as an unsigned +integer quantity and converts it to the corresponding floating-point +value. If the value cannot be exactly represented, it is rounded using +the default rounding mode.

+
+
+
Example:
+
%X = uitofp i32 257 to float         ; yields float:257.0
+%Y = uitofp i8 -1 to double          ; yields double:255.0
+
+
+
+
+
+

sitofp .. to’ Instruction

+
+
Syntax:
+
<result> = sitofp <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘sitofp’ instruction regards value as a signed integer and +converts that value to the ty2 type.

+
+
+
Arguments:
+

The ‘sitofp’ instruction takes a value to cast, which must be a +scalar or vector integer value, and a type to cast it to +ty2, which must be an floating-point type. If +ty is a vector integer type, ty2 must be a vector floating-point +type with the same number of elements as ty

+
+
+
Semantics:
+

The ‘sitofp’ instruction interprets its operand as a signed integer +quantity and converts it to the corresponding floating-point value. If the +value cannot be exactly represented, it is rounded using the default rounding +mode.

+
+
+
Example:
+
%X = sitofp i32 257 to float         ; yields float:257.0
+%Y = sitofp i8 -1 to double          ; yields double:-1.0
+
+
+
+
+
+

ptrtoint .. to’ Instruction

+
+
Syntax:
+
<result> = ptrtoint <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘ptrtoint’ instruction converts the pointer or a vector of +pointers value to the integer (or vector of integers) type ty2.

+
+
+
Arguments:
+

The ‘ptrtoint’ instruction takes a value to cast, which must be +a value of type pointer or a vector of pointers, and a +type to cast it to ty2, which must be an integer or +a vector of integers type.

+
+
+
Semantics:
+

The ‘ptrtoint’ instruction converts value to integer type +ty2 by interpreting the pointer value as an integer and either +truncating or zero extending that value to the size of the integer type. +If value is smaller than ty2 then a zero extension is done. If +value is larger than ty2 then a truncation is done. If they are +the same size, then nothing is done (no-op cast) other than a type +change.

+
+
+
Example:
+
%X = ptrtoint i32* %P to i8                         ; yields truncation on 32-bit architecture
+%Y = ptrtoint i32* %P to i64                        ; yields zero extension on 32-bit architecture
+%Z = ptrtoint <4 x i32*> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture
+
+
+
+
+
+

inttoptr .. to’ Instruction

+
+
Syntax:
+
<result> = inttoptr <ty> <value> to <ty2>[, !dereferenceable !<deref_bytes_node>][, !dereferenceable_or_null !<deref_bytes_node>]             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘inttoptr’ instruction converts an integer value to a +pointer type, ty2.

+
+
+
Arguments:
+

The ‘inttoptr’ instruction takes an integer value to +cast, and a type to cast it to, which must be a pointer +type.

+

The optional !dereferenceable metadata must reference a single metadata +name <deref_bytes_node> corresponding to a metadata node with one i64 +entry. +See dereferenceable metadata.

+

The optional !dereferenceable_or_null metadata must reference a single +metadata name <deref_bytes_node> corresponding to a metadata node with one +i64 entry. +See dereferenceable_or_null metadata.

+
+
+
Semantics:
+

The ‘inttoptr’ instruction converts value to type ty2 by +applying either a zero extension or a truncation depending on the size +of the integer value. If value is larger than the size of a +pointer then a truncation is done. If value is smaller than the size +of a pointer then a zero extension is done. If they are the same size, +nothing is done (no-op cast).

+
+
+
Example:
+
%X = inttoptr i32 255 to i32*          ; yields zero extension on 64-bit architecture
+%Y = inttoptr i32 255 to i32*          ; yields no-op on 32-bit architecture
+%Z = inttoptr i64 0 to i32*            ; yields truncation on 32-bit architecture
+%Z = inttoptr <4 x i32> %G to <4 x i8*>; yields truncation of vector G to four pointers
+
+
+
+
+
+

bitcast .. to’ Instruction

+
+
Syntax:
+
<result> = bitcast <ty> <value> to <ty2>             ; yields ty2
+
+
+
+
+
Overview:
+

The ‘bitcast’ instruction converts value to type ty2 without +changing any bits.

+
+
+
Arguments:
+

The ‘bitcast’ instruction takes a value to cast, which must be a +non-aggregate first class value, and a type to cast it to, which must +also be a non-aggregate first class type. The +bit sizes of value and the destination type, ty2, must be +identical. If the source type is a pointer, the destination type must +also be a pointer of the same size. This instruction supports bitwise +conversion of vectors to integers and to vectors of other types (as +long as they have the same size).

+
+
+
Semantics:
+

The ‘bitcast’ instruction converts value to type ty2. It +is always a no-op cast because no bits change with this +conversion. The conversion is done as if the value had been stored +to memory and read back as type ty2. Pointer (or vector of +pointers) types may only be converted to other pointer (or vector of +pointers) types with the same address space through this instruction. +To convert pointers to other types, use the inttoptr +or ptrtoint instructions first.

+

There is a caveat for bitcasts involving vector types in relation to +endianess. For example bitcast <2 x i8> <value> to i16 puts element zero +of the vector in the least significant bits of the i16 for little-endian while +element zero ends up in the most significant bits for big-endian.

+
+
+
Example:
+
%X = bitcast i8 255 to i8          ; yields i8 :-1
+%Y = bitcast i32* %x to sint*      ; yields sint*:%x
+%Z = bitcast <2 x int> %V to i64;  ; yields i64: %V (depends on endianess)
+%Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*>
+
+
+
+
+
+

addrspacecast .. to’ Instruction

+
+
Syntax:
+
<result> = addrspacecast <pty> <ptrval> to <pty2>       ; yields pty2
+
+
+
+
+
Overview:
+

The ‘addrspacecast’ instruction converts ptrval from pty in +address space n to type pty2 in address space m.

+
+
+
Arguments:
+

The ‘addrspacecast’ instruction takes a pointer or vector of pointer value +to cast and a pointer type to cast it to, which must have a different +address space.

+
+
+
Semantics:
+

The ‘addrspacecast’ instruction converts the pointer value +ptrval to type pty2. It can be a no-op cast or a complex +value modification, depending on the target and the address space +pair. Pointer conversions within the same address space must be +performed with the bitcast instruction. Note that if the address space +conversion is legal then both result and operand refer to the same memory +location.

+
+
+
Example:
+
%X = addrspacecast i32* %x to i32 addrspace(1)*    ; yields i32 addrspace(1)*:%x
+%Y = addrspacecast i32 addrspace(1)* %y to i64 addrspace(2)*    ; yields i64 addrspace(2)*:%y
+%Z = addrspacecast <4 x i32*> %z to <4 x float addrspace(3)*>   ; yields <4 x float addrspace(3)*>:%z
+
+
+
+
+
+
+

Other Operations

+

The instructions in this category are the “miscellaneous” instructions, +which defy better classification.

+
+

icmp’ Instruction

+
+
Syntax:
+
<result> = icmp <cond> <ty> <op1>, <op2>   ; yields i1 or <N x i1>:result
+
+
+
+
+
Overview:
+

The ‘icmp’ instruction returns a boolean value or a vector of +boolean values based on comparison of its two integer, integer vector, +pointer, or pointer vector operands.

+
+
+
Arguments:
+

The ‘icmp’ instruction takes three operands. The first operand is +the condition code indicating the kind of comparison to perform. It is +not a value, just a keyword. The possible condition codes are:

+
    +
  1. eq: equal

  2. +
  3. ne: not equal

  4. +
  5. ugt: unsigned greater than

  6. +
  7. uge: unsigned greater or equal

  8. +
  9. ult: unsigned less than

  10. +
  11. ule: unsigned less or equal

  12. +
  13. sgt: signed greater than

  14. +
  15. sge: signed greater or equal

  16. +
  17. slt: signed less than

  18. +
  19. sle: signed less or equal

  20. +
+

The remaining two arguments must be integer or +pointer or integer vector typed. They +must also be identical types.

+
+
+
Semantics:
+

The ‘icmp’ compares op1 and op2 according to the condition +code given as cond. The comparison performed always yields either an +i1 or vector of i1 result, as follows:

+
    +
  1. eq: yields true if the operands are equal, false +otherwise. No sign interpretation is necessary or performed.

  2. +
  3. ne: yields true if the operands are unequal, false +otherwise. No sign interpretation is necessary or performed.

  4. +
  5. ugt: interprets the operands as unsigned values and yields +true if op1 is greater than op2.

  6. +
  7. uge: interprets the operands as unsigned values and yields +true if op1 is greater than or equal to op2.

  8. +
  9. ult: interprets the operands as unsigned values and yields +true if op1 is less than op2.

  10. +
  11. ule: interprets the operands as unsigned values and yields +true if op1 is less than or equal to op2.

  12. +
  13. sgt: interprets the operands as signed values and yields true +if op1 is greater than op2.

  14. +
  15. sge: interprets the operands as signed values and yields true +if op1 is greater than or equal to op2.

  16. +
  17. slt: interprets the operands as signed values and yields true +if op1 is less than op2.

  18. +
  19. sle: interprets the operands as signed values and yields true +if op1 is less than or equal to op2.

  20. +
+

If the operands are pointer typed, the pointer values +are compared as if they were integers.

+

If the operands are integer vectors, then they are compared element by +element. The result is an i1 vector with the same number of elements +as the values being compared. Otherwise, the result is an i1.

+
+
+
Example:
+
<result> = icmp eq i32 4, 5          ; yields: result=false
+<result> = icmp ne float* %X, %X     ; yields: result=false
+<result> = icmp ult i16  4, 5        ; yields: result=true
+<result> = icmp sgt i16  4, 5        ; yields: result=false
+<result> = icmp ule i16 -4, 5        ; yields: result=false
+<result> = icmp sge i16  4, 5        ; yields: result=false
+
+
+
+
+
+

fcmp’ Instruction

+
+
Syntax:
+
<result> = fcmp [fast-math flags]* <cond> <ty> <op1>, <op2>     ; yields i1 or <N x i1>:result
+
+
+
+
+
Overview:
+

The ‘fcmp’ instruction returns a boolean value or vector of boolean +values based on comparison of its operands.

+

If the operands are floating-point scalars, then the result type is a +boolean (i1).

+

If the operands are floating-point vectors, then the result type is a +vector of boolean with the same number of elements as the operands being +compared.

+
+
+
Arguments:
+

The ‘fcmp’ instruction takes three operands. The first operand is +the condition code indicating the kind of comparison to perform. It is +not a value, just a keyword. The possible condition codes are:

+
    +
  1. false: no comparison, always returns false

  2. +
  3. oeq: ordered and equal

  4. +
  5. ogt: ordered and greater than

  6. +
  7. oge: ordered and greater than or equal

  8. +
  9. olt: ordered and less than

  10. +
  11. ole: ordered and less than or equal

  12. +
  13. one: ordered and not equal

  14. +
  15. ord: ordered (no nans)

  16. +
  17. ueq: unordered or equal

  18. +
  19. ugt: unordered or greater than

  20. +
  21. uge: unordered or greater than or equal

  22. +
  23. ult: unordered or less than

  24. +
  25. ule: unordered or less than or equal

  26. +
  27. une: unordered or not equal

  28. +
  29. uno: unordered (either nans)

  30. +
  31. true: no comparison, always returns true

  32. +
+

Ordered means that neither operand is a QNAN while unordered means +that either operand may be a QNAN.

+

Each of val1 and val2 arguments must be either a floating-point type or a vector of floating-point type. +They must have identical types.

+
+
+
Semantics:
+

The ‘fcmp’ instruction compares op1 and op2 according to the +condition code given as cond. If the operands are vectors, then the +vectors are compared element by element. Each comparison performed +always yields an i1 result, as follows:

+
    +
  1. false: always yields false, regardless of operands.

  2. +
  3. oeq: yields true if both operands are not a QNAN and op1 +is equal to op2.

  4. +
  5. ogt: yields true if both operands are not a QNAN and op1 +is greater than op2.

  6. +
  7. oge: yields true if both operands are not a QNAN and op1 +is greater than or equal to op2.

  8. +
  9. olt: yields true if both operands are not a QNAN and op1 +is less than op2.

  10. +
  11. ole: yields true if both operands are not a QNAN and op1 +is less than or equal to op2.

  12. +
  13. one: yields true if both operands are not a QNAN and op1 +is not equal to op2.

  14. +
  15. ord: yields true if both operands are not a QNAN.

  16. +
  17. ueq: yields true if either operand is a QNAN or op1 is +equal to op2.

  18. +
  19. ugt: yields true if either operand is a QNAN or op1 is +greater than op2.

  20. +
  21. uge: yields true if either operand is a QNAN or op1 is +greater than or equal to op2.

  22. +
  23. ult: yields true if either operand is a QNAN or op1 is +less than op2.

  24. +
  25. ule: yields true if either operand is a QNAN or op1 is +less than or equal to op2.

  26. +
  27. une: yields true if either operand is a QNAN or op1 is +not equal to op2.

  28. +
  29. uno: yields true if either operand is a QNAN.

  30. +
  31. true: always yields true, regardless of operands.

  32. +
+

The fcmp instruction can also optionally take any number of +fast-math flags, which are optimization hints to enable +otherwise unsafe floating-point optimizations.

+

Any set of fast-math flags are legal on an fcmp instruction, but the +only flags that have any effect on its semantics are those that allow +assumptions to be made about the values of input arguments; namely +nnan, ninf, and reassoc. See Fast-Math Flags for more information.

+
+
+
Example:
+
<result> = fcmp oeq float 4.0, 5.0    ; yields: result=false
+<result> = fcmp one float 4.0, 5.0    ; yields: result=true
+<result> = fcmp olt float 4.0, 5.0    ; yields: result=true
+<result> = fcmp ueq double 1.0, 2.0   ; yields: result=false
+
+
+
+
+
+

phi’ Instruction

+
+
Syntax:
+
<result> = phi [fast-math-flags] <ty> [ <val0>, <label0>], ...
+
+
+
+
+
Overview:
+

The ‘phi’ instruction is used to implement the φ node in the SSA +graph representing the function.

+
+
+
Arguments:
+

The type of the incoming values is specified with the first type field. +After this, the ‘phi’ instruction takes a list of pairs as +arguments, with one pair for each predecessor basic block of the current +block. Only values of first class type may be used as +the value arguments to the PHI node. Only labels may be used as the +label arguments.

+

There must be no non-phi instructions between the start of a basic block +and the PHI instructions: i.e. PHI instructions must be first in a basic +block.

+

For the purposes of the SSA form, the use of each incoming value is +deemed to occur on the edge from the corresponding predecessor block to +the current block (but after any definition of an ‘invoke’ +instruction’s return value on the same edge).

+

The optional fast-math-flags marker indicates that the phi has one +or more fast-math-flags. These are optimization hints +to enable otherwise unsafe floating-point optimizations. Fast-math-flags +are only valid for phis that return a floating-point scalar or vector +type, or an array (nested to any depth) of floating-point scalar or vector +types.

+
+
+
Semantics:
+

At runtime, the ‘phi’ instruction logically takes on the value +specified by the pair corresponding to the predecessor basic block that +executed just prior to the current block.

+
+
+
Example:
+
Loop:       ; Infinite loop that counts from 0 on up...
+  %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
+  %nextindvar = add i32 %indvar, 1
+  br label %Loop
+
+
+
+
+
+

select’ Instruction

+
+
Syntax:
+
<result> = select [fast-math flags] selty <cond>, <ty> <val1>, <ty> <val2>             ; yields ty
+
+selty is either i1 or {<N x i1>}
+
+
+
+
+
Overview:
+

The ‘select’ instruction is used to choose one value based on a +condition, without IR-level branching.

+
+
+
Arguments:
+

The ‘select’ instruction requires an ‘i1’ value or a vector of ‘i1’ +values indicating the condition, and two values of the same first +class type.

+
    +
  1. The optional fast-math flags marker indicates that the select has one or more +fast-math flags. These are optimization hints to enable +otherwise unsafe floating-point optimizations. Fast-math flags are only valid +for selects that return a floating-point scalar or vector type, or an array +(nested to any depth) of floating-point scalar or vector types.

  2. +
+
+
+
Semantics:
+

If the condition is an i1 and it evaluates to 1, the instruction returns +the first value argument; otherwise, it returns the second value +argument.

+

If the condition is a vector of i1, then the value arguments must be +vectors of the same size, and the selection is done element by element.

+

If the condition is an i1 and the value arguments are vectors of the +same size, then an entire vector is selected.

+
+
+
Example:
+
%X = select i1 true, i8 17, i8 42          ; yields i8:17
+
+
+
+
+
+

freeze’ Instruction

+
+
Syntax:
+
<result> = freeze ty <val>    ; yields ty:result
+
+
+
+
+
Overview:
+

The ‘freeze’ instruction is used to stop propagation of +undef and poison values.

+
+
+
Arguments:
+

The ‘freeze’ instruction takes a single argument.

+
+
+
Semantics:
+

If the argument is undef or poison, ‘freeze’ returns an +arbitrary, but fixed, value of type ‘ty’. +Otherwise, this instruction is a no-op and returns the input argument. +All uses of a value returned by the same ‘freeze’ instruction are +guaranteed to always observe the same value, while different ‘freeze’ +instructions may yield different values.

+

While undef and poison pointers can be frozen, the result is a +non-dereferenceable pointer. See the +Pointer Aliasing Rules section for more information. +If an aggregate value or vector is frozen, the operand is frozen element-wise. +The padding of an aggregate isn’t considered, since it isn’t visible +without storing it into memory and loading it with a different type.

+
+
+
Example:
+
%w = i32 undef
+%x = freeze i32 %w
+%y = add i32 %w, %w         ; undef
+%z = add i32 %x, %x         ; even number because all uses of %x observe
+                            ; the same value
+%x2 = freeze i32 %w
+%cmp = icmp eq i32 %x, %x2  ; can be true or false
+
+; example with vectors
+%v = <2 x i32> <i32 undef, i32 poison>
+%a = extractelement <2 x i32> %v, i32 0    ; undef
+%b = extractelement <2 x i32> %v, i32 1    ; poison
+%add = add i32 %a, %a                      ; undef
+
+%v.fr = freeze <2 x i32> %v                ; element-wise freeze
+%d = extractelement <2 x i32> %v.fr, i32 0 ; not undef
+%add.f = add i32 %d, %d                    ; even number
+
+; branching on frozen value
+%poison = add nsw i1 %k, undef   ; poison
+%c = freeze i1 %poison
+br i1 %c, label %foo, label %bar ; non-deterministic branch to %foo or %bar
+
+
+
+
+
+

call’ Instruction

+
+
Syntax:
+
<result> = [tail | musttail | notail ] call [fast-math flags] [cconv] [ret attrs] [addrspace(<num>)]
+           <ty>|<fnty> <fnptrval>(<function args>) [fn attrs] [ operand bundles ]
+
+
+
+
+
Overview:
+

The ‘call’ instruction represents a simple function call.

+
+
+
Arguments:
+

This instruction requires several arguments:

+
    +
  1. The optional tail and musttail markers indicate that the optimizers +should perform tail call optimization. The tail marker is a hint that +can be ignored. The musttail marker +means that the call must be tail call optimized in order for the program to +be correct. The musttail marker provides these guarantees:

    +
      +
    1. The call will not cause unbounded stack growth if it is part of a +recursive cycle in the call graph.

    2. +
    3. Arguments with the inalloca or +preallocated attribute are forwarded in place.

    4. +
    5. If the musttail call appears in a function with the "thunk" attribute +and the caller and callee both have varargs, than any unprototyped +arguments in register or memory are forwarded to the callee. Similarly, +the return value of the callee is returned to the caller’s caller, even +if a void return type is in use.

    6. +
    +

    Both markers imply that the callee does not access allocas from the caller. +The tail marker additionally implies that the callee does not access +varargs from the caller. Calls marked musttail must obey the following +additional rules:

    +
      +
    • The call must immediately precede a ret instruction, +or a pointer bitcast followed by a ret instruction.

    • +
    • The ret instruction must return the (possibly bitcasted) value +produced by the call, undef, or void.

    • +
    • The calling conventions of the caller and callee must match.

    • +
    • The callee must be varargs iff the caller is varargs. Bitcasting a +non-varargs function to the appropriate varargs type is legal so +long as the non-varargs prefixes obey the other rules.

    • +
    • The return type must not undergo automatic conversion to an sret pointer.

    • +
    +
  2. +
+
+

In addition, if the calling convention is not swifttailcc or tailcc:

+
+
    +
  • All ABI-impacting function attributes, such as sret, byval, inreg, +returned, and inalloca, must match.

  • +
  • The caller and callee prototypes must match. Pointer types of parameters +or return types may differ in pointee type, but not in address space.

  • +
+
+

On the other hand, if the calling convention is swifttailcc or swiftcc:

+
+
    +
  • Only these ABI-impacting attributes attributes are allowed: sret, byval, +swiftself, and swiftasync.

  • +
  • Prototypes are not required to match.

  • +
+

Tail call optimization for calls marked tail is guaranteed to occur if +the following conditions are met:

+
    +
  • Caller and callee both have the calling convention fastcc or tailcc.

  • +
  • The call is in tail position (ret immediately follows call and ret +uses value of call or is void).

  • +
  • Option -tailcallopt is enabled, +llvm::GuaranteedTailCallOpt is true, or the calling convention +is tailcc

  • +
  • Platform-specific constraints are +met.

  • +
+
+
+
    +
  1. The optional notail marker indicates that the optimizers should not add +tail or musttail markers to the call. It is used to prevent tail +call optimization from being performed on the call.

  2. +
  3. The optional fast-math flags marker indicates that the call has one or more +fast-math flags, which are optimization hints to enable +otherwise unsafe floating-point optimizations. Fast-math flags are only valid +for calls that return a floating-point scalar or vector type, or an array +(nested to any depth) of floating-point scalar or vector types.

  4. +
  5. The optional “cconv” marker indicates which calling +convention the call should use. If none is +specified, the call defaults to using C calling conventions. The +calling convention of the call must match the calling convention of +the target function, or else the behavior is undefined.

  6. +
  7. The optional Parameter Attributes list for return +values. Only ‘zeroext’, ‘signext’, and ‘inreg’ attributes +are valid here.

  8. +
  9. The optional addrspace attribute can be used to indicate the address space +of the called function. If it is not specified, the program address space +from the datalayout string will be used.

  10. +
  11. ty’: the type of the call instruction itself which is also the +type of the return value. Functions that return no value are marked +void.

  12. +
  13. fnty’: shall be the signature of the function being called. The +argument types must match the types implied by this signature. This +type can be omitted if the function is not varargs.

  14. +
  15. fnptrval’: An LLVM value containing a pointer to a function to +be called. In most cases, this is a direct function call, but +indirect call’s are just as possible, calling an arbitrary pointer +to function value.

  16. +
  17. function args’: argument list whose types match the function +signature argument types and parameter attributes. All arguments must +be of first class type. If the function signature +indicates the function accepts a variable number of arguments, the +extra arguments can be specified.

  18. +
  19. The optional function attributes list.

  20. +
  21. The optional operand bundles list.

  22. +
+
+
+
Semantics:
+

The ‘call’ instruction is used to cause control flow to transfer to +a specified function, with its incoming arguments bound to the specified +values. Upon a ‘ret’ instruction in the called function, control +flow continues with the instruction after the function call, and the +return value of the function is bound to the result argument.

+
+
+
Example:
+
%retval = call i32 @test(i32 %argc)
+call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        ; yields i32
+%X = tail call i32 @foo()                                    ; yields i32
+%Y = tail call fastcc i32 @foo()  ; yields i32
+call void %foo(i8 97 signext)
+
+%struct.A = type { i32, i8 }
+%r = call %struct.A @foo()                        ; yields { i32, i8 }
+%gr = extractvalue %struct.A %r, 0                ; yields i32
+%gr1 = extractvalue %struct.A %r, 1               ; yields i8
+%Z = call void @foo() noreturn                    ; indicates that %foo never returns normally
+%ZZ = call zeroext i32 @bar()                     ; Return value is %zero extended
+
+
+

llvm treats calls to some functions with names and arguments that match +the standard C99 library as being the C99 library functions, and may +perform optimizations or generate code for them under that assumption. +This is something we’d like to change in the future to provide better +support for freestanding environments and non-C-based languages.

+
+
+
+

va_arg’ Instruction

+
+
Syntax:
+
<resultval> = va_arg <va_list*> <arglist>, <argty>
+
+
+
+
+
Overview:
+

The ‘va_arg’ instruction is used to access arguments passed through +the “variable argument” area of a function call. It is used to implement +the va_arg macro in C.

+
+
+
Arguments:
+

This instruction takes a va_list* value and the type of the +argument. It returns a value of the specified argument type and +increments the va_list to point to the next argument. The actual +type of va_list is target specific.

+
+
+
Semantics:
+

The ‘va_arg’ instruction loads an argument of the specified type +from the specified va_list and causes the va_list to point to +the next argument. For more information, see the variable argument +handling Intrinsic Functions.

+

It is legal for this instruction to be called in a function which does +not take a variable number of arguments, for example, the vfprintf +function.

+

va_arg is an LLVM instruction instead of an intrinsic +function because it takes a type as an argument.

+
+
+
Example:
+

See the variable argument processing section.

+

Note that the code generator does not yet fully support va_arg on many +targets. Also, it does not currently support va_arg with aggregate +types on any target.

+
+
+
+

landingpad’ Instruction

+
+
Syntax:
+
<resultval> = landingpad <resultty> <clause>+
+<resultval> = landingpad <resultty> cleanup <clause>*
+
+<clause> := catch <type> <value>
+<clause> := filter <array constant type> <array constant>
+
+
+
+
+
Overview:
+

The ‘landingpad’ instruction is used by LLVM’s exception handling +system to specify that a basic block +is a landing pad — one where the exception lands, and corresponds to the +code found in the catch portion of a try/catch sequence. It +defines values supplied by the personality function upon +re-entry to the function. The resultval has the type resultty.

+
+
+
Arguments:
+

The optional +cleanup flag indicates that the landing pad block is a cleanup.

+

A clause begins with the clause type — catch or filter — and +contains the global variable representing the “type” that may be caught +or filtered respectively. Unlike the catch clause, the filter +clause takes an array constant as its argument. Use +“[0 x i8**] undef” for a filter which cannot throw. The +‘landingpad’ instruction must contain at least one clause or +the cleanup flag.

+
+
+
Semantics:
+

The ‘landingpad’ instruction defines the values which are set by the +personality function upon re-entry to the function, and +therefore the “result type” of the landingpad instruction. As with +calling conventions, how the personality function results are +represented in LLVM IR is target specific.

+

The clauses are applied in order from top to bottom. If two +landingpad instructions are merged together through inlining, the +clauses from the calling function are appended to the list of clauses. +When the call stack is being unwound due to an exception being thrown, +the exception is compared against each clause in turn. If it doesn’t +match any of the clauses, and the cleanup flag is not set, then +unwinding continues further up the call stack.

+

The landingpad instruction has several restrictions:

+
    +
  • A landing pad block is a basic block which is the unwind destination +of an ‘invoke’ instruction.

  • +
  • A landing pad block must have a ‘landingpad’ instruction as its +first non-PHI instruction.

  • +
  • There can be only one ‘landingpad’ instruction within the landing +pad block.

  • +
  • A basic block that is not a landing pad block may not include a +‘landingpad’ instruction.

  • +
+
+
+
Example:
+
;; A landing pad which can catch an integer.
+%res = landingpad { i8*, i32 }
+         catch i8** @_ZTIi
+;; A landing pad that is a cleanup.
+%res = landingpad { i8*, i32 }
+         cleanup
+;; A landing pad which can catch an integer and can only throw a double.
+%res = landingpad { i8*, i32 }
+         catch i8** @_ZTIi
+         filter [1 x i8**] [@_ZTId]
+
+
+
+
+
+

catchpad’ Instruction

+
+
Syntax:
+
<resultval> = catchpad within <catchswitch> [<args>*]
+
+
+
+
+
Overview:
+

The ‘catchpad’ instruction is used by LLVM’s exception handling +system to specify that a basic block +begins a catch handler — one where a personality routine attempts to transfer +control to catch an exception.

+
+
+
Arguments:
+

The catchswitch operand must always be a token produced by a +catchswitch instruction in a predecessor block. This +ensures that each catchpad has exactly one predecessor block, and it always +terminates in a catchswitch.

+

The args correspond to whatever information the personality routine +requires to know if this is an appropriate handler for the exception. Control +will transfer to the catchpad if this is the first appropriate handler for +the exception.

+

The resultval has the type token and is used to match the +catchpad to corresponding catchrets and other nested EH +pads.

+
+
+
Semantics:
+

When the call stack is being unwound due to an exception being thrown, the +exception is compared against the args. If it doesn’t match, control will +not reach the catchpad instruction. The representation of args is +entirely target and personality function-specific.

+

Like the landingpad instruction, the catchpad +instruction must be the first non-phi of its parent basic block.

+

The meaning of the tokens produced and consumed by catchpad and other “pad” +instructions is described in the +Windows exception handling documentation.

+

When a catchpad has been “entered” but not yet “exited” (as +described in the EH documentation), +it is undefined behavior to execute a call or invoke +that does not carry an appropriate “funclet” bundle.

+
+
+
Example:
+
dispatch:
+  %cs = catchswitch within none [label %handler0] unwind to caller
+  ;; A catch block which can catch an integer.
+handler0:
+  %tok = catchpad within %cs [i8** @_ZTIi]
+
+
+
+
+
+

cleanuppad’ Instruction

+
+
Syntax:
+
<resultval> = cleanuppad within <parent> [<args>*]
+
+
+
+
+
Overview:
+

The ‘cleanuppad’ instruction is used by LLVM’s exception handling +system to specify that a basic block +is a cleanup block — one where a personality routine attempts to +transfer control to run cleanup actions. +The args correspond to whatever additional +information the personality function requires to +execute the cleanup. +The resultval has the type token and is used to +match the cleanuppad to corresponding cleanuprets. +The parent argument is the token of the funclet that contains the +cleanuppad instruction. If the cleanuppad is not inside a funclet, +this operand may be the token none.

+
+
+
Arguments:
+

The instruction takes a list of arbitrary values which are interpreted +by the personality function.

+
+
+
Semantics:
+

When the call stack is being unwound due to an exception being thrown, +the personality function transfers control to the +cleanuppad with the aid of the personality-specific arguments. +As with calling conventions, how the personality function results are +represented in LLVM IR is target specific.

+

The cleanuppad instruction has several restrictions:

+
    +
  • A cleanup block is a basic block which is the unwind destination of +an exceptional instruction.

  • +
  • A cleanup block must have a ‘cleanuppad’ instruction as its +first non-PHI instruction.

  • +
  • There can be only one ‘cleanuppad’ instruction within the +cleanup block.

  • +
  • A basic block that is not a cleanup block may not include a +‘cleanuppad’ instruction.

  • +
+

When a cleanuppad has been “entered” but not yet “exited” (as +described in the EH documentation), +it is undefined behavior to execute a call or invoke +that does not carry an appropriate “funclet” bundle.

+
+
+
Example:
+
%tok = cleanuppad within %cs []
+
+
+
+
+
+
+
+

Intrinsic Functions

+

LLVM supports the notion of an “intrinsic function”. These functions +have well known names and semantics and are required to follow certain +restrictions. Overall, these intrinsics represent an extension mechanism +for the LLVM language that does not require changing all of the +transformations in LLVM when adding to the language (or the bitcode +reader/writer, the parser, etc…).

+

Intrinsic function names must all start with an “llvm.” prefix. This +prefix is reserved in LLVM for intrinsic names; thus, function names may +not begin with this prefix. Intrinsic functions must always be external +functions: you cannot define the body of intrinsic functions. Intrinsic +functions may only be used in call or invoke instructions: it is illegal +to take the address of an intrinsic function. Additionally, because +intrinsic functions are part of the LLVM language, it is required if any +are added that they be documented here.

+

Some intrinsic functions can be overloaded, i.e., the intrinsic +represents a family of functions that perform the same operation but on +different data types. Because LLVM can represent over 8 million +different integer types, overloading is used commonly to allow an +intrinsic function to operate on any integer type. One or more of the +argument types or the result type can be overloaded to accept any +integer type. Argument types may also be defined as exactly matching a +previous argument’s type or the result type. This allows an intrinsic +function which accepts multiple arguments, but needs all of them to be +of the same type, to only be overloaded with respect to a single +argument or the result.

+

Overloaded intrinsics will have the names of its overloaded argument +types encoded into its function name, each preceded by a period. Only +those types which are overloaded result in a name suffix. Arguments +whose type is matched against another type do not. For example, the +llvm.ctpop function can take an integer of any width and returns an +integer of exactly the same integer width. This leads to a family of +functions such as i8 @llvm.ctpop.i8(i8 %val) and +i29 @llvm.ctpop.i29(i29 %val). Only one type, the return type, is +overloaded, and only one type suffix is required. Because the argument’s +type is matched against the return type, it does not require its own +name suffix.

+

Unnamed types are encoded as s_s. Overloaded intrinsics +that depend on an unnamed type in one of its overloaded argument types get an +additional .<number> suffix. This allows differentiating intrinsics with +different unnamed types as arguments. (For example: +llvm.ssa.copy.p0s_s.2(%42*)) The number is tracked in the LLVM module and +it ensures unique names in the module. While linking together two modules, it is +still possible to get a name clash. In that case one of the names will be +changed by getting a new number.

+

For target developers who are defining intrinsics for back-end code +generation, any intrinsic overloads based solely the distinction between +integer or floating point types should not be relied upon for correct +code generation. In such cases, the recommended approach for target +maintainers when defining intrinsics is to create separate integer and +FP intrinsics rather than rely on overloading. For example, if different +codegen is required for llvm.target.foo(<4 x i32>) and +llvm.target.foo(<4 x float>) then these should be split into +different intrinsics.

+

To learn how to add an intrinsic function, please see the Extending +LLVM Guide.

+
+

Variable Argument Handling Intrinsics

+

Variable argument support is defined in LLVM with the +va_arg instruction and these three intrinsic +functions. These functions are related to the similarly named macros +defined in the <stdarg.h> header file.

+

All of these functions operate on arguments that use a target-specific +value type “va_list”. The LLVM assembly language reference manual +does not define what this type is, so all transformations should be +prepared to handle these functions regardless of the type used.

+

This example shows how the va_arg instruction and the +variable argument handling intrinsic functions are used.

+
; This struct is different for every platform. For most platforms,
+; it is merely an i8*.
+%struct.va_list = type { i8* }
+
+; For Unix x86_64 platforms, va_list is the following struct:
+; %struct.va_list = type { i32, i32, i8*, i8* }
+
+define i32 @test(i32 %X, ...) {
+  ; Initialize variable argument processing
+  %ap = alloca %struct.va_list
+  %ap2 = bitcast %struct.va_list* %ap to i8*
+  call void @llvm.va_start(i8* %ap2)
+
+  ; Read a single integer argument
+  %tmp = va_arg i8* %ap2, i32
+
+  ; Demonstrate usage of llvm.va_copy and llvm.va_end
+  %aq = alloca i8*
+  %aq2 = bitcast i8** %aq to i8*
+  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+  call void @llvm.va_end(i8* %aq2)
+
+  ; Stop processing of arguments.
+  call void @llvm.va_end(i8* %ap2)
+  ret i32 %tmp
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+
+
+
+

llvm.va_start’ Intrinsic

+
+
Syntax:
+
declare void @llvm.va_start(i8* <arglist>)
+
+
+
+
+
Overview:
+

The ‘llvm.va_start’ intrinsic initializes *<arglist> for +subsequent use by va_arg.

+
+
+
Arguments:
+

The argument is a pointer to a va_list element to initialize.

+
+
+
Semantics:
+

The ‘llvm.va_start’ intrinsic works just like the va_start macro +available in C. In a target-dependent way, it initializes the +va_list element to which the argument points, so that the next call +to va_arg will produce the first variable argument passed to the +function. Unlike the C va_start macro, this intrinsic does not need +to know the last argument of the function as the compiler can figure +that out.

+
+
+
+

llvm.va_end’ Intrinsic

+
+
Syntax:
+
declare void @llvm.va_end(i8* <arglist>)
+
+
+
+
+
Overview:
+

The ‘llvm.va_end’ intrinsic destroys *<arglist>, which has been +initialized previously with llvm.va_start or llvm.va_copy.

+
+
+
Arguments:
+

The argument is a pointer to a va_list to destroy.

+
+
+
Semantics:
+

The ‘llvm.va_end’ intrinsic works just like the va_end macro +available in C. In a target-dependent way, it destroys the va_list +element to which the argument points. Calls to +llvm.va_start and +llvm.va_copy must be matched exactly with calls to +llvm.va_end.

+
+
+
+

llvm.va_copy’ Intrinsic

+
+
Syntax:
+
declare void @llvm.va_copy(i8* <destarglist>, i8* <srcarglist>)
+
+
+
+
+
Overview:
+

The ‘llvm.va_copy’ intrinsic copies the current argument position +from the source argument list to the destination argument list.

+
+
+
Arguments:
+

The first argument is a pointer to a va_list element to initialize. +The second argument is a pointer to a va_list element to copy from.

+
+
+
Semantics:
+

The ‘llvm.va_copy’ intrinsic works just like the va_copy macro +available in C. In a target-dependent way, it copies the source +va_list element into the destination va_list element. This +intrinsic is necessary because the `` llvm.va_start`` intrinsic may be +arbitrarily complex and require, for example, memory allocation.

+
+
+
+
+

Accurate Garbage Collection Intrinsics

+

LLVM’s support for Accurate Garbage Collection +(GC) requires the frontend to generate code containing appropriate intrinsic +calls and select an appropriate GC strategy which knows how to lower these +intrinsics in a manner which is appropriate for the target collector.

+

These intrinsics allow identification of GC roots on the +stack, as well as garbage collector implementations that +require read and write barriers. +Frontends for type-safe garbage collected languages should generate +these intrinsics to make use of the LLVM garbage collectors. For more +details, see Garbage Collection with LLVM.

+

LLVM provides an second experimental set of intrinsics for describing garbage +collection safepoints in compiled code. These intrinsics are an alternative +to the llvm.gcroot intrinsics, but are compatible with the ones for +read and write barriers. The +differences in approach are covered in the Garbage Collection with LLVM documentation. The intrinsics themselves are +described in Garbage Collection Safepoints in LLVM.

+
+

llvm.gcroot’ Intrinsic

+
+
Syntax:
+
declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+
+
+
+
+
Overview:
+

The ‘llvm.gcroot’ intrinsic declares the existence of a GC root to +the code generator, and allows some metadata to be associated with it.

+
+
+
Arguments:
+

The first argument specifies the address of a stack object that contains +the root pointer. The second pointer (which must be either a constant or +a global value address) contains the meta-data to be associated with the +root.

+
+
+
Semantics:
+

At runtime, a call to this intrinsic stores a null pointer into the +“ptrloc” location. At compile-time, the code generator generates +information to allow the runtime to find the pointer at GC safe points. +The ‘llvm.gcroot’ intrinsic may only be used in a function which +specifies a GC algorithm.

+
+
+
+

llvm.gcread’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
+
+
+
+
+
Overview:
+

The ‘llvm.gcread’ intrinsic identifies reads of references from heap +locations, allowing garbage collector implementations that require read +barriers.

+
+
+
Arguments:
+

The second argument is the address to read from, which should be an +address allocated from the garbage collector. The first object is a +pointer to the start of the referenced object, if needed by the language +runtime (otherwise null).

+
+
+
Semantics:
+

The ‘llvm.gcread’ intrinsic has the same semantics as a load +instruction, but may be replaced with substantially more complex code by +the garbage collector runtime, as needed. The ‘llvm.gcread’ +intrinsic may only be used in a function which specifies a GC +algorithm.

+
+
+
+

llvm.gcwrite’ Intrinsic

+
+
Syntax:
+
declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
+
+
+
+
+
Overview:
+

The ‘llvm.gcwrite’ intrinsic identifies writes of references to heap +locations, allowing garbage collector implementations that require write +barriers (such as generational or reference counting collectors).

+
+
+
Arguments:
+

The first argument is the reference to store, the second is the start of +the object to store it to, and the third is the address of the field of +Obj to store to. If the runtime does not require a pointer to the +object, Obj may be null.

+
+
+
Semantics:
+

The ‘llvm.gcwrite’ intrinsic has the same semantics as a store +instruction, but may be replaced with substantially more complex code by +the garbage collector runtime, as needed. The ‘llvm.gcwrite’ +intrinsic may only be used in a function which specifies a GC +algorithm.

+
+
+
+

‘llvm.experimental.gc.statepoint’ Intrinsic

+
+
Syntax:
+
declare token
+  @llvm.experimental.gc.statepoint(i64 <id>, i32 <num patch bytes>,
+                 func_type <target>,
+                 i64 <#call args>, i64 <flags>,
+                 ... (call parameters),
+                 i64 0, i64 0)
+
+
+
+
+
Overview:
+

The statepoint intrinsic represents a call which is parse-able by the +runtime.

+
+
+
Operands:
+

The ‘id’ operand is a constant integer that is reported as the ID +field in the generated stackmap. LLVM does not interpret this +parameter in any way and its meaning is up to the statepoint user to +decide. Note that LLVM is free to duplicate code containing +statepoint calls, and this may transform IR that had a unique ‘id’ per +lexical call to statepoint to IR that does not.

+

If ‘num patch bytes’ is non-zero then the call instruction +corresponding to the statepoint is not emitted and LLVM emits ‘num +patch bytes’ bytes of nops in its place. LLVM will emit code to +prepare the function arguments and retrieve the function return value +in accordance to the calling convention; the former before the nop +sequence and the latter after the nop sequence. It is expected that +the user will patch over the ‘num patch bytes’ bytes of nops with a +calling sequence specific to their runtime before executing the +generated machine code. There are no guarantees with respect to the +alignment of the nop sequence. Unlike Stack maps and patch points in LLVM statepoints do +not have a concept of shadow bytes. Note that semantically the +statepoint still represents a call or invoke to ‘target’, and the nop +sequence after patching is expected to represent an operation +equivalent to a call or invoke to ‘target’.

+

The ‘target’ operand is the function actually being called. The +target can be specified as either a symbolic LLVM function, or as an +arbitrary Value of appropriate function type. Note that the function +type must match the signature of the callee and the types of the ‘call +parameters’ arguments.

+

The ‘#call args’ operand is the number of arguments to the actual +call. It must exactly match the number of arguments passed in the +‘call parameters’ variable length section.

+

The ‘flags’ operand is used to specify extra information about the +statepoint. This is currently only used to mark certain statepoints +as GC transitions. This operand is a 64-bit integer with the following +layout, where bit 0 is the least significant bit:

+
+
++++ + + + + + + + + + + + + + +

Bit #

Usage

0

Set if the statepoint is a GC transition, cleared +otherwise.

1-63

Reserved for future use; must be cleared.

+
+

The ‘call parameters’ arguments are simply the arguments which need to +be passed to the call target. They will be lowered according to the +specified calling convention and otherwise handled like a normal call +instruction. The number of arguments must exactly match what is +specified in ‘# call args’. The types must match the signature of +‘target’.

+

The ‘call parameter’ attributes must be followed by two ‘i64 0’ constants. +These were originally the length prefixes for ‘gc transition parameter’ and +‘deopt parameter’ arguments, but the role of these parameter sets have been +entirely replaced with the corresponding operand bundles. In a future +revision, these now redundant arguments will be removed.

+
+
+
Semantics:
+

A statepoint is assumed to read and write all memory. As a result, +memory operations can not be reordered past a statepoint. It is +illegal to mark a statepoint as being either ‘readonly’ or ‘readnone’.

+

Note that legal IR can not perform any memory operation on a ‘gc +pointer’ argument of the statepoint in a location statically reachable +from the statepoint. Instead, the explicitly relocated value (from a +gc.relocate) must be used.

+
+
+
+

‘llvm.experimental.gc.result’ Intrinsic

+
+
Syntax:
+
declare type*
+  @llvm.experimental.gc.result(token %statepoint_token)
+
+
+
+
+
Overview:
+

gc.result extracts the result of the original call instruction +which was replaced by the gc.statepoint. The gc.result +intrinsic is actually a family of three intrinsics due to an +implementation limitation. Other than the type of the return value, +the semantics are the same.

+
+
+
Operands:
+

The first and only argument is the gc.statepoint which starts +the safepoint sequence of which this gc.result is a part. +Despite the typing of this as a generic token, only the value defined +by a gc.statepoint is legal here.

+
+
+
Semantics:
+

The gc.result represents the return value of the call target of +the statepoint. The type of the gc.result must exactly match +the type of the target. If the call target returns void, there will +be no gc.result.

+

A gc.result is modeled as a ‘readnone’ pure function. It has no +side effects since it is just a projection of the return value of the +previous call represented by the gc.statepoint.

+
+
+
+

‘llvm.experimental.gc.relocate’ Intrinsic

+
+
Syntax:
+
declare <pointer type>
+  @llvm.experimental.gc.relocate(token %statepoint_token,
+                                 i32 %base_offset,
+                                 i32 %pointer_offset)
+
+
+
+
+
Overview:
+

A gc.relocate returns the potentially relocated value of a pointer +at the safepoint.

+
+
+
Operands:
+

The first argument is the gc.statepoint which starts the +safepoint sequence of which this gc.relocation is a part. +Despite the typing of this as a generic token, only the value defined +by a gc.statepoint is legal here.

+

The second and third arguments are both indices into operands of the +corresponding statepoint’s gc-live operand bundle.

+

The second argument is an index which specifies the allocation for the pointer +being relocated. The associated value must be within the object with which the +pointer being relocated is associated. The optimizer is free to change which +interior derived pointer is reported, provided that it does not replace an +actual base pointer with another interior derived pointer. Collectors are +allowed to rely on the base pointer operand remaining an actual base pointer if +so constructed.

+

The third argument is an index which specify the (potentially) derived pointer +being relocated. It is legal for this index to be the same as the second +argument if-and-only-if a base pointer is being relocated.

+
+
+
Semantics:
+

The return value of gc.relocate is the potentially relocated value +of the pointer specified by its arguments. It is unspecified how the +value of the returned pointer relates to the argument to the +gc.statepoint other than that a) it points to the same source +language object with the same offset, and b) the ‘based-on’ +relationship of the newly relocated pointers is a projection of the +unrelocated pointers. In particular, the integer value of the pointer +returned is unspecified.

+

A gc.relocate is modeled as a readnone pure function. It has no +side effects since it is just a way to extract information about work +done during the actual call modeled by the gc.statepoint.

+
+
+
+

‘llvm.experimental.gc.get.pointer.base’ Intrinsic

+
+
Syntax:
+
declare <pointer type>
+  @llvm.experimental.gc.get.pointer.base(
+    <pointer type> readnone nocapture %derived_ptr)
+    nounwind readnone willreturn
+
+
+
+
+
Overview:
+

gc.get.pointer.base for a derived pointer returns its base pointer.

+
+
+
Operands:
+

The only argument is a pointer which is based on some object with +an unknown offset from the base of said object.

+
+
+
Semantics:
+

This intrinsic is used in the abstract machine model for GC to represent +the base pointer for an arbitrary derived pointer.

+

This intrinsic is inlined by the RewriteStatepointsForGC pass by +replacing all uses of this callsite with the offset of a derived pointer from +its base pointer value. The replacement is done as part of the lowering to the +explicit statepoint model.

+

The return pointer type must be the same as the type of the parameter.

+
+
+
+

‘llvm.experimental.gc.get.pointer.offset’ Intrinsic

+
+
Syntax:
+
declare i64
+  @llvm.experimental.gc.get.pointer.offset(
+    <pointer type> readnone nocapture %derived_ptr)
+    nounwind readnone willreturn
+
+
+
+
+
Overview:
+

gc.get.pointer.offset for a derived pointer returns the offset from its +base pointer.

+
+
+
Operands:
+

The only argument is a pointer which is based on some object with +an unknown offset from the base of said object.

+
+
+
Semantics:
+

This intrinsic is used in the abstract machine model for GC to represent +the offset of an arbitrary derived pointer from its base pointer.

+

This intrinsic is inlined by the RewriteStatepointsForGC pass by +replacing all uses of this callsite with the offset of a derived pointer from +its base pointer value. The replacement is done as part of the lowering to the +explicit statepoint model.

+

Basically this call calculates difference between the derived pointer and its +base pointer (see ‘llvm.experimental.gc.get.pointer.base’ Intrinsic) both ptrtoint casted. But +this cast done outside the RewriteStatepointsForGC pass could result +in the pointers lost for further lowering from the abstract model to the +explicit physical one.

+
+
+
+
+

Code Generator Intrinsics

+

These intrinsics are provided by LLVM to expose special features that +may only be implemented with code generator support.

+
+

llvm.returnaddress’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.returnaddress(i32 <level>)
+
+
+
+
+
Overview:
+

The ‘llvm.returnaddress’ intrinsic attempts to compute a +target-specific value indicating the return address of the current +function or one of its callers.

+
+
+
Arguments:
+

The argument to this intrinsic indicates which function to return the +address for. Zero indicates the calling function, one indicates its +caller, etc. The argument is required to be a constant integer +value.

+
+
+
Semantics:
+

The ‘llvm.returnaddress’ intrinsic either returns a pointer +indicating the return address of the specified call frame, or zero if it +cannot be identified. The value returned by this intrinsic is likely to +be incorrect or 0 for arguments other than zero, so it should only be +used for debugging purposes.

+

Note that calling this intrinsic does not prevent function inlining or +other aggressive transformations, so the value returned may not be that +of the obvious source-language caller.

+
+
+
+

llvm.addressofreturnaddress’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.addressofreturnaddress()
+
+
+
+
+
Overview:
+

The ‘llvm.addressofreturnaddress’ intrinsic returns a target-specific +pointer to the place in the stack frame where the return address of the +current function is stored.

+
+
+
Semantics:
+

Note that calling this intrinsic does not prevent function inlining or +other aggressive transformations, so the value returned may not be that +of the obvious source-language caller.

+

This intrinsic is only implemented for x86 and aarch64.

+
+
+
+

llvm.sponentry’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.sponentry()
+
+
+
+
+
Overview:
+

The ‘llvm.sponentry’ intrinsic returns the stack pointer value at +the entry of the current function calling this intrinsic.

+
+
+
Semantics:
+

Note this intrinsic is only verified on AArch64.

+
+
+
+

llvm.frameaddress’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.frameaddress(i32 <level>)
+
+
+
+
+
Overview:
+

The ‘llvm.frameaddress’ intrinsic attempts to return the +target-specific frame pointer value for the specified stack frame.

+
+
+
Arguments:
+

The argument to this intrinsic indicates which function to return the +frame pointer for. Zero indicates the calling function, one indicates +its caller, etc. The argument is required to be a constant integer +value.

+
+
+
Semantics:
+

The ‘llvm.frameaddress’ intrinsic either returns a pointer +indicating the frame address of the specified call frame, or zero if it +cannot be identified. The value returned by this intrinsic is likely to +be incorrect or 0 for arguments other than zero, so it should only be +used for debugging purposes.

+

Note that calling this intrinsic does not prevent function inlining or +other aggressive transformations, so the value returned may not be that +of the obvious source-language caller.

+
+
+
+

llvm.swift.async.context.addr’ Intrinsic

+
+
Syntax:
+
declare i8** @llvm.swift.async.context.addr()
+
+
+
+
+
Overview:
+

The ‘llvm.swift.async.context.addr’ intrinsic returns a pointer to +the part of the extended frame record containing the asynchronous +context of a Swift execution.

+
+
+
Semantics:
+

If the caller has a swiftasync parameter, that argument will initially +be stored at the returned address. If not, it will be initialized to null.

+
+
+
+

llvm.localescape’ and ‘llvm.localrecover’ Intrinsics

+
+
Syntax:
+
declare void @llvm.localescape(...)
+declare i8* @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+
+
+
+
+
Overview:
+

The ‘llvm.localescape’ intrinsic escapes offsets of a collection of static +allocas, and the ‘llvm.localrecover’ intrinsic applies those offsets to a +live frame pointer to recover the address of the allocation. The offset is +computed during frame layout of the caller of llvm.localescape.

+
+
+
Arguments:
+

All arguments to ‘llvm.localescape’ must be pointers to static allocas or +casts of static allocas. Each function can only call ‘llvm.localescape’ +once, and it can only do so from the entry block.

+

The func argument to ‘llvm.localrecover’ must be a constant +bitcasted pointer to a function defined in the current module. The code +generator cannot determine the frame allocation offset of functions defined in +other modules.

+

The fp argument to ‘llvm.localrecover’ must be a frame pointer of a +call frame that is currently live. The return value of ‘llvm.localaddress’ +is one way to produce such a value, but various runtimes also expose a suitable +pointer in platform-specific ways.

+

The idx argument to ‘llvm.localrecover’ indicates which alloca passed to +‘llvm.localescape’ to recover. It is zero-indexed.

+
+
+
Semantics:
+

These intrinsics allow a group of functions to share access to a set of local +stack allocations of a one parent function. The parent function may call the +‘llvm.localescape’ intrinsic once from the function entry block, and the +child functions can use ‘llvm.localrecover’ to access the escaped allocas. +The ‘llvm.localescape’ intrinsic blocks inlining, as inlining changes where +the escaped allocas are allocated, which would break attempts to use +‘llvm.localrecover’.

+
+
+
+

llvm.seh.try.begin’ and ‘llvm.seh.try.end’ Intrinsics

+
+
Syntax:
+
declare void @llvm.seh.try.begin()
+declare void @llvm.seh.try.end()
+
+
+
+
+
Overview:
+

The ‘llvm.seh.try.begin’ and ‘llvm.seh.try.end’ intrinsics mark +the boundary of a _try region for Windows SEH Asynchrous Exception Handling.

+
+
+
Semantics:
+

When a C-function is compiled with Windows SEH Asynchrous Exception option, +-feh_asynch (aka MSVC -EHa), these two intrinsics are injected to mark _try +boundary and to prevent potential exceptions from being moved across boundary. +Any set of operations can then be confined to the region by reading their leaf +inputs via volatile loads and writing their root outputs via volatile stores.

+
+
+
+

llvm.seh.scope.begin’ and ‘llvm.seh.scope.end’ Intrinsics

+
+
Syntax:
+
declare void @llvm.seh.scope.begin()
+declare void @llvm.seh.scope.end()
+
+
+
+
+
Overview:
+

The ‘llvm.seh.scope.begin’ and ‘llvm.seh.scope.end’ intrinsics mark +the boundary of a CPP object lifetime for Windows SEH Asynchrous Exception +Handling (MSVC option -EHa).

+
+
+
Semantics:
+

LLVM’s ordinary exception-handling representation associates EH cleanups and +handlers only with invoke``s, which normally correspond only to call sites.  To +support arbitrary faulting instructions, it must be possible to recover the current +EH scope for any instruction.  Turning every operation in LLVM that could fault +into an ``invoke of a new, potentially-throwing intrinsic would require adding a +large number of intrinsics, impede optimization of those operations, and make +compilation slower by introducing many extra basic blocks. These intrinsics can +be used instead to mark the region protected by a cleanup, such as for a local +C++ object with a non-trivial destructor. llvm.seh.scope.begin is used to mark +the start of the region; it is always called with invoke, with the unwind block +being the desired unwind destination for any potentially-throwing instructions +within the region. llvm.seh.scope.end is used to mark when the scope ends +and the EH cleanup is no longer required (e.g. because the destructor is being +called).

+
+
+
+

llvm.read_register’, ‘llvm.read_volatile_register’, and ‘llvm.write_register’ Intrinsics

+
+
Syntax:
+
declare i32 @llvm.read_register.i32(metadata)
+declare i64 @llvm.read_register.i64(metadata)
+declare i32 @llvm.read_volatile_register.i32(metadata)
+declare i64 @llvm.read_volatile_register.i64(metadata)
+declare void @llvm.write_register.i32(metadata, i32 @value)
+declare void @llvm.write_register.i64(metadata, i64 @value)
+!0 = !{!"sp\00"}
+
+
+
+
+
Overview:
+

The ‘llvm.read_register’, ‘llvm.read_volatile_register’, and +‘llvm.write_register’ intrinsics provide access to the named register. +The register must be valid on the architecture being compiled to. The type +needs to be compatible with the register being read.

+
+
+
Semantics:
+

The ‘llvm.read_register’ and ‘llvm.read_volatile_register’ intrinsics +return the current value of the register, where possible. The +‘llvm.write_register’ intrinsic sets the current value of the register, +where possible.

+

A call to ‘llvm.read_volatile_register’ is assumed to have side-effects +and possibly return a different value each time (e.g. for a timer register).

+

This is useful to implement named register global variables that need +to always be mapped to a specific register, as is common practice on +bare-metal programs including OS kernels.

+

The compiler doesn’t check for register availability or use of the used +register in surrounding code, including inline assembly. Because of that, +allocatable registers are not supported.

+

Warning: So far it only works with the stack pointer on selected +architectures (ARM, AArch64, PowerPC and x86_64). Significant amount of +work is needed to support other registers and even more so, allocatable +registers.

+
+
+
+

llvm.stacksave’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.stacksave()
+
+
+
+
+
Overview:
+

The ‘llvm.stacksave’ intrinsic is used to remember the current state +of the function stack, for use with +llvm.stackrestore. This is useful for +implementing language features like scoped automatic variable sized +arrays in C99.

+
+
+
Semantics:
+

This intrinsic returns an opaque pointer value that can be passed to +llvm.stackrestore. When an +llvm.stackrestore intrinsic is executed with a value saved from +llvm.stacksave, it effectively restores the state of the stack to +the state it was in when the llvm.stacksave intrinsic executed. In +practice, this pops any alloca blocks from the stack that +were allocated after the llvm.stacksave was executed.

+
+
+
+

llvm.stackrestore’ Intrinsic

+
+
Syntax:
+
declare void @llvm.stackrestore(i8* %ptr)
+
+
+
+
+
Overview:
+

The ‘llvm.stackrestore’ intrinsic is used to restore the state of +the function stack to the state it was in when the corresponding +llvm.stacksave intrinsic executed. This is +useful for implementing language features like scoped automatic variable +sized arrays in C99.

+
+
+
Semantics:
+

See the description for llvm.stacksave.

+
+
+
+

llvm.get.dynamic.area.offset’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.get.dynamic.area.offset.i32()
+declare i64 @llvm.get.dynamic.area.offset.i64()
+
+
+
+
+
Overview:
+
+

The ‘llvm.get.dynamic.area.offset.*’ intrinsic family is used to +get the offset from native stack pointer to the address of the most +recent dynamic alloca on the caller’s stack. These intrinsics are +intended for use in combination with +llvm.stacksave to get a +pointer to the most recent dynamic alloca. This is useful, for example, +for AddressSanitizer’s stack unpoisoning routines.

+
+
+
+
Semantics:
+
+

These intrinsics return a non-negative integer value that can be used to +get the address of the most recent dynamic alloca, allocated by alloca +on the caller’s stack. In particular, for targets where stack grows downwards, +adding this offset to the native stack pointer would get the address of the most +recent dynamic alloca. For targets where stack grows upwards, the situation is a bit more +complicated, because subtracting this value from stack pointer would get the address +one past the end of the most recent dynamic alloca.

+

Although for most targets llvm.get.dynamic.area.offset <int_get_dynamic_area_offset> +returns just a zero, for others, such as PowerPC and PowerPC64, it returns a +compile-time-known constant value.

+

The return value type of llvm.get.dynamic.area.offset +must match the target’s default address space’s (address space 0) pointer type.

+
+
+
+
+

llvm.prefetch’ Intrinsic

+
+
Syntax:
+
declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>, i32 <cache type>)
+
+
+
+
+
Overview:
+

The ‘llvm.prefetch’ intrinsic is a hint to the code generator to +insert a prefetch instruction if supported; otherwise, it is a noop. +Prefetches have no effect on the behavior of the program but can change +its performance characteristics.

+
+
+
Arguments:
+

address is the address to be prefetched, rw is the specifier +determining if the fetch should be for a read (0) or write (1), and +locality is a temporal locality specifier ranging from (0) - no +locality, to (3) - extremely local keep in cache. The cache type +specifies whether the prefetch is performed on the data (1) or +instruction (0) cache. The rw, locality and cache type +arguments must be constant integers.

+
+
+
Semantics:
+

This intrinsic does not modify the behavior of the program. In +particular, prefetches cannot trap and do not produce a value. On +targets that support this intrinsic, the prefetch can provide hints to +the processor cache for better performance.

+
+
+
+

llvm.pcmarker’ Intrinsic

+
+
Syntax:
+
declare void @llvm.pcmarker(i32 <id>)
+
+
+
+
+
Overview:
+

The ‘llvm.pcmarker’ intrinsic is a method to export a Program +Counter (PC) in a region of code to simulators and other tools. The +method is target specific, but it is expected that the marker will use +exported symbols to transmit the PC of the marker. The marker makes no +guarantees that it will remain with any specific instruction after +optimizations. It is possible that the presence of a marker will inhibit +optimizations. The intended use is to be inserted after optimizations to +allow correlations of simulation runs.

+
+
+
Arguments:
+

id is a numerical id identifying the marker.

+
+
+
Semantics:
+

This intrinsic does not modify the behavior of the program. Backends +that do not support this intrinsic may ignore it.

+
+
+
+

llvm.readcyclecounter’ Intrinsic

+
+
Syntax:
+
declare i64 @llvm.readcyclecounter()
+
+
+
+
+
Overview:
+

The ‘llvm.readcyclecounter’ intrinsic provides access to the cycle +counter register (or similar low latency, high accuracy clocks) on those +targets that support it. On X86, it should map to RDTSC. On Alpha, it +should map to RPCC. As the backing counters overflow quickly (on the +order of 9 seconds on alpha), this should only be used for small +timings.

+
+
+
Semantics:
+

When directly supported, reading the cycle counter should not modify any +memory. Implementations are allowed to either return an application +specific value or a system wide value. On backends without support, this +is lowered to a constant 0.

+

Note that runtime support may be conditional on the privilege-level code is +running at and the host platform.

+
+
+
+

llvm.clear_cache’ Intrinsic

+
+
Syntax:
+
declare void @llvm.clear_cache(i8*, i8*)
+
+
+
+
+
Overview:
+

The ‘llvm.clear_cache’ intrinsic ensures visibility of modifications +in the specified range to the execution unit of the processor. On +targets with non-unified instruction and data cache, the implementation +flushes the instruction cache.

+
+
+
Semantics:
+

On platforms with coherent instruction and data caches (e.g. x86), this +intrinsic is a nop. On platforms with non-coherent instruction and data +cache (e.g. ARM, MIPS), the intrinsic is lowered either to appropriate +instructions or a system call, if cache flushing requires special +privileges.

+

The default behavior is to emit a call to __clear_cache from the run +time library.

+

This intrinsic does not empty the instruction pipeline. Modifications +of the current function are outside the scope of the intrinsic.

+
+
+
+

llvm.instrprof.increment’ Intrinsic

+
+
Syntax:
+
declare void @llvm.instrprof.increment(i8* <name>, i64 <hash>,
+                                       i32 <num-counters>, i32 <index>)
+
+
+
+
+
Overview:
+

The ‘llvm.instrprof.increment’ intrinsic can be emitted by a +frontend for use with instrumentation based profiling. These will be +lowered by the -instrprof pass to generate execution counts of a +program at runtime.

+
+
+
Arguments:
+

The first argument is a pointer to a global variable containing the +name of the entity being instrumented. This should generally be the +(mangled) function name for a set of counters.

+

The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source, and +the third is the number of counters associated with name. It is an +error if hash or num-counters differ between two instances of +instrprof.increment that refer to the same name.

+

The last argument refers to which of the counters for name should +be incremented. It should be a value between 0 and num-counters.

+
+
+
Semantics:
+

This intrinsic represents an increment of a profiling counter. It will +cause the -instrprof pass to generate the appropriate data +structures and the code to increment the appropriate value, in a +format that can be written out by a compiler runtime and consumed via +the llvm-profdata tool.

+
+
+
+

llvm.instrprof.increment.step’ Intrinsic

+
+
Syntax:
+
declare void @llvm.instrprof.increment.step(i8* <name>, i64 <hash>,
+                                            i32 <num-counters>,
+                                            i32 <index>, i64 <step>)
+
+
+
+
+
Overview:
+

The ‘llvm.instrprof.increment.step’ intrinsic is an extension to +the ‘llvm.instrprof.increment’ intrinsic with an additional fifth +argument to specify the step of the increment.

+
+
+
Arguments:
+

The first four arguments are the same as ‘llvm.instrprof.increment’ +intrinsic.

+

The last argument specifies the value of the increment of the counter variable.

+
+
+
Semantics:
+

See description of ‘llvm.instrprof.increment’ intrinsic.

+
+
+
+

llvm.instrprof.value.profile’ Intrinsic

+
+
Syntax:
+
declare void @llvm.instrprof.value.profile(i8* <name>, i64 <hash>,
+                                           i64 <value>, i32 <value_kind>,
+                                           i32 <index>)
+
+
+
+
+
Overview:
+

The ‘llvm.instrprof.value.profile’ intrinsic can be emitted by a +frontend for use with instrumentation based profiling. This will be +lowered by the -instrprof pass to find out the target values, +instrumented expressions take in a program at runtime.

+
+
+
Arguments:
+

The first argument is a pointer to a global variable containing the +name of the entity being instrumented. name should generally be the +(mangled) function name for a set of counters.

+

The second argument is a hash value that can be used by the consumer +of the profile data to detect changes to the instrumented source. It +is an error if hash differs between two instances of +llvm.instrprof.* that refer to the same name.

+

The third argument is the value of the expression being profiled. The profiled +expression’s value should be representable as an unsigned 64-bit value. The +fourth argument represents the kind of value profiling that is being done. The +supported value profiling kinds are enumerated through the +InstrProfValueKind type declared in the +<include/llvm/ProfileData/InstrProf.h> header file. The last argument is the +index of the instrumented expression within name. It should be >= 0.

+
+
+
Semantics:
+

This intrinsic represents the point where a call to a runtime routine +should be inserted for value profiling of target expressions. -instrprof +pass will generate the appropriate data structures and replace the +llvm.instrprof.value.profile intrinsic with the call to the profile +runtime library with proper arguments.

+
+
+
+

llvm.thread.pointer’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.thread.pointer()
+
+
+
+
+
Overview:
+

The ‘llvm.thread.pointer’ intrinsic returns the value of the thread +pointer.

+
+
+
Semantics:
+

The ‘llvm.thread.pointer’ intrinsic returns a pointer to the TLS area +for the current thread. The exact semantics of this value are target +specific: it may point to the start of TLS area, to the end, or somewhere +in the middle. Depending on the target, this intrinsic may read a register, +call a helper function, read from an alternate memory space, or perform +other operations necessary to locate the TLS area. Not all targets support +this intrinsic.

+
+
+
+

llvm.call.preallocated.setup’ Intrinsic

+
+
Syntax:
+
declare token @llvm.call.preallocated.setup(i32 %num_args)
+
+
+
+
+
Overview:
+

The ‘llvm.call.preallocated.setup’ intrinsic returns a token which can +be used with a call’s "preallocated" operand bundle to indicate that +certain arguments are allocated and initialized before the call.

+
+
+
Semantics:
+

The ‘llvm.call.preallocated.setup’ intrinsic returns a token which is +associated with at most one call. The token can be passed to +‘@llvm.call.preallocated.arg’ to get a pointer to get that +corresponding argument. The token must be the parameter to a +"preallocated" operand bundle for the corresponding call.

+

Nested calls to ‘llvm.call.preallocated.setup’ are allowed, but must +be properly nested. e.g.

+

:: code-block:: llvm

+
+

%t1 = call token @llvm.call.preallocated.setup(i32 0) +%t2 = call token @llvm.call.preallocated.setup(i32 0) +call void foo() [“preallocated”(token %t2)] +call void foo() [“preallocated”(token %t1)]

+
+

is allowed, but not

+

:: code-block:: llvm

+
+

%t1 = call token @llvm.call.preallocated.setup(i32 0) +%t2 = call token @llvm.call.preallocated.setup(i32 0) +call void foo() [“preallocated”(token %t1)] +call void foo() [“preallocated”(token %t2)]

+
+
+
+
+

llvm.call.preallocated.arg’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.call.preallocated.arg(token %setup_token, i32 %arg_index)
+
+
+
+
+
Overview:
+

The ‘llvm.call.preallocated.arg’ intrinsic returns a pointer to the +corresponding preallocated argument for the preallocated call.

+
+
+
Semantics:
+

The ‘llvm.call.preallocated.arg’ intrinsic returns a pointer to the +%arg_index``th argument with the ``preallocated attribute for +the call associated with the %setup_token, which must be from +‘llvm.call.preallocated.setup’.

+

A call to ‘llvm.call.preallocated.arg’ must have a call site +preallocated attribute. The type of the preallocated attribute must +match the type used by the preallocated attribute of the corresponding +argument at the preallocated call. The type is used in the case that an +llvm.call.preallocated.setup does not have a corresponding call (e.g. due +to DCE), where otherwise we cannot know how large the arguments are.

+

It is undefined behavior if this is called with a token from an +‘llvm.call.preallocated.setup’ if another +‘llvm.call.preallocated.setup’ has already been called or if the +preallocated call corresponding to the ‘llvm.call.preallocated.setup’ +has already been called.

+
+
+
+

llvm.call.preallocated.teardown’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.call.preallocated.teardown(token %setup_token)
+
+
+
+
+
Overview:
+

The ‘llvm.call.preallocated.teardown’ intrinsic cleans up the stack +created by a ‘llvm.call.preallocated.setup’.

+
+
+
Semantics:
+

The token argument must be a ‘llvm.call.preallocated.setup’.

+

The ‘llvm.call.preallocated.teardown’ intrinsic cleans up the stack +allocated by the corresponding ‘llvm.call.preallocated.setup’. Exactly +one of this or the preallocated call must be called to prevent stack leaks. +It is undefined behavior to call both a ‘llvm.call.preallocated.teardown’ +and the preallocated call for a given ‘llvm.call.preallocated.setup’.

+

For example, if the stack is allocated for a preallocated call by a +‘llvm.call.preallocated.setup’, then an initializer function called on an +allocated argument throws an exception, there should be a +‘llvm.call.preallocated.teardown’ in the exception handler to prevent +stack leaks.

+

Following the nesting rules in ‘llvm.call.preallocated.setup’, nested +calls to ‘llvm.call.preallocated.setup’ and +‘llvm.call.preallocated.teardown’ are allowed but must be properly +nested.

+
+
+
Example:
+
    %cs = call token @llvm.call.preallocated.setup(i32 1)
+    %x = call i8* @llvm.call.preallocated.arg(token %cs, i32 0) preallocated(i32)
+    %y = bitcast i8* %x to i32*
+    invoke void @constructor(i32* %y) to label %conta unwind label %contb
+conta:
+    call void @foo1(i32* preallocated(i32) %y) ["preallocated"(token %cs)]
+    ret void
+contb:
+    %s = catchswitch within none [label %catch] unwind to caller
+catch:
+    %p = catchpad within %s []
+    call void @llvm.call.preallocated.teardown(token %cs)
+    ret void
+
+
+
+
+
+
+

Standard C/C++ Library Intrinsics

+

LLVM provides intrinsics for a few important standard C/C++ library +functions. These intrinsics allow source-language front-ends to pass +information about the alignment of the pointer arguments to the code +generator, providing opportunity for more efficient code generation.

+
+

llvm.abs.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.abs on any +integer bit width or any vector of integer elements.

+
declare i32 @llvm.abs.i32(i32 <src>, i1 <is_int_min_poison>)
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32> <src>, i1 <is_int_min_poison>)
+
+
+
+
+
Overview:
+

The ‘llvm.abs’ family of intrinsic functions returns the absolute value +of an argument.

+
+
+
Arguments:
+

The first argument is the value for which the absolute value is to be returned. +This argument may be of any integer type or a vector with integer element type. +The return type must match the first argument type.

+

The second argument must be a constant and is a flag to indicate whether the +result value of the ‘llvm.abs’ intrinsic is a +poison value if the argument is statically or dynamically +an INT_MIN value.

+
+
+
Semantics:
+

The ‘llvm.abs’ intrinsic returns the magnitude (always positive) of the +argument or each element of a vector argument.”. If the argument is INT_MIN, +then the result is also INT_MIN if is_int_min_poison == 0 and +poison otherwise.

+
+
+
+

llvm.smax.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use @llvm.smax on any +integer bit width or any vector of integer elements.

+
declare i32 @llvm.smax.i32(i32 %a, i32 %b)
+declare <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

Return the larger of %a and %b comparing the values as signed integers. +Vector intrinsics operate on a per-element basis. The larger element of %a +and %b at a given index is returned for that index.

+
+
+
Arguments:
+

The arguments (%a and %b) may be of any integer type or a vector with +integer element type. The argument types must match each other, and the return +type must match the argument type.

+
+
+
+

llvm.smin.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use @llvm.smin on any +integer bit width or any vector of integer elements.

+
declare i32 @llvm.smin.i32(i32 %a, i32 %b)
+declare <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

Return the smaller of %a and %b comparing the values as signed integers. +Vector intrinsics operate on a per-element basis. The smaller element of %a +and %b at a given index is returned for that index.

+
+
+
Arguments:
+

The arguments (%a and %b) may be of any integer type or a vector with +integer element type. The argument types must match each other, and the return +type must match the argument type.

+
+
+
+

llvm.umax.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use @llvm.umax on any +integer bit width or any vector of integer elements.

+
declare i32 @llvm.umax.i32(i32 %a, i32 %b)
+declare <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

Return the larger of %a and %b comparing the values as unsigned +integers. Vector intrinsics operate on a per-element basis. The larger element +of %a and %b at a given index is returned for that index.

+
+
+
Arguments:
+

The arguments (%a and %b) may be of any integer type or a vector with +integer element type. The argument types must match each other, and the return +type must match the argument type.

+
+
+
+

llvm.umin.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use @llvm.umin on any +integer bit width or any vector of integer elements.

+
declare i32 @llvm.umin.i32(i32 %a, i32 %b)
+declare <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

Return the smaller of %a and %b comparing the values as unsigned +integers. Vector intrinsics operate on a per-element basis. The smaller element +of %a and %b at a given index is returned for that index.

+
+
+
Arguments:
+

The arguments (%a and %b) may be of any integer type or a vector with +integer element type. The argument types must match each other, and the return +type must match the argument type.

+
+
+
+

llvm.memcpy’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.memcpy on any +integer bit width and for different address spaces. Not all targets +support all bit widths however.

+
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                        i32 <len>, i1 <isvolatile>)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+                                        i64 <len>, i1 <isvolatile>)
+
+
+
+
+
Overview:
+

The ‘llvm.memcpy.*’ intrinsics copy a block of memory from the +source location to the destination location.

+

Note that, unlike the standard libc function, the llvm.memcpy.* +intrinsics do not return a value, takes extra isvolatile +arguments and the pointers can be in specified address spaces.

+
+
+
Arguments:
+

The first argument is a pointer to the destination, the second is a +pointer to the source. The third argument is an integer argument +specifying the number of bytes to copy, and the fourth is a +boolean indicating a volatile access.

+

The align parameter attribute can be provided +for the first and second arguments.

+

If the isvolatile parameter is true, the llvm.memcpy call is +a volatile operation. The detailed access behavior is not +very cleanly specified and it is unwise to depend on it.

+
+
+
Semantics:
+

The ‘llvm.memcpy.*’ intrinsics copy a block of memory from the source +location to the destination location, which must either be equal or +non-overlapping. It copies “len” bytes of memory over. If the argument is known +to be aligned to some boundary, this can be specified as an attribute on the +argument.

+

If <len> is 0, it is no-op modulo the behavior of attributes attached to +the arguments. +If <len> is not a well-defined value, the behavior is undefined. +If <len> is not zero, both <dest> and <src> should be well-defined, +otherwise the behavior is undefined.

+
+
+
+

llvm.memcpy.inline’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.memcpy.inline on any +integer bit width and for different address spaces. Not all targets +support all bit widths however.

+
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                               i32 <len>, i1 <isvolatile>)
+declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+                                               i64 <len>, i1 <isvolatile>)
+
+
+
+
+
Overview:
+

The ‘llvm.memcpy.inline.*’ intrinsics copy a block of memory from the +source location to the destination location and guarantees that no external +functions are called.

+

Note that, unlike the standard libc function, the llvm.memcpy.inline.* +intrinsics do not return a value, takes extra isvolatile +arguments and the pointers can be in specified address spaces.

+
+
+
Arguments:
+

The first argument is a pointer to the destination, the second is a +pointer to the source. The third argument is a constant integer argument +specifying the number of bytes to copy, and the fourth is a +boolean indicating a volatile access.

+

The align parameter attribute can be provided +for the first and second arguments.

+

If the isvolatile parameter is true, the llvm.memcpy.inline call is +a volatile operation. The detailed access behavior is not +very cleanly specified and it is unwise to depend on it.

+
+
+
Semantics:
+

The ‘llvm.memcpy.inline.*’ intrinsics copy a block of memory from the +source location to the destination location, which are not allowed to +overlap. It copies “len” bytes of memory over. If the argument is known +to be aligned to some boundary, this can be specified as an attribute on +the argument. +The behavior of ‘llvm.memcpy.inline.*’ is equivalent to the behavior of +‘llvm.memcpy.*’, but the generated code is guaranteed not to call any +external functions.

+
+
+
+

llvm.memmove’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.memmove on any integer +bit width and for different address space. Not all targets support all +bit widths however.

+
declare void @llvm.memmove.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                         i32 <len>, i1 <isvolatile>)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+                                         i64 <len>, i1 <isvolatile>)
+
+
+
+
+
Overview:
+

The ‘llvm.memmove.*’ intrinsics move a block of memory from the +source location to the destination location. It is similar to the +‘llvm.memcpy’ intrinsic but allows the two memory locations to +overlap.

+

Note that, unlike the standard libc function, the llvm.memmove.* +intrinsics do not return a value, takes an extra isvolatile +argument and the pointers can be in specified address spaces.

+
+
+
Arguments:
+

The first argument is a pointer to the destination, the second is a +pointer to the source. The third argument is an integer argument +specifying the number of bytes to copy, and the fourth is a +boolean indicating a volatile access.

+

The align parameter attribute can be provided +for the first and second arguments.

+

If the isvolatile parameter is true, the llvm.memmove call +is a volatile operation. The detailed access behavior is +not very cleanly specified and it is unwise to depend on it.

+
+
+
Semantics:
+

The ‘llvm.memmove.*’ intrinsics copy a block of memory from the +source location to the destination location, which may overlap. It +copies “len” bytes of memory over. If the argument is known to be +aligned to some boundary, this can be specified as an attribute on +the argument.

+

If <len> is 0, it is no-op modulo the behavior of attributes attached to +the arguments. +If <len> is not a well-defined value, the behavior is undefined. +If <len> is not zero, both <dest> and <src> should be well-defined, +otherwise the behavior is undefined.

+
+
+
+

llvm.memset.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.memset on any integer +bit width and for different address spaces. However, not all targets +support all bit widths.

+
declare void @llvm.memset.p0i8.i32(i8* <dest>, i8 <val>,
+                                   i32 <len>, i1 <isvolatile>)
+declare void @llvm.memset.p0i8.i64(i8* <dest>, i8 <val>,
+                                   i64 <len>, i1 <isvolatile>)
+
+
+
+
+
Overview:
+

The ‘llvm.memset.*’ intrinsics fill a block of memory with a +particular byte value.

+

Note that, unlike the standard libc function, the llvm.memset +intrinsic does not return a value and takes an extra volatile +argument. Also, the destination can be in an arbitrary address space.

+
+
+
Arguments:
+

The first argument is a pointer to the destination to fill, the second +is the byte value with which to fill it, the third argument is an +integer argument specifying the number of bytes to fill, and the fourth +is a boolean indicating a volatile access.

+

The align parameter attribute can be provided +for the first arguments.

+

If the isvolatile parameter is true, the llvm.memset call is +a volatile operation. The detailed access behavior is not +very cleanly specified and it is unwise to depend on it.

+
+
+
Semantics:
+

The ‘llvm.memset.*’ intrinsics fill “len” bytes of memory starting +at the destination location. If the argument is known to be +aligned to some boundary, this can be specified as an attribute on +the argument.

+

If <len> is 0, it is no-op modulo the behavior of attributes attached to +the arguments. +If <len> is not a well-defined value, the behavior is undefined. +If <len> is not zero, both <dest> and <src> should be well-defined, +otherwise the behavior is undefined.

+
+
+
+

llvm.sqrt.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.sqrt on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.sqrt.f32(float %Val)
+declare double    @llvm.sqrt.f64(double %Val)
+declare x86_fp80  @llvm.sqrt.f80(x86_fp80 %Val)
+declare fp128     @llvm.sqrt.f128(fp128 %Val)
+declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.sqrt’ intrinsics return the square root of the specified value.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘sqrt’ function but without +trapping or setting errno. For types specified by IEEE-754, the result +matches a conforming libm implementation.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.powi.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.powi on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+

Generally, the only supported type for the exponent is the one matching +with the C type int.

+
declare float     @llvm.powi.f32.i32(float  %Val, i32 %power)
+declare double    @llvm.powi.f64.i16(double %Val, i16 %power)
+declare x86_fp80  @llvm.powi.f80.i32(x86_fp80  %Val, i32 %power)
+declare fp128     @llvm.powi.f128.i32(fp128 %Val, i32 %power)
+declare ppc_fp128 @llvm.powi.ppcf128.i32(ppc_fp128  %Val, i32 %power)
+
+
+
+
+
Overview:
+

The ‘llvm.powi.*’ intrinsics return the first operand raised to the +specified (positive or negative) power. The order of evaluation of +multiplications is not defined. When a vector of floating-point type is +used, the second argument remains a scalar integer value.

+
+
+
Arguments:
+

The second argument is an integer power, and the first is a value to +raise to that power.

+
+
+
Semantics:
+

This function returns the first value raised to the second power with an +unspecified sequence of rounding operations.

+
+
+
+

llvm.sin.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.sin on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.sin.f32(float  %Val)
+declare double    @llvm.sin.f64(double %Val)
+declare x86_fp80  @llvm.sin.f80(x86_fp80  %Val)
+declare fp128     @llvm.sin.f128(fp128 %Val)
+declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.sin.*’ intrinsics return the sine of the operand.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘sin’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.cos.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.cos on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.cos.f32(float  %Val)
+declare double    @llvm.cos.f64(double %Val)
+declare x86_fp80  @llvm.cos.f80(x86_fp80  %Val)
+declare fp128     @llvm.cos.f128(fp128 %Val)
+declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.cos.*’ intrinsics return the cosine of the operand.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘cos’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.pow.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.pow on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.pow.f32(float  %Val, float %Power)
+declare double    @llvm.pow.f64(double %Val, double %Power)
+declare x86_fp80  @llvm.pow.f80(x86_fp80  %Val, x86_fp80 %Power)
+declare fp128     @llvm.pow.f128(fp128 %Val, fp128 %Power)
+declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128  %Val, ppc_fp128 Power)
+
+
+
+
+
Overview:
+

The ‘llvm.pow.*’ intrinsics return the first operand raised to the +specified (positive or negative) power.

+
+
+
Arguments:
+

The arguments and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘pow’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.exp.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.exp on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.exp.f32(float  %Val)
+declare double    @llvm.exp.f64(double %Val)
+declare x86_fp80  @llvm.exp.f80(x86_fp80  %Val)
+declare fp128     @llvm.exp.f128(fp128 %Val)
+declare ppc_fp128 @llvm.exp.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.exp.*’ intrinsics compute the base-e exponential of the specified +value.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘exp’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.exp2.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.exp2 on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.exp2.f32(float  %Val)
+declare double    @llvm.exp2.f64(double %Val)
+declare x86_fp80  @llvm.exp2.f80(x86_fp80  %Val)
+declare fp128     @llvm.exp2.f128(fp128 %Val)
+declare ppc_fp128 @llvm.exp2.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.exp2.*’ intrinsics compute the base-2 exponential of the +specified value.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘exp2’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.log.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.log on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.log.f32(float  %Val)
+declare double    @llvm.log.f64(double %Val)
+declare x86_fp80  @llvm.log.f80(x86_fp80  %Val)
+declare fp128     @llvm.log.f128(fp128 %Val)
+declare ppc_fp128 @llvm.log.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.log.*’ intrinsics compute the base-e logarithm of the specified +value.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘log’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.log10.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.log10 on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.log10.f32(float  %Val)
+declare double    @llvm.log10.f64(double %Val)
+declare x86_fp80  @llvm.log10.f80(x86_fp80  %Val)
+declare fp128     @llvm.log10.f128(fp128 %Val)
+declare ppc_fp128 @llvm.log10.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.log10.*’ intrinsics compute the base-10 logarithm of the +specified value.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘log10’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.log2.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.log2 on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.log2.f32(float  %Val)
+declare double    @llvm.log2.f64(double %Val)
+declare x86_fp80  @llvm.log2.f80(x86_fp80  %Val)
+declare fp128     @llvm.log2.f128(fp128 %Val)
+declare ppc_fp128 @llvm.log2.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.log2.*’ intrinsics compute the base-2 logarithm of the specified +value.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘log2’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.fma.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.fma on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
+declare double    @llvm.fma.f64(double %a, double %b, double %c)
+declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
+declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
+declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
+
+
+
+
+
Overview:
+

The ‘llvm.fma.*’ intrinsics perform the fused multiply-add operation.

+
+
+
Arguments:
+

The arguments and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

Return the same value as a corresponding libm ‘fma’ function but without +trapping or setting errno.

+

When specified with the fast-math-flag ‘afn’, the result may be approximated +using a less accurate calculation.

+
+
+
+

llvm.fabs.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.fabs on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.fabs.f32(float  %Val)
+declare double    @llvm.fabs.f64(double %Val)
+declare x86_fp80  @llvm.fabs.f80(x86_fp80 %Val)
+declare fp128     @llvm.fabs.f128(fp128 %Val)
+declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.fabs.*’ intrinsics return the absolute value of the +operand.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm fabs functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.minnum.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.minnum on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.minnum.f32(float %Val0, float %Val1)
+declare double    @llvm.minnum.f64(double %Val0, double %Val1)
+declare x86_fp80  @llvm.minnum.f80(x86_fp80 %Val0, x86_fp80 %Val1)
+declare fp128     @llvm.minnum.f128(fp128 %Val0, fp128 %Val1)
+declare ppc_fp128 @llvm.minnum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1)
+
+
+
+
+
Overview:
+

The ‘llvm.minnum.*’ intrinsics return the minimum of the two +arguments.

+
+
+
Arguments:
+

The arguments and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

Follows the IEEE-754 semantics for minNum, except for handling of +signaling NaNs. This match’s the behavior of libm’s fmin.

+

If either operand is a NaN, returns the other non-NaN operand. Returns +NaN only if both operands are NaN. The returned NaN is always +quiet. If the operands compare equal, returns a value that compares +equal to both operands. This means that fmin(+/-0.0, +/-0.0) could +return either -0.0 or 0.0.

+

Unlike the IEEE-754 2008 behavior, this does not distinguish between +signaling and quiet NaN inputs. If a target’s implementation follows +the standard and returns a quiet NaN if either input is a signaling +NaN, the intrinsic lowering is responsible for quieting the inputs to +correctly return the non-NaN input (e.g. by using the equivalent of +llvm.canonicalize).

+
+
+
+

llvm.maxnum.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.maxnum on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.maxnum.f32(float  %Val0, float  %Val1)
+declare double    @llvm.maxnum.f64(double %Val0, double %Val1)
+declare x86_fp80  @llvm.maxnum.f80(x86_fp80  %Val0, x86_fp80  %Val1)
+declare fp128     @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1)
+declare ppc_fp128 @llvm.maxnum.ppcf128(ppc_fp128  %Val0, ppc_fp128  %Val1)
+
+
+
+
+
Overview:
+

The ‘llvm.maxnum.*’ intrinsics return the maximum of the two +arguments.

+
+
+
Arguments:
+

The arguments and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

Follows the IEEE-754 semantics for maxNum except for the handling of +signaling NaNs. This matches the behavior of libm’s fmax.

+

If either operand is a NaN, returns the other non-NaN operand. Returns +NaN only if both operands are NaN. The returned NaN is always +quiet. If the operands compare equal, returns a value that compares +equal to both operands. This means that fmax(+/-0.0, +/-0.0) could +return either -0.0 or 0.0.

+

Unlike the IEEE-754 2008 behavior, this does not distinguish between +signaling and quiet NaN inputs. If a target’s implementation follows +the standard and returns a quiet NaN if either input is a signaling +NaN, the intrinsic lowering is responsible for quieting the inputs to +correctly return the non-NaN input (e.g. by using the equivalent of +llvm.canonicalize).

+
+
+
+

llvm.minimum.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.minimum on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.minimum.f32(float %Val0, float %Val1)
+declare double    @llvm.minimum.f64(double %Val0, double %Val1)
+declare x86_fp80  @llvm.minimum.f80(x86_fp80 %Val0, x86_fp80 %Val1)
+declare fp128     @llvm.minimum.f128(fp128 %Val0, fp128 %Val1)
+declare ppc_fp128 @llvm.minimum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1)
+
+
+
+
+
Overview:
+

The ‘llvm.minimum.*’ intrinsics return the minimum of the two +arguments, propagating NaNs and treating -0.0 as less than +0.0.

+
+
+
Arguments:
+

The arguments and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

If either operand is a NaN, returns NaN. Otherwise returns the lesser +of the two arguments. -0.0 is considered to be less than +0.0 for this +intrinsic. Note that these are the semantics specified in the draft of +IEEE 754-2018.

+
+
+
+

llvm.maximum.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.maximum on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.maximum.f32(float %Val0, float %Val1)
+declare double    @llvm.maximum.f64(double %Val0, double %Val1)
+declare x86_fp80  @llvm.maximum.f80(x86_fp80 %Val0, x86_fp80 %Val1)
+declare fp128     @llvm.maximum.f128(fp128 %Val0, fp128 %Val1)
+declare ppc_fp128 @llvm.maximum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1)
+
+
+
+
+
Overview:
+

The ‘llvm.maximum.*’ intrinsics return the maximum of the two +arguments, propagating NaNs and treating -0.0 as less than +0.0.

+
+
+
Arguments:
+

The arguments and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

If either operand is a NaN, returns NaN. Otherwise returns the greater +of the two arguments. -0.0 is considered to be less than +0.0 for this +intrinsic. Note that these are the semantics specified in the draft of +IEEE 754-2018.

+
+
+
+

llvm.copysign.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.copysign on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.copysign.f32(float  %Mag, float  %Sgn)
+declare double    @llvm.copysign.f64(double %Mag, double %Sgn)
+declare x86_fp80  @llvm.copysign.f80(x86_fp80  %Mag, x86_fp80  %Sgn)
+declare fp128     @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
+declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128  %Mag, ppc_fp128  %Sgn)
+
+
+
+
+
Overview:
+

The ‘llvm.copysign.*’ intrinsics return a value with the magnitude of the +first operand and the sign of the second operand.

+
+
+
Arguments:
+

The arguments and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm copysign +functions would, and handles error conditions in the same way.

+
+
+
+

llvm.floor.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.floor on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.floor.f32(float  %Val)
+declare double    @llvm.floor.f64(double %Val)
+declare x86_fp80  @llvm.floor.f80(x86_fp80  %Val)
+declare fp128     @llvm.floor.f128(fp128 %Val)
+declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.floor.*’ intrinsics return the floor of the operand.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm floor functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.ceil.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.ceil on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.ceil.f32(float  %Val)
+declare double    @llvm.ceil.f64(double %Val)
+declare x86_fp80  @llvm.ceil.f80(x86_fp80  %Val)
+declare fp128     @llvm.ceil.f128(fp128 %Val)
+declare ppc_fp128 @llvm.ceil.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.ceil.*’ intrinsics return the ceiling of the operand.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm ceil functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.trunc.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.trunc on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.trunc.f32(float  %Val)
+declare double    @llvm.trunc.f64(double %Val)
+declare x86_fp80  @llvm.trunc.f80(x86_fp80  %Val)
+declare fp128     @llvm.trunc.f128(fp128 %Val)
+declare ppc_fp128 @llvm.trunc.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.trunc.*’ intrinsics returns the operand rounded to the +nearest integer not larger in magnitude than the operand.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm trunc functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.rint.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.rint on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.rint.f32(float  %Val)
+declare double    @llvm.rint.f64(double %Val)
+declare x86_fp80  @llvm.rint.f80(x86_fp80  %Val)
+declare fp128     @llvm.rint.f128(fp128 %Val)
+declare ppc_fp128 @llvm.rint.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.rint.*’ intrinsics returns the operand rounded to the +nearest integer. It may raise an inexact floating-point exception if the +operand isn’t an integer.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm rint functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.nearbyint.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.nearbyint on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.nearbyint.f32(float  %Val)
+declare double    @llvm.nearbyint.f64(double %Val)
+declare x86_fp80  @llvm.nearbyint.f80(x86_fp80  %Val)
+declare fp128     @llvm.nearbyint.f128(fp128 %Val)
+declare ppc_fp128 @llvm.nearbyint.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.nearbyint.*’ intrinsics returns the operand rounded to the +nearest integer.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm nearbyint +functions would, and handles error conditions in the same way.

+
+
+
+

llvm.round.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.round on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.round.f32(float  %Val)
+declare double    @llvm.round.f64(double %Val)
+declare x86_fp80  @llvm.round.f80(x86_fp80  %Val)
+declare fp128     @llvm.round.f128(fp128 %Val)
+declare ppc_fp128 @llvm.round.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.round.*’ intrinsics returns the operand rounded to the +nearest integer.

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same +type.

+
+
+
Semantics:
+

This function returns the same values as the libm round +functions would, and handles error conditions in the same way.

+
+
+
+

llvm.roundeven.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.roundeven on any +floating-point or vector of floating-point type. Not all targets support +all types however.

+
declare float     @llvm.roundeven.f32(float  %Val)
+declare double    @llvm.roundeven.f64(double %Val)
+declare x86_fp80  @llvm.roundeven.f80(x86_fp80  %Val)
+declare fp128     @llvm.roundeven.f128(fp128 %Val)
+declare ppc_fp128 @llvm.roundeven.ppcf128(ppc_fp128  %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.roundeven.*’ intrinsics returns the operand rounded to the nearest +integer in floating-point format rounding halfway cases to even (that is, to the +nearest value that is an even integer).

+
+
+
Arguments:
+

The argument and return value are floating-point numbers of the same type.

+
+
+
Semantics:
+

This function implements IEEE-754 operation roundToIntegralTiesToEven. It +also behaves in the same way as C standard function roundeven, except that +it does not raise floating point exceptions.

+
+
+
+

llvm.lround.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.lround on any +floating-point type. Not all targets support all types however.

+
declare i32 @llvm.lround.i32.f32(float %Val)
+declare i32 @llvm.lround.i32.f64(double %Val)
+declare i32 @llvm.lround.i32.f80(float %Val)
+declare i32 @llvm.lround.i32.f128(double %Val)
+declare i32 @llvm.lround.i32.ppcf128(double %Val)
+
+declare i64 @llvm.lround.i64.f32(float %Val)
+declare i64 @llvm.lround.i64.f64(double %Val)
+declare i64 @llvm.lround.i64.f80(float %Val)
+declare i64 @llvm.lround.i64.f128(double %Val)
+declare i64 @llvm.lround.i64.ppcf128(double %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.lround.*’ intrinsics return the operand rounded to the nearest +integer with ties away from zero.

+
+
+
Arguments:
+

The argument is a floating-point number and the return value is an integer +type.

+
+
+
Semantics:
+

This function returns the same values as the libm lround +functions would, but without setting errno.

+
+
+
+

llvm.llround.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.llround on any +floating-point type. Not all targets support all types however.

+
declare i64 @llvm.lround.i64.f32(float %Val)
+declare i64 @llvm.lround.i64.f64(double %Val)
+declare i64 @llvm.lround.i64.f80(float %Val)
+declare i64 @llvm.lround.i64.f128(double %Val)
+declare i64 @llvm.lround.i64.ppcf128(double %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.llround.*’ intrinsics return the operand rounded to the nearest +integer with ties away from zero.

+
+
+
Arguments:
+

The argument is a floating-point number and the return value is an integer +type.

+
+
+
Semantics:
+

This function returns the same values as the libm llround +functions would, but without setting errno.

+
+
+
+

llvm.lrint.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.lrint on any +floating-point type. Not all targets support all types however.

+
declare i32 @llvm.lrint.i32.f32(float %Val)
+declare i32 @llvm.lrint.i32.f64(double %Val)
+declare i32 @llvm.lrint.i32.f80(float %Val)
+declare i32 @llvm.lrint.i32.f128(double %Val)
+declare i32 @llvm.lrint.i32.ppcf128(double %Val)
+
+declare i64 @llvm.lrint.i64.f32(float %Val)
+declare i64 @llvm.lrint.i64.f64(double %Val)
+declare i64 @llvm.lrint.i64.f80(float %Val)
+declare i64 @llvm.lrint.i64.f128(double %Val)
+declare i64 @llvm.lrint.i64.ppcf128(double %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.lrint.*’ intrinsics return the operand rounded to the nearest +integer.

+
+
+
Arguments:
+

The argument is a floating-point number and the return value is an integer +type.

+
+
+
Semantics:
+

This function returns the same values as the libm lrint +functions would, but without setting errno.

+
+
+
+

llvm.llrint.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.llrint on any +floating-point type. Not all targets support all types however.

+
declare i64 @llvm.llrint.i64.f32(float %Val)
+declare i64 @llvm.llrint.i64.f64(double %Val)
+declare i64 @llvm.llrint.i64.f80(float %Val)
+declare i64 @llvm.llrint.i64.f128(double %Val)
+declare i64 @llvm.llrint.i64.ppcf128(double %Val)
+
+
+
+
+
Overview:
+

The ‘llvm.llrint.*’ intrinsics return the operand rounded to the nearest +integer.

+
+
+
Arguments:
+

The argument is a floating-point number and the return value is an integer +type.

+
+
+
Semantics:
+

This function returns the same values as the libm llrint +functions would, but without setting errno.

+
+
+
+
+

Bit Manipulation Intrinsics

+

LLVM provides intrinsics for a few important bit manipulation +operations. These allow efficient code generation for some algorithms.

+
+

llvm.bitreverse.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic function. You can use bitreverse on any +integer type.

+
declare i16 @llvm.bitreverse.i16(i16 <id>)
+declare i32 @llvm.bitreverse.i32(i32 <id>)
+declare i64 @llvm.bitreverse.i64(i64 <id>)
+declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> <id>)
+
+
+
+
+
Overview:
+

The ‘llvm.bitreverse’ family of intrinsics is used to reverse the +bitpattern of an integer value or vector of integer values; for example +0b10110110 becomes 0b01101101.

+
+
+
Semantics:
+

The llvm.bitreverse.iN intrinsic returns an iN value that has bit +M in the input moved to bit N-M in the output. The vector +intrinsics, such as llvm.bitreverse.v4i32, operate on a per-element +basis and the element order is not affected.

+
+
+
+

llvm.bswap.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic function. You can use bswap on any +integer type that is an even number of bytes (i.e. BitWidth % 16 == 0).

+
declare i16 @llvm.bswap.i16(i16 <id>)
+declare i32 @llvm.bswap.i32(i32 <id>)
+declare i64 @llvm.bswap.i64(i64 <id>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32> <id>)
+
+
+
+
+
Overview:
+

The ‘llvm.bswap’ family of intrinsics is used to byte swap an integer +value or vector of integer values with an even number of bytes (positive +multiple of 16 bits).

+
+
+
Semantics:
+

The llvm.bswap.i16 intrinsic returns an i16 value that has the high +and low byte of the input i16 swapped. Similarly, the llvm.bswap.i32 +intrinsic returns an i32 value that has the four bytes of the input i32 +swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the +returned i32 will have its bytes in 3, 2, 1, 0 order. The +llvm.bswap.i48, llvm.bswap.i64 and other intrinsics extend this +concept to additional even-byte lengths (6 bytes, 8 bytes and more, +respectively). The vector intrinsics, such as llvm.bswap.v4i32, +operate on a per-element basis and the element order is not affected.

+
+
+
+

llvm.ctpop.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.ctpop on any integer +bit width, or on any vector with integer elements. Not all targets +support all bit widths or vector types, however.

+
declare i8 @llvm.ctpop.i8(i8  <src>)
+declare i16 @llvm.ctpop.i16(i16 <src>)
+declare i32 @llvm.ctpop.i32(i32 <src>)
+declare i64 @llvm.ctpop.i64(i64 <src>)
+declare i256 @llvm.ctpop.i256(i256 <src>)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32> <src>)
+
+
+
+
+
Overview:
+

The ‘llvm.ctpop’ family of intrinsics counts the number of bits set +in a value.

+
+
+
Arguments:
+

The only argument is the value to be counted. The argument may be of any +integer type, or a vector with integer elements. The return type must +match the argument type.

+
+
+
Semantics:
+

The ‘llvm.ctpop’ intrinsic counts the 1’s in a variable, or within +each element of a vector.

+
+
+
+

llvm.ctlz.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.ctlz on any +integer bit width, or any vector whose elements are integers. Not all +targets support all bit widths or vector types, however.

+
declare i8   @llvm.ctlz.i8  (i8   <src>, i1 <is_zero_undef>)
+declare i16  @llvm.ctlz.i16 (i16  <src>, i1 <is_zero_undef>)
+declare i32  @llvm.ctlz.i32 (i32  <src>, i1 <is_zero_undef>)
+declare i64  @llvm.ctlz.i64 (i64  <src>, i1 <is_zero_undef>)
+declare i256 @llvm.ctlz.i256(i256 <src>, i1 <is_zero_undef>)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+
+
+
+
+
Overview:
+

The ‘llvm.ctlz’ family of intrinsic functions counts the number of +leading zeros in a variable.

+
+
+
Arguments:
+

The first argument is the value to be counted. This argument may be of +any integer type, or a vector with integer element type. The return +type must match the first argument type.

+

The second argument must be a constant and is a flag to indicate whether +the intrinsic should ensure that a zero as the first argument produces a +defined result. Historically some architectures did not provide a +defined result for zero values as efficiently, and many algorithms are +now predicated on avoiding zero-value inputs.

+
+
+
Semantics:
+

The ‘llvm.ctlz’ intrinsic counts the leading (most significant) +zeros in a variable, or within each element of the vector. If +src == 0 then the result is the size in bits of the type of src +if is_zero_undef == 0 and undef otherwise. For example, +llvm.ctlz(i32 2) = 30.

+
+
+
+

llvm.cttz.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.cttz on any +integer bit width, or any vector of integer elements. Not all targets +support all bit widths or vector types, however.

+
declare i8   @llvm.cttz.i8  (i8   <src>, i1 <is_zero_undef>)
+declare i16  @llvm.cttz.i16 (i16  <src>, i1 <is_zero_undef>)
+declare i32  @llvm.cttz.i32 (i32  <src>, i1 <is_zero_undef>)
+declare i64  @llvm.cttz.i64 (i64  <src>, i1 <is_zero_undef>)
+declare i256 @llvm.cttz.i256(i256 <src>, i1 <is_zero_undef>)
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+
+
+
+
+
Overview:
+

The ‘llvm.cttz’ family of intrinsic functions counts the number of +trailing zeros.

+
+
+
Arguments:
+

The first argument is the value to be counted. This argument may be of +any integer type, or a vector with integer element type. The return +type must match the first argument type.

+

The second argument must be a constant and is a flag to indicate whether +the intrinsic should ensure that a zero as the first argument produces a +defined result. Historically some architectures did not provide a +defined result for zero values as efficiently, and many algorithms are +now predicated on avoiding zero-value inputs.

+
+
+
Semantics:
+

The ‘llvm.cttz’ intrinsic counts the trailing (least significant) +zeros in a variable, or within each element of a vector. If src == 0 +then the result is the size in bits of the type of src if +is_zero_undef == 0 and undef otherwise. For example, +llvm.cttz(2) = 1.

+
+
+
+

llvm.fshl.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.fshl on any +integer bit width or any vector of integer elements. Not all targets +support all bit widths or vector types, however.

+
declare i8  @llvm.fshl.i8 (i8 %a, i8 %b, i8 %c)
+declare i67 @llvm.fshl.i67(i67 %a, i67 %b, i67 %c)
+declare <2 x i32> @llvm.fshl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c)
+
+
+
+
+
Overview:
+

The ‘llvm.fshl’ family of intrinsic functions performs a funnel shift left: +the first two values are concatenated as { %a : %b } (%a is the most significant +bits of the wide value), the combined value is shifted left, and the most +significant bits are extracted to produce a result that is the same size as the +original arguments. If the first 2 arguments are identical, this is equivalent +to a rotate left operation. For vector types, the operation occurs for each +element of the vector. The shift argument is treated as an unsigned amount +modulo the element size of the arguments.

+
+
+
Arguments:
+

The first two arguments are the values to be concatenated. The third +argument is the shift amount. The arguments may be any integer type or a +vector with integer element type. All arguments and the return value must +have the same type.

+
+
+
Example:
+
%r = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %z)  ; %r = i8: msb_extract((concat(x, y) << (z % 8)), 8)
+%r = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)  ; %r = i8: 128 (0b10000000)
+%r = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)  ; %r = i8: 120 (0b01111000)
+%r = call i8 @llvm.fshl.i8(i8 0, i8 255, i8 8)   ; %r = i8: 0   (0b00000000)
+
+
+
+
+
+

llvm.fshr.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.fshr on any +integer bit width or any vector of integer elements. Not all targets +support all bit widths or vector types, however.

+
declare i8  @llvm.fshr.i8 (i8 %a, i8 %b, i8 %c)
+declare i67 @llvm.fshr.i67(i67 %a, i67 %b, i67 %c)
+declare <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c)
+
+
+
+
+
Overview:
+

The ‘llvm.fshr’ family of intrinsic functions performs a funnel shift right: +the first two values are concatenated as { %a : %b } (%a is the most significant +bits of the wide value), the combined value is shifted right, and the least +significant bits are extracted to produce a result that is the same size as the +original arguments. If the first 2 arguments are identical, this is equivalent +to a rotate right operation. For vector types, the operation occurs for each +element of the vector. The shift argument is treated as an unsigned amount +modulo the element size of the arguments.

+
+
+
Arguments:
+

The first two arguments are the values to be concatenated. The third +argument is the shift amount. The arguments may be any integer type or a +vector with integer element type. All arguments and the return value must +have the same type.

+
+
+
Example:
+
%r = call i8 @llvm.fshr.i8(i8 %x, i8 %y, i8 %z)  ; %r = i8: lsb_extract((concat(x, y) >> (z % 8)), 8)
+%r = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)  ; %r = i8: 254 (0b11111110)
+%r = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)  ; %r = i8: 225 (0b11100001)
+%r = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)   ; %r = i8: 255 (0b11111111)
+
+
+
+
+
+
+

Arithmetic with Overflow Intrinsics

+

LLVM provides intrinsics for fast arithmetic overflow checking.

+

Each of these intrinsics returns a two-element struct. The first +element of this struct contains the result of the corresponding +arithmetic operation modulo 2n, where n is the bit width of +the result. Therefore, for example, the first element of the struct +returned by llvm.sadd.with.overflow.i32 is always the same as the +result of a 32-bit add instruction with the same operands, where +the add is not modified by an nsw or nuw flag.

+

The second element of the result is an i1 that is 1 if the +arithmetic operation overflowed and 0 otherwise. An operation +overflows if, for any values of its operands A and B and for +any N larger than the operands’ width, ext(A op B) to iN is +not equal to (ext(A) to iN) op (ext(B) to iN) where ext is +sext for signed overflow and zext for unsigned overflow, and +op is the underlying arithmetic operation.

+

The behavior of these intrinsics is well-defined for all argument +values.

+
+

llvm.sadd.with.overflow.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.sadd.with.overflow +on any integer bit width or vectors of integers.

+
declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
+declare {<4 x i32>, <4 x i1>} @llvm.sadd.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

The ‘llvm.sadd.with.overflow’ family of intrinsic functions perform +a signed addition of the two arguments, and indicate whether an overflow +occurred during the signed summation.

+
+
+
Arguments:
+

The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +i1. %a and %b are the two values that will undergo signed +addition.

+
+
+
Semantics:
+

The ‘llvm.sadd.with.overflow’ family of intrinsic functions perform +a signed addition of the two variables. They return a structure — the +first element of which is the signed summation, and the second element +of which is a bit specifying if the signed summation resulted in an +overflow.

+
+
+
Examples:
+
%res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+%sum = extractvalue {i32, i1} %res, 0
+%obit = extractvalue {i32, i1} %res, 1
+br i1 %obit, label %overflow, label %normal
+
+
+
+
+
+

llvm.uadd.with.overflow.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.uadd.with.overflow +on any integer bit width or vectors of integers.

+
declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+declare {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

The ‘llvm.uadd.with.overflow’ family of intrinsic functions perform +an unsigned addition of the two arguments, and indicate whether a carry +occurred during the unsigned summation.

+
+
+
Arguments:
+

The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +i1. %a and %b are the two values that will undergo unsigned +addition.

+
+
+
Semantics:
+

The ‘llvm.uadd.with.overflow’ family of intrinsic functions perform +an unsigned addition of the two arguments. They return a structure — the +first element of which is the sum, and the second element of which is a +bit specifying if the unsigned summation resulted in a carry.

+
+
+
Examples:
+
%res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+%sum = extractvalue {i32, i1} %res, 0
+%obit = extractvalue {i32, i1} %res, 1
+br i1 %obit, label %carry, label %normal
+
+
+
+
+
+

llvm.ssub.with.overflow.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.ssub.with.overflow +on any integer bit width or vectors of integers.

+
declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

The ‘llvm.ssub.with.overflow’ family of intrinsic functions perform +a signed subtraction of the two arguments, and indicate whether an +overflow occurred during the signed subtraction.

+
+
+
Arguments:
+

The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +i1. %a and %b are the two values that will undergo signed +subtraction.

+
+
+
Semantics:
+

The ‘llvm.ssub.with.overflow’ family of intrinsic functions perform +a signed subtraction of the two arguments. They return a structure — the +first element of which is the subtraction, and the second element of +which is a bit specifying if the signed subtraction resulted in an +overflow.

+
+
+
Examples:
+
%res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+%sum = extractvalue {i32, i1} %res, 0
+%obit = extractvalue {i32, i1} %res, 1
+br i1 %obit, label %overflow, label %normal
+
+
+
+
+
+

llvm.usub.with.overflow.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.usub.with.overflow +on any integer bit width or vectors of integers.

+
declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

The ‘llvm.usub.with.overflow’ family of intrinsic functions perform +an unsigned subtraction of the two arguments, and indicate whether an +overflow occurred during the unsigned subtraction.

+
+
+
Arguments:
+

The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +i1. %a and %b are the two values that will undergo unsigned +subtraction.

+
+
+
Semantics:
+

The ‘llvm.usub.with.overflow’ family of intrinsic functions perform +an unsigned subtraction of the two arguments. They return a structure — +the first element of which is the subtraction, and the second element of +which is a bit specifying if the unsigned subtraction resulted in an +overflow.

+
+
+
Examples:
+
%res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+%sum = extractvalue {i32, i1} %res, 0
+%obit = extractvalue {i32, i1} %res, 1
+br i1 %obit, label %overflow, label %normal
+
+
+
+
+
+

llvm.smul.with.overflow.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.smul.with.overflow +on any integer bit width or vectors of integers.

+
declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
+declare {<4 x i32>, <4 x i1>} @llvm.smul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

The ‘llvm.smul.with.overflow’ family of intrinsic functions perform +a signed multiplication of the two arguments, and indicate whether an +overflow occurred during the signed multiplication.

+
+
+
Arguments:
+

The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +i1. %a and %b are the two values that will undergo signed +multiplication.

+
+
+
Semantics:
+

The ‘llvm.smul.with.overflow’ family of intrinsic functions perform +a signed multiplication of the two arguments. They return a structure — +the first element of which is the multiplication, and the second element +of which is a bit specifying if the signed multiplication resulted in an +overflow.

+
+
+
Examples:
+
%res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+%sum = extractvalue {i32, i1} %res, 0
+%obit = extractvalue {i32, i1} %res, 1
+br i1 %obit, label %overflow, label %normal
+
+
+
+
+
+

llvm.umul.with.overflow.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.umul.with.overflow +on any integer bit width or vectors of integers.

+
declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
+declare {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview:
+

The ‘llvm.umul.with.overflow’ family of intrinsic functions perform +a unsigned multiplication of the two arguments, and indicate whether an +overflow occurred during the unsigned multiplication.

+
+
+
Arguments:
+

The arguments (%a and %b) and the first element of the result structure +may be of integer types of any bit width, but they must have the same +bit width. The second element of the result structure must be of type +i1. %a and %b are the two values that will undergo unsigned +multiplication.

+
+
+
Semantics:
+

The ‘llvm.umul.with.overflow’ family of intrinsic functions perform +an unsigned multiplication of the two arguments. They return a structure — +the first element of which is the multiplication, and the second +element of which is a bit specifying if the unsigned multiplication +resulted in an overflow.

+
+
+
Examples:
+
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+%sum = extractvalue {i32, i1} %res, 0
+%obit = extractvalue {i32, i1} %res, 1
+br i1 %obit, label %overflow, label %normal
+
+
+
+
+
+
+

Saturation Arithmetic Intrinsics

+

Saturation arithmetic is a version of arithmetic in which operations are +limited to a fixed range between a minimum and maximum value. If the result of +an operation is greater than the maximum value, the result is set (or +“clamped”) to this maximum. If it is below the minimum, it is clamped to this +minimum.

+
+

llvm.sadd.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.sadd.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.sadd.sat.i16(i16 %a, i16 %b)
+declare i32 @llvm.sadd.sat.i32(i32 %a, i32 %b)
+declare i64 @llvm.sadd.sat.i64(i64 %a, i64 %b)
+declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview
+

The ‘llvm.sadd.sat’ family of intrinsic functions perform signed +saturating addition on the 2 arguments.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo signed addition.

+
+
+
Semantics:
+

The maximum value this operation can clamp to is the largest signed value +representable by the bit width of the arguments. The minimum value is the +smallest signed value representable by this bit width.

+
+
+
Examples
+
%res = call i4 @llvm.sadd.sat.i4(i4 1, i4 2)  ; %res = 3
+%res = call i4 @llvm.sadd.sat.i4(i4 5, i4 6)  ; %res = 7
+%res = call i4 @llvm.sadd.sat.i4(i4 -4, i4 2)  ; %res = -2
+%res = call i4 @llvm.sadd.sat.i4(i4 -4, i4 -5)  ; %res = -8
+
+
+
+
+
+

llvm.uadd.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.uadd.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.uadd.sat.i16(i16 %a, i16 %b)
+declare i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
+declare i64 @llvm.uadd.sat.i64(i64 %a, i64 %b)
+declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview
+

The ‘llvm.uadd.sat’ family of intrinsic functions perform unsigned +saturating addition on the 2 arguments.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo unsigned addition.

+
+
+
Semantics:
+

The maximum value this operation can clamp to is the largest unsigned value +representable by the bit width of the arguments. Because this is an unsigned +operation, the result will never saturate towards zero.

+
+
+
Examples
+
%res = call i4 @llvm.uadd.sat.i4(i4 1, i4 2)  ; %res = 3
+%res = call i4 @llvm.uadd.sat.i4(i4 5, i4 6)  ; %res = 11
+%res = call i4 @llvm.uadd.sat.i4(i4 8, i4 8)  ; %res = 15
+
+
+
+
+
+

llvm.ssub.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.ssub.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.ssub.sat.i16(i16 %a, i16 %b)
+declare i32 @llvm.ssub.sat.i32(i32 %a, i32 %b)
+declare i64 @llvm.ssub.sat.i64(i64 %a, i64 %b)
+declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview
+

The ‘llvm.ssub.sat’ family of intrinsic functions perform signed +saturating subtraction on the 2 arguments.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo signed subtraction.

+
+
+
Semantics:
+

The maximum value this operation can clamp to is the largest signed value +representable by the bit width of the arguments. The minimum value is the +smallest signed value representable by this bit width.

+
+
+
Examples
+
%res = call i4 @llvm.ssub.sat.i4(i4 2, i4 1)  ; %res = 1
+%res = call i4 @llvm.ssub.sat.i4(i4 2, i4 6)  ; %res = -4
+%res = call i4 @llvm.ssub.sat.i4(i4 -4, i4 5)  ; %res = -8
+%res = call i4 @llvm.ssub.sat.i4(i4 4, i4 -5)  ; %res = 7
+
+
+
+
+
+

llvm.usub.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.usub.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.usub.sat.i16(i16 %a, i16 %b)
+declare i32 @llvm.usub.sat.i32(i32 %a, i32 %b)
+declare i64 @llvm.usub.sat.i64(i64 %a, i64 %b)
+declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview
+

The ‘llvm.usub.sat’ family of intrinsic functions perform unsigned +saturating subtraction on the 2 arguments.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo unsigned subtraction.

+
+
+
Semantics:
+

The minimum value this operation can clamp to is 0, which is the smallest +unsigned value representable by the bit width of the unsigned arguments. +Because this is an unsigned operation, the result will never saturate towards +the largest possible value representable by this bit width.

+
+
+
Examples
+
%res = call i4 @llvm.usub.sat.i4(i4 2, i4 1)  ; %res = 1
+%res = call i4 @llvm.usub.sat.i4(i4 2, i4 6)  ; %res = 0
+
+
+
+
+
+

llvm.sshl.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.sshl.sat +on integers or vectors of integers of any bit width.

+
declare i16 @llvm.sshl.sat.i16(i16 %a, i16 %b)
+declare i32 @llvm.sshl.sat.i32(i32 %a, i32 %b)
+declare i64 @llvm.sshl.sat.i64(i64 %a, i64 %b)
+declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview
+

The ‘llvm.sshl.sat’ family of intrinsic functions perform signed +saturating left shift on the first argument.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any +bit width, but they must have the same bit width. %a is the value to be +shifted, and %b is the amount to shift by. If b is (statically or +dynamically) equal to or larger than the integer bit width of the arguments, +the result is a poison value. If the arguments are +vectors, each vector element of a is shifted by the corresponding shift +amount in b.

+
+
+
Semantics:
+

The maximum value this operation can clamp to is the largest signed value +representable by the bit width of the arguments. The minimum value is the +smallest signed value representable by this bit width.

+
+
+
Examples
+
%res = call i4 @llvm.sshl.sat.i4(i4 2, i4 1)  ; %res = 4
+%res = call i4 @llvm.sshl.sat.i4(i4 2, i4 2)  ; %res = 7
+%res = call i4 @llvm.sshl.sat.i4(i4 -5, i4 1)  ; %res = -8
+%res = call i4 @llvm.sshl.sat.i4(i4 -1, i4 1)  ; %res = -2
+
+
+
+
+
+

llvm.ushl.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.ushl.sat +on integers or vectors of integers of any bit width.

+
declare i16 @llvm.ushl.sat.i16(i16 %a, i16 %b)
+declare i32 @llvm.ushl.sat.i32(i32 %a, i32 %b)
+declare i64 @llvm.ushl.sat.i64(i64 %a, i64 %b)
+declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
+
+
+
+
+
Overview
+

The ‘llvm.ushl.sat’ family of intrinsic functions perform unsigned +saturating left shift on the first argument.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any +bit width, but they must have the same bit width. %a is the value to be +shifted, and %b is the amount to shift by. If b is (statically or +dynamically) equal to or larger than the integer bit width of the arguments, +the result is a poison value. If the arguments are +vectors, each vector element of a is shifted by the corresponding shift +amount in b.

+
+
+
Semantics:
+

The maximum value this operation can clamp to is the largest unsigned value +representable by the bit width of the arguments.

+
+
+
Examples
+
%res = call i4 @llvm.ushl.sat.i4(i4 2, i4 1)  ; %res = 4
+%res = call i4 @llvm.ushl.sat.i4(i4 3, i4 3)  ; %res = 15
+
+
+
+
+
+
+

Fixed Point Arithmetic Intrinsics

+

A fixed point number represents a real data type for a number that has a fixed +number of digits after a radix point (equivalent to the decimal point ‘.’). +The number of digits after the radix point is referred as the scale. These +are useful for representing fractional values to a specific precision. The +following intrinsics perform fixed point arithmetic operations on 2 operands +of the same scale, specified as the third argument.

+

The llvm.*mul.fix family of intrinsic functions represents a multiplication +of fixed point numbers through scaled integers. Therefore, fixed point +multiplication can be represented as

+
%result = call i4 @llvm.smul.fix.i4(i4 %a, i4 %b, i32 %scale)
+
+; Expands to
+%a2 = sext i4 %a to i8
+%b2 = sext i4 %b to i8
+%mul = mul nsw nuw i8 %a, %b
+%scale2 = trunc i32 %scale to i8
+%r = ashr i8 %mul, i8 %scale2  ; this is for a target rounding down towards negative infinity
+%result = trunc i8 %r to i4
+
+
+

The llvm.*div.fix family of intrinsic functions represents a division of +fixed point numbers through scaled integers. Fixed point division can be +represented as:

+
%result call i4 @llvm.sdiv.fix.i4(i4 %a, i4 %b, i32 %scale)
+
+; Expands to
+%a2 = sext i4 %a to i8
+%b2 = sext i4 %b to i8
+%scale2 = trunc i32 %scale to i8
+%a3 = shl i8 %a2, %scale2
+%r = sdiv i8 %a3, %b2 ; this is for a target rounding towards zero
+%result = trunc i8 %r to i4
+
+
+

For each of these functions, if the result cannot be represented exactly with +the provided scale, the result is rounded. Rounding is unspecified since +preferred rounding may vary for different targets. Rounding is specified +through a target hook. Different pipelines should legalize or optimize this +using the rounding specified by this hook if it is provided. Operations like +constant folding, instruction combining, KnownBits, and ValueTracking should +also use this hook, if provided, and not assume the direction of rounding. A +rounded result must always be within one unit of precision from the true +result. That is, the error between the returned result and the true result must +be less than 1/2^(scale).

+
+

llvm.smul.fix.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.smul.fix +on any integer bit width or vectors of integers.

+
declare i16 @llvm.smul.fix.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.smul.fix.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.smul.fix.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.smul.fix’ family of intrinsic functions perform signed +fixed point multiplication on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. The arguments may also work with +int vectors of the same length and int size. %a and %b are the two +values that will undergo signed fixed point multiplication. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs fixed point multiplication on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

It is undefined behavior if the result value does not fit within the range of +the fixed point type.

+
+
+
Examples
+
%res = call i4 @llvm.smul.fix.i4(i4 3, i4 2, i32 0)  ; %res = 6 (2 x 3 = 6)
+%res = call i4 @llvm.smul.fix.i4(i4 3, i4 2, i32 1)  ; %res = 3 (1.5 x 1 = 1.5)
+%res = call i4 @llvm.smul.fix.i4(i4 3, i4 -2, i32 1)  ; %res = -3 (1.5 x -1 = -1.5)
+
+; The result in the following could be rounded up to -2 or down to -2.5
+%res = call i4 @llvm.smul.fix.i4(i4 3, i4 -3, i32 1)  ; %res = -5 (or -4) (1.5 x -1.5 = -2.25)
+
+
+
+
+
+

llvm.umul.fix.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.umul.fix +on any integer bit width or vectors of integers.

+
declare i16 @llvm.umul.fix.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.umul.fix.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.umul.fix.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.umul.fix’ family of intrinsic functions perform unsigned +fixed point multiplication on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. The arguments may also work with +int vectors of the same length and int size. %a and %b are the two +values that will undergo unsigned fixed point multiplication. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs unsigned fixed point multiplication on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

It is undefined behavior if the result value does not fit within the range of +the fixed point type.

+
+
+
Examples
+
%res = call i4 @llvm.umul.fix.i4(i4 3, i4 2, i32 0)  ; %res = 6 (2 x 3 = 6)
+%res = call i4 @llvm.umul.fix.i4(i4 3, i4 2, i32 1)  ; %res = 3 (1.5 x 1 = 1.5)
+
+; The result in the following could be rounded down to 3.5 or up to 4
+%res = call i4 @llvm.umul.fix.i4(i4 15, i4 1, i32 1)  ; %res = 7 (or 8) (7.5 x 0.5 = 3.75)
+
+
+
+
+
+

llvm.smul.fix.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.smul.fix.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.smul.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.smul.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.smul.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.smul.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.smul.fix.sat’ family of intrinsic functions perform signed +fixed point saturating multiplication on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo signed fixed point multiplication. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs fixed point multiplication on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

The maximum value this operation can clamp to is the largest signed value +representable by the bit width of the first 2 arguments. The minimum value is the +smallest signed value representable by this bit width.

+
+
+
Examples
+
%res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 2, i32 0)  ; %res = 6 (2 x 3 = 6)
+%res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 2, i32 1)  ; %res = 3 (1.5 x 1 = 1.5)
+%res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 -2, i32 1)  ; %res = -3 (1.5 x -1 = -1.5)
+
+; The result in the following could be rounded up to -2 or down to -2.5
+%res = call i4 @llvm.smul.fix.sat.i4(i4 3, i4 -3, i32 1)  ; %res = -5 (or -4) (1.5 x -1.5 = -2.25)
+
+; Saturation
+%res = call i4 @llvm.smul.fix.sat.i4(i4 7, i4 2, i32 0)  ; %res = 7
+%res = call i4 @llvm.smul.fix.sat.i4(i4 7, i4 4, i32 2)  ; %res = 7
+%res = call i4 @llvm.smul.fix.sat.i4(i4 -8, i4 5, i32 2)  ; %res = -8
+%res = call i4 @llvm.smul.fix.sat.i4(i4 -8, i4 -2, i32 1)  ; %res = 7
+
+; Scale can affect the saturation result
+%res = call i4 @llvm.smul.fix.sat.i4(i4 2, i4 4, i32 0)  ; %res = 7 (2 x 4 -> clamped to 7)
+%res = call i4 @llvm.smul.fix.sat.i4(i4 2, i4 4, i32 1)  ; %res = 4 (1 x 2 = 2)
+
+
+
+
+
+

llvm.umul.fix.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.umul.fix.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.umul.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.umul.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.umul.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.umul.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.umul.fix.sat’ family of intrinsic functions perform unsigned +fixed point saturating multiplication on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo unsigned fixed point multiplication. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs fixed point multiplication on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

The maximum value this operation can clamp to is the largest unsigned value +representable by the bit width of the first 2 arguments. The minimum value is the +smallest unsigned value representable by this bit width (zero).

+
+
+
Examples
+
%res = call i4 @llvm.umul.fix.sat.i4(i4 3, i4 2, i32 0)  ; %res = 6 (2 x 3 = 6)
+%res = call i4 @llvm.umul.fix.sat.i4(i4 3, i4 2, i32 1)  ; %res = 3 (1.5 x 1 = 1.5)
+
+; The result in the following could be rounded down to 2 or up to 2.5
+%res = call i4 @llvm.umul.fix.sat.i4(i4 3, i4 3, i32 1)  ; %res = 4 (or 5) (1.5 x 1.5 = 2.25)
+
+; Saturation
+%res = call i4 @llvm.umul.fix.sat.i4(i4 8, i4 2, i32 0)  ; %res = 15 (8 x 2 -> clamped to 15)
+%res = call i4 @llvm.umul.fix.sat.i4(i4 8, i4 8, i32 2)  ; %res = 15 (2 x 2 -> clamped to 3.75)
+
+; Scale can affect the saturation result
+%res = call i4 @llvm.umul.fix.sat.i4(i4 2, i4 4, i32 0)  ; %res = 7 (2 x 4 -> clamped to 7)
+%res = call i4 @llvm.umul.fix.sat.i4(i4 2, i4 4, i32 1)  ; %res = 4 (1 x 2 = 2)
+
+
+
+
+
+

llvm.sdiv.fix.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.sdiv.fix +on any integer bit width or vectors of integers.

+
declare i16 @llvm.sdiv.fix.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.sdiv.fix.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.sdiv.fix.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.sdiv.fix.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.sdiv.fix’ family of intrinsic functions perform signed +fixed point division on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. The arguments may also work with +int vectors of the same length and int size. %a and %b are the two +values that will undergo signed fixed point division. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs fixed point division on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

It is undefined behavior if the result value does not fit within the range of +the fixed point type, or if the second argument is zero.

+
+
+
Examples
+
%res = call i4 @llvm.sdiv.fix.i4(i4 6, i4 2, i32 0)  ; %res = 3 (6 / 2 = 3)
+%res = call i4 @llvm.sdiv.fix.i4(i4 6, i4 4, i32 1)  ; %res = 3 (3 / 2 = 1.5)
+%res = call i4 @llvm.sdiv.fix.i4(i4 3, i4 -2, i32 1) ; %res = -3 (1.5 / -1 = -1.5)
+
+; The result in the following could be rounded up to 1 or down to 0.5
+%res = call i4 @llvm.sdiv.fix.i4(i4 3, i4 4, i32 1)  ; %res = 2 (or 1) (1.5 / 2 = 0.75)
+
+
+
+
+
+

llvm.udiv.fix.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.udiv.fix +on any integer bit width or vectors of integers.

+
declare i16 @llvm.udiv.fix.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.udiv.fix.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.udiv.fix.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.udiv.fix.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.udiv.fix’ family of intrinsic functions perform unsigned +fixed point division on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. The arguments may also work with +int vectors of the same length and int size. %a and %b are the two +values that will undergo unsigned fixed point division. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs fixed point division on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

It is undefined behavior if the result value does not fit within the range of +the fixed point type, or if the second argument is zero.

+
+
+
Examples
+
%res = call i4 @llvm.udiv.fix.i4(i4 6, i4 2, i32 0)  ; %res = 3 (6 / 2 = 3)
+%res = call i4 @llvm.udiv.fix.i4(i4 6, i4 4, i32 1)  ; %res = 3 (3 / 2 = 1.5)
+%res = call i4 @llvm.udiv.fix.i4(i4 1, i4 -8, i32 4) ; %res = 2 (0.0625 / 0.5 = 0.125)
+
+; The result in the following could be rounded up to 1 or down to 0.5
+%res = call i4 @llvm.udiv.fix.i4(i4 3, i4 4, i32 1)  ; %res = 2 (or 1) (1.5 / 2 = 0.75)
+
+
+
+
+
+

llvm.sdiv.fix.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.sdiv.fix.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.sdiv.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.sdiv.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.sdiv.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.sdiv.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.sdiv.fix.sat’ family of intrinsic functions perform signed +fixed point saturating division on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo signed fixed point division. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs fixed point division on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

The maximum value this operation can clamp to is the largest signed value +representable by the bit width of the first 2 arguments. The minimum value is the +smallest signed value representable by this bit width.

+

It is undefined behavior if the second argument is zero.

+
+
+
Examples
+
%res = call i4 @llvm.sdiv.fix.sat.i4(i4 6, i4 2, i32 0)  ; %res = 3 (6 / 2 = 3)
+%res = call i4 @llvm.sdiv.fix.sat.i4(i4 6, i4 4, i32 1)  ; %res = 3 (3 / 2 = 1.5)
+%res = call i4 @llvm.sdiv.fix.sat.i4(i4 3, i4 -2, i32 1) ; %res = -3 (1.5 / -1 = -1.5)
+
+; The result in the following could be rounded up to 1 or down to 0.5
+%res = call i4 @llvm.sdiv.fix.sat.i4(i4 3, i4 4, i32 1)  ; %res = 2 (or 1) (1.5 / 2 = 0.75)
+
+; Saturation
+%res = call i4 @llvm.sdiv.fix.sat.i4(i4 -8, i4 -1, i32 0)  ; %res = 7 (-8 / -1 = 8 => 7)
+%res = call i4 @llvm.sdiv.fix.sat.i4(i4 4, i4 2, i32 2)  ; %res = 7 (1 / 0.5 = 2 => 1.75)
+%res = call i4 @llvm.sdiv.fix.sat.i4(i4 -4, i4 1, i32 2)  ; %res = -8 (-1 / 0.25 = -4 => -2)
+
+
+
+
+
+

llvm.udiv.fix.sat.*’ Intrinsics

+
+
Syntax
+

This is an overloaded intrinsic. You can use llvm.udiv.fix.sat +on any integer bit width or vectors of integers.

+
declare i16 @llvm.udiv.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
+declare i32 @llvm.udiv.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
+declare i64 @llvm.udiv.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
+declare <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+
+
+
+
Overview
+

The ‘llvm.udiv.fix.sat’ family of intrinsic functions perform unsigned +fixed point saturating division on 2 arguments of the same scale.

+
+
+
Arguments
+

The arguments (%a and %b) and the result may be of integer types of any bit +width, but they must have the same bit width. %a and %b are the two +values that will undergo unsigned fixed point division. The argument +%scale represents the scale of both operands, and must be a constant +integer.

+
+
+
Semantics:
+

This operation performs fixed point division on the 2 arguments of a +specified scale. The result will also be returned in the same scale specified +in the third argument.

+

If the result value cannot be precisely represented in the given scale, the +value is rounded up or down to the closest representable value. The rounding +direction is unspecified.

+

The maximum value this operation can clamp to is the largest unsigned value +representable by the bit width of the first 2 arguments. The minimum value is the +smallest unsigned value representable by this bit width (zero).

+

It is undefined behavior if the second argument is zero.

+
+
+
Examples
+
%res = call i4 @llvm.udiv.fix.sat.i4(i4 6, i4 2, i32 0)  ; %res = 3 (6 / 2 = 3)
+%res = call i4 @llvm.udiv.fix.sat.i4(i4 6, i4 4, i32 1)  ; %res = 3 (3 / 2 = 1.5)
+
+; The result in the following could be rounded down to 0.5 or up to 1
+%res = call i4 @llvm.udiv.fix.sat.i4(i4 3, i4 4, i32 1)  ; %res = 1 (or 2) (1.5 / 2 = 0.75)
+
+; Saturation
+%res = call i4 @llvm.udiv.fix.sat.i4(i4 8, i4 2, i32 2)  ; %res = 15 (2 / 0.5 = 4 => 3.75)
+
+
+
+
+
+
+

Specialised Arithmetic Intrinsics

+
+

llvm.canonicalize.*’ Intrinsic

+
+
Syntax:
+
declare float @llvm.canonicalize.f32(float %a)
+declare double @llvm.canonicalize.f64(double %b)
+
+
+
+
+
Overview:
+

The ‘llvm.canonicalize.*’ intrinsic returns the platform specific canonical +encoding of a floating-point number. This canonicalization is useful for +implementing certain numeric primitives such as frexp. The canonical encoding is +defined by IEEE-754-2008 to be:

+
2.1.8 canonical encoding: The preferred encoding of a floating-point
+representation in a format. Applied to declets, significands of finite
+numbers, infinities, and NaNs, especially in decimal formats.
+
+
+

This operation can also be considered equivalent to the IEEE-754-2008 +conversion of a floating-point value to the same format. NaNs are handled +according to section 6.2.

+

Examples of non-canonical encodings:

+
    +
  • x87 pseudo denormals, pseudo NaNs, pseudo Infinity, Unnormals. These are +converted to a canonical representation per hardware-specific protocol.

  • +
  • Many normal decimal floating-point numbers have non-canonical alternative +encodings.

  • +
  • Some machines, like GPUs or ARMv7 NEON, do not support subnormal values. +These are treated as non-canonical encodings of zero and will be flushed to +a zero of the same sign by this operation.

  • +
+

Note that per IEEE-754-2008 6.2, systems that support signaling NaNs with +default exception handling must signal an invalid exception, and produce a +quiet NaN result.

+

This function should always be implementable as multiplication by 1.0, provided +that the compiler does not constant fold the operation. Likewise, division by +1.0 and llvm.minnum(x, x) are possible implementations. Addition with +-0.0 is also sufficient provided that the rounding mode is not -Infinity.

+

@llvm.canonicalize must preserve the equality relation. That is:

+
    +
  • (@llvm.canonicalize(x) == x) is equivalent to (x == x)

  • +
  • (@llvm.canonicalize(x) == @llvm.canonicalize(y)) is equivalent to +to (x == y)

  • +
+

Additionally, the sign of zero must be conserved: +@llvm.canonicalize(-0.0) = -0.0 and @llvm.canonicalize(+0.0) = +0.0

+

The payload bits of a NaN must be conserved, with two exceptions. +First, environments which use only a single canonical representation of NaN +must perform said canonicalization. Second, SNaNs must be quieted per the +usual methods.

+

The canonicalization operation may be optimized away if:

+
    +
  • The input is known to be canonical. For example, it was produced by a +floating-point operation that is required by the standard to be canonical.

  • +
  • The result is consumed only by (or fused with) other floating-point +operations. That is, the bits of the floating-point value are not examined.

  • +
+
+
+
+

llvm.fmuladd.*’ Intrinsic

+
+
Syntax:
+
declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
+declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
+
+
+
+
+
Overview:
+

The ‘llvm.fmuladd.*’ intrinsic functions represent multiply-add +expressions that can be fused if the code generator determines that (a) the +target instruction set has support for a fused operation, and (b) that the +fused operation is more efficient than the equivalent, separate pair of mul +and add instructions.

+
+
+
Arguments:
+

The ‘llvm.fmuladd.*’ intrinsics each take three arguments: two +multiplicands, a and b, and an addend c.

+
+
+
Semantics:
+

The expression:

+
%0 = call float @llvm.fmuladd.f32(%a, %b, %c)
+
+
+

is equivalent to the expression a * b + c, except that it is unspecified +whether rounding will be performed between the multiplication and addition +steps. Fusion is not guaranteed, even if the target platform supports it. +If a fused multiply-add is required, the corresponding +llvm.fma intrinsic function should be used instead. +This never sets errno, just as ‘llvm.fma.*’.

+
+
+
Examples:
+
%r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields float:r2 = (a * b) + c
+
+
+
+
+
+
+

Hardware-Loop Intrinsics

+

LLVM support several intrinsics to mark a loop as a hardware-loop. They are +hints to the backend which are required to lower these intrinsics further to target +specific instructions, or revert the hardware-loop to a normal loop if target +specific restriction are not met and a hardware-loop can’t be generated.

+

These intrinsics may be modified in the future and are not intended to be used +outside the backend. Thus, front-end and mid-level optimizations should not be +generating these intrinsics.

+
+

llvm.set.loop.iterations.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare void @llvm.set.loop.iterations.i32(i32)
+declare void @llvm.set.loop.iterations.i64(i64)
+
+
+
+
+
Overview:
+

The ‘llvm.set.loop.iterations.*’ intrinsics are used to specify the +hardware-loop trip count. They are placed in the loop preheader basic block and +are marked as IntrNoDuplicate to avoid optimizers duplicating these +instructions.

+
+
+
Arguments:
+

The integer operand is the loop trip count of the hardware-loop, and thus +not e.g. the loop back-edge taken count.

+
+
+
Semantics:
+

The ‘llvm.set.loop.iterations.*’ intrinsics do not perform any arithmetic +on their operand. It’s a hint to the backend that can use this to set up the +hardware-loop count with a target specific instruction, usually a move of this +value to a special register or a hardware-loop instruction.

+
+
+
+

llvm.start.loop.iterations.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.start.loop.iterations.i32(i32)
+declare i64 @llvm.start.loop.iterations.i64(i64)
+
+
+
+
+
Overview:
+

The ‘llvm.start.loop.iterations.*’ intrinsics are similar to the +‘llvm.set.loop.iterations.*’ intrinsics, used to specify the +hardware-loop trip count but also produce a value identical to the input +that can be used as the input to the loop. They are placed in the loop +preheader basic block and the output is expected to be the input to the +phi for the induction variable of the loop, decremented by the +‘llvm.loop.decrement.reg.*’.

+
+
+
Arguments:
+

The integer operand is the loop trip count of the hardware-loop, and thus +not e.g. the loop back-edge taken count.

+
+
+
Semantics:
+

The ‘llvm.start.loop.iterations.*’ intrinsics do not perform any arithmetic +on their operand. It’s a hint to the backend that can use this to set up the +hardware-loop count with a target specific instruction, usually a move of this +value to a special register or a hardware-loop instruction.

+
+
+
+

llvm.test.set.loop.iterations.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i1 @llvm.test.set.loop.iterations.i32(i32)
+declare i1 @llvm.test.set.loop.iterations.i64(i64)
+
+
+
+
+
Overview:
+

The ‘llvm.test.set.loop.iterations.*’ intrinsics are used to specify the +the loop trip count, and also test that the given count is not zero, allowing +it to control entry to a while-loop. They are placed in the loop preheader’s +predecessor basic block, and are marked as IntrNoDuplicate to avoid +optimizers duplicating these instructions.

+
+
+
Arguments:
+

The integer operand is the loop trip count of the hardware-loop, and thus +not e.g. the loop back-edge taken count.

+
+
+
Semantics:
+

The ‘llvm.test.set.loop.iterations.*’ intrinsics do not perform any +arithmetic on their operand. It’s a hint to the backend that can use this to +set up the hardware-loop count with a target specific instruction, usually a +move of this value to a special register or a hardware-loop instruction. +The result is the conditional value of whether the given count is not zero.

+
+
+
+

llvm.test.start.loop.iterations.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare {i32, i1} @llvm.test.start.loop.iterations.i32(i32)
+declare {i64, i1} @llvm.test.start.loop.iterations.i64(i64)
+
+
+
+
+
Overview:
+

The ‘llvm.test.start.loop.iterations.*’ intrinsics are similar to the +‘llvm.test.set.loop.iterations.*’ and ‘llvm.start.loop.iterations.*’ +intrinsics, used to specify the hardware-loop trip count, but also produce a +value identical to the input that can be used as the input to the loop. The +second i1 output controls entry to a while-loop.

+
+
+
Arguments:
+

The integer operand is the loop trip count of the hardware-loop, and thus +not e.g. the loop back-edge taken count.

+
+
+
Semantics:
+

The ‘llvm.test.start.loop.iterations.*’ intrinsics do not perform any +arithmetic on their operand. It’s a hint to the backend that can use this to +set up the hardware-loop count with a target specific instruction, usually a +move of this value to a special register or a hardware-loop instruction. +The result is a pair of the input and a conditional value of whether the +given count is not zero.

+
+
+
+

llvm.loop.decrement.reg.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.loop.decrement.reg.i32(i32, i32)
+declare i64 @llvm.loop.decrement.reg.i64(i64, i64)
+
+
+
+
+
Overview:
+

The ‘llvm.loop.decrement.reg.*’ intrinsics are used to lower the loop +iteration counter and return an updated value that will be used in the next +loop test check.

+
+
+
Arguments:
+

Both arguments must have identical integer types. The first operand is the +loop iteration counter. The second operand is the maximum number of elements +processed in an iteration.

+
+
+
Semantics:
+

The ‘llvm.loop.decrement.reg.*’ intrinsics do an integer SUB of its +two operands, which is not allowed to wrap. They return the remaining number of +iterations still to be executed, and can be used together with a PHI, +ICMP and BR to control the number of loop iterations executed. Any +optimisations are allowed to treat it is a SUB, and it is supported by +SCEV, so it’s the backends responsibility to handle cases where it may be +optimised. These intrinsics are marked as IntrNoDuplicate to avoid +optimizers duplicating these instructions.

+
+
+
+

llvm.loop.decrement.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i1 @llvm.loop.decrement.i32(i32)
+declare i1 @llvm.loop.decrement.i64(i64)
+
+
+
+
+
Overview:
+

The HardwareLoops pass allows the loop decrement value to be specified with an +option. It defaults to a loop decrement value of 1, but it can be an unsigned +integer value provided by this option. The ‘llvm.loop.decrement.*’ +intrinsics decrement the loop iteration counter with this value, and return a +false predicate if the loop should exit, and true otherwise. +This is emitted if the loop counter is not updated via a PHI node, which +can also be controlled with an option.

+
+
+
Arguments:
+

The integer argument is the loop decrement value used to decrement the loop +iteration counter.

+
+
+
Semantics:
+

The ‘llvm.loop.decrement.*’ intrinsics do a SUB of the loop iteration +counter with the given loop decrement value, and return false if the loop +should exit, this SUB is not allowed to wrap. The result is a condition +that is used by the conditional branch controlling the loop.

+
+
+
+
+

Vector Reduction Intrinsics

+

Horizontal reductions of vectors can be expressed using the following +intrinsics. Each one takes a vector operand as an input and applies its +respective operation across all elements of the vector, returning a single +scalar result of the same element type.

+
+

llvm.vector.reduce.add.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.add.*’ intrinsics do an integer ADD +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.fadd.*’ Intrinsic

+
+
Syntax:
+
declare float @llvm.vector.reduce.fadd.v4f32(float %start_value, <4 x float> %a)
+declare double @llvm.vector.reduce.fadd.v2f64(double %start_value, <2 x double> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.fadd.*’ intrinsics do a floating-point +ADD reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input.

+

If the intrinsic call has the ‘reassoc’ flag set, then the reduction will not +preserve the associativity of an equivalent scalarized counterpart. Otherwise +the reduction will be sequential, thus implying that the operation respects +the associativity of a scalarized reduction. That is, the reduction begins with +the start value and performs an fadd operation with consecutively increasing +vector element indices. See the following pseudocode:

+
float sequential_fadd(start_value, input_vector)
+  result = start_value
+  for i = 0 to length(input_vector)
+    result = result + input_vector[i]
+  return result
+
+
+
+
+
Arguments:
+

The first argument to this intrinsic is a scalar start value for the reduction. +The type of the start value matches the element-type of the vector input. +The second argument must be a vector of floating-point values.

+

To ignore the start value, negative zero (-0.0) can be used, as it is +the neutral value of floating point addition.

+
+
+
Examples:
+
%unord = call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %input) ; relaxed reduction
+%ord = call float @llvm.vector.reduce.fadd.v4f32(float %start_value, <4 x float> %input) ; sequential reduction
+
+
+
+
+
+

llvm.vector.reduce.mul.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a)
+declare i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.mul.*’ intrinsics do an integer MUL +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.fmul.*’ Intrinsic

+
+
Syntax:
+
declare float @llvm.vector.reduce.fmul.v4f32(float %start_value, <4 x float> %a)
+declare double @llvm.vector.reduce.fmul.v2f64(double %start_value, <2 x double> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.fmul.*’ intrinsics do a floating-point +MUL reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input.

+

If the intrinsic call has the ‘reassoc’ flag set, then the reduction will not +preserve the associativity of an equivalent scalarized counterpart. Otherwise +the reduction will be sequential, thus implying that the operation respects +the associativity of a scalarized reduction. That is, the reduction begins with +the start value and performs an fmul operation with consecutively increasing +vector element indices. See the following pseudocode:

+
float sequential_fmul(start_value, input_vector)
+  result = start_value
+  for i = 0 to length(input_vector)
+    result = result * input_vector[i]
+  return result
+
+
+
+
+
Arguments:
+

The first argument to this intrinsic is a scalar start value for the reduction. +The type of the start value matches the element-type of the vector input. +The second argument must be a vector of floating-point values.

+

To ignore the start value, one (1.0) can be used, as it is the neutral +value of floating point multiplication.

+
+
+
Examples:
+
%unord = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.0, <4 x float> %input) ; relaxed reduction
+%ord = call float @llvm.vector.reduce.fmul.v4f32(float %start_value, <4 x float> %input) ; sequential reduction
+
+
+
+
+
+

llvm.vector.reduce.and.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.and.*’ intrinsics do a bitwise AND +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.or.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.or.*’ intrinsics do a bitwise OR reduction +of a vector, returning the result as a scalar. The return type matches the +element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.xor.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.xor.*’ intrinsics do a bitwise XOR +reduction of a vector, returning the result as a scalar. The return type matches +the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.smax.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.smax.*’ intrinsics do a signed integer +MAX reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.smin.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.smin.*’ intrinsics do a signed integer +MIN reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.umax.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.umax.*’ intrinsics do an unsigned +integer MAX reduction of a vector, returning the result as a scalar. The +return type matches the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.umin.*’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.umin.*’ intrinsics do an unsigned +integer MIN reduction of a vector, returning the result as a scalar. The +return type matches the element-type of the vector input.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of integer values.

+
+
+
+

llvm.vector.reduce.fmax.*’ Intrinsic

+
+
Syntax:
+
declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
+declare double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.fmax.*’ intrinsics do a floating-point +MAX reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input.

+

This instruction has the same comparison semantics as the ‘llvm.maxnum.*’ +intrinsic. That is, the result will always be a number unless all elements of +the vector are NaN. For a vector with maximum element magnitude 0.0 and +containing both +0.0 and -0.0 elements, the sign of the result is unspecified.

+

If the intrinsic call has the nnan fast-math flag, then the operation can +assume that NaNs are not present in the input vector.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of floating-point values.

+
+
+
+

llvm.vector.reduce.fmin.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
+declare double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.vector.reduce.fmin.*’ intrinsics do a floating-point +MIN reduction of a vector, returning the result as a scalar. The return type +matches the element-type of the vector input.

+

This instruction has the same comparison semantics as the ‘llvm.minnum.*’ +intrinsic. That is, the result will always be a number unless all elements of +the vector are NaN. For a vector with minimum element magnitude 0.0 and +containing both +0.0 and -0.0 elements, the sign of the result is unspecified.

+

If the intrinsic call has the nnan fast-math flag, then the operation can +assume that NaNs are not present in the input vector.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector of floating-point values.

+
+
+
+

llvm.experimental.vector.insert’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.experimental.vector.insert +to insert a fixed-width vector into a scalable vector, but not the other way +around.

+
declare <vscale x 4 x float> @llvm.experimental.vector.insert.v4f32(<vscale x 4 x float> %vec, <4 x float> %subvec, i64 %idx)
+declare <vscale x 2 x double> @llvm.experimental.vector.insert.v2f64(<vscale x 2 x double> %vec, <2 x double> %subvec, i64 %idx)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.vector.insert.*’ intrinsics insert a vector into another vector +starting from a given index. The return type matches the type of the vector we +insert into. Conceptually, this can be used to build a scalable vector out of +non-scalable vectors.

+
+
+
Arguments:
+

The vec is the vector which subvec will be inserted into. +The subvec is the vector that will be inserted.

+

idx represents the starting element number at which subvec will be +inserted. idx must be a constant multiple of subvec’s known minimum +vector length. If subvec is a scalable vector, idx is first scaled by +the runtime scaling factor of subvec. The elements of vec starting at +idx are overwritten with subvec. Elements idx through (idx + +num_elements(subvec) - 1) must be valid vec indices. If this condition +cannot be determined statically but is false at runtime, then the result vector +is undefined.

+
+
+
+

llvm.experimental.vector.extract’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use +llvm.experimental.vector.extract to extract a fixed-width vector from a +scalable vector, but not the other way around.

+
declare <4 x float> @llvm.experimental.vector.extract.v4f32(<vscale x 4 x float> %vec, i64 %idx)
+declare <2 x double> @llvm.experimental.vector.extract.v2f64(<vscale x 2 x double> %vec, i64 %idx)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.vector.extract.*’ intrinsics extract a vector from +within another vector starting from a given index. The return type must be +explicitly specified. Conceptually, this can be used to decompose a scalable +vector into non-scalable parts.

+
+
+
Arguments:
+

The vec is the vector from which we will extract a subvector.

+

The idx specifies the starting element number within vec from which a +subvector is extracted. idx must be a constant multiple of the known-minimum +vector length of the result type. If the result type is a scalable vector, +idx is first scaled by the result type’s runtime scaling factor. Elements +idx through (idx + num_elements(result_type) - 1) must be valid vector +indices. If this condition cannot be determined statically but is false at +runtime, then the result vector is undefined. The idx parameter must be a +vector index constant type (for most targets this will be an integer pointer +type).

+
+
+
+

llvm.experimental.vector.reverse’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
+declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.vector.reverse.*’ intrinsics reverse a vector. +The intrinsic takes a single vector and returns a vector of matching type but +with the original lane order reversed. These intrinsics work for both fixed +and scalable vectors. While this intrinsic is marked as experimental the +recommended way to express reverse operations for fixed-width vectors is still +to use a shufflevector, as that may allow for more optimization opportunities.

+
+
+
Arguments:
+

The argument to this intrinsic must be a vector.

+
+
+
+

llvm.experimental.vector.splice’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %vec1, <2 x double> %vec2, i32 %imm)
+declare <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, i32 %imm)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.vector.splice.*’ intrinsics construct a vector by +concatenating elements from the first input vector with elements of the second +input vector, returning a vector of the same type as the input vectors. The +signed immediate, modulo the number of elements in the vector, is the index +into the first vector from which to extract the result value. This means +conceptually that for a positive immediate, a vector is extracted from +concat(%vec1, %vec2) starting at index imm, whereas for a negative +immediate, it extracts -imm trailing elements from the first vector, and +the remaining elements from %vec2.

+

These intrinsics work for both fixed and scalable vectors. While this intrinsic +is marked as experimental, the recommended way to express this operation for +fixed-width vectors is still to use a shufflevector, as that may allow for more +optimization opportunities.

+

For example:

+
llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, 1)  ==> <B, C, D, E> ; index
+llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, -3) ==> <B, C, D, E> ; trailing elements
+
+
+
+
+
Arguments:
+

The first two operands are vectors with the same type. The third argument +imm is the start index, modulo VL, where VL is the runtime vector length of +the source/result vector. The imm is a signed integer constant in the range +-VL <= imm < VL. For values outside of this range the result is poison.

+
+
+
+

llvm.experimental.stepvector’ Intrinsic

+

This is an overloaded intrinsic. You can use llvm.experimental.stepvector +to generate a vector whose lane values comprise the linear sequence +<0, 1, 2, …>. It is primarily intended for scalable vectors.

+
declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+
+
+

The ‘llvm.experimental.stepvector’ intrinsics are used to create vectors +of integers whose elements contain a linear sequence of values starting from 0 +with a step of 1. This experimental intrinsic can only be used for vectors +with integer elements that are at least 8 bits in size. If the sequence value +exceeds the allowed limit for the element type then the result for that lane is +undefined.

+

These intrinsics work for both fixed and scalable vectors. While this intrinsic +is marked as experimental, the recommended way to express this operation for +fixed-width vectors is still to generate a constant vector instead.

+
+
Arguments:
+

None.

+
+
+
+
+

Matrix Intrinsics

+

Operations on matrixes requiring shape information (like number of rows/columns +or the memory layout) can be expressed using the matrix intrinsics. These +intrinsics require matrix dimensions to be passed as immediate arguments, and +matrixes are passed and returned as vectors. This means that for a R x +C matrix, element i of column j is at index j * R + i in the +corresponding vector, with indices starting at 0. Currently column-major layout +is assumed. The intrinsics support both integer and floating point matrixes.

+
+

llvm.matrix.transpose.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare vectorty @llvm.matrix.transpose.*(vectorty %In, i32 <Rows>, i32 <Cols>)
+
+
+
+
+
Overview:
+

The ‘llvm.matrix.transpose.*’ intrinsics treat %In as a <Rows> x +<Cols> matrix and return the transposed matrix in the result vector.

+
+
+
Arguments:
+

The first argument %In is a vector that corresponds to a <Rows> x +<Cols> matrix. Thus, arguments <Rows> and <Cols> correspond to the +number of rows and columns, respectively, and must be positive, constant +integers. The returned vector must have <Rows> * <Cols> elements, and have +the same float or integer element type as %In.

+
+
+
+

llvm.matrix.multiply.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare vectorty @llvm.matrix.multiply.*(vectorty %A, vectorty %B, i32 <OuterRows>, i32 <Inner>, i32 <OuterColumns>)
+
+
+
+
+
Overview:
+

The ‘llvm.matrix.multiply.*’ intrinsics treat %A as a <OuterRows> x +<Inner> matrix, %B as a <Inner> x <OuterColumns> matrix, and +multiplies them. The result matrix is returned in the result vector.

+
+
+
Arguments:
+

The first vector argument %A corresponds to a matrix with <OuterRows> * +<Inner> elements, and the second argument %B to a matrix with +<Inner> * <OuterColumns> elements. Arguments <OuterRows>, +<Inner> and <OuterColumns> must be positive, constant integers. The +returned vector must have <OuterRows> * <OuterColumns> elements. +Vectors %A, %B, and the returned vector all have the same float or +integer element type.

+
+
+
+

llvm.matrix.column.major.load.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare vectorty @llvm.matrix.column.major.load.*(
+    ptrty %Ptr, i64 %Stride, i1 <IsVolatile>, i32 <Rows>, i32 <Cols>)
+
+
+
+
+
Overview:
+

The ‘llvm.matrix.column.major.load.*’ intrinsics load a <Rows> x <Cols> +matrix using a stride of %Stride to compute the start address of the +different columns. The offset is computed using %Stride’s bitwidth. This +allows for convenient loading of sub matrixes. If <IsVolatile> is true, the +intrinsic is considered a volatile memory access. The result +matrix is returned in the result vector. If the %Ptr argument is known to +be aligned to some boundary, this can be specified as an attribute on the +argument.

+
+
+
Arguments:
+

The first argument %Ptr is a pointer type to the returned vector type, and +corresponds to the start address to load from. The second argument %Stride +is a positive, constant integer with %Stride >= <Rows>. %Stride is used +to compute the column memory addresses. I.e., for a column C, its start +memory addresses is calculated with %Ptr + C * %Stride. The third Argument +<IsVolatile> is a boolean value. The fourth and fifth arguments, +<Rows> and <Cols>, correspond to the number of rows and columns, +respectively, and must be positive, constant integers. The returned vector must +have <Rows> * <Cols> elements.

+

The align parameter attribute can be provided for the +%Ptr arguments.

+
+
+
+

llvm.matrix.column.major.store.*’ Intrinsic

+
+
Syntax:
+
declare void @llvm.matrix.column.major.store.*(
+    vectorty %In, ptrty %Ptr, i64 %Stride, i1 <IsVolatile>, i32 <Rows>, i32 <Cols>)
+
+
+
+
+
Overview:
+

The ‘llvm.matrix.column.major.store.*’ intrinsics store the <Rows> x +<Cols> matrix in %In to memory using a stride of %Stride between +columns. The offset is computed using %Stride’s bitwidth. If +<IsVolatile> is true, the intrinsic is considered a +volatile memory access.

+

If the %Ptr argument is known to be aligned to some boundary, this can be +specified as an attribute on the argument.

+
+
+
Arguments:
+

The first argument %In is a vector that corresponds to a <Rows> x +<Cols> matrix to be stored to memory. The second argument %Ptr is a +pointer to the vector type of %In, and is the start address of the matrix +in memory. The third argument %Stride is a positive, constant integer with +%Stride >= <Rows>. %Stride is used to compute the column memory +addresses. I.e., for a column C, its start memory addresses is calculated +with %Ptr + C * %Stride. The fourth argument <IsVolatile> is a boolean +value. The arguments <Rows> and <Cols> correspond to the number of rows +and columns, respectively, and must be positive, constant integers.

+

The align parameter attribute can be provided +for the %Ptr arguments.

+
+
+
+
+

Half Precision Floating-Point Intrinsics

+

For most target platforms, half precision floating-point is a +storage-only format. This means that it is a dense encoding (in memory) +but does not support computation in the format.

+

This means that code must first load the half-precision floating-point +value as an i16, then convert it to float with +llvm.convert.from.fp16. Computation can +then be performed on the float value (including extending to double +etc). To store the value back to memory, it is first converted to float +if needed, then converted to i16 with +llvm.convert.to.fp16, then storing as an +i16 value.

+
+

llvm.convert.to.fp16’ Intrinsic

+
+
Syntax:
+
declare i16 @llvm.convert.to.fp16.f32(float %a)
+declare i16 @llvm.convert.to.fp16.f64(double %a)
+
+
+
+
+
Overview:
+

The ‘llvm.convert.to.fp16’ intrinsic function performs a conversion from a +conventional floating-point type to half precision floating-point format.

+
+
+
Arguments:
+

The intrinsic function contains single argument - the value to be +converted.

+
+
+
Semantics:
+

The ‘llvm.convert.to.fp16’ intrinsic function performs a conversion from a +conventional floating-point format to half precision floating-point format. The +return value is an i16 which contains the converted number.

+
+
+
Examples:
+
%res = call i16 @llvm.convert.to.fp16.f32(float %a)
+store i16 %res, i16* @x, align 2
+
+
+
+
+
+

llvm.convert.from.fp16’ Intrinsic

+
+
Syntax:
+
declare float @llvm.convert.from.fp16.f32(i16 %a)
+declare double @llvm.convert.from.fp16.f64(i16 %a)
+
+
+
+
+
Overview:
+

The ‘llvm.convert.from.fp16’ intrinsic function performs a +conversion from half precision floating-point format to single precision +floating-point format.

+
+
+
Arguments:
+

The intrinsic function contains single argument - the value to be +converted.

+
+
+
Semantics:
+

The ‘llvm.convert.from.fp16’ intrinsic function performs a +conversion from half single precision floating-point format to single +precision floating-point format. The input half-float value is +represented by an i16 value.

+
+
+
Examples:
+
%a = load i16, i16* @x, align 2
+%res = call float @llvm.convert.from.fp16(i16 %a)
+
+
+
+
+
+
+

Saturating floating-point to integer conversions

+

The fptoui and fptosi instructions return a +poison value if the rounded-towards-zero value is not +representable by the result type. These intrinsics provide an alternative +conversion, which will saturate towards the smallest and largest representable +integer values instead.

+
+

llvm.fptoui.sat.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.fptoui.sat on any +floating-point argument type and any integer result type, or vectors thereof. +Not all targets may support all types, however.

+
declare i32 @llvm.fptoui.sat.i32.f32(float %f)
+declare i19 @llvm.fptoui.sat.i19.f64(double %f)
+declare <4 x i100> @llvm.fptoui.sat.v4i100.v4f128(<4 x fp128> %f)
+
+
+
+
+
Overview:
+

This intrinsic converts the argument into an unsigned integer using saturating +semantics.

+
+
+
Arguments:
+

The argument may be any floating-point or vector of floating-point type. The +return value may be any integer or vector of integer type. The number of vector +elements in argument and return must be the same.

+
+
+
Semantics:
+

The conversion to integer is performed subject to the following rules:

+
    +
  • If the argument is any NaN, zero is returned.

  • +
  • If the argument is smaller than zero (this includes negative infinity), +zero is returned.

  • +
  • If the argument is larger than the largest representable unsigned integer of +the result type (this includes positive infinity), the largest representable +unsigned integer is returned.

  • +
  • Otherwise, the result of rounding the argument towards zero is returned.

  • +
+
+
+
Example:
+
%a = call i8 @llvm.fptoui.sat.i8.f32(float 123.9)              ; yields i8: 123
+%b = call i8 @llvm.fptoui.sat.i8.f32(float -5.7)               ; yields i8:   0
+%c = call i8 @llvm.fptoui.sat.i8.f32(float 377.0)              ; yields i8: 255
+%d = call i8 @llvm.fptoui.sat.i8.f32(float 0xFFF8000000000000) ; yields i8:   0
+
+
+
+
+
+

llvm.fptosi.sat.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.fptosi.sat on any +floating-point argument type and any integer result type, or vectors thereof. +Not all targets may support all types, however.

+
declare i32 @llvm.fptosi.sat.i32.f32(float %f)
+declare i19 @llvm.fptosi.sat.i19.f64(double %f)
+declare <4 x i100> @llvm.fptosi.sat.v4i100.v4f128(<4 x fp128> %f)
+
+
+
+
+
Overview:
+

This intrinsic converts the argument into a signed integer using saturating +semantics.

+
+
+
Arguments:
+

The argument may be any floating-point or vector of floating-point type. The +return value may be any integer or vector of integer type. The number of vector +elements in argument and return must be the same.

+
+
+
Semantics:
+

The conversion to integer is performed subject to the following rules:

+
    +
  • If the argument is any NaN, zero is returned.

  • +
  • If the argument is smaller than the smallest representable signed integer of +the result type (this includes negative infinity), the smallest +representable signed integer is returned.

  • +
  • If the argument is larger than the largest representable signed integer of +the result type (this includes positive infinity), the largest representable +signed integer is returned.

  • +
  • Otherwise, the result of rounding the argument towards zero is returned.

  • +
+
+
+
Example:
+
%a = call i8 @llvm.fptosi.sat.i8.f32(float 23.9)               ; yields i8:   23
+%b = call i8 @llvm.fptosi.sat.i8.f32(float -130.8)             ; yields i8: -128
+%c = call i8 @llvm.fptosi.sat.i8.f32(float 999.0)              ; yields i8:  127
+%d = call i8 @llvm.fptosi.sat.i8.f32(float 0xFFF8000000000000) ; yields i8:    0
+
+
+
+
+
+
+

Debugger Intrinsics

+

The LLVM debugger intrinsics (which all start with llvm.dbg. +prefix), are described in the LLVM Source Level +Debugging +document.

+
+
+

Exception Handling Intrinsics

+

The LLVM exception handling intrinsics (which all start with +llvm.eh. prefix), are described in the LLVM Exception +Handling document.

+
+
+

Trampoline Intrinsics

+

These intrinsics make it possible to excise one parameter, marked with +the nest attribute, from a function. The result is a +callable function pointer lacking the nest parameter - the caller does +not need to provide a value for it. Instead, the value to use is stored +in advance in a “trampoline”, a block of memory usually allocated on the +stack, which also contains code to splice the nest value into the +argument list. This is used to implement the GCC nested function address +extension.

+

For example, if the function is i32 f(i8* nest %c, i32 %x, i32 %y) +then the resulting function pointer has signature i32 (i32, i32)*. +It can be created as follows:

+
%tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
+%tramp1 = getelementptr [10 x i8], [10 x i8]* %tramp, i32 0, i32 0
+call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
+%p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
+%fp = bitcast i8* %p to i32 (i32, i32)*
+
+
+

The call %val = call i32 %fp(i32 %x, i32 %y) is then equivalent to +%val = call i32 %f(i8* %nval, i32 %x, i32 %y).

+
+

llvm.init.trampoline’ Intrinsic

+
+
Syntax:
+
declare void @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
+
+
+
+
+
Overview:
+

This fills the memory pointed to by tramp with executable code, +turning it into a trampoline.

+
+
+
Arguments:
+

The llvm.init.trampoline intrinsic takes three arguments, all +pointers. The tramp argument must point to a sufficiently large and +sufficiently aligned block of memory; this memory is written to by the +intrinsic. Note that the size and the alignment are target-specific - +LLVM currently provides no portable way of determining them, so a +front-end that generates this intrinsic needs to have some +target-specific knowledge. The func argument must hold a function +bitcast to an i8*.

+
+
+
Semantics:
+

The block of memory pointed to by tramp is filled with target +dependent code, turning it into a function. Then tramp needs to be +passed to llvm.adjust.trampoline to get a pointer which can +be bitcast (to a new function) and called. The new +function’s signature is the same as that of func with any arguments +marked with the nest attribute removed. At most one such nest +argument is allowed, and it must be of pointer type. Calling the new +function is equivalent to calling func with the same argument list, +but with nval used for the missing nest argument. If, after +calling llvm.init.trampoline, the memory pointed to by tramp is +modified, then the effect of any later call to the returned function +pointer is undefined.

+
+
+
+

llvm.adjust.trampoline’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.adjust.trampoline(i8* <tramp>)
+
+
+
+
+
Overview:
+

This performs any required machine-specific adjustment to the address of +a trampoline (passed as tramp).

+
+
+
Arguments:
+

tramp must point to a block of memory which already has trampoline +code filled in by a previous call to +llvm.init.trampoline.

+
+
+
Semantics:
+

On some architectures the address of the code to be executed needs to be +different than the address where the trampoline is actually stored. This +intrinsic returns the executable address corresponding to tramp +after performing the required machine specific adjustments. The pointer +returned can then be bitcast and executed.

+
+
+
+
+

Vector Predication Intrinsics

+

VP intrinsics are intended for predicated SIMD/vector code. A typical VP +operation takes a vector mask and an explicit vector length parameter as in:

+
<W x T> llvm.vp.<opcode>.*(<W x T> %x, <W x T> %y, <W x i1> %mask, i32 %evl)
+
+
+

The vector mask parameter (%mask) always has a vector of i1 type, for example +<32 x i1>. The explicit vector length parameter always has the type i32 and +is an unsigned integer value. The explicit vector length parameter (%evl) is in +the range:

+
0 <= %evl <= W,  where W is the number of vector elements
+
+
+

Note that for scalable vector types W is the runtime +length of the vector.

+

The VP intrinsic has undefined behavior if %evl > W. The explicit vector +length (%evl) creates a mask, %EVLmask, with all elements 0 <= i < %evl set +to True, and all other lanes %evl <= i < W to False. A new mask %M is +calculated with an element-wise AND from %mask and %EVLmask:

+
M = %mask AND %EVLmask
+
+
+

A vector operation <opcode> on vectors A and B calculates:

+
A <opcode> B =  {  A[i] <opcode> B[i]   M[i] = True, and
+                {  undef otherwise
+
+
+
+

Optimization Hint

+

Some targets, such as AVX512, do not support the %evl parameter in hardware. +The use of an effective %evl is discouraged for those targets. The function +TargetTransformInfo::hasActiveVectorLength() returns true when the target +has native support for %evl.

+
+
+

llvm.vp.select.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.select.v16i32 (<16 x i1> <condition>, <16 x i32> <on_true>, <16 x i32> <on_false>, i32 <evl>)
+declare <vscale x 4 x i64>  @llvm.vp.select.nxv4i64 (<vscale x 4 x i1> <condition>, <vscale x 4 x i32> <on_true>, <vscale x 4 x i32> <on_false>, i32 <evl>)
+
+
+
+
+
Overview:
+

The ‘llvm.vp.select’ intrinsic is used to choose one value based on a +condition vector, without IR-level branching.

+
+
+
Arguments:
+

The first operand is a vector of i1 and indicates the condition. The +second operand is the value that is selected where the condition vector is +true. The third operand is the value that is selected where the condition +vector is false. The vectors must be of the same size. The fourth operand is +the explicit vector length.

+
    +
  1. The optional fast-math flags marker indicates that the select has one or +more fast-math flags. These are optimization hints to +enable otherwise unsafe floating-point optimizations. Fast-math flags are +only valid for selects that return a floating-point scalar or vector type, +or an array (nested to any depth) of floating-point scalar or vector types.

  2. +
+
+
+
Semantics:
+

The intrinsic selects lanes from the second and third operand depending on a +condition vector.

+

All result lanes at positions greater or equal than %evl are undefined. +For all lanes below %evl where the condition vector is true the lane is +taken from the second operand. Otherwise, the lane is taken from the third +operand.

+
+
+
Example:
+
%r = call <4 x i32> @llvm.vp.select.v4i32(<4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false, i32 %evl)
+
+;;; Expansion.
+;; Any result is legal on lanes at and above %evl.
+%also.r = select <4 x i1> %cond, <4 x i32> %on_true, <4 x i32> %on_false
+
+
+
+
+
+

llvm.vp.add.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.add.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.add.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.add.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer addition of two vectors of integers.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.add’ intrinsic performs integer addition (add) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = add <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.sub.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.sub.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.sub.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.sub.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer subtraction of two vectors of integers.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.sub’ intrinsic performs integer subtraction +(sub) of the first and second vector operand on each enabled +lane. The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = sub <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.mul.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.mul.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.mul.nxv46i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.mul.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer multiplication of two vectors of integers.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.mul’ intrinsic performs integer multiplication +(mul) of the first and second vector operand on each enabled +lane. The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = mul <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.sdiv.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.sdiv.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.sdiv.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.sdiv.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated, signed division of two vectors of integers.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.sdiv’ intrinsic performs signed division (sdiv) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = sdiv <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.udiv.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.udiv.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.udiv.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.udiv.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated, unsigned division of two vectors of integers.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The third operand is the vector mask and has the same number of elements as the result vector type. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.udiv’ intrinsic performs unsigned division +(udiv) of the first and second vector operand on each enabled +lane. The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = udiv <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.srem.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.srem.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.srem.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.srem.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated computations of the signed remainder of two integer vectors.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.srem’ intrinsic computes the remainder of the signed division +(srem) of the first and second vector operand on each enabled +lane. The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = srem <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.urem.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.urem.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.urem.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.urem.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated computation of the unsigned remainder of two integer vectors.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.urem’ intrinsic computes the remainder of the unsigned division +(urem) of the first and second vector operand on each enabled +lane. The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = urem <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.ashr.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.ashr.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.ashr.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.ashr.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Vector-predicated arithmetic right-shift.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.ashr’ intrinsic computes the arithmetic right shift +(ashr) of the first operand by the second operand on each +enabled lane. The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = ashr <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.lshr.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.lshr.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.lshr.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.lshr.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Vector-predicated logical right-shift.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.lshr’ intrinsic computes the logical right shift +(lshr) of the first operand by the second operand on each +enabled lane. The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = lshr <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.shl.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.shl.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.shl.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.shl.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Vector-predicated left shift.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.shl’ intrinsic computes the left shift (shl) of +the first operand by the second operand on each enabled lane. The result on +disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = shl <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.or.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.or.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.or.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.or.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Vector-predicated or.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.or’ intrinsic performs a bitwise or (or) of the +first two operands on each enabled lane. The result on disabled lanes is +undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = or <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.and.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.and.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.and.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.and.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Vector-predicated and.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.and’ intrinsic performs a bitwise and (and) of +the first two operands on each enabled lane. The result on disabled lanes is +undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = and <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.xor.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x i32>  @llvm.vp.xor.v16i32 (<16 x i32> <left_op>, <16 x i32> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x i32>  @llvm.vp.xor.nxv4i32 (<vscale x 4 x i32> <left_op>, <vscale x 4 x i32> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x i64>  @llvm.vp.xor.v256i64 (<256 x i64> <left_op>, <256 x i64> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Vector-predicated, bitwise xor.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of integer type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.xor’ intrinsic performs a bitwise xor (xor) of +the first two operands on each enabled lane. +The result on disabled lanes is undefined.

+
+
+
Examples:
+
%r = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = xor <4 x i32> %a, %b
+%also.r = select <4 x i1> %mask, <4 x i32> %t, <4 x i32> undef
+
+
+
+
+
+

llvm.vp.fadd.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x float>  @llvm.vp.fadd.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x float>  @llvm.vp.fadd.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x double>  @llvm.vp.fadd.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point addition of two vectors of floating-point values.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of floating-point type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.fadd’ intrinsic performs floating-point addition (add) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is undefined. The operation is performed in the default +floating-point environment.

+
+
+
Examples:
+
%r = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = fadd <4 x float> %a, %b
+%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
+
+
+
+
+
+

llvm.vp.fsub.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x float>  @llvm.vp.fsub.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x float>  @llvm.vp.fsub.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x double>  @llvm.vp.fsub.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point subtraction of two vectors of floating-point values.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of floating-point type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.fsub’ intrinsic performs floating-point subtraction (add) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is undefined. The operation is performed in the default +floating-point environment.

+
+
+
Examples:
+
%r = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = fsub <4 x float> %a, %b
+%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
+
+
+
+
+
+

llvm.vp.fmul.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x float>  @llvm.vp.fmul.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x float>  @llvm.vp.fmul.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x double>  @llvm.vp.fmul.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point multiplication of two vectors of floating-point values.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of floating-point type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.fmul’ intrinsic performs floating-point multiplication (add) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is undefined. The operation is performed in the default +floating-point environment.

+
+
+
Examples:
+
%r = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = fmul <4 x float> %a, %b
+%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
+
+
+
+
+
+

llvm.vp.fdiv.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x float>  @llvm.vp.fdiv.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x float>  @llvm.vp.fdiv.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x double>  @llvm.vp.fdiv.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point division of two vectors of floating-point values.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of floating-point type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.fdiv’ intrinsic performs floating-point division (add) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is undefined. The operation is performed in the default +floating-point environment.

+
+
+
Examples:
+
%r = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = fdiv <4 x float> %a, %b
+%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
+
+
+
+
+
+

llvm.vp.frem.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <16 x float>  @llvm.vp.frem.v16f32 (<16 x float> <left_op>, <16 x float> <right_op>, <16 x i1> <mask>, i32 <vector_length>)
+declare <vscale x 4 x float>  @llvm.vp.frem.nxv4f32 (<vscale x 4 x float> <left_op>, <vscale x 4 x float> <right_op>, <vscale x 4 x i1> <mask>, i32 <vector_length>)
+declare <256 x double>  @llvm.vp.frem.v256f64 (<256 x double> <left_op>, <256 x double> <right_op>, <256 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point remainder of two vectors of floating-point values.

+
+
+
Arguments:
+

The first two operands and the result have the same vector of floating-point type. The +third operand is the vector mask and has the same number of elements as the +result vector type. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.frem’ intrinsic performs floating-point remainder (add) +of the first and second vector operand on each enabled lane. The result on +disabled lanes is undefined. The operation is performed in the default +floating-point environment.

+
+
+
Examples:
+
%r = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> %a, <4 x float> %b, <4 x i1> %mask, i32 %evl)
+;; For all lanes below %evl, %r is lane-wise equivalent to %also.r
+
+%t = frem <4 x float> %a, %b
+%also.r = select <4 x i1> %mask, <4 x float> %t, <4 x float> undef
+
+
+
+
+
+

llvm.vp.reduce.add.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.add.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.add.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer ADD reduction of a vector and a scalar starting value, +returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.add’ intrinsic performs the integer ADD reduction +(llvm.vector.reduce.add) of the vector operand +val on each enabled lane, adding it to the scalar start_value. Disabled +lanes are treated as containing the neutral value 0 (i.e. having no effect +on the reduction operation). If the vector length is zero, the result is equal +to start_value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i32 @llvm.vp.reduce.add.v4i32(i32 %start, <4 x i32> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i32> %a, <4 x i32> zeroinitializer
+%reduction = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %masked.a)
+%also.r = add i32 %reduction, %start
+
+
+
+
+
+

llvm.vp.reduce.fadd.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare float @llvm.vp.reduce.fadd.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare double @llvm.vp.reduce.fadd.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point ADD reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +floating-point type equal to the result type. The second operand is the vector +on which the reduction is performed and must be a vector of floating-point +values whose element type is the result/start type. The third operand is the +vector mask and is a vector of boolean values with the same number of elements +as the vector operand. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.fadd’ intrinsic performs the floating-point ADD +reduction (llvm.vector.reduce.fadd) of the +vector operand val on each enabled lane, adding it to the scalar +start_value. Disabled lanes are treated as containing the neutral value +-0.0 (i.e. having no effect on the reduction operation). If no lanes are +enabled, the resulting value will be equal to start_value.

+

To ignore the start value, the neutral value can be used.

+

See the unpredicated version (llvm.vector.reduce.fadd) for more detail on the semantics of the reduction.

+
+
+
Examples:
+
%r = call float @llvm.vp.reduce.fadd.v4f32(float %start, <4 x float> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>
+%also.r = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %masked.a)
+
+
+
+
+
+

llvm.vp.reduce.mul.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.mul.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.mul.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer MUL reduction of a vector and a scalar starting value, +returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.mul’ intrinsic performs the integer MUL reduction +(llvm.vector.reduce.mul) of the vector operand val +on each enabled lane, multiplying it by the scalar start_value. Disabled +lanes are treated as containing the neutral value 1 (i.e. having no effect +on the reduction operation). If the vector length is zero, the result is the +start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i32 @llvm.vp.reduce.mul.v4i32(i32 %start, <4 x i32> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i32> %a, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+%reduction = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %masked.a)
+%also.r = mul i32 %reduction, %start
+
+
+
+
+
+

llvm.vp.reduce.fmul.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare float @llvm.vp.reduce.fmul.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare double @llvm.vp.reduce.fmul.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point MUL reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +floating-point type equal to the result type. The second operand is the vector +on which the reduction is performed and must be a vector of floating-point +values whose element type is the result/start type. The third operand is the +vector mask and is a vector of boolean values with the same number of elements +as the vector operand. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.fmul’ intrinsic performs the floating-point MUL +reduction (llvm.vector.reduce.fmul) of the +vector operand val on each enabled lane, multiplying it by the scalar +start_value`. Disabled lanes are treated as containing the neutral value +1.0 (i.e. having no effect on the reduction operation). If no lanes are +enabled, the resulting value will be equal to the starting value.

+

To ignore the start value, the neutral value can be used.

+

See the unpredicated version (llvm.vector.reduce.fmul) for more detail on the semantics.

+
+
+
Examples:
+
%r = call float @llvm.vp.reduce.fmul.v4f32(float %start, <4 x float> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>
+%also.r = call float @llvm.vector.reduce.fmul.v4f32(float %start, <4 x float> %masked.a)
+
+
+
+
+
+

llvm.vp.reduce.and.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.and.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.and.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer AND reduction of a vector and a scalar starting value, +returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.and’ intrinsic performs the integer AND reduction +(llvm.vector.reduce.and) of the vector operand +val on each enabled lane, performing an ‘and’ of that with with the +scalar start_value. Disabled lanes are treated as containing the neutral +value UINT_MAX, or -1 (i.e. having no effect on the reduction +operation). If the vector length is zero, the result is the start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i32 @llvm.vp.reduce.and.v4i32(i32 %start, <4 x i32> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+%reduction = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %masked.a)
+%also.r = and i32 %reduction, %start
+
+
+
+
+
+

llvm.vp.reduce.or.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.or.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.or.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer OR reduction of a vector and a scalar starting value, +returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.or’ intrinsic performs the integer OR reduction +(llvm.vector.reduce.or) of the vector operand +val on each enabled lane, performing an ‘or’ of that with the scalar +start_value. Disabled lanes are treated as containing the neutral value +0 (i.e. having no effect on the reduction operation). If the vector length +is zero, the result is the start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i32 @llvm.vp.reduce.or.v4i32(i32 %start, <4 x i32> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+%reduction = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %masked.a)
+%also.r = or i32 %reduction, %start
+
+
+
+
+
+

llvm.vp.reduce.xor.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.xor.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.xor.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated integer XOR reduction of a vector and a scalar starting value, +returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.xor’ intrinsic performs the integer XOR reduction +(llvm.vector.reduce.xor) of the vector operand +val on each enabled lane, performing an ‘xor’ of that with the scalar +start_value. Disabled lanes are treated as containing the neutral value +0 (i.e. having no effect on the reduction operation). If the vector length +is zero, the result is the start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i32 @llvm.vp.reduce.xor.v4i32(i32 %start, <4 x i32> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+%reduction = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %masked.a)
+%also.r = xor i32 %reduction, %start
+
+
+
+
+
+

llvm.vp.reduce.smax.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.smax.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.smax.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated signed-integer MAX reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.smax’ intrinsic performs the signed-integer MAX +reduction (llvm.vector.reduce.smax) of the +vector operand val on each enabled lane, and taking the maximum of that and +the scalar start_value. Disabled lanes are treated as containing the +neutral value INT_MIN (i.e. having no effect on the reduction operation). +If the vector length is zero, the result is the start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i8 @llvm.vp.reduce.smax.v4i8(i8 %start, <4 x i8> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i8> %a, <4 x i8> <i8 -128, i8 -128, i8 -128, i8 -128>
+%reduction = call i8 @llvm.vector.reduce.smax.v4i8(<4 x i8> %masked.a)
+%also.r = call i8 @llvm.smax.i8(i8 %reduction, i8 %start)
+
+
+
+
+
+

llvm.vp.reduce.smin.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.smin.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.smin.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated signed-integer MIN reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.smin’ intrinsic performs the signed-integer MIN +reduction (llvm.vector.reduce.smin) of the +vector operand val on each enabled lane, and taking the minimum of that and +the scalar start_value. Disabled lanes are treated as containing the +neutral value INT_MAX (i.e. having no effect on the reduction operation). +If the vector length is zero, the result is the start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i8 @llvm.vp.reduce.smin.v4i8(i8 %start, <4 x i8> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i8> %a, <4 x i8> <i8 127, i8 127, i8 127, i8 127>
+%reduction = call i8 @llvm.vector.reduce.smin.v4i8(<4 x i8> %masked.a)
+%also.r = call i8 @llvm.smin.i8(i8 %reduction, i8 %start)
+
+
+
+
+
+

llvm.vp.reduce.umax.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.umax.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.umax.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated unsigned-integer MAX reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.umax’ intrinsic performs the unsigned-integer MAX +reduction (llvm.vector.reduce.umax) of the +vector operand val on each enabled lane, and taking the maximum of that and +the scalar start_value. Disabled lanes are treated as containing the +neutral value 0 (i.e. having no effect on the reduction operation). If the +vector length is zero, the result is the start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i32 @llvm.vp.reduce.umax.v4i32(i32 %start, <4 x i32> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+%reduction = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %masked.a)
+%also.r = call i32 @llvm.umax.i32(i32 %reduction, i32 %start)
+
+
+
+
+
+

llvm.vp.reduce.umin.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare i32 @llvm.vp.reduce.umin.v4i32(i32 <start_value>, <4 x i32> <val>, <4 x i1> <mask>, i32 <vector_length>)
+declare i16 @llvm.vp.reduce.umin.nxv8i16(i16 <start_value>, <vscale x 8 x i16> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated unsigned-integer MIN reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +integer type equal to the result type. The second operand is the vector on +which the reduction is performed and must be a vector of integer values whose +element type is the result/start type. The third operand is the vector mask and +is a vector of boolean values with the same number of elements as the vector +operand. The fourth operand is the explicit vector length of the operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.umin’ intrinsic performs the unsigned-integer MIN +reduction (llvm.vector.reduce.umin) of the +vector operand val on each enabled lane, taking the minimum of that and the +scalar start_value. Disabled lanes are treated as containing the neutral +value UINT_MAX, or -1 (i.e. having no effect on the reduction +operation). If the vector length is zero, the result is the start value.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call i32 @llvm.vp.reduce.umin.v4i32(i32 %start, <4 x i32> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+%reduction = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %masked.a)
+%also.r = call i32 @llvm.umin.i32(i32 %reduction, i32 %start)
+
+
+
+
+
+

llvm.vp.reduce.fmax.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare float @llvm.vp.reduce.fmax.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, float <vector_length>)
+declare double @llvm.vp.reduce.fmax.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point MAX reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +floating-point type equal to the result type. The second operand is the vector +on which the reduction is performed and must be a vector of floating-point +values whose element type is the result/start type. The third operand is the +vector mask and is a vector of boolean values with the same number of elements +as the vector operand. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.fmax’ intrinsic performs the floating-point MAX +reduction (llvm.vector.reduce.fmax) of the +vector operand val on each enabled lane, taking the maximum of that and the +scalar start_value. Disabled lanes are treated as containing the neutral +value (i.e. having no effect on the reduction operation). If the vector length +is zero, the result is the start value.

+

The neutral value is dependent on the fast-math flags. If no +flags are set, the neutral value is -QNAN. If nnan and ninf are +both set, then the neutral value is the smallest floating-point value for the +result type. If only nnan is set then the neutral value is -Infinity.

+

This instruction has the same comparison semantics as the +llvm.vector.reduce.fmax intrinsic (and thus the +‘llvm.maxnum.*’ intrinsic). That is, the result will always be a number +unless all elements of the vector and the starting value are NaN. For a +vector with maximum element magnitude 0.0 and containing both +0.0 and +-0.0 elements, the sign of the result is unspecified.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call float @llvm.vp.reduce.fmax.v4f32(float %float, <4 x float> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float QNAN, float QNAN, float QNAN, float QNAN>
+%reduction = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %masked.a)
+%also.r = call float @llvm.maxnum.f32(float %reduction, float %start)
+
+
+
+
+
+

llvm.vp.reduce.fmin.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare float @llvm.vp.reduce.fmin.v4f32(float <start_value>, <4 x float> <val>, <4 x i1> <mask>, float <vector_length>)
+declare double @llvm.vp.reduce.fmin.nxv8f64(double <start_value>, <vscale x 8 x double> <val>, <vscale x 8 x i1> <mask>, i32 <vector_length>)
+
+
+
+
+
Overview:
+

Predicated floating-point MIN reduction of a vector and a scalar starting +value, returning the result as a scalar.

+
+
+
Arguments:
+

The first operand is the start value of the reduction, which must be a scalar +floating-point type equal to the result type. The second operand is the vector +on which the reduction is performed and must be a vector of floating-point +values whose element type is the result/start type. The third operand is the +vector mask and is a vector of boolean values with the same number of elements +as the vector operand. The fourth operand is the explicit vector length of the +operation.

+
+
+
Semantics:
+

The ‘llvm.vp.reduce.fmin’ intrinsic performs the floating-point MIN +reduction (llvm.vector.reduce.fmin) of the +vector operand val on each enabled lane, taking the minimum of that and the +scalar start_value. Disabled lanes are treated as containing the neutral +value (i.e. having no effect on the reduction operation). If the vector length +is zero, the result is the start value.

+

The neutral value is dependent on the fast-math flags. If no +flags are set, the neutral value is +QNAN. If nnan and ninf are +both set, then the neutral value is the largest floating-point value for the +result type. If only nnan is set then the neutral value is +Infinity.

+

This instruction has the same comparison semantics as the +llvm.vector.reduce.fmin intrinsic (and thus the +‘llvm.minnum.*’ intrinsic). That is, the result will always be a number +unless all elements of the vector and the starting value are NaN. For a +vector with maximum element magnitude 0.0 and containing both +0.0 and +-0.0 elements, the sign of the result is unspecified.

+

To ignore the start value, the neutral value can be used.

+
+
+
Examples:
+
%r = call float @llvm.vp.reduce.fmin.v4f32(float %start, <4 x float> %a, <4 x i1> %mask, i32 %evl)
+; %r is equivalent to %also.r, where lanes greater than or equal to %evl
+; are treated as though %mask were false for those lanes.
+
+%masked.a = select <4 x i1> %mask, <4 x float> %a, <4 x float> <float QNAN, float QNAN, float QNAN, float QNAN>
+%reduction = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %masked.a)
+%also.r = call float @llvm.minnum.f32(float %reduction, float %start)
+
+
+
+
+
+

llvm.get.active.lane.mask.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic.

+
declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %base, i32 %n)
+declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %base, i64 %n)
+declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %base, i64 %n)
+declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 %base, i64 %n)
+
+
+
+
+
Overview:
+

Create a mask representing active and inactive vector lanes.

+
+
+
Arguments:
+

Both operands have the same scalar integer type. The result is a vector with +the i1 element type.

+
+
+
Semantics:
+

The ‘llvm.get.active.lane.mask.*’ intrinsics are semantically equivalent +to:

+
%m[i] = icmp ult (%base + i), %n
+
+
+

where %m is a vector (mask) of active/inactive lanes with its elements +indexed by i, and %base, %n are the two arguments to +llvm.get.active.lane.mask.*, %icmp is an integer compare and ult +the unsigned less-than comparison operator. Overflow cannot occur in +(%base + i) and its comparison against %n as it is performed in integer +numbers and not in machine numbers. If %n is 0, then the result is a +poison value. The above is equivalent to:

+
%m = @llvm.get.active.lane.mask(%base, %n)
+
+
+

This can, for example, be emitted by the loop vectorizer in which case +%base is the first element of the vector induction variable (VIV) and +%n is the loop tripcount. Thus, these intrinsics perform an element-wise +less than comparison of VIV with the loop tripcount, producing a mask of +true/false values representing active/inactive vector lanes, except if the VIV +overflows in which case they return false in the lanes where the VIV overflows. +The arguments are scalar types to accommodate scalable vector types, for which +it is unknown what the type of the step vector needs to be that enumerate its +lanes without overflow.

+

This mask %m can e.g. be used in masked load/store instructions. These +intrinsics provide a hint to the backend. I.e., for a vector loop, the +back-edge taken count of the original scalar loop is explicit as the second +argument.

+
+
+
Examples:
+
%active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %elem0, i64 429)
+%wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef)
+
+
+
+
+
+
+

Masked Vector Load and Store Intrinsics

+

LLVM provides intrinsics for predicated vector load and store operations. The predicate is specified by a mask operand, which holds one bit per vector element, switching the associated vector lane on or off. The memory addresses corresponding to the “off” lanes are not accessed. When all bits of the mask are on, the intrinsic is identical to a regular vector load or store. When all bits are off, no memory is accessed.

+
+

llvm.masked.load.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. The loaded data is a vector of any integer, floating-point or pointer data type.

+
declare <16 x float>  @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+declare <2 x double>  @llvm.masked.load.v2f64.p0v2f64  (<2 x double>* <ptr>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+;; The data is a vector of pointers to double
+declare <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64    (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
+;; The data is a vector of function pointers
+declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
+
+
+
+
+
Overview:
+

Reads a vector from memory according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. The masked-off lanes in the result vector are taken from the corresponding lanes of the ‘passthru’ operand.

+
+
+
Arguments:
+

The first operand is the base pointer for the load. The second operand is the alignment of the source location. It must be a power of two constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the base pointer and the type of the ‘passthru’ operand are the same vector types.

+
+
+
Semantics:
+

The ‘llvm.masked.load’ intrinsic is designed for conditional reading of selected vector elements in a single IR operation. It is useful for targets that support vector masked loads and allows vectorizing predicated basic blocks on these targets. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar load operations. +The result of this operation is equivalent to a regular vector load instruction followed by a ‘select’ between the loaded and the passthru values, predicated on the same mask. However, using this intrinsic prevents exceptions on memory access to masked-off lanes.

+
%res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32 (<16 x float>* %ptr, i32 4, <16 x i1>%mask, <16 x float> %passthru)
+
+;; The result of the two following instructions is identical aside from potential memory access exception
+%loadlal = load <16 x float>, <16 x float>* %ptr, align 4
+%res = select <16 x i1> %mask, <16 x float> %loadlal, <16 x float> %passthru
+
+
+
+
+
+

llvm.masked.store.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating-point or pointer data type.

+
declare void @llvm.masked.store.v8i32.p0v8i32  (<8  x i32>   <value>, <8  x i32>*   <ptr>, i32 <alignment>,  <8  x i1> <mask>)
+declare void @llvm.masked.store.v16f32.p0v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>,  <16 x i1> <mask>)
+;; The data is a vector of pointers to double
+declare void @llvm.masked.store.v8p0f64.p0v8p0f64    (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
+;; The data is a vector of function pointers
+declare void @llvm.masked.store.v4p0f_i32f.p0v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
+
+
+
+
+
Overview:
+

Writes a vector to memory according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes.

+
+
+
Arguments:
+

The first operand is the vector value to be written to memory. The second operand is the base pointer for the store, it has the same underlying type as the value operand. The third operand is the alignment of the destination location. It must be a power of two constant integer value. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements.

+
+
+
Semantics:
+

The ‘llvm.masked.store’ intrinsics is designed for conditional writing of selected vector elements in a single IR operation. It is useful for targets that support vector masked store and allows vectorizing predicated basic blocks on these targets. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations. +The result of this operation is equivalent to a load-modify-store sequence. However, using this intrinsic prevents exceptions and data races on memory access to masked-off lanes.

+
call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %value, <16 x float>* %ptr, i32 4,  <16 x i1> %mask)
+
+;; The result of the following instructions is identical aside from potential data races and memory access exceptions
+%oldval = load <16 x float>, <16 x float>* %ptr, align 4
+%res = select <16 x i1> %mask, <16 x float> %value, <16 x float> %oldval
+store <16 x float> %res, <16 x float>* %ptr, align 4
+
+
+
+
+
+
+

Masked Vector Gather and Scatter Intrinsics

+

LLVM provides intrinsics for vector gather and scatter operations. They are similar to Masked Vector Load and Store, except they are designed for arbitrary memory accesses, rather than sequential memory accesses. Gather and scatter also employ a mask operand, which holds one bit per vector element, switching the associated vector lane on or off. The memory addresses corresponding to the “off” lanes are not accessed. When all bits are off, no memory is accessed.

+
+

llvm.masked.gather.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer, floating-point or pointer data type gathered together into one vector.

+
declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32   (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p1f64     (<2 x double addrspace(1)*> <ptrs>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+declare <8 x float*> @llvm.masked.gather.v8p0f32.v8p0p0f32 (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1>  <mask>, <8 x float*> <passthru>)
+
+
+
+
+
Overview:
+

Reads scalar values from arbitrary memory locations and gathers them into one vector. The memory locations are provided in the vector of pointers ‘ptrs’. The memory is accessed according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes. The masked-off lanes in the result vector are taken from the corresponding lanes of the ‘passthru’ operand.

+
+
+
Arguments:
+

The first operand is a vector of pointers which holds all memory addresses to read. The second operand is an alignment of the source addresses. It must be 0 or a power of two constant integer value. The third operand, mask, is a vector of boolean values with the same number of elements as the return type. The fourth is a pass-through value that is used to fill the masked-off lanes of the result. The return type, underlying type of the vector of pointers and the type of the ‘passthru’ operand are the same vector types.

+
+
+
Semantics:
+

The ‘llvm.masked.gather’ intrinsic is designed for conditional reading of multiple scalar values from arbitrary memory locations in a single IR operation. It is useful for targets that support vector masked gathers and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of scalar load operations. +The semantics of this operation are equivalent to a sequence of conditional scalar loads with subsequent gathering all loaded values into a single vector. The mask restricts memory access to certain lanes and facilitates vectorization of predicated basic blocks.

+
%res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
+
+;; The gather with all-true mask is equivalent to the following instruction sequence
+%ptr0 = extractelement <4 x double*> %ptrs, i32 0
+%ptr1 = extractelement <4 x double*> %ptrs, i32 1
+%ptr2 = extractelement <4 x double*> %ptrs, i32 2
+%ptr3 = extractelement <4 x double*> %ptrs, i32 3
+
+%val0 = load double, double* %ptr0, align 8
+%val1 = load double, double* %ptr1, align 8
+%val2 = load double, double* %ptr2, align 8
+%val3 = load double, double* %ptr3, align 8
+
+%vec0    = insertelement <4 x double>undef, %val0, 0
+%vec01   = insertelement <4 x double>%vec0, %val1, 1
+%vec012  = insertelement <4 x double>%vec01, %val2, 2
+%vec0123 = insertelement <4 x double>%vec012, %val3, 3
+
+
+
+
+
+

llvm.masked.scatter.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating-point or pointer data type. Each vector element is stored in an arbitrary memory address. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element.

+
declare void @llvm.masked.scatter.v8i32.v8p0i32     (<8 x i32>     <value>, <8 x i32*>     <ptrs>, i32 <alignment>, <8 x i1>  <mask>)
+declare void @llvm.masked.scatter.v16f32.v16p1f32   (<16 x float>  <value>, <16 x float addrspace(1)*>  <ptrs>, i32 <alignment>, <16 x i1> <mask>)
+declare void @llvm.masked.scatter.v4p0f64.v4p0p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1>  <mask>)
+
+
+
+
+
Overview:
+

Writes each element from the value vector to the corresponding memory address. The memory addresses are represented as a vector of pointers. Writing is done according to the provided mask. The mask holds a bit for each vector lane, and is used to prevent memory accesses to the masked-off lanes.

+
+
+
Arguments:
+

The first operand is a vector value to be written to memory. The second operand is a vector of pointers, pointing to where the value elements should be stored. It has the same underlying type as the value operand. The third operand is an alignment of the destination addresses. It must be 0 or a power of two constant integer value. The fourth operand, mask, is a vector of boolean values. The types of the mask and the value operand must have the same number of vector elements.

+
+
+
Semantics:
+

The ‘llvm.masked.scatter’ intrinsics is designed for writing selected vector elements to arbitrary memory addresses in a single IR operation. The operation may be conditional, when not all bits in the mask are switched on. It is useful for targets that support vector masked scatter and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations.

+
;; This instruction unconditionally stores data vector in multiple addresses
+call @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4,  <8 x i1>  <true, true, .. true>)
+
+;; It is equivalent to a list of scalar stores
+%val0 = extractelement <8 x i32> %value, i32 0
+%val1 = extractelement <8 x i32> %value, i32 1
+..
+%val7 = extractelement <8 x i32> %value, i32 7
+%ptr0 = extractelement <8 x i32*> %ptrs, i32 0
+%ptr1 = extractelement <8 x i32*> %ptrs, i32 1
+..
+%ptr7 = extractelement <8 x i32*> %ptrs, i32 7
+;; Note: the order of the following stores is important when they overlap:
+store i32 %val0, i32* %ptr0, align 4
+store i32 %val1, i32* %ptr1, align 4
+..
+store i32 %val7, i32* %ptr7, align 4
+
+
+
+
+
+
+

Masked Vector Expanding Load and Compressing Store Intrinsics

+

LLVM provides intrinsics for expanding load and compressing store operations. Data selected from a vector according to a mask is stored in consecutive memory addresses (compressed store), and vice-versa (expanding load). These operations effective map to “if (cond.i) a[j++] = v.i” and “if (cond.i) v.i = a[j++]” patterns, respectively. Note that when the mask starts with ‘1’ bits followed by ‘0’ bits, these operations are identical to llvm.masked.store and llvm.masked.load.

+
+

llvm.masked.expandload.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. Several values of integer, floating point or pointer data type are loaded from consecutive memory addresses and stored into the elements of a vector according to the mask.

+
declare <16 x float>  @llvm.masked.expandload.v16f32 (float* <ptr>, <16 x i1> <mask>, <16 x float> <passthru>)
+declare <2 x i64>     @llvm.masked.expandload.v2i64 (i64* <ptr>, <2 x i1>  <mask>, <2 x i64> <passthru>)
+
+
+
+
+
Overview:
+

Reads a number of scalar values sequentially from memory location provided in ‘ptr’ and spreads them in a vector. The ‘mask’ holds a bit for each vector lane. The number of elements read from memory is equal to the number of ‘1’ bits in the mask. The loaded elements are positioned in the destination vector according to the sequence of ‘1’ and ‘0’ bits in the mask. E.g., if the mask vector is ‘10010001’, “expandload” reads 3 values from memory addresses ptr, ptr+1, ptr+2 and places them in lanes 0, 3 and 7 accordingly. The masked-off lanes are filled by elements from the corresponding lanes of the ‘passthru’ operand.

+
+
+
Arguments:
+

The first operand is the base pointer for the load. It has the same underlying type as the element of the returned vector. The second operand, mask, is a vector of boolean values with the same number of elements as the return type. The third is a pass-through value that is used to fill the masked-off lanes of the result. The return type and the type of the ‘passthru’ operand have the same vector type.

+
+
+
Semantics:
+

The ‘llvm.masked.expandload’ intrinsic is designed for reading multiple scalar values from adjacent memory addresses into possibly non-adjacent vector lanes. It is useful for targets that support vector expanding loads and allows vectorizing loop with cross-iteration dependency like in the following example:

+
// In this loop we load from B and spread the elements into array A.
+double *A, B; int *C;
+for (int i = 0; i < size; ++i) {
+  if (C[i] != 0)
+    A[i] = B[j++];
+}
+
+
+
; Load several elements from array B and expand them in a vector.
+; The number of loaded elements is equal to the number of '1' elements in the Mask.
+%Tmp = call <8 x double> @llvm.masked.expandload.v8f64(double* %Bptr, <8 x i1> %Mask, <8 x double> undef)
+; Store the result in A
+call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %Tmp, <8 x double>* %Aptr, i32 8, <8 x i1> %Mask)
+
+; %Bptr should be increased on each iteration according to the number of '1' elements in the Mask.
+%MaskI = bitcast <8 x i1> %Mask to i8
+%MaskIPopcnt = call i8 @llvm.ctpop.i8(i8 %MaskI)
+%MaskI64 = zext i8 %MaskIPopcnt to i64
+%BNextInd = add i64 %BInd, %MaskI64
+
+
+

Other targets may support this intrinsic differently, for example, by lowering it into a sequence of conditional scalar load operations and shuffles. +If all mask elements are ‘1’, the intrinsic behavior is equivalent to the regular unmasked vector load.

+
+
+
+

llvm.masked.compressstore.*’ Intrinsics

+
+
Syntax:
+

This is an overloaded intrinsic. A number of scalar values of integer, floating point or pointer data type are collected from an input vector and stored into adjacent memory addresses. A mask defines which elements to collect from the vector.

+
declare void @llvm.masked.compressstore.v8i32  (<8  x i32>   <value>, i32*   <ptr>, <8  x i1> <mask>)
+declare void @llvm.masked.compressstore.v16f32 (<16 x float> <value>, float* <ptr>, <16 x i1> <mask>)
+
+
+
+
+
Overview:
+

Selects elements from input vector ‘value’ according to the ‘mask’. All selected elements are written into adjacent memory addresses starting at address ‘ptr’, from lower to higher. The mask holds a bit for each vector lane, and is used to select elements to be stored. The number of elements to be stored is equal to the number of active bits in the mask.

+
+
+
Arguments:
+

The first operand is the input vector, from which elements are collected and written to memory. The second operand is the base pointer for the store, it has the same underlying type as the element of the input vector operand. The third operand is the mask, a vector of boolean values. The mask and the input vector must have the same number of vector elements.

+
+
+
Semantics:
+

The ‘llvm.masked.compressstore’ intrinsic is designed for compressing data in memory. It allows to collect elements from possibly non-adjacent lanes of a vector and store them contiguously in memory in one IR operation. It is useful for targets that support compressing store operations and allows vectorizing loops with cross-iteration dependences like in the following example:

+
// In this loop we load elements from A and store them consecutively in B
+double *A, B; int *C;
+for (int i = 0; i < size; ++i) {
+  if (C[i] != 0)
+    B[j++] = A[i]
+}
+
+
+
; Load elements from A.
+%Tmp = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %Aptr, i32 8, <8 x i1> %Mask, <8 x double> undef)
+; Store all selected elements consecutively in array B
+call <void> @llvm.masked.compressstore.v8f64(<8 x double> %Tmp, double* %Bptr, <8 x i1> %Mask)
+
+; %Bptr should be increased on each iteration according to the number of '1' elements in the Mask.
+%MaskI = bitcast <8 x i1> %Mask to i8
+%MaskIPopcnt = call i8 @llvm.ctpop.i8(i8 %MaskI)
+%MaskI64 = zext i8 %MaskIPopcnt to i64
+%BNextInd = add i64 %BInd, %MaskI64
+
+
+

Other targets may support this intrinsic differently, for example, by lowering it into a sequence of branches that guard scalar store operations.

+
+
+
+
+

Memory Use Markers

+

This class of intrinsics provides information about the +lifetime of memory objects and ranges where variables +are immutable.

+
+

llvm.lifetime.start’ Intrinsic

+
+
Syntax:
+
declare void @llvm.lifetime.start(i64 <size>, i8* nocapture <ptr>)
+
+
+
+
+
Overview:
+

The ‘llvm.lifetime.start’ intrinsic specifies the start of a memory +object’s lifetime.

+
+
+
Arguments:
+

The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object.

+
+
+
Semantics:
+

If ptr is a stack-allocated object and it points to the first byte of +the object, the object is initially marked as dead. +ptr is conservatively considered as a non-stack-allocated object if +the stack coloring algorithm that is used in the optimization pipeline cannot +conclude that ptr is a stack-allocated object.

+

After ‘llvm.lifetime.start’, the stack object that ptr points is marked +as alive and has an uninitialized value. +The stack object is marked as dead when either +llvm.lifetime.end to the alloca is executed or the +function returns.

+

After llvm.lifetime.end is called, +‘llvm.lifetime.start’ on the stack object can be called again. +The second ‘llvm.lifetime.start’ call marks the object as alive, but it +does not change the address of the object.

+

If ptr is a non-stack-allocated object, it does not point to the first +byte of the object or it is a stack object that is already alive, it simply +fills all bytes of the object with poison.

+
+
+
+

llvm.lifetime.end’ Intrinsic

+
+
Syntax:
+
declare void @llvm.lifetime.end(i64 <size>, i8* nocapture <ptr>)
+
+
+
+
+
Overview:
+

The ‘llvm.lifetime.end’ intrinsic specifies the end of a memory object’s +lifetime.

+
+
+
Arguments:
+

The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object.

+
+
+
Semantics:
+

If ptr is a stack-allocated object and it points to the first byte of the +object, the object is dead. +ptr is conservatively considered as a non-stack-allocated object if +the stack coloring algorithm that is used in the optimization pipeline cannot +conclude that ptr is a stack-allocated object.

+

Calling llvm.lifetime.end on an already dead alloca is no-op.

+

If ptr is a non-stack-allocated object or it does not point to the first +byte of the object, it is equivalent to simply filling all bytes of the object +with poison.

+
+
+
+

llvm.invariant.start’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. The memory object can belong to any address space.

+
declare {}* @llvm.invariant.start.p0i8(i64 <size>, i8* nocapture <ptr>)
+
+
+
+
+
Overview:
+

The ‘llvm.invariant.start’ intrinsic specifies that the contents of +a memory object will not change.

+
+
+
Arguments:
+

The first argument is a constant integer representing the size of the +object, or -1 if it is variable sized. The second argument is a pointer +to the object.

+
+
+
Semantics:
+

This intrinsic indicates that until an llvm.invariant.end that uses +the return value, the referenced memory location is constant and +unchanging.

+
+
+
+

llvm.invariant.end’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. The memory object can belong to any address space.

+
declare void @llvm.invariant.end.p0i8({}* <start>, i64 <size>, i8* nocapture <ptr>)
+
+
+
+
+
Overview:
+

The ‘llvm.invariant.end’ intrinsic specifies that the contents of a +memory object are mutable.

+
+
+
Arguments:
+

The first argument is the matching llvm.invariant.start intrinsic. +The second argument is a constant integer representing the size of the +object, or -1 if it is variable sized and the third argument is a +pointer to the object.

+
+
+
Semantics:
+

This intrinsic indicates that the memory is mutable again.

+
+
+
+

llvm.launder.invariant.group’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. The memory object can belong to any address +space. The returned pointer must belong to the same address space as the +argument.

+
declare i8* @llvm.launder.invariant.group.p0i8(i8* <ptr>)
+
+
+
+
+
Overview:
+

The ‘llvm.launder.invariant.group’ intrinsic can be used when an invariant +established by invariant.group metadata no longer holds, to obtain a new +pointer value that carries fresh invariant group information. It is an +experimental intrinsic, which means that its semantics might change in the +future.

+
+
+
Arguments:
+

The llvm.launder.invariant.group takes only one argument, which is a pointer +to the memory.

+
+
+
Semantics:
+

Returns another pointer that aliases its argument but which is considered different +for the purposes of load/store invariant.group metadata. +It does not read any accessible memory and the execution can be speculated.

+
+
+
+

llvm.strip.invariant.group’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. The memory object can belong to any address +space. The returned pointer must belong to the same address space as the +argument.

+
declare i8* @llvm.strip.invariant.group.p0i8(i8* <ptr>)
+
+
+
+
+
Overview:
+

The ‘llvm.strip.invariant.group’ intrinsic can be used when an invariant +established by invariant.group metadata no longer holds, to obtain a new pointer +value that does not carry the invariant information. It is an experimental +intrinsic, which means that its semantics might change in the future.

+
+
+
Arguments:
+

The llvm.strip.invariant.group takes only one argument, which is a pointer +to the memory.

+
+
+
Semantics:
+

Returns another pointer that aliases its argument but which has no associated +invariant.group metadata. +It does not read any memory and can be speculated.

+
+
+
+
+

Constrained Floating-Point Intrinsics

+

These intrinsics are used to provide special handling of floating-point +operations when specific rounding mode or floating-point exception behavior is +required. By default, LLVM optimization passes assume that the rounding mode is +round-to-nearest and that floating-point exceptions will not be monitored. +Constrained FP intrinsics are used to support non-default rounding modes and +accurately preserve exception behavior without compromising LLVM’s ability to +optimize FP code when the default behavior is used.

+

If any FP operation in a function is constrained then they all must be +constrained. This is required for correct LLVM IR. Optimizations that +move code around can create miscompiles if mixing of constrained and normal +operations is done. The correct way to mix constrained and less constrained +operations is to use the rounding mode and exception handling metadata to +mark constrained intrinsics as having LLVM’s default behavior.

+

Each of these intrinsics corresponds to a normal floating-point operation. The +data arguments and the return value are the same as the corresponding FP +operation.

+

The rounding mode argument is a metadata string specifying what +assumptions, if any, the optimizer can make when transforming constant +values. Some constrained FP intrinsics omit this argument. If required +by the intrinsic, this argument must be one of the following strings:

+
"round.dynamic"
+"round.tonearest"
+"round.downward"
+"round.upward"
+"round.towardzero"
+"round.tonearestaway"
+
+
+

If this argument is “round.dynamic” optimization passes must assume that the +rounding mode is unknown and may change at runtime. No transformations that +depend on rounding mode may be performed in this case.

+

The other possible values for the rounding mode argument correspond to the +similarly named IEEE rounding modes. If the argument is any of these values +optimization passes may perform transformations as long as they are consistent +with the specified rounding mode.

+

For example, ‘x-0’->’x’ is not a valid transformation if the rounding mode is +“round.downward” or “round.dynamic” because if the value of ‘x’ is +0 then +‘x-0’ should evaluate to ‘-0’ when rounding downward. However, this +transformation is legal for all other rounding modes.

+

For values other than “round.dynamic” optimization passes may assume that the +actual runtime rounding mode (as defined in a target-specific manner) matches +the specified rounding mode, but this is not guaranteed. Using a specific +non-dynamic rounding mode which does not match the actual rounding mode at +runtime results in undefined behavior.

+

The exception behavior argument is a metadata string describing the floating +point exception semantics that required for the intrinsic. This argument +must be one of the following strings:

+
"fpexcept.ignore"
+"fpexcept.maytrap"
+"fpexcept.strict"
+
+
+

If this argument is “fpexcept.ignore” optimization passes may assume that the +exception status flags will not be read and that floating-point exceptions will +be masked. This allows transformations to be performed that may change the +exception semantics of the original code. For example, FP operations may be +speculatively executed in this case whereas they must not be for either of the +other possible values of this argument.

+

If the exception behavior argument is “fpexcept.maytrap” optimization passes +must avoid transformations that may raise exceptions that would not have been +raised by the original code (such as speculatively executing FP operations), but +passes are not required to preserve all exceptions that are implied by the +original code. For example, exceptions may be potentially hidden by constant +folding.

+

If the exception behavior argument is “fpexcept.strict” all transformations must +strictly preserve the floating-point exception semantics of the original code. +Any FP exception that would have been raised by the original code must be raised +by the transformed code, and the transformed code must not raise any FP +exceptions that would not have been raised by the original code. This is the +exception behavior argument that will be used if the code being compiled reads +the FP exception status flags, but this mode can also be used with code that +unmasks FP exceptions.

+

The number and order of floating-point exceptions is NOT guaranteed. For +example, a series of FP operations that each may raise exceptions may be +vectorized into a single instruction that raises each unique exception a single +time.

+

Proper function attributes usage is required for the +constrained intrinsics to function correctly.

+

All function calls done in a function that uses constrained floating +point intrinsics must have the strictfp attribute.

+

All function definitions that use constrained floating point intrinsics +must have the strictfp attribute.

+
+

llvm.experimental.constrained.fadd’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.fadd(<type> <op1>, <type> <op2>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fadd’ intrinsic returns the sum of its +two operands.

+
+
+
Arguments:
+

The first two arguments to the ‘llvm.experimental.constrained.fadd’ +intrinsic must be floating-point or vector +of floating-point values. Both arguments must have identical types.

+

The third and fourth arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

The value produced is the floating-point sum of the two value operands and has +the same type as the operands.

+
+
+
+

llvm.experimental.constrained.fsub’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.fsub(<type> <op1>, <type> <op2>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fsub’ intrinsic returns the difference +of its two operands.

+
+
+
Arguments:
+

The first two arguments to the ‘llvm.experimental.constrained.fsub’ +intrinsic must be floating-point or vector +of floating-point values. Both arguments must have identical types.

+

The third and fourth arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

The value produced is the floating-point difference of the two value operands +and has the same type as the operands.

+
+
+
+

llvm.experimental.constrained.fmul’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.fmul(<type> <op1>, <type> <op2>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fmul’ intrinsic returns the product of +its two operands.

+
+
+
Arguments:
+

The first two arguments to the ‘llvm.experimental.constrained.fmul’ +intrinsic must be floating-point or vector +of floating-point values. Both arguments must have identical types.

+

The third and fourth arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

The value produced is the floating-point product of the two value operands and +has the same type as the operands.

+
+
+
+

llvm.experimental.constrained.fdiv’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.fdiv(<type> <op1>, <type> <op2>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fdiv’ intrinsic returns the quotient of +its two operands.

+
+
+
Arguments:
+

The first two arguments to the ‘llvm.experimental.constrained.fdiv’ +intrinsic must be floating-point or vector +of floating-point values. Both arguments must have identical types.

+

The third and fourth arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

The value produced is the floating-point quotient of the two value operands and +has the same type as the operands.

+
+
+
+

llvm.experimental.constrained.frem’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.frem(<type> <op1>, <type> <op2>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.frem’ intrinsic returns the remainder +from the division of its two operands.

+
+
+
Arguments:
+

The first two arguments to the ‘llvm.experimental.constrained.frem’ +intrinsic must be floating-point or vector +of floating-point values. Both arguments must have identical types.

+

The third and fourth arguments specify the rounding mode and exception +behavior as described above. The rounding mode argument has no effect, since +the result of frem is never rounded, but the argument is included for +consistency with the other constrained floating-point intrinsics.

+
+
+
Semantics:
+

The value produced is the floating-point remainder from the division of the two +value operands and has the same type as the operands. The remainder has the +same sign as the dividend.

+
+
+
+

llvm.experimental.constrained.fma’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.fma(<type> <op1>, <type> <op2>, <type> <op3>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fma’ intrinsic returns the result of a +fused-multiply-add operation on its operands.

+
+
+
Arguments:
+

The first three arguments to the ‘llvm.experimental.constrained.fma’ +intrinsic must be floating-point or vector of floating-point values. All arguments must have identical types.

+

The fourth and fifth arguments specify the rounding mode and exception behavior +as described above.

+
+
+
Semantics:
+

The result produced is the product of the first two operands added to the third +operand computed with infinite precision, and then rounded to the target +precision.

+
+
+
+

llvm.experimental.constrained.fptoui’ Intrinsic

+
+
Syntax:
+
declare <ty2>
+@llvm.experimental.constrained.fptoui(<type> <value>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fptoui’ intrinsic converts a +floating-point value to its unsigned integer equivalent of type ty2.

+
+
+
Arguments:
+

The first argument to the ‘llvm.experimental.constrained.fptoui’ +intrinsic must be floating point or vector of floating point values.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

The result produced is an unsigned integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero.

+
+
+
+

llvm.experimental.constrained.fptosi’ Intrinsic

+
+
Syntax:
+
declare <ty2>
+@llvm.experimental.constrained.fptosi(<type> <value>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fptosi’ intrinsic converts +floating-point value to type ty2.

+
+
+
Arguments:
+

The first argument to the ‘llvm.experimental.constrained.fptosi’ +intrinsic must be floating point or vector of floating point values.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

The result produced is a signed integer converted from the floating +point operand. The value is truncated, so it is rounded towards zero.

+
+
+
+

llvm.experimental.constrained.uitofp’ Intrinsic

+
+
Syntax:
+
declare <ty2>
+@llvm.experimental.constrained.uitofp(<type> <value>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.uitofp’ intrinsic converts an +unsigned integer value to a floating-point of type ty2.

+
+
+
Arguments:
+

The first argument to the ‘llvm.experimental.constrained.uitofp’ +intrinsic must be an integer or vector of integer values.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

An inexact floating-point exception will be raised if rounding is required. +Any result produced is a floating point value converted from the input +integer operand.

+
+
+
+

llvm.experimental.constrained.sitofp’ Intrinsic

+
+
Syntax:
+
declare <ty2>
+@llvm.experimental.constrained.sitofp(<type> <value>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.sitofp’ intrinsic converts a +signed integer value to a floating-point of type ty2.

+
+
+
Arguments:
+

The first argument to the ‘llvm.experimental.constrained.sitofp’ +intrinsic must be an integer or vector of integer values.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

An inexact floating-point exception will be raised if rounding is required. +Any result produced is a floating point value converted from the input +integer operand.

+
+
+
+

llvm.experimental.constrained.fptrunc’ Intrinsic

+
+
Syntax:
+
declare <ty2>
+@llvm.experimental.constrained.fptrunc(<type> <value>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fptrunc’ intrinsic truncates value +to type ty2.

+
+
+
Arguments:
+

The first argument to the ‘llvm.experimental.constrained.fptrunc’ +intrinsic must be floating point or vector of floating point values. This argument must be larger in size +than the result.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

The result produced is a floating point value truncated to be smaller in size +than the operand.

+
+
+
+

llvm.experimental.constrained.fpext’ Intrinsic

+
+
Syntax:
+
declare <ty2>
+@llvm.experimental.constrained.fpext(<type> <value>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fpext’ intrinsic extends a +floating-point value to a larger floating-point value.

+
+
+
Arguments:
+

The first argument to the ‘llvm.experimental.constrained.fpext’ +intrinsic must be floating point or vector of floating point values. This argument must be smaller in size +than the result.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

The result produced is a floating point value extended to be larger in size +than the operand. All restrictions that apply to the fpext instruction also +apply to this intrinsic.

+
+
+
+

llvm.experimental.constrained.fcmp’ and ‘llvm.experimental.constrained.fcmps’ Intrinsics

+
+
Syntax:
+
declare <ty2>
+@llvm.experimental.constrained.fcmp(<type> <op1>, <type> <op2>,
+                                    metadata <condition code>,
+                                    metadata <exception behavior>)
+declare <ty2>
+@llvm.experimental.constrained.fcmps(<type> <op1>, <type> <op2>,
+                                     metadata <condition code>,
+                                     metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fcmp’ and +‘llvm.experimental.constrained.fcmps’ intrinsics return a boolean +value or vector of boolean values based on comparison of its operands.

+

If the operands are floating-point scalars, then the result type is a +boolean (i1).

+

If the operands are floating-point vectors, then the result type is a +vector of boolean with the same number of elements as the operands being +compared.

+

The ‘llvm.experimental.constrained.fcmp’ intrinsic performs a quiet +comparison operation while the ‘llvm.experimental.constrained.fcmps’ +intrinsic performs a signaling comparison operation.

+
+
+
Arguments:
+

The first two arguments to the ‘llvm.experimental.constrained.fcmp’ +and ‘llvm.experimental.constrained.fcmps’ intrinsics must be +floating-point or vector +of floating-point values. Both arguments must have identical types.

+

The third argument is the condition code indicating the kind of comparison +to perform. It must be a metadata string with one of the following values:

+
    +
  • oeq”: ordered and equal

  • +
  • ogt”: ordered and greater than

  • +
  • oge”: ordered and greater than or equal

  • +
  • olt”: ordered and less than

  • +
  • ole”: ordered and less than or equal

  • +
  • one”: ordered and not equal

  • +
  • ord”: ordered (no nans)

  • +
  • ueq”: unordered or equal

  • +
  • ugt”: unordered or greater than

  • +
  • uge”: unordered or greater than or equal

  • +
  • ult”: unordered or less than

  • +
  • ule”: unordered or less than or equal

  • +
  • une”: unordered or not equal

  • +
  • uno”: unordered (either nans)

  • +
+

Ordered means that neither operand is a NAN while unordered means +that either operand may be a NAN.

+

The fourth argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

op1 and op2 are compared according to the condition code given +as the third argument. If the operands are vectors, then the +vectors are compared element by element. Each comparison performed +always yields an i1 result, as follows:

+
    +
  • oeq”: yields true if both operands are not a NAN and op1 +is equal to op2.

  • +
  • ogt”: yields true if both operands are not a NAN and op1 +is greater than op2.

  • +
  • oge”: yields true if both operands are not a NAN and op1 +is greater than or equal to op2.

  • +
  • olt”: yields true if both operands are not a NAN and op1 +is less than op2.

  • +
  • ole”: yields true if both operands are not a NAN and op1 +is less than or equal to op2.

  • +
  • one”: yields true if both operands are not a NAN and op1 +is not equal to op2.

  • +
  • ord”: yields true if both operands are not a NAN.

  • +
  • ueq”: yields true if either operand is a NAN or op1 is +equal to op2.

  • +
  • ugt”: yields true if either operand is a NAN or op1 is +greater than op2.

  • +
  • uge”: yields true if either operand is a NAN or op1 is +greater than or equal to op2.

  • +
  • ult”: yields true if either operand is a NAN or op1 is +less than op2.

  • +
  • ule”: yields true if either operand is a NAN or op1 is +less than or equal to op2.

  • +
  • une”: yields true if either operand is a NAN or op1 is +not equal to op2.

  • +
  • uno”: yields true if either operand is a NAN.

  • +
+

The quiet comparison operation performed by +‘llvm.experimental.constrained.fcmp’ will only raise an exception +if either operand is a SNAN. The signaling comparison operation +performed by ‘llvm.experimental.constrained.fcmps’ will raise an +exception if either operand is a NAN (QNAN or SNAN). Such an exception +does not preclude a result being produced (e.g. exception might only +set a flag), therefore the distinction between ordered and unordered +comparisons is also relevant for the +‘llvm.experimental.constrained.fcmps’ intrinsic.

+
+
+
+

llvm.experimental.constrained.fmuladd’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.fmuladd(<type> <op1>, <type> <op2>,
+                                       <type> <op3>,
+                                       metadata <rounding mode>,
+                                       metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.fmuladd’ intrinsic represents +multiply-add expressions that can be fused if the code generator determines +that (a) the target instruction set has support for a fused operation, +and (b) that the fused operation is more efficient than the equivalent, +separate pair of mul and add instructions.

+
+
+
Arguments:
+

The first three arguments to the ‘llvm.experimental.constrained.fmuladd’ +intrinsic must be floating-point or vector of floating-point values. +All three arguments must have identical types.

+

The fourth and fifth arguments specify the rounding mode and exception behavior +as described above.

+
+
+
Semantics:
+

The expression:

+
%0 = call float @llvm.experimental.constrained.fmuladd.f32(%a, %b, %c,
+                                                           metadata <rounding mode>,
+                                                           metadata <exception behavior>)
+
+
+

is equivalent to the expression:

+
%0 = call float @llvm.experimental.constrained.fmul.f32(%a, %b,
+                                                        metadata <rounding mode>,
+                                                        metadata <exception behavior>)
+%1 = call float @llvm.experimental.constrained.fadd.f32(%0, %c,
+                                                        metadata <rounding mode>,
+                                                        metadata <exception behavior>)
+
+
+

except that it is unspecified whether rounding will be performed between the +multiplication and addition steps. Fusion is not guaranteed, even if the target +platform supports it. +If a fused multiply-add is required, the corresponding +llvm.experimental.constrained.fma intrinsic function should be +used instead. +This never sets errno, just as ‘llvm.experimental.constrained.fma.*’.

+
+
+
+
+

Constrained libm-equivalent Intrinsics

+

In addition to the basic floating-point operations for which constrained +intrinsics are described above, there are constrained versions of various +operations which provide equivalent behavior to a corresponding libm function. +These intrinsics allow the precise behavior of these operations with respect to +rounding mode and exception behavior to be controlled.

+

As with the basic constrained floating-point intrinsics, the rounding mode +and exception behavior arguments only control the behavior of the optimizer. +They do not change the runtime floating-point environment.

+
+

llvm.experimental.constrained.sqrt’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.sqrt(<type> <op1>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.sqrt’ intrinsic returns the square root +of the specified value, returning the same value as the libm ‘sqrt’ +functions would, but without setting errno.

+
+
+
Arguments:
+

The first argument and the return type are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the nonnegative square root of the specified value. +If the value is less than negative zero, a floating-point exception occurs +and the return value is architecture specific.

+
+
+
+

llvm.experimental.constrained.pow’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.pow(<type> <op1>, <type> <op2>,
+                                   metadata <rounding mode>,
+                                   metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.pow’ intrinsic returns the first operand +raised to the (positive or negative) power specified by the second operand.

+
+
+
Arguments:
+

The first two arguments and the return value are floating-point numbers of the +same type. The second argument specifies the power to which the first argument +should be raised.

+

The third and fourth arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the first value raised to the second power, +returning the same values as the libm pow functions would, and +handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.powi’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.powi(<type> <op1>, i32 <op2>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.powi’ intrinsic returns the first operand +raised to the (positive or negative) power specified by the second operand. The +order of evaluation of multiplications is not defined. When a vector of +floating-point type is used, the second argument remains a scalar integer value.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type. The second argument is a 32-bit signed integer specifying the power to +which the first argument should be raised.

+

The third and fourth arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the first value raised to the second power with an +unspecified sequence of rounding operations.

+
+
+
+

llvm.experimental.constrained.sin’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.sin(<type> <op1>,
+                                   metadata <rounding mode>,
+                                   metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.sin’ intrinsic returns the sine of the +first operand.

+
+
+
Arguments:
+

The first argument and the return type are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the sine of the specified operand, returning the +same values as the libm sin functions would, and handles error +conditions in the same way.

+
+
+
+

llvm.experimental.constrained.cos’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.cos(<type> <op1>,
+                                   metadata <rounding mode>,
+                                   metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.cos’ intrinsic returns the cosine of the +first operand.

+
+
+
Arguments:
+

The first argument and the return type are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the cosine of the specified operand, returning the +same values as the libm cos functions would, and handles error +conditions in the same way.

+
+
+
+

llvm.experimental.constrained.exp’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.exp(<type> <op1>,
+                                   metadata <rounding mode>,
+                                   metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.exp’ intrinsic computes the base-e +exponential of the specified value.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm exp functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.exp2’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.exp2(<type> <op1>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.exp2’ intrinsic computes the base-2 +exponential of the specified value.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm exp2 functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.log’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.log(<type> <op1>,
+                                   metadata <rounding mode>,
+                                   metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.log’ intrinsic computes the base-e +logarithm of the specified value.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm log functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.log10’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.log10(<type> <op1>,
+                                     metadata <rounding mode>,
+                                     metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.log10’ intrinsic computes the base-10 +logarithm of the specified value.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm log10 functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.log2’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.log2(<type> <op1>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.log2’ intrinsic computes the base-2 +logarithm of the specified value.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm log2 functions +would, and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.rint’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.rint(<type> <op1>,
+                                    metadata <rounding mode>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.rint’ intrinsic returns the first +operand rounded to the nearest integer. It may raise an inexact floating-point +exception if the operand is not an integer.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm rint functions +would, and handles error conditions in the same way. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating-point environment. The rounding +mode argument is only intended as information to the compiler.

+
+
+
+

llvm.experimental.constrained.lrint’ Intrinsic

+
+
Syntax:
+
declare <inttype>
+@llvm.experimental.constrained.lrint(<fptype> <op1>,
+                                     metadata <rounding mode>,
+                                     metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.lrint’ intrinsic returns the first +operand rounded to the nearest integer. An inexact floating-point exception +will be raised if the operand is not an integer. An invalid exception is +raised if the result is too large to fit into a supported integer type, +and in this case the result is undefined.

+
+
+
Arguments:
+

The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the llvm.lrint intrinsic and the lrint +libm functions.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm lrint functions +would, and handles error conditions in the same way.

+

The rounding mode is described, not determined, by the rounding mode +argument. The actual rounding mode is determined by the runtime floating-point +environment. The rounding mode argument is only intended as information +to the compiler.

+

If the runtime floating-point environment is using the default rounding mode +then the results will be the same as the llvm.lrint intrinsic.

+
+
+
+

llvm.experimental.constrained.llrint’ Intrinsic

+
+
Syntax:
+
declare <inttype>
+@llvm.experimental.constrained.llrint(<fptype> <op1>,
+                                      metadata <rounding mode>,
+                                      metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.llrint’ intrinsic returns the first +operand rounded to the nearest integer. An inexact floating-point exception +will be raised if the operand is not an integer. An invalid exception is +raised if the result is too large to fit into a supported integer type, +and in this case the result is undefined.

+
+
+
Arguments:
+

The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the llvm.llrint intrinsic and the llrint +libm functions.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm llrint functions +would, and handles error conditions in the same way.

+

The rounding mode is described, not determined, by the rounding mode +argument. The actual rounding mode is determined by the runtime floating-point +environment. The rounding mode argument is only intended as information +to the compiler.

+

If the runtime floating-point environment is using the default rounding mode +then the results will be the same as the llvm.llrint intrinsic.

+
+
+
+

llvm.experimental.constrained.nearbyint’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.nearbyint(<type> <op1>,
+                                         metadata <rounding mode>,
+                                         metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.nearbyint’ intrinsic returns the first +operand rounded to the nearest integer. It will not raise an inexact +floating-point exception if the operand is not an integer.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second and third arguments specify the rounding mode and exception +behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm nearbyint functions +would, and handles error conditions in the same way. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating-point environment. The rounding +mode argument is only intended as information to the compiler.

+
+
+
+

llvm.experimental.constrained.maxnum’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.maxnum(<type> <op1>, <type> <op2>
+                                      metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.maxnum’ intrinsic returns the maximum +of the two arguments.

+
+
+
Arguments:
+

The first two arguments and the return value are floating-point numbers +of the same type.

+

The third argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function follows the IEEE-754 semantics for maxNum.

+
+
+
+

llvm.experimental.constrained.minnum’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.minnum(<type> <op1>, <type> <op2>
+                                      metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.minnum’ intrinsic returns the minimum +of the two arguments.

+
+
+
Arguments:
+

The first two arguments and the return value are floating-point numbers +of the same type.

+

The third argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function follows the IEEE-754 semantics for minNum.

+
+
+
+

llvm.experimental.constrained.maximum’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.maximum(<type> <op1>, <type> <op2>
+                                       metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.maximum’ intrinsic returns the maximum +of the two arguments, propagating NaNs and treating -0.0 as less than +0.0.

+
+
+
Arguments:
+

The first two arguments and the return value are floating-point numbers +of the same type.

+

The third argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function follows semantics specified in the draft of IEEE 754-2018.

+
+
+
+

llvm.experimental.constrained.minimum’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.minimum(<type> <op1>, <type> <op2>
+                                       metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.minimum’ intrinsic returns the minimum +of the two arguments, propagating NaNs and treating -0.0 as less than +0.0.

+
+
+
Arguments:
+

The first two arguments and the return value are floating-point numbers +of the same type.

+

The third argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function follows semantics specified in the draft of IEEE 754-2018.

+
+
+
+

llvm.experimental.constrained.ceil’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.ceil(<type> <op1>,
+                                    metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.ceil’ intrinsic returns the ceiling of the +first operand.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm ceil functions +would and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.floor’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.floor(<type> <op1>,
+                                     metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.floor’ intrinsic returns the floor of the +first operand.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm floor functions +would and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.round’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.round(<type> <op1>,
+                                     metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.round’ intrinsic returns the first +operand rounded to the nearest integer.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm round functions +would and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.roundeven’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.roundeven(<type> <op1>,
+                                         metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.roundeven’ intrinsic returns the first +operand rounded to the nearest integer in floating-point format, rounding +halfway cases to even (that is, to the nearest value that is an even integer), +regardless of the current rounding direction.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function implements IEEE-754 operation roundToIntegralTiesToEven. It +also behaves in the same way as C standard function roundeven and can signal +the invalid operation exception for a SNAN operand.

+
+
+
+

llvm.experimental.constrained.lround’ Intrinsic

+
+
Syntax:
+
declare <inttype>
+@llvm.experimental.constrained.lround(<fptype> <op1>,
+                                      metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.lround’ intrinsic returns the first +operand rounded to the nearest integer with ties away from zero. It will +raise an inexact floating-point exception if the operand is not an integer. +An invalid exception is raised if the result is too large to fit into a +supported integer type, and in this case the result is undefined.

+
+
+
Arguments:
+

The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the llvm.lround intrinsic and the lround +libm functions.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm lround functions +would and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.llround’ Intrinsic

+
+
Syntax:
+
declare <inttype>
+@llvm.experimental.constrained.llround(<fptype> <op1>,
+                                       metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.llround’ intrinsic returns the first +operand rounded to the nearest integer with ties away from zero. It will +raise an inexact floating-point exception if the operand is not an integer. +An invalid exception is raised if the result is too large to fit into a +supported integer type, and in this case the result is undefined.

+
+
+
Arguments:
+

The first argument is a floating-point number. The return value is an +integer type. Not all types are supported on all targets. The supported +types are the same as the llvm.llround intrinsic and the llround +libm functions.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm llround functions +would and handles error conditions in the same way.

+
+
+
+

llvm.experimental.constrained.trunc’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.experimental.constrained.trunc(<type> <op1>,
+                                     metadata <exception behavior>)
+
+
+
+
+
Overview:
+

The ‘llvm.experimental.constrained.trunc’ intrinsic returns the first +operand rounded to the nearest integer not larger in magnitude than the +operand.

+
+
+
Arguments:
+

The first argument and the return value are floating-point numbers of the same +type.

+

The second argument specifies the exception behavior as described above.

+
+
+
Semantics:
+

This function returns the same values as the libm trunc functions +would and handles error conditions in the same way.

+
+
+
+

llvm.experimental.noalias.scope.decl’ Intrinsic

+
+
Syntax:
+
declare void @llvm.experimental.noalias.scope.decl(metadata !id.scope.list)
+
+
+
+
+
Overview:
+

The llvm.experimental.noalias.scope.decl intrinsic identifies where a +noalias scope is declared. When the intrinsic is duplicated, a decision must +also be made about the scope: depending on the reason of the duplication, +the scope might need to be duplicated as well.

+
+
+
Arguments:
+

The !id.scope.list argument is metadata that is a list of noalias +metadata references. The format is identical to that required for noalias +metadata. This list must have exactly one element.

+
+
+
Semantics:
+

The llvm.experimental.noalias.scope.decl intrinsic identifies where a +noalias scope is declared. When the intrinsic is duplicated, a decision must +also be made about the scope: depending on the reason of the duplication, +the scope might need to be duplicated as well.

+

For example, when the intrinsic is used inside a loop body, and that loop is +unrolled, the associated noalias scope must also be duplicated. Otherwise, the +noalias property it signifies would spill across loop iterations, whereas it +was only valid within a single iteration.

+
; This examples shows two possible positions for noalias.decl and how they impact the semantics:
+; If it is outside the loop (Version 1), then %a and %b are noalias across *all* iterations.
+; If it is inside the loop (Version 2), then %a and %b are noalias only within *one* iteration.
+declare void @decl_in_loop(i8* %a.base, i8* %b.base) {
+entry:
+  ; call void @llvm.experimental.noalias.scope.decl(metadata !2) ; Version 1: noalias decl outside loop
+  br label %loop
+
+loop:
+  %a = phi i8* [ %a.base, %entry ], [ %a.inc, %loop ]
+  %b = phi i8* [ %b.base, %entry ], [ %b.inc, %loop ]
+  ; call void @llvm.experimental.noalias.scope.decl(metadata !2) ; Version 2: noalias decl inside loop
+  %val = load i8, i8* %a, !alias.scope !2
+  store i8 %val, i8* %b, !noalias !2
+  %a.inc = getelementptr inbounds i8, i8* %a, i64 1
+  %b.inc = getelementptr inbounds i8, i8* %b, i64 1
+  %cond = call i1 @cond()
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+!0 = !{!0} ; domain
+!1 = !{!1, !0} ; scope
+!2 = !{!1} ; scope list
+
+
+

Multiple calls to @llvm.experimental.noalias.scope.decl for the same scope +are possible, but one should never dominate another. Violations are pointed out +by the verifier as they indicate a problem in either a transformation pass or +the input.

+
+
+
+
+

Floating Point Environment Manipulation intrinsics

+

These functions read or write floating point environment, such as rounding +mode or state of floating point exceptions. Altering the floating point +environment requires special care. See Floating Point Environment.

+
+

llvm.flt.rounds’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.flt.rounds()
+
+
+
+
+
Overview:
+

The ‘llvm.flt.rounds’ intrinsic reads the current rounding mode.

+
+
+
Semantics:
+

The ‘llvm.flt.rounds’ intrinsic returns the current rounding mode. +Encoding of the returned values is same as the result of FLT_ROUNDS, +specified by C standard:

+
0  - toward zero
+1  - to nearest, ties to even
+2  - toward positive infinity
+3  - toward negative infinity
+4  - to nearest, ties away from zero
+
+
+

Other values may be used to represent additional rounding modes, supported by a +target. These values are target-specific.

+
+
+
+

llvm.set.rounding’ Intrinsic

+
+
Syntax:
+
declare void @llvm.set.rounding(i32 <val>)
+
+
+
+
+
Overview:
+

The ‘llvm.set.rounding’ intrinsic sets current rounding mode.

+
+
+
Arguments:
+

The argument is the required rounding mode. Encoding of rounding mode is +the same as used by ‘llvm.flt.rounds’.

+
+
+
Semantics:
+

The ‘llvm.set.rounding’ intrinsic sets the current rounding mode. It is +similar to C library function ‘fesetround’, however this intrinsic does not +return any value and uses platform-independent representation of IEEE rounding +modes.

+
+
+
+
+

General Intrinsics

+

This class of intrinsics is designed to be generic and has no specific +purpose.

+
+

llvm.var.annotation’ Intrinsic

+
+
Syntax:
+
declare void @llvm.var.annotation(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
+
+
+
+
+
Overview:
+

The ‘llvm.var.annotation’ intrinsic.

+
+
+
Arguments:
+

The first argument is a pointer to a value, the second is a pointer to a +global string, the third is a pointer to a global string which is the +source file name, and the last argument is the line number.

+
+
+
Semantics:
+

This intrinsic allows annotation of local variables with arbitrary +strings. This can be useful for special purpose optimizations that want +to look for these annotations. These have no other defined use; they are +ignored by code generation and optimization.

+
+
+
+

llvm.ptr.annotation.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use ‘llvm.ptr.annotation’ on a +pointer to an integer of any width. NOTE you must specify an address space for +the pointer. The identifier for the default address space is the integer +‘0’.

+
declare i8*   @llvm.ptr.annotation.p<address space>i8(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i16*  @llvm.ptr.annotation.p<address space>i16(i16* <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i32*  @llvm.ptr.annotation.p<address space>i32(i32* <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i64*  @llvm.ptr.annotation.p<address space>i64(i64* <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i256* @llvm.ptr.annotation.p<address space>i256(i256* <val>, i8* <str>, i8* <str>, i32  <int>)
+
+
+
+
+
Overview:
+

The ‘llvm.ptr.annotation’ intrinsic.

+
+
+
Arguments:
+

The first argument is a pointer to an integer value of arbitrary bitwidth +(result of some expression), the second is a pointer to a global string, the +third is a pointer to a global string which is the source file name, and the +last argument is the line number. It returns the value of the first argument.

+
+
+
Semantics:
+

This intrinsic allows annotation of a pointer to an integer with arbitrary +strings. This can be useful for special purpose optimizations that want to look +for these annotations. These have no other defined use; they are ignored by code +generation and optimization.

+
+
+
+

llvm.annotation.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use ‘llvm.annotation’ on +any integer bit width.

+
declare i8 @llvm.annotation.i8(i8 <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i16 @llvm.annotation.i16(i16 <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i32 @llvm.annotation.i32(i32 <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i64 @llvm.annotation.i64(i64 <val>, i8* <str>, i8* <str>, i32  <int>)
+declare i256 @llvm.annotation.i256(i256 <val>, i8* <str>, i8* <str>, i32  <int>)
+
+
+
+
+
Overview:
+

The ‘llvm.annotation’ intrinsic.

+
+
+
Arguments:
+

The first argument is an integer value (result of some expression), the +second is a pointer to a global string, the third is a pointer to a +global string which is the source file name, and the last argument is +the line number. It returns the value of the first argument.

+
+
+
Semantics:
+

This intrinsic allows annotations to be put on arbitrary expressions +with arbitrary strings. This can be useful for special purpose +optimizations that want to look for these annotations. These have no +other defined use; they are ignored by code generation and optimization.

+
+
+
+

llvm.codeview.annotation’ Intrinsic

+
+
Syntax:
+

This annotation emits a label at its program point and an associated +S_ANNOTATION codeview record with some additional string metadata. This is +used to implement MSVC’s __annotation intrinsic. It is marked +noduplicate, so calls to this intrinsic prevent inlining and should be +considered expensive.

+
declare void @llvm.codeview.annotation(metadata)
+
+
+
+
+
Arguments:
+

The argument should be an MDTuple containing any number of MDStrings.

+
+
+
+

llvm.trap’ Intrinsic

+
+
Syntax:
+
declare void @llvm.trap() cold noreturn nounwind
+
+
+
+
+
Overview:
+

The ‘llvm.trap’ intrinsic.

+
+
+
Arguments:
+

None.

+
+
+
Semantics:
+

This intrinsic is lowered to the target dependent trap instruction. If +the target does not have a trap instruction, this intrinsic will be +lowered to a call of the abort() function.

+
+
+
+

llvm.debugtrap’ Intrinsic

+
+
Syntax:
+
declare void @llvm.debugtrap() nounwind
+
+
+
+
+
Overview:
+

The ‘llvm.debugtrap’ intrinsic.

+
+
+
Arguments:
+

None.

+
+
+
Semantics:
+

This intrinsic is lowered to code which is intended to cause an +execution trap with the intention of requesting the attention of a +debugger.

+
+
+
+

llvm.ubsantrap’ Intrinsic

+
+
Syntax:
+
declare void @llvm.ubsantrap(i8 immarg) cold noreturn nounwind
+
+
+
+
+
Overview:
+

The ‘llvm.ubsantrap’ intrinsic.

+
+
+
Arguments:
+

An integer describing the kind of failure detected.

+
+
+
Semantics:
+

This intrinsic is lowered to code which is intended to cause an execution trap, +embedding the argument into encoding of that trap somehow to discriminate +crashes if possible.

+

Equivalent to @llvm.trap for targets that do not support this behaviour.

+
+
+
+

llvm.stackprotector’ Intrinsic

+
+
Syntax:
+
declare void @llvm.stackprotector(i8* <guard>, i8** <slot>)
+
+
+
+
+
Overview:
+

The llvm.stackprotector intrinsic takes the guard and stores it +onto the stack at slot. The stack slot is adjusted to ensure that it +is placed on the stack before local variables.

+
+
+
Arguments:
+

The llvm.stackprotector intrinsic requires two pointer arguments. +The first argument is the value loaded from the stack guard +@__stack_chk_guard. The second variable is an alloca that has +enough space to hold the value of the guard.

+
+
+
Semantics:
+

This intrinsic causes the prologue/epilogue inserter to force the position of +the AllocaInst stack slot to be before local variables on the stack. This is +to ensure that if a local variable on the stack is overwritten, it will destroy +the value of the guard. When the function exits, the guard on the stack is +checked against the original guard by llvm.stackprotectorcheck. If they are +different, then llvm.stackprotectorcheck causes the program to abort by +calling the __stack_chk_fail() function.

+
+
+
+

llvm.stackguard’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.stackguard()
+
+
+
+
+
Overview:
+

The llvm.stackguard intrinsic returns the system stack guard value.

+

It should not be generated by frontends, since it is only for internal usage. +The reason why we create this intrinsic is that we still support IR form Stack +Protector in FastISel.

+
+
+
Arguments:
+

None.

+
+
+
Semantics:
+

On some platforms, the value returned by this intrinsic remains unchanged +between loads in the same thread. On other platforms, it returns the same +global variable value, if any, e.g. @__stack_chk_guard.

+

Currently some platforms have IR-level customized stack guard loading (e.g. +X86 Linux) that is not handled by llvm.stackguard(), while they should be +in the future.

+
+
+
+

llvm.objectsize’ Intrinsic

+
+
Syntax:
+
declare i32 @llvm.objectsize.i32(i8* <object>, i1 <min>, i1 <nullunknown>, i1 <dynamic>)
+declare i64 @llvm.objectsize.i64(i8* <object>, i1 <min>, i1 <nullunknown>, i1 <dynamic>)
+
+
+
+
+
Overview:
+

The llvm.objectsize intrinsic is designed to provide information to the +optimizer to determine whether a) an operation (like memcpy) will overflow a +buffer that corresponds to an object, or b) that a runtime check for overflow +isn’t necessary. An object in this context means an allocation of a specific +class, structure, array, or other object.

+
+
+
Arguments:
+

The llvm.objectsize intrinsic takes four arguments. The first argument is a +pointer to or into the object. The second argument determines whether +llvm.objectsize returns 0 (if true) or -1 (if false) when the object size is +unknown. The third argument controls how llvm.objectsize acts when null +in address space 0 is used as its pointer argument. If it’s false, +llvm.objectsize reports 0 bytes available when given null. Otherwise, if +the null is in a non-zero address space or if true is given for the +third argument of llvm.objectsize, we assume its size is unknown. The fourth +argument to llvm.objectsize determines if the value should be evaluated at +runtime.

+

The second, third, and fourth arguments only accept constants.

+
+
+
Semantics:
+

The llvm.objectsize intrinsic is lowered to a value representing the size of +the object concerned. If the size cannot be determined, llvm.objectsize +returns i32/i64 -1 or 0 (depending on the min argument).

+
+
+
+

llvm.expect’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.expect on any +integer bit width.

+
declare i1 @llvm.expect.i1(i1 <val>, i1 <expected_val>)
+declare i32 @llvm.expect.i32(i32 <val>, i32 <expected_val>)
+declare i64 @llvm.expect.i64(i64 <val>, i64 <expected_val>)
+
+
+
+
+
Overview:
+

The llvm.expect intrinsic provides information about expected (the +most probable) value of val, which can be used by optimizers.

+
+
+
Arguments:
+

The llvm.expect intrinsic takes two arguments. The first argument is +a value. The second argument is an expected value.

+
+
+
Semantics:
+

This intrinsic is lowered to the val.

+
+
+
+

llvm.expect.with.probability’ Intrinsic

+
+
Syntax:
+

This intrinsic is similar to llvm.expect. This is an overloaded intrinsic. +You can use llvm.expect.with.probability on any integer bit width.

+
declare i1 @llvm.expect.with.probability.i1(i1 <val>, i1 <expected_val>, double <prob>)
+declare i32 @llvm.expect.with.probability.i32(i32 <val>, i32 <expected_val>, double <prob>)
+declare i64 @llvm.expect.with.probability.i64(i64 <val>, i64 <expected_val>, double <prob>)
+
+
+
+
+
Overview:
+

The llvm.expect.with.probability intrinsic provides information about +expected value of val with probability(or confidence) prob, which can +be used by optimizers.

+
+
+
Arguments:
+

The llvm.expect.with.probability intrinsic takes three arguments. The first +argument is a value. The second argument is an expected value. The third +argument is a probability.

+
+
+
Semantics:
+

This intrinsic is lowered to the val.

+
+
+
+

llvm.assume’ Intrinsic

+
+
Syntax:
+
declare void @llvm.assume(i1 %cond)
+
+
+
+
+
Overview:
+

The llvm.assume allows the optimizer to assume that the provided +condition is true. This information can then be used in simplifying other parts +of the code.

+

More complex assumptions can be encoded as +assume operand bundles.

+
+
+
Arguments:
+

The argument of the call is the condition which the optimizer may assume is +always true.

+
+
+
Semantics:
+

The intrinsic allows the optimizer to assume that the provided condition is +always true whenever the control flow reaches the intrinsic call. No code is +generated for this intrinsic, and instructions that contribute only to the +provided condition are not used for code generation. If the condition is +violated during execution, the behavior is undefined.

+

Note that the optimizer might limit the transformations performed on values +used by the llvm.assume intrinsic in order to preserve the instructions +only used to form the intrinsic’s input argument. This might prove undesirable +if the extra information provided by the llvm.assume intrinsic does not cause +sufficient overall improvement in code quality. For this reason, +llvm.assume should not be used to document basic mathematical invariants +that the optimizer can otherwise deduce or facts that are of little use to the +optimizer.

+
+
+
+

llvm.ssa.copy’ Intrinsic

+
+
Syntax:
+
declare type @llvm.ssa.copy(type %operand) returned(1) readnone
+
+
+
+
+
Arguments:
+

The first argument is an operand which is used as the returned value.

+
+
+
Overview:
+

The llvm.ssa.copy intrinsic can be used to attach information to +operations by copying them and giving them new names. For example, +the PredicateInfo utility uses it to build Extended SSA form, and +attach various forms of information to operands that dominate specific +uses. It is not meant for general use, only for building temporary +renaming forms that require value splits at certain points.

+
+
+
+

llvm.type.test’ Intrinsic

+
+
Syntax:
+
declare i1 @llvm.type.test(i8* %ptr, metadata %type) nounwind readnone
+
+
+
+
+
Arguments:
+

The first argument is a pointer to be tested. The second argument is a +metadata object representing a type identifier.

+
+
+
Overview:
+

The llvm.type.test intrinsic tests whether the given pointer is associated +with the given type identifier.

+
+
+
+

llvm.type.checked.load’ Intrinsic

+
+
Syntax:
+
declare {i8*, i1} @llvm.type.checked.load(i8* %ptr, i32 %offset, metadata %type) argmemonly nounwind readonly
+
+
+
+
+
Arguments:
+

The first argument is a pointer from which to load a function pointer. The +second argument is the byte offset from which to load the function pointer. The +third argument is a metadata object representing a type identifier.

+
+
+
Overview:
+

The llvm.type.checked.load intrinsic safely loads a function pointer from a +virtual table pointer using type metadata. This intrinsic is used to implement +control flow integrity in conjunction with virtual call optimization. The +virtual call optimization pass will optimize away llvm.type.checked.load +intrinsics associated with devirtualized calls, thereby removing the type +check in cases where it is not needed to enforce the control flow integrity +constraint.

+

If the given pointer is associated with a type metadata identifier, this +function returns true as the second element of its return value. (Note that +the function may also return true if the given pointer is not associated +with a type metadata identifier.) If the function’s return value’s second +element is true, the following rules apply to the first element:

+
    +
  • If the given pointer is associated with the given type metadata identifier, +it is the function pointer loaded from the given byte offset from the given +pointer.

  • +
  • If the given pointer is not associated with the given type metadata +identifier, it is one of the following (the choice of which is unspecified):

    +
      +
    1. The function pointer that would have been loaded from an arbitrarily chosen +(through an unspecified mechanism) pointer associated with the type +metadata.

    2. +
    3. If the function has a non-void return type, a pointer to a function that +returns an unspecified value without causing side effects.

    4. +
    +
  • +
+

If the function’s return value’s second element is false, the value of the +first element is undefined.

+
+
+
+

llvm.arithmetic.fence’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.arithmetic.fence(<type> <op>)
+
+
+
+
+
Overview:
+

The purpose of the llvm.arithmetic.fence intrinsic +is to prevent the optimizer from performing fast-math optimizations, +particularly reassociation, +between the argument and the expression that contains the argument. +It can be used to preserve the parentheses in the source language.

+
+
+
Arguments:
+

The llvm.arithmetic.fence intrinsic takes only one argument. +The argument and the return value are floating-point numbers, +or vector floating-point numbers, of the same type.

+
+
+
Semantics:
+

This intrinsic returns the value of its operand. The optimizer can optimize +the argument, but the optimizer cannot hoist any component of the operand +to the containing context, and the optimizer cannot move the calculation of +any expression in the containing context into the operand.

+
+
+
+

llvm.donothing’ Intrinsic

+
+
Syntax:
+
declare void @llvm.donothing() nounwind readnone
+
+
+
+
+
Overview:
+

The llvm.donothing intrinsic doesn’t perform any operation. It’s one of only +three intrinsics (besides llvm.experimental.patchpoint and +llvm.experimental.gc.statepoint) that can be called with an invoke +instruction.

+
+
+
Arguments:
+

None.

+
+
+
Semantics:
+

This intrinsic does nothing, and it’s removed by optimizers and ignored +by codegen.

+
+
+
+

llvm.experimental.deoptimize’ Intrinsic

+
+
Syntax:
+
declare type @llvm.experimental.deoptimize(...) [ "deopt"(...) ]
+
+
+
+
+
Overview:
+

This intrinsic, together with deoptimization operand bundles, allow frontends to express transfer of control and +frame-local state from the currently executing (typically more specialized, +hence faster) version of a function into another (typically more generic, hence +slower) version.

+

In languages with a fully integrated managed runtime like Java and JavaScript +this intrinsic can be used to implement “uncommon trap” or “side exit” like +functionality. In unmanaged languages like C and C++, this intrinsic can be +used to represent the slow paths of specialized functions.

+
+
+
Arguments:
+

The intrinsic takes an arbitrary number of arguments, whose meaning is +decided by the lowering strategy.

+
+
+
Semantics:
+

The @llvm.experimental.deoptimize intrinsic executes an attached +deoptimization continuation (denoted using a deoptimization +operand bundle) and returns the value returned by +the deoptimization continuation. Defining the semantic properties of +the continuation itself is out of scope of the language reference – +as far as LLVM is concerned, the deoptimization continuation can +invoke arbitrary side effects, including reading from and writing to +the entire heap.

+

Deoptimization continuations expressed using "deopt" operand bundles always +continue execution to the end of the physical frame containing them, so all +calls to @llvm.experimental.deoptimize must be in “tail position”:

+
+
    +
  • @llvm.experimental.deoptimize cannot be invoked.

  • +
  • The call must immediately precede a ret instruction.

  • +
  • The ret instruction must return the value produced by the +@llvm.experimental.deoptimize call if there is one, or void.

  • +
+
+

Note that the above restrictions imply that the return type for a call to +@llvm.experimental.deoptimize will match the return type of its immediate +caller.

+

The inliner composes the "deopt" continuations of the caller into the +"deopt" continuations present in the inlinee, and also updates calls to this +intrinsic to return directly from the frame of the function it inlined into.

+

All declarations of @llvm.experimental.deoptimize must share the +same calling convention.

+
+
+
Lowering:
+

Calls to @llvm.experimental.deoptimize are lowered to calls to the +symbol __llvm_deoptimize (it is the frontend’s responsibility to +ensure that this symbol is defined). The call arguments to +@llvm.experimental.deoptimize are lowered as if they were formal +arguments of the specified types, and not as varargs.

+
+
+
+

llvm.experimental.guard’ Intrinsic

+
+
Syntax:
+
declare void @llvm.experimental.guard(i1, ...) [ "deopt"(...) ]
+
+
+
+
+
Overview:
+

This intrinsic, together with deoptimization operand bundles, allows frontends to express guards or checks on +optimistic assumptions made during compilation. The semantics of +@llvm.experimental.guard is defined in terms of +@llvm.experimental.deoptimize – its body is defined to be +equivalent to:

+
define void @llvm.experimental.guard(i1 %pred, <args...>) {
+  %realPred = and i1 %pred, undef
+  br i1 %realPred, label %continue, label %leave [, !make.implicit !{}]
+
+leave:
+  call void @llvm.experimental.deoptimize(<args...>) [ "deopt"() ]
+  ret void
+
+continue:
+  ret void
+}
+
+
+

with the optional [, !make.implicit !{}] present if and only if it +is present on the call site. For more details on !make.implicit, +see FaultMaps and implicit checks.

+

In words, @llvm.experimental.guard executes the attached +"deopt" continuation if (but not only if) its first argument +is false. Since the optimizer is allowed to replace the undef +with an arbitrary value, it can optimize guard to fail “spuriously”, +i.e. without the original condition being false (hence the “not only +if”); and this allows for “check widening” type optimizations.

+

@llvm.experimental.guard cannot be invoked.

+

After @llvm.experimental.guard was first added, a more general +formulation was found in @llvm.experimental.widenable.condition. +Support for @llvm.experimental.guard is slowly being rephrased in +terms of this alternate.

+
+
+
+

llvm.experimental.widenable.condition’ Intrinsic

+
+
Syntax:
+
declare i1 @llvm.experimental.widenable.condition()
+
+
+
+
+
Overview:
+

This intrinsic represents a “widenable condition” which is +boolean expressions with the following property: whether this +expression is true or false, the program is correct and +well-defined.

+

Together with deoptimization operand bundles, +@llvm.experimental.widenable.condition allows frontends to +express guards or checks on optimistic assumptions made during +compilation and represent them as branch instructions on special +conditions.

+

While this may appear similar in semantics to undef, it is very +different in that an invocation produces a particular, singular +value. It is also intended to be lowered late, and remain available +for specific optimizations and transforms that can benefit from its +special properties.

+
+
+
Arguments:
+

None.

+
+
+
Semantics:
+

The intrinsic @llvm.experimental.widenable.condition() +returns either true or false. For each evaluation of a call +to this intrinsic, the program must be valid and correct both if +it returns true and if it returns false. This allows +transformation passes to replace evaluations of this intrinsic +with either value whenever one is beneficial.

+

When used in a branch condition, it allows us to choose between +two alternative correct solutions for the same problem, like +in example below:

+
  %cond = call i1 @llvm.experimental.widenable.condition()
+  br i1 %cond, label %solution_1, label %solution_2
+
+label %fast_path:
+  ; Apply memory-consuming but fast solution for a task.
+
+label %slow_path:
+  ; Cheap in memory but slow solution.
+
+
+

Whether the result of intrinsic’s call is true or false, +it should be correct to pick either solution. We can switch +between them by replacing the result of +@llvm.experimental.widenable.condition with different +i1 expressions.

+

This is how it can be used to represent guards as widenable branches:

+
block:
+  ; Unguarded instructions
+  call void @llvm.experimental.guard(i1 %cond, <args...>) ["deopt"(<deopt_args...>)]
+  ; Guarded instructions
+
+
+

Can be expressed in an alternative equivalent form of explicit branch using +@llvm.experimental.widenable.condition:

+
block:
+  ; Unguarded instructions
+  %widenable_condition = call i1 @llvm.experimental.widenable.condition()
+  %guard_condition = and i1 %cond, %widenable_condition
+  br i1 %guard_condition, label %guarded, label %deopt
+
+guarded:
+  ; Guarded instructions
+
+deopt:
+  call type @llvm.experimental.deoptimize(<args...>) [ "deopt"(<deopt_args...>) ]
+
+
+

So the block guarded is only reachable when %cond is true, +and it should be valid to go to the block deopt whenever %cond +is true or false.

+

@llvm.experimental.widenable.condition will never throw, thus +it cannot be invoked.

+
+
+
Guard widening:
+

When @llvm.experimental.widenable.condition() is used in +condition of a guard represented as explicit branch, it is +legal to widen the guard’s condition with any additional +conditions.

+

Guard widening looks like replacement of

+
%widenable_cond = call i1 @llvm.experimental.widenable.condition()
+%guard_cond = and i1 %cond, %widenable_cond
+br i1 %guard_cond, label %guarded, label %deopt
+
+
+

with

+
%widenable_cond = call i1 @llvm.experimental.widenable.condition()
+%new_cond = and i1 %any_other_cond, %widenable_cond
+%new_guard_cond = and i1 %cond, %new_cond
+br i1 %new_guard_cond, label %guarded, label %deopt
+
+
+

for this branch. Here %any_other_cond is an arbitrarily chosen +well-defined i1 value. By making guard widening, we may +impose stricter conditions on guarded block and bail to the +deopt when the new condition is not met.

+
+
+
Lowering:
+

Default lowering strategy is replacing the result of +call of @llvm.experimental.widenable.condition with +constant true. However it is always correct to replace +it with any other i1 value. Any pass can +freely do it if it can benefit from non-default lowering.

+
+
+
+

llvm.load.relative’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.load.relative.iN(i8* %ptr, iN %offset) argmemonly nounwind readonly
+
+
+
+
+
Overview:
+

This intrinsic loads a 32-bit value from the address %ptr + %offset, +adds %ptr to that value and returns it. The constant folder specifically +recognizes the form of this intrinsic and the constant initializers it may +load from; if a loaded constant initializer is known to have the form +i32 trunc(x - %ptr), the intrinsic call is folded to x.

+

LLVM provides that the calculation of such a constant initializer will +not overflow at link time under the medium code model if x is an +unnamed_addr function. However, it does not provide this guarantee for +a constant initializer folded into a function body. This intrinsic can be +used to avoid the possibility of overflows when loading from such a constant.

+
+
+
+

llvm.sideeffect’ Intrinsic

+
+
Syntax:
+
declare void @llvm.sideeffect() inaccessiblememonly nounwind
+
+
+
+
+
Overview:
+

The llvm.sideeffect intrinsic doesn’t perform any operation. Optimizers +treat it as having side effects, so it can be inserted into a loop to +indicate that the loop shouldn’t be assumed to terminate (which could +potentially lead to the loop being optimized away entirely), even if it’s +an infinite loop with no other side effects.

+
+
+
Arguments:
+

None.

+
+
+
Semantics:
+

This intrinsic actually does nothing, but optimizers must assume that it +has externally observable side effects.

+
+
+
+

llvm.is.constant.*’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.is.constant with any argument type.

+
declare i1 @llvm.is.constant.i32(i32 %operand) nounwind readnone
+declare i1 @llvm.is.constant.f32(float %operand) nounwind readnone
+declare i1 @llvm.is.constant.TYPENAME(TYPE %operand) nounwind readnone
+
+
+
+
+
Overview:
+

The ‘llvm.is.constant’ intrinsic will return true if the argument +is known to be a manifest compile-time constant. It is guaranteed to +fold to either true or false before generating machine code.

+
+
+
Semantics:
+

This intrinsic generates no code. If its argument is known to be a +manifest compile-time constant value, then the intrinsic will be +converted to a constant true value. Otherwise, it will be converted to +a constant false value.

+

In particular, note that if the argument is a constant expression +which refers to a global (the address of which _is_ a constant, but +not manifest during the compile), then the intrinsic evaluates to +false.

+

The result also intentionally depends on the result of optimization +passes – e.g., the result can change depending on whether a +function gets inlined or not. A function’s parameters are +obviously not constant. However, a call like +llvm.is.constant.i32(i32 %param) can return true after the +function is inlined, if the value passed to the function parameter was +a constant.

+

On the other hand, if constant folding is not run, it will never +evaluate to true, even in simple cases.

+
+
+
+

llvm.ptrmask’ Intrinsic

+
+
Syntax:
+
declare ptrty llvm.ptrmask(ptrty %ptr, intty %mask) readnone speculatable
+
+
+
+
+
Arguments:
+

The first argument is a pointer. The second argument is an integer.

+
+
+
Overview:
+

The llvm.ptrmask intrinsic masks out bits of the pointer according to a mask. +This allows stripping data from tagged pointers without converting them to an +integer (ptrtoint/inttoptr). As a consequence, we can preserve more information +to facilitate alias analysis and underlying-object detection.

+
+
+
Semantics:
+

The result of ptrmask(ptr, mask) is equivalent to +getelementptr ptr, (ptrtoint(ptr) & mask) - ptrtoint(ptr). Both the returned +pointer and the first argument are based on the same underlying object (for more +information on the based on terminology see +the pointer aliasing rules). If the bitwidth of the +mask argument does not match the pointer size of the target, the mask is +zero-extended or truncated accordingly.

+
+
+
+

llvm.vscale’ Intrinsic

+
+
Syntax:
+
declare i32 llvm.vscale.i32()
+declare i64 llvm.vscale.i64()
+
+
+
+
+
Overview:
+

The llvm.vscale intrinsic returns the value for vscale in scalable +vectors such as <vscale x 16 x i8>.

+
+
+
Semantics:
+

vscale is a positive value that is constant throughout program +execution, but is unknown at compile time. +If the result value does not fit in the result type, then the result is +a poison value.

+
+
+
+
+

Stack Map Intrinsics

+

LLVM provides experimental intrinsics to support runtime patching +mechanisms commonly desired in dynamic language JITs. These intrinsics +are described in Stack maps and patch points in LLVM.

+
+
+

Element Wise Atomic Memory Intrinsics

+

These intrinsics are similar to the standard library memory intrinsics except +that they perform memory transfer as a sequence of atomic memory accesses.

+
+

llvm.memcpy.element.unordered.atomic’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.memcpy.element.unordered.atomic on +any integer bit width and for different address spaces. Not all targets +support all bit widths however.

+
declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* <dest>,
+                                                                 i8* <src>,
+                                                                 i32 <len>,
+                                                                 i32 <element_size>)
+declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* <dest>,
+                                                                 i8* <src>,
+                                                                 i64 <len>,
+                                                                 i32 <element_size>)
+
+
+
+
+
Overview:
+

The ‘llvm.memcpy.element.unordered.atomic.*’ intrinsic is a specialization of the +‘llvm.memcpy.*’ intrinsic. It differs in that the dest and src are treated +as arrays with elements that are exactly element_size bytes, and the copy between +buffers uses a sequence of unordered atomic load/store operations +that are a positive integer multiple of the element_size in size.

+
+
+
Arguments:
+

The first three arguments are the same as they are in the @llvm.memcpy +intrinsic, with the added constraint that len is required to be a positive integer +multiple of the element_size. If len is not a positive integer multiple of +element_size, then the behaviour of the intrinsic is undefined.

+

element_size must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit.

+

For each of the input pointers align parameter attribute must be specified. It +must be a power of two no less than the element_size. Caller guarantees that +both the source and destination pointers are aligned to that boundary.

+
+
+
Semantics:
+

The ‘llvm.memcpy.element.unordered.atomic.*’ intrinsic copies len bytes of +memory from the source location to the destination location. These locations are not +allowed to overlap. The memory copy is performed as a sequence of load/store operations +where each access is guaranteed to be a multiple of element_size bytes wide and +aligned at an element_size boundary.

+

The order of the copy is unspecified. The same value may be read from the source +buffer many times, but only one write is issued to the destination buffer per +element. It is well defined to have concurrent reads and writes to both source and +destination provided those reads and writes are unordered atomic when specified.

+

This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered loads from the source location and stores to the +destination.

+
+
+
Lowering:
+

In the most general case call to the ‘llvm.memcpy.element.unordered.atomic.*’ is +lowered to a call to the symbol __llvm_memcpy_element_unordered_atomic_*. Where ‘*’ +is replaced with an actual element size. See RewriteStatepointsForGC intrinsic +lowering for details on GC specific +lowering.

+

Optimizer is allowed to inline memory copy when it’s profitable to do so.

+
+
+
+

llvm.memmove.element.unordered.atomic’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use +llvm.memmove.element.unordered.atomic on any integer bit width and for +different address spaces. Not all targets support all bit widths however.

+
declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* <dest>,
+                                                                  i8* <src>,
+                                                                  i32 <len>,
+                                                                  i32 <element_size>)
+declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* <dest>,
+                                                                  i8* <src>,
+                                                                  i64 <len>,
+                                                                  i32 <element_size>)
+
+
+
+
+
Overview:
+

The ‘llvm.memmove.element.unordered.atomic.*’ intrinsic is a specialization +of the ‘llvm.memmove.*’ intrinsic. It differs in that the dest and +src are treated as arrays with elements that are exactly element_size +bytes, and the copy between buffers uses a sequence of +unordered atomic load/store operations that are a positive +integer multiple of the element_size in size.

+
+
+
Arguments:
+

The first three arguments are the same as they are in the +@llvm.memmove intrinsic, with the added constraint that +len is required to be a positive integer multiple of the element_size. +If len is not a positive integer multiple of element_size, then the +behaviour of the intrinsic is undefined.

+

element_size must be a compile-time constant positive power of two no +greater than a target-specific atomic access size limit.

+

For each of the input pointers the align parameter attribute must be +specified. It must be a power of two no less than the element_size. Caller +guarantees that both the source and destination pointers are aligned to that +boundary.

+
+
+
Semantics:
+

The ‘llvm.memmove.element.unordered.atomic.*’ intrinsic copies len bytes +of memory from the source location to the destination location. These locations +are allowed to overlap. The memory copy is performed as a sequence of load/store +operations where each access is guaranteed to be a multiple of element_size +bytes wide and aligned at an element_size boundary.

+

The order of the copy is unspecified. The same value may be read from the source +buffer many times, but only one write is issued to the destination buffer per +element. It is well defined to have concurrent reads and writes to both source +and destination provided those reads and writes are unordered atomic when +specified.

+

This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered loads from the source location and stores to the +destination.

+
+
+
Lowering:
+

In the most general case call to the +‘llvm.memmove.element.unordered.atomic.*’ is lowered to a call to the symbol +__llvm_memmove_element_unordered_atomic_*. Where ‘*’ is replaced with an +actual element size. See RewriteStatepointsForGC intrinsic lowering for details on GC specific +lowering.

+

The optimizer is allowed to inline the memory copy when it’s profitable to do so.

+
+
+
+

llvm.memset.element.unordered.atomic’ Intrinsic

+
+
Syntax:
+

This is an overloaded intrinsic. You can use llvm.memset.element.unordered.atomic on +any integer bit width and for different address spaces. Not all targets +support all bit widths however.

+
declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* <dest>,
+                                                            i8 <value>,
+                                                            i32 <len>,
+                                                            i32 <element_size>)
+declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* <dest>,
+                                                            i8 <value>,
+                                                            i64 <len>,
+                                                            i32 <element_size>)
+
+
+
+
+
Overview:
+

The ‘llvm.memset.element.unordered.atomic.*’ intrinsic is a specialization of the +‘llvm.memset.*’ intrinsic. It differs in that the dest is treated as an array +with elements that are exactly element_size bytes, and the assignment to that array +uses uses a sequence of unordered atomic store operations +that are a positive integer multiple of the element_size in size.

+
+
+
Arguments:
+

The first three arguments are the same as they are in the @llvm.memset +intrinsic, with the added constraint that len is required to be a positive integer +multiple of the element_size. If len is not a positive integer multiple of +element_size, then the behaviour of the intrinsic is undefined.

+

element_size must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit.

+

The dest input pointer must have the align parameter attribute specified. It +must be a power of two no less than the element_size. Caller guarantees that +the destination pointer is aligned to that boundary.

+
+
+
Semantics:
+

The ‘llvm.memset.element.unordered.atomic.*’ intrinsic sets the len bytes of +memory starting at the destination location to the given value. The memory is +set with a sequence of store operations where each access is guaranteed to be a +multiple of element_size bytes wide and aligned at an element_size boundary.

+

The order of the assignment is unspecified. Only one write is issued to the +destination buffer per element. It is well defined to have concurrent reads and +writes to the destination provided those reads and writes are unordered atomic +when specified.

+

This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered stores to the destination.

+
+
+
Lowering:
+

In the most general case call to the ‘llvm.memset.element.unordered.atomic.*’ is +lowered to a call to the symbol __llvm_memset_element_unordered_atomic_*. Where ‘*’ +is replaced with an actual element size.

+

The optimizer is allowed to inline the memory assignment when it’s profitable to do so.

+
+
+
+
+

Objective-C ARC Runtime Intrinsics

+

LLVM provides intrinsics that lower to Objective-C ARC runtime entry points. +LLVM is aware of the semantics of these functions, and optimizes based on that +knowledge. You can read more about the details of Objective-C ARC here.

+
+

llvm.objc.autorelease’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.autorelease(i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_autorelease.

+
+
+
+

llvm.objc.autoreleasePoolPop’ Intrinsic

+
+
Syntax:
+
declare void @llvm.objc.autoreleasePoolPop(i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_autoreleasePoolPop.

+
+
+
+

llvm.objc.autoreleasePoolPush’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.autoreleasePoolPush()
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_autoreleasePoolPush.

+
+
+
+

llvm.objc.autoreleaseReturnValue’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.autoreleaseReturnValue(i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_autoreleaseReturnValue.

+
+
+
+

llvm.objc.copyWeak’ Intrinsic

+
+
Syntax:
+
declare void @llvm.objc.copyWeak(i8**, i8**)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_copyWeak.

+
+
+
+

llvm.objc.destroyWeak’ Intrinsic

+
+
Syntax:
+
declare void @llvm.objc.destroyWeak(i8**)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_destroyWeak.

+
+
+
+

llvm.objc.initWeak’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.initWeak(i8**, i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_initWeak.

+
+
+
+

llvm.objc.loadWeak’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.loadWeak(i8**)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_loadWeak.

+
+
+
+

llvm.objc.loadWeakRetained’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.loadWeakRetained(i8**)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_loadWeakRetained.

+
+
+
+

llvm.objc.moveWeak’ Intrinsic

+
+
Syntax:
+
declare void @llvm.objc.moveWeak(i8**, i8**)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_moveWeak.

+
+
+
+

llvm.objc.release’ Intrinsic

+
+
Syntax:
+
declare void @llvm.objc.release(i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_release.

+
+
+
+

llvm.objc.retain’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.retain(i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_retain.

+
+
+
+

llvm.objc.retainAutorelease’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.retainAutorelease(i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_retainAutorelease.

+
+
+
+

llvm.objc.retainAutoreleaseReturnValue’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.retainAutoreleaseReturnValue(i8*)
+
+
+
+ +
+
+

llvm.objc.retainAutoreleasedReturnValue’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.retainAutoreleasedReturnValue(i8*)
+
+
+
+ +
+
+

llvm.objc.retainBlock’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.retainBlock(i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_retainBlock.

+
+
+
+

llvm.objc.storeStrong’ Intrinsic

+
+
Syntax:
+
declare void @llvm.objc.storeStrong(i8**, i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_storeStrong.

+
+
+
+

llvm.objc.storeWeak’ Intrinsic

+
+
Syntax:
+
declare i8* @llvm.objc.storeWeak(i8**, i8*)
+
+
+
+
+
Lowering:
+

Lowers to a call to objc_storeWeak.

+
+
+
+

Preserving Debug Information Intrinsics

+

These intrinsics are used to carry certain debuginfo together with +IR-level operations. For example, it may be desirable to +know the structure/union name and the original user-level field +indices. Such information got lost in IR GetElementPtr instruction +since the IR types are different from debugInfo types and unions +are converted to structs in IR.

+
+
+

llvm.preserve.array.access.index’ Intrinsic

+
+
Syntax:
+
declare <ret_type>
+@llvm.preserve.array.access.index.p0s_union.anons.p0a10s_union.anons(<type> base,
+                                                                     i32 dim,
+                                                                     i32 index)
+
+
+
+
+
Overview:
+

The ‘llvm.preserve.array.access.index’ intrinsic returns the getelementptr address +based on array base base, array dimension dim and the last access index index +into the array. The return type ret_type is a pointer type to the array element. +The array dim and index are preserved which is more robust than +getelementptr instruction which may be subject to compiler transformation. +The llvm.preserve.access.index type of metadata is attached to this call instruction +to provide array or pointer debuginfo type. +The metadata is a DICompositeType or DIDerivedType representing the +debuginfo version of type.

+
+
+
Arguments:
+

The base is the array base address. The dim is the array dimension. +The base is a pointer if dim equals 0. +The index is the last access index into the array or pointer.

+

The base argument must be annotated with an elementtype attribute at the call-site. This attribute specifies the +getelementptr element type.

+
+
+
Semantics:
+

The ‘llvm.preserve.array.access.index’ intrinsic produces the same result +as a getelementptr with base base and access operands {dim's 0's, index}.

+
+
+
+

llvm.preserve.union.access.index’ Intrinsic

+
+
Syntax:
+
declare <type>
+@llvm.preserve.union.access.index.p0s_union.anons.p0s_union.anons(<type> base,
+                                                                  i32 di_index)
+
+
+
+
+
Overview:
+

The ‘llvm.preserve.union.access.index’ intrinsic carries the debuginfo field index +di_index and returns the base address. +The llvm.preserve.access.index type of metadata is attached to this call instruction +to provide union debuginfo type. +The metadata is a DICompositeType representing the debuginfo version of type. +The return type type is the same as the base type.

+
+
+
Arguments:
+

The base is the union base address. The di_index is the field index in debuginfo.

+
+
+
Semantics:
+

The ‘llvm.preserve.union.access.index’ intrinsic returns the base address.

+
+
+
+

llvm.preserve.struct.access.index’ Intrinsic

+
+
Syntax:
+
declare <ret_type>
+@llvm.preserve.struct.access.index.p0i8.p0s_struct.anon.0s(<type> base,
+                                                           i32 gep_index,
+                                                           i32 di_index)
+
+
+
+
+
Overview:
+

The ‘llvm.preserve.struct.access.index’ intrinsic returns the getelementptr address +based on struct base base and IR struct member index gep_index. +The llvm.preserve.access.index type of metadata is attached to this call instruction +to provide struct debuginfo type. +The metadata is a DICompositeType representing the debuginfo version of type. +The return type ret_type is a pointer type to the structure member.

+
+
+
Arguments:
+

The base is the structure base address. The gep_index is the struct member index +based on IR structures. The di_index is the struct member index based on debuginfo.

+

The base argument must be annotated with an elementtype attribute at the call-site. This attribute specifies the +getelementptr element type.

+
+
+
Semantics:
+

The ‘llvm.preserve.struct.access.index’ intrinsic produces the same result +as a getelementptr with base base and access operands {0, gep_index}.

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Lexicon.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Lexicon.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Lexicon.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Lexicon.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,416 @@ + + + + + + + + + The LLVM Lexicon — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The LLVM Lexicon

+
+

Note

+

This document is a work in progress!

+
+
+

Definitions

+
+

A

+
+
ADCE

Aggressive Dead Code Elimination

+
+
AST

Abstract Syntax Tree.

+

Due to Clang’s influence (mostly the fact that parsing and semantic +analysis are so intertwined for C and especially C++), the typical +working definition of AST in the LLVM community is roughly “the +compiler’s first complete symbolic (as opposed to textual) +representation of an input program”. +As such, an “AST” might be a more general graph instead of a “tree” +(consider the symbolic representation for the type of a typical “linked +list node”). This working definition is closer to what some authors +call an “annotated abstract syntax tree”.

+

Consult your favorite compiler book or search engine for more details.

+
+
+
+
+

B

+
+
BB Vectorization

Basic-Block Vectorization

+
+
BDCE

Bit-tracking dead code elimination. Some bit-wise instructions (shifts, +ands, ors, etc.) “kill” some of their input bits – that is, they make it +such that those bits can be either zero or one without affecting control or +data flow of a program. The BDCE pass removes instructions that only +compute these dead bits.

+
+
BURS

Bottom Up Rewriting System — A method of instruction selection for code +generation. An example is the BURG tool.

+
+
+
+
+

C

+
+
CFI

Call Frame Information. Used in DWARF debug info and in C++ unwind info +to show how the function prolog lays out the stack frame.

+
+
CIE

Common Information Entry. A kind of CFI used to reduce the size of FDEs. +The compiler creates a CIE which contains the information common across all +the FDEs. Each FDE then points to its CIE.

+
+
CSE

Common Subexpression Elimination. An optimization that removes common +subexpression computation. For example (a+b)*(a+b) has two +subexpressions that are the same: (a+b). This optimization would +perform the addition only once and then perform the multiply (but only if +it’s computationally correct/safe).

+
+
+
+
+

D

+
+
DAG

Directed Acyclic Graph

+
+
+
+
Derived Pointer

A pointer to the interior of an object, such that a garbage collector is +unable to use the pointer for reachability analysis. While a derived pointer +is live, the corresponding object pointer must be kept in a root, otherwise +the collector might free the referenced object. With copying collectors, +derived pointers pose an additional hazard that they may be invalidated at +any safe point. This term is used in opposition to object pointer.

+
+
DSA

Data Structure Analysis

+
+
DSE

Dead Store Elimination

+
+
+
+
+

E

+
+
ento

This namespace houses the +Clang Static Analyzer. +It is an abbreviation of entomology.

+
+

“Entomology is the scientific study of insects.”

+
+

In the past, this namespace had not only the name GR (aka. Graph Reachability) +but also entoSA.

+
+
+
+
+

F

+
+
FCA

First Class Aggregate

+
+
FDE

Frame Description Entry. A kind of CFI used to describe the stack frame of +one function.

+
+
+
+
+

G

+
+
GC

Garbage Collection. The practice of using reachability analysis instead of +explicit memory management to reclaim unused memory.

+
+
GEP

GetElementPtr. An LLVM IR instruction that is used to get the address +of a subelement of an aggregate data structure. It is documented in detail +here.

+
+
GVN

Global Value Numbering. GVN is a pass that partitions values computed by a +function into congruence classes. Values ending up in the same congruence +class are guaranteed to be the same for every execution of the program. +In that respect, congruency is a compile-time approximation of equivalence +of values at runtime.

+
+
+
+
+

H

+
+
Heap

In garbage collection, the region of memory which is managed using +reachability analysis.

+
+
+
+
+

I

+
+
ICE

Internal Compiler Error. This abbreviation is used to describe errors +that occur in LLVM or Clang as they are compiling source code. For example, +if a valid C++ source program were to trigger an assert in Clang when +compiled, that could be referred to as an “ICE”.

+
+
IPA

Inter-Procedural Analysis. Refers to any variety of code analysis that +occurs between procedures, functions or compilation units (modules).

+
+
IPO

Inter-Procedural Optimization. Refers to any variety of code optimization +that occurs between procedures, functions or compilation units (modules).

+
+
ISel

Instruction Selection

+
+
+
+
+

L

+
+
LCSSA

Loop-Closed Static Single Assignment Form

+
+
LGTM

“Looks Good To Me”. In a review thread, this indicates that the +reviewer thinks that the patch is okay to commit.

+
+
LICM

Loop Invariant Code Motion

+
+
LSDA

Language Specific Data Area. C++ “zero cost” unwinding is built on top a +generic unwinding mechanism. As the unwinder walks each frame, it calls +a “personality” function to do language specific analysis. Each function’s +FDE points to an optional LSDA which is passed to the personality function. +For C++, the LSDA contain info about the type and location of catch +statements in that function.

+
+
Load-VN

Load Value Numbering

+
+
LTO

Link-Time Optimization

+
+
+
+
+

M

+
+
MC

Machine Code

+
+
+
+
+

N

+
+
NFC

“No functional change”. Used in a commit message to indicate that a patch +is a pure refactoring/cleanup. +Usually used in the first line, so it is visible without opening the +actual commit email.

+
+
+
+
+

O

+
+
Object Pointer

A pointer to an object such that the garbage collector is able to trace +references contained within the object. This term is used in opposition to +derived pointer.

+
+
+
+
+

P

+
+
PR

Problem report. A bug filed on the LLVM Bug Tracking System.

+
+
PRE

Partial Redundancy Elimination

+
+
+
+
+

R

+

RAUW

+
+

Replace All Uses With. The functions User::replaceUsesOfWith(), +Value::replaceAllUsesWith(), and +Constant::replaceUsesOfWithOnConstant() implement the replacement of one +Value with another by iterating over its def/use chain and fixing up all of +the pointers to point to the new value. See +also def/use chains.

+
+
+
Reassociation

Rearranging associative expressions to promote better redundancy elimination +and other optimization. For example, changing (A+B-A) into (B+A-A), +permitting it to be optimized into (B+0) then (B).

+
+
RFC

Request for Comment. An email sent to a project mailing list in order to +solicit feedback on a proposed change.

+
+
+
+
Root

In garbage collection, a pointer variable lying outside of the heap from +which the collector begins its reachability analysis. In the context of code +generation, “root” almost always refers to a “stack root” — a local or +temporary variable within an executing function.

+
+
RPO

Reverse postorder

+
+
+
+
+

S

+
+
Safe Point

In garbage collection, it is necessary to identify stack roots so that +reachability analysis may proceed. It may be infeasible to provide this +information for every instruction, so instead the information is +calculated only at designated safe points. With a copying collector, +derived pointers must not be retained across safe points and object +pointers must be reloaded from stack roots.

+
+
SDISel

Selection DAG Instruction Selection.

+
+
SCC

Strongly Connected Component

+
+
SCCP

Sparse Conditional Constant Propagation

+
+
SLP

Superword-Level Parallelism, same as Basic-Block Vectorization.

+
+
Splat

Splat refers to a vector of identical scalar elements.

+

The term is based on the PowerPC Altivec instructions that provided +this functionality in hardware. For example, “vsplth” and the corresponding +software intrinsic “vec_splat()”. Examples of other hardware names for this +action include “duplicate” (ARM) and “broadcast” (x86).

+
+
SRoA

Scalar Replacement of Aggregates

+
+
SSA

Static Single Assignment

+
+
Stack Map

In garbage collection, metadata emitted by the code generator which +identifies roots within the stack frame of an executing function.

+
+
+
+
+

T

+
+
TBAA

Type-Based Alias Analysis

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LibFuzzer.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LibFuzzer.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LibFuzzer.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LibFuzzer.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,876 @@ + + + + + + + + + libFuzzer – a library for coverage-guided fuzz testing. — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

libFuzzer – a library for coverage-guided fuzz testing.

+ +
+

Introduction

+

LibFuzzer is in-process, coverage-guided, evolutionary fuzzing engine.

+

LibFuzzer is linked with the library under test, and feeds fuzzed inputs to the +library via a specific fuzzing entrypoint (aka “target function”); the fuzzer +then tracks which areas of the code are reached, and generates mutations on the +corpus of input data in order to maximize the code coverage. +The code coverage +information for libFuzzer is provided by LLVM’s SanitizerCoverage +instrumentation.

+

Contact: libfuzzer(#)googlegroups.com

+
+
+

Versions

+

LibFuzzer is under active development so you will need the current +(or at least a very recent) version of the Clang compiler (see building Clang from trunk)

+

Refer to https://releases.llvm.org/5.0.0/docs/LibFuzzer.html for documentation on the older version.

+
+
+

Getting Started

+ +
+

Fuzz Target

+

The first step in using libFuzzer on a library is to implement a +fuzz target – a function that accepts an array of bytes and +does something interesting with these bytes using the API under test. +Like this:

+
// fuzz_target.cc
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  DoSomethingInterestingWithMyAPI(Data, Size);
+  return 0;  // Non-zero return values are reserved for future use.
+}
+
+
+

Note that this fuzz target does not depend on libFuzzer in any way +and so it is possible and even desirable to use it with other fuzzing engines +e.g. AFL and/or Radamsa.

+

Some important things to remember about fuzz targets:

+
    +
  • The fuzzing engine will execute the fuzz target many times with different inputs in the same process.

  • +
  • It must tolerate any kind of input (empty, huge, malformed, etc).

  • +
  • It must not exit() on any input.

  • +
  • It may use threads but ideally all threads should be joined at the end of the function.

  • +
  • It must be as deterministic as possible. Non-determinism (e.g. random decisions not based on the input bytes) will make fuzzing inefficient.

  • +
  • It must be fast. Try avoiding cubic or greater complexity, logging, or excessive memory consumption.

  • +
  • Ideally, it should not modify any global state (although that’s not strict).

  • +
  • Usually, the narrower the target the better. E.g. if your target can parse several data formats, split it into several targets, one per format.

  • +
+
+
+

Fuzzer Usage

+

Recent versions of Clang (starting from 6.0) include libFuzzer, and no extra installation is necessary.

+

In order to build your fuzzer binary, use the -fsanitize=fuzzer flag during the +compilation and linking. In most cases you may want to combine libFuzzer with +AddressSanitizer (ASAN), UndefinedBehaviorSanitizer (UBSAN), or both. You can +also build with MemorySanitizer (MSAN), but support is experimental:

+
clang -g -O1 -fsanitize=fuzzer                         mytarget.c # Builds the fuzz target w/o sanitizers
+clang -g -O1 -fsanitize=fuzzer,address                 mytarget.c # Builds the fuzz target with ASAN
+clang -g -O1 -fsanitize=fuzzer,signed-integer-overflow mytarget.c # Builds the fuzz target with a part of UBSAN
+clang -g -O1 -fsanitize=fuzzer,memory                  mytarget.c # Builds the fuzz target with MSAN
+
+
+

This will perform the necessary instrumentation, as well as linking with the libFuzzer library. +Note that -fsanitize=fuzzer links in the libFuzzer’s main() symbol.

+

If modifying CFLAGS of a large project, which also compiles executables +requiring their own main symbol, it may be desirable to request just the +instrumentation without linking:

+
clang -fsanitize=fuzzer-no-link mytarget.c
+
+
+

Then libFuzzer can be linked to the desired driver by passing in +-fsanitize=fuzzer during the linking stage.

+
+
+

Corpus

+

Coverage-guided fuzzers like libFuzzer rely on a corpus of sample inputs for the +code under test. This corpus should ideally be seeded with a varied collection +of valid and invalid inputs for the code under test; for example, for a graphics +library the initial corpus might hold a variety of different small PNG/JPG/GIF +files. The fuzzer generates random mutations based around the sample inputs in +the current corpus. If a mutation triggers execution of a previously-uncovered +path in the code under test, then that mutation is saved to the corpus for +future variations.

+

LibFuzzer will work without any initial seeds, but will be less +efficient if the library under test accepts complex, +structured inputs.

+

The corpus can also act as a sanity/regression check, to confirm that the +fuzzing entrypoint still works and that all of the sample inputs run through +the code under test without problems.

+

If you have a large corpus (either generated by fuzzing or acquired by other means) +you may want to minimize it while still preserving the full coverage. One way to do that +is to use the -merge=1 flag:

+
mkdir NEW_CORPUS_DIR  # Store minimized corpus here.
+./my_fuzzer -merge=1 NEW_CORPUS_DIR FULL_CORPUS_DIR
+
+
+

You may use the same flag to add more interesting items to an existing corpus. +Only the inputs that trigger new coverage will be added to the first corpus.

+
./my_fuzzer -merge=1 CURRENT_CORPUS_DIR NEW_POTENTIALLY_INTERESTING_INPUTS_DIR
+
+
+
+
+

Running

+

To run the fuzzer, first create a Corpus directory that holds the +initial “seed” sample inputs:

+
mkdir CORPUS_DIR
+cp /some/input/samples/* CORPUS_DIR
+
+
+

Then run the fuzzer on the corpus directory:

+
./my_fuzzer CORPUS_DIR  # -max_len=1000 -jobs=20 ...
+
+
+

As the fuzzer discovers new interesting test cases (i.e. test cases that +trigger coverage of new paths through the code under test), those test cases +will be added to the corpus directory.

+

By default, the fuzzing process will continue indefinitely – at least until +a bug is found. Any crashes or sanitizer failures will be reported as usual, +stopping the fuzzing process, and the particular input that triggered the bug +will be written to disk (typically as crash-<sha1>, leak-<sha1>, +or timeout-<sha1>).

+
+
+

Parallel Fuzzing

+

Each libFuzzer process is single-threaded, unless the library under test starts +its own threads. However, it is possible to run multiple libFuzzer processes in +parallel with a shared corpus directory; this has the advantage that any new +inputs found by one fuzzer process will be available to the other fuzzer +processes (unless you disable this with the -reload=0 option).

+

This is primarily controlled by the -jobs=N option, which indicates that +that N fuzzing jobs should be run to completion (i.e. until a bug is found or +time/iteration limits are reached). These jobs will be run across a set of +worker processes, by default using half of the available CPU cores; the count of +worker processes can be overridden by the -workers=N option. For example, +running with -jobs=30 on a 12-core machine would run 6 workers by default, +with each worker averaging 5 bugs by completion of the entire process.

+
+
+

Fork mode

+

Experimental mode -fork=N (where N is the number of parallel jobs) +enables oom-, timeout-, and crash-resistant +fuzzing with separate processes (using fork-exec, not just fork).

+

The top libFuzzer process will not do any fuzzing itself, but will +spawn up to N concurrent child processes providing them +small random subsets of the corpus. After a child exits, the top process +merges the corpus generated by the child back to the main corpus.

+

Related flags:

+
+
-ignore_ooms

True by default. If an OOM happens during fuzzing in one of the child processes, +the reproducer is saved on disk, and fuzzing continues.

+
+
-ignore_timeouts

True by default, same as -ignore_ooms, but for timeouts.

+
+
-ignore_crashes

False by default, same as -ignore_ooms, but for all other crashes.

+
+
+

The plan is to eventually replace -jobs=N and -workers=N with -fork=N.

+
+
+

Resuming merge

+

Merging large corpora may be time consuming, and it is often desirable to do it +on preemptable VMs, where the process may be killed at any time. +In order to seamlessly resume the merge, use the -merge_control_file flag +and use killall -SIGUSR1 /path/to/fuzzer/binary to stop the merge gracefully. Example:

+
% rm -f SomeLocalPath
+% ./my_fuzzer CORPUS1 CORPUS2 -merge=1 -merge_control_file=SomeLocalPath
+...
+MERGE-INNER: using the control file 'SomeLocalPath'
+...
+# While this is running, do `killall -SIGUSR1 my_fuzzer` in another console
+==9015== INFO: libFuzzer: exiting as requested
+
+# This will leave the file SomeLocalPath with the partial state of the merge.
+# Now, you can continue the merge by executing the same command. The merge
+# will continue from where it has been interrupted.
+% ./my_fuzzer CORPUS1 CORPUS2 -merge=1 -merge_control_file=SomeLocalPath
+...
+MERGE-OUTER: non-empty control file provided: 'SomeLocalPath'
+MERGE-OUTER: control file ok, 32 files total, first not processed file 20
+...
+
+
+
+
+
+

Options

+

To run the fuzzer, pass zero or more corpus directories as command line +arguments. The fuzzer will read test inputs from each of these corpus +directories, and any new test inputs that are generated will be written +back to the first corpus directory:

+
./fuzzer [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ]
+
+
+

If a list of files (rather than directories) are passed to the fuzzer program, +then it will re-run those files as test inputs but will not perform any fuzzing. +In this mode the fuzzer binary can be used as a regression test (e.g. on a +continuous integration system) to check the target function and saved inputs +still work.

+

The most important command line options are:

+
+
-help

Print help message (-help=1).

+
+
-seed

Random seed. If 0 (the default), the seed is generated.

+
+
-runs

Number of individual test runs, -1 (the default) to run indefinitely.

+
+
-max_len

Maximum length of a test input. If 0 (the default), libFuzzer tries to guess +a good value based on the corpus (and reports it).

+
+
-len_control

Try generating small inputs first, then try larger inputs over time. +Specifies the rate at which the length limit is increased (smaller == faster). +Default is 100. If 0, immediately try inputs with size up to max_len.

+
+
-timeout

Timeout in seconds, default 1200. If an input takes longer than this timeout, +the process is treated as a failure case.

+
+
-rss_limit_mb

Memory usage limit in Mb, default 2048. Use 0 to disable the limit. +If an input requires more than this amount of RSS memory to execute, +the process is treated as a failure case. +The limit is checked in a separate thread every second. +If running w/o ASAN/MSAN, you may use ‘ulimit -v’ instead.

+
+
-malloc_limit_mb

If non-zero, the fuzzer will exit if the target tries to allocate this +number of Mb with one malloc call. +If zero (default) same limit as rss_limit_mb is applied.

+
+
-timeout_exitcode

Exit code (default 77) used if libFuzzer reports a timeout.

+
+
-error_exitcode

Exit code (default 77) used if libFuzzer itself (not a sanitizer) reports a bug (leak, OOM, etc).

+
+
-max_total_time

If positive, indicates the maximum total time in seconds to run the fuzzer. +If 0 (the default), run indefinitely.

+
+
-merge

If set to 1, any corpus inputs from the 2nd, 3rd etc. corpus directories +that trigger new code coverage will be merged into the first corpus +directory. Defaults to 0. This flag can be used to minimize a corpus.

+
+
-merge_control_file

Specify a control file used for the merge process. +If a merge process gets killed it tries to leave this file in a state +suitable for resuming the merge. By default a temporary file will be used.

+
+
-minimize_crash

If 1, minimizes the provided crash input. +Use with -runs=N or -max_total_time=N to limit the number of attempts.

+
+
-reload

If set to 1 (the default), the corpus directory is re-read periodically to +check for new inputs; this allows detection of new inputs that were discovered +by other fuzzing processes.

+
+
-jobs

Number of fuzzing jobs to run to completion. Default value is 0, which runs a +single fuzzing process until completion. If the value is >= 1, then this +number of jobs performing fuzzing are run, in a collection of parallel +separate worker processes; each such worker process has its +stdout/stderr redirected to fuzz-<JOB>.log.

+
+
-workers

Number of simultaneous worker processes to run the fuzzing jobs to completion +in. If 0 (the default), min(jobs, NumberOfCpuCores()/2) is used.

+
+
-dict

Provide a dictionary of input keywords; see Dictionaries.

+
+
-use_counters

Use coverage counters to generate approximate counts of how often code +blocks are hit; defaults to 1.

+
+
-reduce_inputs

Try to reduce the size of inputs while preserving their full feature sets; +defaults to 1.

+
+
-use_value_profile

Use value profile to guide corpus expansion; defaults to 0.

+
+
-only_ascii

If 1, generate only ASCII (isprint``+``isspace) inputs. Defaults to 0.

+
+
-artifact_prefix

Provide a prefix to use when saving fuzzing artifacts (crash, timeout, or +slow inputs) as $(artifact_prefix)file. Defaults to empty.

+
+
-exact_artifact_path

Ignored if empty (the default). If non-empty, write the single artifact on +failure (crash, timeout) as $(exact_artifact_path). This overrides +-artifact_prefix and will not use checksum in the file name. Do not use +the same path for several parallel processes.

+
+
-print_pcs

If 1, print out newly covered PCs. Defaults to 0.

+
+
-print_final_stats

If 1, print statistics at exit. Defaults to 0.

+
+
-detect_leaks

If 1 (default) and if LeakSanitizer is enabled +try to detect memory leaks during fuzzing (i.e. not only at shut down).

+
+
-close_fd_mask

Indicate output streams to close at startup. Be careful, this will +remove diagnostic output from target code (e.g. messages on assert failure).

+
+
    +
  • 0 (default): close neither stdout nor stderr

  • +
  • 1 : close stdout

  • +
  • 2 : close stderr

  • +
  • 3 : close both stdout and stderr.

  • +
+
+
+
+

For the full list of flags run the fuzzer binary with -help=1.

+
+
+

Output

+

During operation the fuzzer prints information to stderr, for example:

+
INFO: Seed: 1523017872
+INFO: Loaded 1 modules (16 guards): [0x744e60, 0x744ea0),
+INFO: -max_len is not provided, using 64
+INFO: A corpus is not provided, starting from an empty corpus
+#0    READ units: 1
+#1    INITED cov: 3 ft: 2 corp: 1/1b exec/s: 0 rss: 24Mb
+#3811 NEW    cov: 4 ft: 3 corp: 2/2b exec/s: 0 rss: 25Mb L: 1 MS: 5 ChangeBit-ChangeByte-ChangeBit-ShuffleBytes-ChangeByte-
+#3827 NEW    cov: 5 ft: 4 corp: 3/4b exec/s: 0 rss: 25Mb L: 2 MS: 1 CopyPart-
+#3963 NEW    cov: 6 ft: 5 corp: 4/6b exec/s: 0 rss: 25Mb L: 2 MS: 2 ShuffleBytes-ChangeBit-
+#4167 NEW    cov: 7 ft: 6 corp: 5/9b exec/s: 0 rss: 25Mb L: 3 MS: 1 InsertByte-
+...
+
+
+

The early parts of the output include information about the fuzzer options and +configuration, including the current random seed (in the Seed: line; this +can be overridden with the -seed=N flag).

+

Further output lines have the form of an event code and statistics. The +possible event codes are:

+
+
READ

The fuzzer has read in all of the provided input samples from the corpus +directories.

+
+
INITED

The fuzzer has completed initialization, which includes running each of +the initial input samples through the code under test.

+
+
NEW

The fuzzer has created a test input that covers new areas of the code +under test. This input will be saved to the primary corpus directory.

+
+
REDUCE

The fuzzer has found a better (smaller) input that triggers previously +discovered features (set -reduce_inputs=0 to disable).

+
+
pulse

The fuzzer has generated 2n inputs (generated periodically to reassure +the user that the fuzzer is still working).

+
+
DONE

The fuzzer has completed operation because it has reached the specified +iteration limit (-runs) or time limit (-max_total_time).

+
+
RELOAD

The fuzzer is performing a periodic reload of inputs from the corpus +directory; this allows it to discover any inputs discovered by other +fuzzer processes (see Parallel Fuzzing).

+
+
+

Each output line also reports the following statistics (when non-zero):

+
+
cov:

Total number of code blocks or edges covered by executing the current corpus.

+
+
ft:

libFuzzer uses different signals to evaluate the code coverage: +edge coverage, edge counters, value profiles, indirect caller/callee pairs, etc. +These signals combined are called features (ft:).

+
+
corp:

Number of entries in the current in-memory test corpus and its size in bytes.

+
+
lim:

Current limit on the length of new entries in the corpus. Increases over time +until the max length (-max_len) is reached.

+
+
exec/s:

Number of fuzzer iterations per second.

+
+
rss:

Current memory consumption.

+
+
+

For NEW and REDUCE events, the output line also includes information +about the mutation operation that produced the new input:

+
+
L:

Size of the new input in bytes.

+
+
MS: <n> <operations>

Count and list of the mutation operations used to generate the input.

+
+
+
+
+

Examples

+ +
+

Toy example

+

A simple function that does something interesting if it receives the input +“HI!”:

+
cat << EOF > test_fuzzer.cc
+#include <stdint.h>
+#include <stddef.h>
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  if (size > 0 && data[0] == 'H')
+    if (size > 1 && data[1] == 'I')
+       if (size > 2 && data[2] == '!')
+       __builtin_trap();
+  return 0;
+}
+EOF
+# Build test_fuzzer.cc with asan and link against libFuzzer.
+clang++ -fsanitize=address,fuzzer test_fuzzer.cc
+# Run the fuzzer with no corpus.
+./a.out
+
+
+

You should get an error pretty quickly:

+
INFO: Seed: 1523017872
+INFO: Loaded 1 modules (16 guards): [0x744e60, 0x744ea0),
+INFO: -max_len is not provided, using 64
+INFO: A corpus is not provided, starting from an empty corpus
+#0    READ units: 1
+#1    INITED cov: 3 ft: 2 corp: 1/1b exec/s: 0 rss: 24Mb
+#3811 NEW    cov: 4 ft: 3 corp: 2/2b exec/s: 0 rss: 25Mb L: 1 MS: 5 ChangeBit-ChangeByte-ChangeBit-ShuffleBytes-ChangeByte-
+#3827 NEW    cov: 5 ft: 4 corp: 3/4b exec/s: 0 rss: 25Mb L: 2 MS: 1 CopyPart-
+#3963 NEW    cov: 6 ft: 5 corp: 4/6b exec/s: 0 rss: 25Mb L: 2 MS: 2 ShuffleBytes-ChangeBit-
+#4167 NEW    cov: 7 ft: 6 corp: 5/9b exec/s: 0 rss: 25Mb L: 3 MS: 1 InsertByte-
+==31511== ERROR: libFuzzer: deadly signal
+...
+artifact_prefix='./'; Test unit written to ./crash-b13e8756b13a00cf168300179061fb4b91fefbed
+
+
+
+
+

More examples

+

Examples of real-life fuzz targets and the bugs they find can be found +at http://tutorial.libfuzzer.info. Among other things you can learn how +to detect Heartbleed in one second.

+
+
+
+

Advanced features

+ +
+

Dictionaries

+

LibFuzzer supports user-supplied dictionaries with input language keywords +or other interesting byte sequences (e.g. multi-byte magic values). +Use -dict=DICTIONARY_FILE. For some input languages using a dictionary +may significantly improve the search speed. +The dictionary syntax is similar to that used by AFL for its -x option:

+
# Lines starting with '#' and empty lines are ignored.
+
+# Adds "blah" (w/o quotes) to the dictionary.
+kw1="blah"
+# Use \\ for backslash and \" for quotes.
+kw2="\"ac\\dc\""
+# Use \xAB for hex values
+kw3="\xF7\xF8"
+# the name of the keyword followed by '=' may be omitted:
+"foo\x0Abar"
+
+
+
+
+

Tracing CMP instructions

+

With an additional compiler flag -fsanitize-coverage=trace-cmp +(on by default as part of -fsanitize=fuzzer, see SanitizerCoverageTraceDataFlow) +libFuzzer will intercept CMP instructions and guide mutations based +on the arguments of intercepted CMP instructions. This may slow down +the fuzzing but is very likely to improve the results.

+
+
+

Value Profile

+

With -fsanitize-coverage=trace-cmp (default with -fsanitize=fuzzer) +and extra run-time flag -use_value_profile=1 the fuzzer will +collect value profiles for the parameters of compare instructions +and treat some new values as new coverage.

+

The current implementation does roughly the following:

+
    +
  • The compiler instruments all CMP instructions with a callback that receives both CMP arguments.

  • +
  • The callback computes (caller_pc&4095) | (popcnt(Arg1 ^ Arg2) << 12) and uses this value to set a bit in a bitset.

  • +
  • Every new observed bit in the bitset is treated as new coverage.

  • +
+

This feature has a potential to discover many interesting inputs, +but there are two downsides. +First, the extra instrumentation may bring up to 2x additional slowdown. +Second, the corpus may grow by several times.

+
+
+

Fuzzer-friendly build mode

+

Sometimes the code under test is not fuzzing-friendly. Examples:

+
+
    +
  • The target code uses a PRNG seeded e.g. by system time and +thus two consequent invocations may potentially execute different code paths +even if the end result will be the same. This will cause a fuzzer to treat +two similar inputs as significantly different and it will blow up the test corpus. +E.g. libxml uses rand() inside its hash table.

  • +
  • The target code uses checksums to protect from invalid inputs. +E.g. png checks CRC for every chunk.

  • +
+
+

In many cases it makes sense to build a special fuzzing-friendly build +with certain fuzzing-unfriendly features disabled. We propose to use a common build macro +for all such cases for consistency: FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION.

+
void MyInitPRNG() {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  // In fuzzing mode the behavior of the code should be deterministic.
+  srand(0);
+#else
+  srand(time(0));
+#endif
+}
+
+
+
+
+

AFL compatibility

+

LibFuzzer can be used together with AFL on the same test corpus. +Both fuzzers expect the test corpus to reside in a directory, one file per input. +You can run both fuzzers on the same corpus, one after another:

+
./afl-fuzz -i testcase_dir -o findings_dir /path/to/program @@
+./llvm-fuzz testcase_dir findings_dir  # Will write new tests to testcase_dir
+
+
+

Periodically restart both fuzzers so that they can use each other’s findings. +Currently, there is no simple way to run both fuzzing engines in parallel while sharing the same corpus dir.

+

You may also use AFL on your target function LLVMFuzzerTestOneInput: +see an example here.

+
+
+

How good is my fuzzer?

+

Once you implement your target function LLVMFuzzerTestOneInput and fuzz it to death, +you will want to know whether the function or the corpus can be improved further. +One easy to use metric is, of course, code coverage.

+

We recommend to use +Clang Coverage, +to visualize and study your code coverage +(example).

+
+
+

User-supplied mutators

+

LibFuzzer allows to use custom (user-supplied) mutators, see +Structure-Aware Fuzzing +for more details.

+
+
+

Startup initialization

+

If the library being tested needs to be initialized, there are several options.

+

The simplest way is to have a statically initialized global object inside +LLVMFuzzerTestOneInput (or in global scope if that works for you):

+
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  static bool Initialized = DoInitialization();
+  ...
+
+
+

Alternatively, you may define an optional init function and it will receive +the program arguments that you can read and modify. Do this only if you +really need to access argv/argc.

+
extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
+ ReadAndMaybeModify(argc, argv);
+ return 0;
+}
+
+
+
+
+

Using libFuzzer as a library

+

If the code being fuzzed must provide its own main, it’s possible to +invoke libFuzzer as a library. Be sure to pass -fsanitize=fuzzer-no-link +during compilation, and link your binary against the no-main version of +libFuzzer. On Linux installations, this is typically located at:

+
/usr/lib/<llvm-version>/lib/clang/<clang-version>/lib/linux/libclang_rt.fuzzer_no_main-<architecture>.a
+
+
+

If building libFuzzer from source, this is located at the following path +in the build output directory:

+
lib/linux/libclang_rt.fuzzer_no_main-<architecture>.a
+
+
+

From here, the code can do whatever setup it requires, and when it’s ready +to start fuzzing, it can call LLVMFuzzerRunDriver, passing in the program +arguments and a callback. This callback is invoked just like +LLVMFuzzerTestOneInput, and has the same signature.

+
extern "C" int LLVMFuzzerRunDriver(int *argc, char ***argv,
+                  int (*UserCb)(const uint8_t *Data, size_t Size));
+
+
+
+
+

Leaks

+

Binaries built with AddressSanitizer or LeakSanitizer will try to detect +memory leaks at the process shutdown. +For in-process fuzzing this is inconvenient +since the fuzzer needs to report a leak with a reproducer as soon as the leaky +mutation is found. However, running full leak detection after every mutation +is expensive.

+

By default (-detect_leaks=1) libFuzzer will count the number of +malloc and free calls when executing every mutation. +If the numbers don’t match (which by itself doesn’t mean there is a leak) +libFuzzer will invoke the more expensive LeakSanitizer +pass and if the actual leak is found, it will be reported with the reproducer +and the process will exit.

+

If your target has massive leaks and the leak detection is disabled +you will eventually run out of RAM (see the -rss_limit_mb flag).

+
+
+
+

Developing libFuzzer

+

LibFuzzer is built as a part of LLVM project by default on macos and Linux. +Users of other operating systems can explicitly request compilation using +-DCOMPILER_RT_BUILD_LIBFUZZER=ON flag. +Tests are run using check-fuzzer target from the build directory +which was configured with -DCOMPILER_RT_INCLUDE_TESTS=ON flag.

+
ninja check-fuzzer
+
+
+
+
+

FAQ

+
+

Q. Why doesn’t libFuzzer use any of the LLVM support?

+

There are two reasons.

+

First, we want this library to be used outside of the LLVM without users having to +build the rest of LLVM. This may sound unconvincing for many LLVM folks, +but in practice the need for building the whole LLVM frightens many potential +users – and we want more users to use this code.

+

Second, there is a subtle technical reason not to rely on the rest of LLVM, or +any other large body of code (maybe not even STL). When coverage instrumentation +is enabled, it will also instrument the LLVM support code which will blow up the +coverage set of the process (since the fuzzer is in-process). In other words, by +using more external dependencies we will slow down the fuzzer while the main +reason for it to exist is extreme speed.

+
+
+

Q. Does libFuzzer Support Windows?

+

Yes, libFuzzer now supports Windows. Initial support was added in r341082. +Any build of Clang 9 supports it. You can download a build of Clang for Windows +that has libFuzzer from +LLVM Snapshot Builds.

+

Using libFuzzer on Windows without ASAN is unsupported. Building fuzzers with the +/MD (dynamic runtime library) compile option is unsupported. Support for these +may be added in the future. Linking fuzzers with the /INCREMENTAL link option +(or the /DEBUG option which implies it) is also unsupported.

+

Send any questions or comments to the mailing list: libfuzzer(#)googlegroups.com

+
+
+

Q. When libFuzzer is not a good solution for a problem?

+
    +
  • If the test inputs are validated by the target library and the validator +asserts/crashes on invalid inputs, in-process fuzzing is not applicable.

  • +
  • Bugs in the target library may accumulate without being detected. E.g. a memory +corruption that goes undetected at first and then leads to a crash while +testing another input. This is why it is highly recommended to run this +in-process fuzzer with all sanitizers to detect most bugs on the spot.

  • +
  • It is harder to protect the in-process fuzzer from excessive memory +consumption and infinite loops in the target library (still possible).

  • +
  • The target library should not have significant global state that is not +reset between the runs.

  • +
  • Many interesting target libraries are not designed in a way that supports +the in-process fuzzer interface (e.g. require a file path instead of a +byte array).

  • +
  • If a single test run takes a considerable fraction of a second (or +more) the speed benefit from the in-process fuzzer is negligible.

  • +
  • If the target library runs persistent threads (that outlive +execution of one test) the fuzzing results will be unreliable.

  • +
+
+
+

Q. So, what exactly this Fuzzer is good for?

+

This Fuzzer might be a good choice for testing libraries that have relatively +small inputs, each input takes < 10ms to run, and the library code is not expected +to crash on invalid inputs. +Examples: regular expression matchers, text or binary format parsers, compression, +network, crypto.

+
+
+

Q. LibFuzzer crashes on my complicated fuzz target (but works fine for me on smaller targets).

+

Check if your fuzz target uses dlclose. +Currently, libFuzzer doesn’t support targets that call dlclose, +this may be fixed in future.

+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LinkTimeOptimization.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LinkTimeOptimization.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LinkTimeOptimization.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LinkTimeOptimization.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,421 @@ + + + + + + + + + LLVM Link Time Optimization: Design and Implementation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ + + + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LoopTerminology.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LoopTerminology.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/LoopTerminology.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/LoopTerminology.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,756 @@ + + + + + + + + + LLVM Loop Terminology (and Canonical Forms) — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Loop Terminology (and Canonical Forms)

+ +
+

Loop Definition

+

Loops are an important concept for a code optimizer. In LLVM, detection +of loops in a control-flow graph is done by LoopInfo. It is based +on the following definition.

+

A loop is a subset of nodes from the control-flow graph (CFG; where +nodes represent basic blocks) with the following properties:

+
    +
  1. The induced subgraph (which is the subgraph that contains all the +edges from the CFG within the loop) is strongly connected +(every node is reachable from all others).

  2. +
  3. All edges from outside the subset into the subset point to the same +node, called the header. As a consequence, the header dominates +all nodes in the loop (i.e. every execution path to any of the loop’s +node will have to pass through the header).

  4. +
  5. The loop is the maximum subset with these properties. That is, no +additional nodes from the CFG can be added such that the induced +subgraph would still be strongly connected and the header would +remain the same.

  6. +
+

In computer science literature, this is often called a natural loop. +In LLVM, this is the only definition of a loop.

+
+

Terminology

+

The definition of a loop comes with some additional terminology:

+
    +
  • An entering block (or loop predecessor) is a non-loop node +that has an edge into the loop (necessarily the header). If there is +only one entering block entering block, and its only edge is to the +header, it is also called the loop’s preheader. The preheader +dominates the loop without itself being part of the loop.

  • +
  • A latch is a loop node that has an edge to the header.

  • +
  • A backedge is an edge from a latch to the header.

  • +
  • An exiting edge is an edge from inside the loop to a node outside +of the loop. The source of such an edge is called an exiting block, its +target is an exit block.

  • +
+_images/loop-terminology.svg +
+
+

Important Notes

+

This loop definition has some noteworthy consequences:

+
    +
  • A node can be the header of at most one loop. As such, a loop can be +identified by its header. Due to the header being the only entry into +a loop, it can be called a Single-Entry-Multiple-Exits (SEME) region.

  • +
  • For basic blocks that are not reachable from the function’s entry, the +concept of loops is undefined. This follows from the concept of +dominance being undefined as well.

  • +
  • The smallest loop consists of a single basic block that branches to +itself. In this case that block is the header, latch (and exiting +block if it has another edge to a different block) at the same time. +A single block that has no branch to itself is not considered a loop, +even though it is trivially strongly connected.

  • +
+_images/loop-single.svg +

In this case, the role of header, exiting block and latch fall to the +same node. LoopInfo reports this as:

+
$ opt input.ll -loops -analyze
+Loop at depth 1 containing: %for.body<header><latch><exiting>
+
+
+
    +
  • Loops can be nested inside each other. That is, a loop’s node set can +be a subset of another loop with a different loop header. The loop +hierarchy in a function forms a forest: Each top-level loop is the +root of the tree of the loops nested inside it.

  • +
+_images/loop-nested.svg +
    +
  • It is not possible that two loops share only a few of their nodes. +Two loops are either disjoint or one is nested inside the other. In +the example below the left and right subsets both violate the +maximality condition. Only the merge of both sets is considered a loop.

  • +
+_images/loop-nonmaximal.svg +
    +
  • It is also possible that two logical loops share a header, but are +considered a single loop by LLVM:

  • +
+
for (int i = 0; i < 128; ++i)
+  for (int j = 0; j < 128; ++j)
+    body(i,j);
+
+
+

which might be represented in LLVM-IR as follows. Note that there is +only a single header and hence just a single loop.

+_images/loop-merge.svg +

The LoopSimplify pass will +detect the loop and ensure separate headers for the outer and inner loop.

+_images/loop-separate.svg +
    +
  • A cycle in the CFG does not imply there is a loop. The example below +shows such a CFG, where there is no header node that dominates all +other nodes in the cycle. This is called irreducible control-flow.

  • +
+_images/loop-irreducible.svg +

The term reducible results from the ability to collapse the CFG into a +single node by successively replacing one of three base structures with +a single node: A sequential execution of basic blocks, a conditional +branching (or switch) with re-joining, and a basic block looping on itself. +Wikipedia +has a more formal definition, which basically says that every cycle has +a dominating header.

+
    +
  • Irreducible control-flow can occur at any level of the loop nesting. +That is, a loop that itself does not contain any loops can still have +cyclic control flow in its body; a loop that is not nested inside +another loop can still be part of an outer cycle; and there can be +additional cycles between any two loops where one is contained in the other.

  • +
  • Exiting edges are not the only way to break out of a loop. Other +possibilities are unreachable terminators, [[noreturn]] functions, +exceptions, signals, and your computer’s power button.

  • +
  • A basic block “inside” the loop that does not have a path back to the +loop (i.e. to a latch or header) is not considered part of the loop. +This is illustrated by the following code.

  • +
+
for (unsigned i = 0; i <= n; ++i) {
+  if (c1) {
+    // When reaching this block, we will have exited the loop.
+    do_something();
+    break;
+  }
+  if (c2) {
+    // abort(), never returns, so we have exited the loop.
+    abort();
+  }
+  if (c3) {
+    // The unreachable allows the compiler to assume that this will not rejoin the loop.
+    do_something();
+    __builtin_unreachable();
+  }
+  if (c4) {
+    // This statically infinite loop is not nested because control-flow will not continue with the for-loop.
+    while(true) {
+      do_something();
+    }
+  }
+}
+
+
+
    +
  • There is no requirement for the control flow to eventually leave the +loop, i.e. a loop can be infinite. A statically infinite loop is a +loop that has no exiting edges. A dynamically infinite loop has +exiting edges, but it is possible to be never taken. This may happen +only under some circumstances, such as when n == UINT_MAX in the code +below.

  • +
+
for (unsigned i = 0; i <= n; ++i)
+  body(i);
+
+
+

It is possible for the optimizer to turn a dynamically infinite loop +into a statically infinite loop, for instance when it can prove that the +exiting condition is always false. Because the exiting edge is never +taken, the optimizer can change the conditional branch into an +unconditional one.

+

Note that under some circumstances the compiler may assume that a loop will +eventually terminate without proving it. For instance, it may remove a loop +that does not do anything in its body. If the loop was infinite, this +optimization resulted in an “infinite” performance speed-up. A call +to the intrinsic llvm.sideeffect can be added +into the loop to ensure that the optimizer does not make this assumption +without proof.

+
    +
  • The number of executions of the loop header before leaving the loop is +the loop trip count (or iteration count). If the loop should +not be executed at all, a loop guard must skip the entire loop:

  • +
+_images/loop-guard.svg +

Since the first thing a loop header might do is to check whether there +is another execution and if not, immediately exit without doing any work +(also see Rotated Loops), loop trip count is not +the best measure of a loop’s number of iterations. For instance, the +number of header executions of the code below for a non-positive n +(before loop rotation) is 1, even though the loop body is not executed +at all.

+
for (int i = 0; i < n; ++i)
+  body(i);
+
+
+

A better measure is the backedge-taken count, which is the number of +times any of the backedges is taken before the loop. It is one less than +the trip count for executions that enter the header.

+
+
+
+

LoopInfo

+

LoopInfo is the core analysis for obtaining information about loops. +There are few key implications of the definitions given above which +are important for working successfully with this interface.

+
    +
  • LoopInfo does not contain information about non-loop cycles. As a +result, it is not suitable for any algorithm which requires complete +cycle detection for correctness.

  • +
  • LoopInfo provides an interface for enumerating all top level loops +(e.g. those not contained in any other loop). From there, you may +walk the tree of sub-loops rooted in that top level loop.

  • +
  • Loops which become statically unreachable during optimization must +be removed from LoopInfo. If this can not be done for some reason, +then the optimization is required to preserve the static +reachability of the loop.

  • +
+
+
+

Loop Simplify Form

+

The Loop Simplify Form is a canonical form that makes +several analyses and transformations simpler and more effective. +It is ensured by the LoopSimplify +(-loop-simplify) pass and is automatically +added by the pass managers when scheduling a LoopPass. +This pass is implemented in +LoopSimplify.h. +When it is successful, the loop has:

+
    +
  • A preheader.

  • +
  • A single backedge (which implies that there is a single latch).

  • +
  • Dedicated exits. That is, no exit block for the loop +has a predecessor that is outside the loop. This implies +that all exit blocks are dominated by the loop header.

  • +
+
+
+

Loop Closed SSA (LCSSA)

+

A program is in Loop Closed SSA Form if it is in SSA form +and all values that are defined in a loop are used only inside +this loop.

+

Programs written in LLVM IR are always in SSA form but not necessarily +in LCSSA. To achieve the latter, for each value that is live across the +loop boundary, single entry PHI nodes are inserted to each of the exit blocks +1 in order to “close” these values inside the loop. +In particular, consider the following loop:

+
c = ...;
+for (...) {
+  if (c)
+    X1 = ...
+  else
+    X2 = ...
+  X3 = phi(X1, X2);  // X3 defined
+}
+
+... = X3 + 4;  // X3 used, i.e. live
+               // outside the loop
+
+
+

In the inner loop, the X3 is defined inside the loop, but used +outside of it. In Loop Closed SSA form, this would be represented as follows:

+
c = ...;
+for (...) {
+  if (c)
+    X1 = ...
+  else
+    X2 = ...
+  X3 = phi(X1, X2);
+}
+X4 = phi(X3);
+
+... = X4 + 4;
+
+
+

This is still valid LLVM; the extra phi nodes are purely redundant, +but all LoopPass’es are required to preserve them. +This form is ensured by the LCSSA (-lcssa) +pass and is added automatically by the LoopPassManager when +scheduling a LoopPass. +After the loop optimizations are done, these extra phi nodes +will be deleted by -instcombine.

+

Note that an exit block is outside of a loop, so how can such a phi “close” +the value inside the loop since it uses it outside of it ? First of all, +for phi nodes, as +mentioned in the LangRef: +“the use of each incoming value is deemed to occur on the edge from the +corresponding predecessor block to the current block”. Now, an +edge to an exit block is considered outside of the loop because +if we take that edge, it leads us clearly out of the loop.

+

However, an edge doesn’t actually contain any IR, so in source code, +we have to choose a convention of whether the use happens in +the current block or in the respective predecessor. For LCSSA’s purpose, +we consider the use happens in the latter (so as to consider the +use inside) 2.

+

The major benefit of LCSSA is that it makes many other loop optimizations +simpler.

+

First of all, a simple observation is that if one needs to see all +the outside users, they can just iterate over all the (loop closing) +PHI nodes in the exit blocks (the alternative would be to +scan the def-use chain 3 of all instructions in the loop).

+

Then, consider for example +-loop-unswitch ing the loop above. +Because it is in LCSSA form, we know that any value defined inside of +the loop will be used either only inside the loop or in a loop closing +PHI node. In this case, the only loop closing PHI node is X4. +This means that we can just copy the loop and change the X4 +accordingly, like so:

+
c = ...;
+if (c) {
+  for (...) {
+    if (true)
+      X1 = ...
+    else
+      X2 = ...
+    X3 = phi(X1, X2);
+  }
+} else {
+  for (...) {
+    if (false)
+      X1' = ...
+    else
+      X2' = ...
+    X3' = phi(X1', X2');
+  }
+}
+X4 = phi(X3, X3')
+
+
+

Now, all uses of X4 will get the updated value (in general, +if a loop is in LCSSA form, in any loop transformation, +we only need to update the loop closing PHI nodes for the changes +to take effect). If we did not have Loop Closed SSA form, it means that X3 could +possibly be used outside the loop. So, we would have to introduce the +X4 (which is the new X3) and replace all uses of X3 with that. +However, we should note that because LLVM keeps a def-use chain +3 for each Value, we wouldn’t need +to perform data-flow analysis to find and replace all the uses +(there is even a utility function, replaceAllUsesWith(), +that performs this transformation by iterating the def-use chain).

+

Another important advantage is that the behavior of all uses +of an induction variable is the same. Without this, you need to +distinguish the case when the variable is used outside of +the loop it is defined in, for example:

+
for (i = 0; i < 100; i++) {
+  for (j = 0; j < 100; j++) {
+    k = i + j;
+    use(k);    // use 1
+  }
+  use(k);      // use 2
+}
+
+
+

Looking from the outer loop with the normal SSA form, the first use of k +is not well-behaved, while the second one is an induction variable with +base 100 and step 1. Although, in practice, and in the LLVM context, +such cases can be handled effectively by SCEV. Scalar Evolution +(scalar-evolution) or SCEV, is a +(analysis) pass that analyzes and categorizes the evolution of scalar +expressions in loops.

+

In general, it’s easier to use SCEV in loops that are in LCSSA form. +The evolution of a scalar (loop-variant) expression that +SCEV can analyze is, by definition, relative to a loop. +An expression is represented in LLVM by an +llvm::Instruction. +If the expression is inside two (or more) loops (which can only +happen if the loops are nested, like in the example above) and you want +to get an analysis of its evolution (from SCEV), +you have to also specify relative to what Loop you want it. +Specifically, you have to use +getSCEVAtScope().

+

However, if all loops are in LCSSA form, each expression is actually +represented by two different llvm::Instructions. One inside the loop +and one outside, which is the loop-closing PHI node and represents +the value of the expression after the last iteration (effectively, +we break each loop-variant expression into two expressions and so, every +expression is at most in one loop). You can now just use +getSCEV(). +and which of these two llvm::Instructions you pass to it disambiguates +the context / scope / relative loop.

+

Footnotes

+
+
1
+

To insert these loop-closing PHI nodes, one has to +(re-)compute dominance frontiers (if the loop has multiple exits).

+
+
2
+

Considering the point of use of a PHI entry value +to be in the respective predecessor is a convention across the whole LLVM. +The reason is mostly practical; for example it preserves the dominance +property of SSA. It is also just an overapproximation of the actual +number of uses; the incoming block could branch to another block in which +case the value is not actually used but there are no side-effects (it might +increase its live range which is not relevant in LCSSA though). +Furthermore, we can gain some intuition if we consider liveness: +A PHI is usually inserted in the current block because the value can’t +be used from this point and onwards (i.e. the current block is a dominance +frontier). It doesn’t make sense to consider that the value is used in +the current block (because of the PHI) since the value stops being live +before the PHI. In some sense the PHI definition just “replaces” the original +value definition and doesn’t actually use it. It should be stressed that +this analogy is only used as an example and does not pose any strict +requirements. For example, the value might dominate the current block +but we can still insert a PHI (as we do with LCSSA PHI nodes) and +use the original value afterwards (in which case the two live ranges overlap, +although in LCSSA (the whole point is that) we never do that).

+
+
3(1,2)
+

A property of SSA is that there exists a def-use chain +for each definition, which is a list of all the uses of this definition. +LLVM implements this property by keeping a list of all the uses of a Value +in an internal data structure.

+
+
+
+
+

“More Canonical” Loops

+
+

Rotated Loops

+

Loops are rotated by the LoopRotate (loop-rotate) +pass, which converts loops into do/while style loops and is +implemented in +LoopRotation.h. Example:

+
void test(int n) {
+  for (int i = 0; i < n; i += 1)
+    // Loop body
+}
+
+
+

is transformed to:

+
void test(int n) {
+  int i = 0;
+  do {
+    // Loop body
+    i += 1;
+  } while (i < n);
+}
+
+
+

Warning: This transformation is valid only if the compiler +can prove that the loop body will be executed at least once. Otherwise, +it has to insert a guard which will test it at runtime. In the example +above, that would be:

+
void test(int n) {
+  int i = 0;
+  if (n > 0) {
+    do {
+      // Loop body
+      i += 1;
+    } while (i < n);
+  }
+}
+
+
+

It’s important to understand the effect of loop rotation +at the LLVM IR level. We follow with the previous examples +in LLVM IR while also providing a graphical representation +of the control-flow graphs (CFG). You can get the same graphical +results by utilizing the view-cfg pass.

+

The initial for loop could be translated to:

+
define void @test(i32 %n) {
+entry:
+  br label %for.header
+
+for.header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %latch ]
+  %cond = icmp slt i32 %i, %n
+  br i1 %cond, label %body, label %exit
+
+body:
+  ; Loop body
+  br label %latch
+
+latch:
+  %i.next = add nsw i32 %i, 1
+  br label %for.header
+
+exit:
+  ret void
+}
+
+
+_images/loop-terminology-initial-loop.png +

Before we explain how LoopRotate will actually +transform this loop, here’s how we could convert +it (by hand) to a do-while style loop.

+
define void @test(i32 %n) {
+entry:
+  br label %body
+
+body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %latch ]
+  ; Loop body
+  br label %latch
+
+latch:
+  %i.next = add nsw i32 %i, 1
+  %cond = icmp slt i32 %i.next, %n
+  br i1 %cond, label %body, label %exit
+
+exit:
+  ret void
+}
+
+
+_images/loop-terminology-rotated-loop.png +

Note two things:

+
    +
  • The condition check was moved to the “bottom” of the loop, i.e. +the latch. This is something that LoopRotate does by copying the header +of the loop to the latch.

  • +
  • The compiler in this case can’t deduce that the loop will +definitely execute at least once so the above transformation +is not valid. As mentioned above, a guard has to be inserted, +which is something that LoopRotate will do.

  • +
+

This is how LoopRotate transforms this loop:

+
define void @test(i32 %n) {
+entry:
+  %guard_cond = icmp slt i32 0, %n
+  br i1 %guard_cond, label %loop.preheader, label %exit
+
+loop.preheader:
+  br label %body
+
+body:
+  %i2 = phi i32 [ 0, %loop.preheader ], [ %i.next, %latch ]
+  br label %latch
+
+latch:
+  %i.next = add nsw i32 %i2, 1
+  %cond = icmp slt i32 %i.next, %n
+  br i1 %cond, label %body, label %loop.exit
+
+loop.exit:
+  br label %exit
+
+exit:
+  ret void
+}
+
+
+_images/loop-terminology-guarded-loop.png +

The result is a little bit more complicated than we may expect +because LoopRotate ensures that the loop is in +Loop Simplify Form +after rotation. +In this case, it inserted the %loop.preheader basic block so +that the loop has a preheader and it introduced the %loop.exit +basic block so that the loop has dedicated exits +(otherwise, %exit would be jumped from both %latch and %entry, +but %entry is not contained in the loop). +Note that a loop has to be in Loop Simplify Form beforehand +too for LoopRotate to be applied successfully.

+

The main advantage of this form is that it allows hoisting +invariant instructions, especially loads, into the preheader. +That could be done in non-rotated loops as well but with +some disadvantages. Let’s illustrate them with an example:

+
for (int i = 0; i < n; ++i) {
+  auto v = *p;
+  use(v);
+}
+
+
+

We assume that loading from p is invariant and use(v) is some +statement that uses v. +If we wanted to execute the load only once we could move it +“out” of the loop body, resulting in this:

+
auto v = *p;
+for (int i = 0; i < n; ++i) {
+  use(v);
+}
+
+
+

However, now, in the case that n <= 0, in the initial form, +the loop body would never execute, and so, the load would +never execute. This is a problem mainly for semantic reasons. +Consider the case in which n <= 0 and loading from p is invalid. +In the initial program there would be no error. However, with this +transformation we would introduce one, effectively breaking +the initial semantics.

+

To avoid both of these problems, we can insert a guard:

+
if (n > 0) {  // loop guard
+  auto v = *p;
+  for (int i = 0; i < n; ++i) {
+    use(v);
+  }
+}
+
+
+

This is certainly better but it could be improved slightly. Notice +that the check for whether n is bigger than 0 is executed twice (and +n does not change in between). Once when we check the guard condition +and once in the first execution of the loop. To avoid that, we could +do an unconditional first execution and insert the loop condition +in the end. This effectively means transforming the loop into a do-while loop:

+
if (0 < n) {
+  auto v = *p;
+  do {
+    use(v);
+    ++i;
+  } while (i < n);
+}
+
+
+

Note that LoopRotate does not generally do such +hoisting. Rather, it is an enabling transformation for other +passes like Loop-Invariant Code Motion (-licm).

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MarkdownQuickstartTemplate.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MarkdownQuickstartTemplate.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MarkdownQuickstartTemplate.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MarkdownQuickstartTemplate.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,277 @@ + + + + + + + + + Markdown Quickstart Template — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Markdown Quickstart Template

+
+

Introduction and Quickstart

+

This document is meant to get you writing documentation as fast as possible +even if you have no previous experience with Markdown. The goal is to take +someone in the state of “I want to write documentation and get it added to +LLVM’s docs” and turn that into useful documentation mailed to llvm-commits +with as little nonsense as possible.

+

You can find this document in docs/MarkdownQuickstartTemplate.md. You +should copy it, open the new file in your text editor, write your docs, and +then send the new document to llvm-commits for review.

+

Focus on content. It is easy to fix the Markdown syntax +later if necessary, although Markdown tries to imitate common +plain-text conventions so it should be quite natural. A basic knowledge of +Markdown syntax is useful when writing the document, so the last +~half of this document (starting with Example Section) gives examples +which should cover 99% of use cases.

+

Let me say that again: focus on content. But if you really need to verify +Sphinx’s output, see docs/README.txt for information.

+

Once you have finished with the content, please send the .md file to +llvm-commits for review.

+
+
+

Guidelines

+

Try to answer the following questions in your first section:

+
    +
  1. Why would I want to read this document?

  2. +
  3. What should I know to be able to follow along with this document?

  4. +
  5. What will I have learned by the end of this document?

  6. +
+

Common names for the first section are Introduction, Overview, or +Background.

+

If possible, make your document a “how to”. Give it a name HowTo*.md +like the other “how to” documents. This format is usually the easiest +for another person to understand and also the most useful.

+

You generally should not be writing documentation other than a “how to” +unless there is already a “how to” about your topic. The reason for this +is that without a “how to” document to read first, it is difficult for a +person to understand a more advanced document.

+

Focus on content (yes, I had to say it again).

+

The rest of this document shows example Markdown markup constructs +that are meant to be read by you in your text editor after you have copied +this file into a new file for the documentation you are about to write.

+
+
+

Example Section

+

Your text can be emphasized, bold, or monospace.

+

Use blank lines to separate paragraphs.

+

Headings (like Example Section just above) give your document its +structure.

+
+

Example Subsection

+

Make a link like this. There is also a more +sophisticated syntax which can be more readable for longer links since +it disrupts the flow less. You can put the [link name]: <URL> block +pretty much anywhere later in the document.

+

Lists can be made like this:

+
    +
  1. A list starting with [0-9]. will be automatically numbered.

  2. +
  3. This is a second list element.

    +
      +
    1. Use indentation to create nested lists.

    2. +
    +
  4. +
+

You can also use unordered lists.

+
    +
  • Stuff.

    +
      +
    • Deeper stuff.

    • +
    +
  • +
  • More stuff.

  • +
+
+

Example Subsubsection

+

You can make blocks of code like this:

+
int main() {
+  return 0;
+}
+
+
+

As an extension to markdown, you can also specify a highlighter to use.

+
int main() {
+  return 0;
+}
+
+
+

For a shell session, use a console code block.

+
$ echo "Goodbye cruel world!"
+$ rm -rf /
+
+
+

If you need to show LLVM IR use the llvm code block.

+
define i32 @test1() {
+entry:
+  ret i32 0
+}
+
+
+

Some other common code blocks you might need are c, objc, make, +and cmake. If you need something beyond that, you can look at the full +list of supported code blocks.

+

However, don’t waste time fiddling with syntax highlighting when you could +be adding meaningful content. When in doubt, show preformatted text +without any syntax highlighting like this:

+
                      .
+                       +:.
+                   ..:: ::
+                .++:+:: ::+:.:.
+               .:+           :
+        ::.::..::            .+.
+      ..:+    ::              :
+......+:.                    ..
+      :++.    ..              :
+        .+:::+::              :
+        ..   . .+            ::
+                 +.:      .::+.
+                  ...+. .: .
+                     .++:..
+                      ...
+
+
+
+
Hopefully you won’t need to be this deep
+

If you need to do fancier things than what has been shown in this document, +you can mail the list or check the Common Mark spec. Sphinx specific +integration documentation can be found in the recommonmark docs.

+
+
+
+
+
+

Generating the documentation

+

see Sphinx Quickstart Template

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MarkedUpDisassembly.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MarkedUpDisassembly.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MarkedUpDisassembly.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MarkedUpDisassembly.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,214 @@ + + + + + + + + + LLVM’s Optional Rich Disassembly Output — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM’s Optional Rich Disassembly Output

+ +
+

Introduction

+

LLVM’s default disassembly output is raw text. To allow consumers more ability +to introspect the instructions’ textual representation or to reformat for a more +user friendly display there is an optional rich disassembly output.

+

This optional output is sufficient to reference into individual portions of the +instruction text. This is intended for clients like disassemblers, list file +generators, and pretty-printers, which need more than the raw instructions and +the ability to print them.

+

To provide this functionality the assembly text is marked up with annotations. +The markup is simple enough in syntax to be robust even in the case of version +mismatches between consumers and producers. That is, the syntax generally does +not carry semantics beyond “this text has an annotation,” so consumers can +simply ignore annotations they do not understand or do not care about.

+

After calling LLVMCreateDisasm() to create a disassembler context the +optional output is enable with this call:

+
LLVMSetDisasmOptions(DC, LLVMDisassembler_Option_UseMarkup);
+
+
+

Then subsequent calls to LLVMDisasmInstruction() will return output strings +with the marked up annotations.

+
+
+

Instruction Annotations

+
+

Contextual markups

+

Annotated assembly display will supply contextual markup to help clients more +efficiently implement things like pretty printers. Most markup will be target +independent, so clients can effectively provide good display without any target +specific knowledge.

+

Annotated assembly goes through the normal instruction printer, but optionally +includes contextual tags on portions of the instruction string. An annotation +is any ‘<’ ‘>’ delimited section of text(1).

+
annotation: '<' tag-name tag-modifier-list ':' annotated-text '>'
+tag-name: identifier
+tag-modifier-list: comma delimited identifier list
+
+
+

The tag-name is an identifier which gives the type of the annotation. For the +first pass, this will be very simple, with memory references, registers, and +immediates having the tag names “mem”, “reg”, and “imm”, respectively.

+

The tag-modifier-list is typically additional target-specific context, such as +register class.

+

Clients should accept and ignore any tag-names or tag-modifiers they do not +understand, allowing the annotations to grow in richness without breaking older +clients.

+

For example, a possible annotation of an ARM load of a stack-relative location +might be annotated as:

+
ldr <reg gpr:r0>, <mem regoffset:[<reg gpr:sp>, <imm:#4>]>
+
+
+

1: For assembly dialects in which ‘<’ and/or ‘>’ are legal tokens, a literal token is escaped by following immediately with a repeat of the character. For example, a literal ‘<’ character is output as ‘<<’ in an annotated assembly string.

+
+
+

C API Details

+

The intended consumers of this information use the C API, therefore the new C +API function for the disassembler will be added to provide an option to produce +disassembled instructions with annotations, LLVMSetDisasmOptions() and the +LLVMDisassembler_Option_UseMarkup option (see above).

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MCJITDesignAndImplementation.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MCJITDesignAndImplementation.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MCJITDesignAndImplementation.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MCJITDesignAndImplementation.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,289 @@ + + + + + + + + + MCJIT Design and Implementation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

MCJIT Design and Implementation

+
+

Introduction

+

This document describes the internal workings of the MCJIT execution +engine and the RuntimeDyld component. It is intended as a high level +overview of the implementation, showing the flow and interactions of +objects throughout the code generation and dynamic loading process.

+
+
+

Engine Creation

+

In most cases, an EngineBuilder object is used to create an instance of +the MCJIT execution engine. The EngineBuilder takes an llvm::Module +object as an argument to its constructor. The client may then set various +options that we control the later be passed along to the MCJIT engine, +including the selection of MCJIT as the engine type to be created. +Of particular interest is the EngineBuilder::setMCJITMemoryManager +function. If the client does not explicitly create a memory manager at +this time, a default memory manager (specifically SectionMemoryManager) +will be created when the MCJIT engine is instantiated.

+

Once the options have been set, a client calls EngineBuilder::create to +create an instance of the MCJIT engine. If the client does not use the +form of this function that takes a TargetMachine as a parameter, a new +TargetMachine will be created based on the target triple associated with +the Module that was used to create the EngineBuilder.

+_images/MCJIT-engine-builder.png +

EngineBuilder::create will call the static MCJIT::createJIT function, +passing in its pointers to the module, memory manager and target machine +objects, all of which will subsequently be owned by the MCJIT object.

+

The MCJIT class has a member variable, Dyld, which contains an instance of +the RuntimeDyld wrapper class. This member will be used for +communications between MCJIT and the actual RuntimeDyldImpl object that +gets created when an object is loaded.

+_images/MCJIT-creation.png +

Upon creation, MCJIT holds a pointer to the Module object that it received +from EngineBuilder but it does not immediately generate code for this +module. Code generation is deferred until either the +MCJIT::finalizeObject method is called explicitly or a function such as +MCJIT::getPointerToFunction is called which requires the code to have been +generated.

+
+
+

Code Generation

+

When code generation is triggered, as described above, MCJIT will first +attempt to retrieve an object image from its ObjectCache member, if one +has been set. If a cached object image cannot be retrieved, MCJIT will +call its emitObject method. MCJIT::emitObject uses a local PassManager +instance and creates a new ObjectBufferStream instance, both of which it +passes to TargetMachine::addPassesToEmitMC before calling PassManager::run +on the Module with which it was created.

+_images/MCJIT-load.png +

The PassManager::run call causes the MC code generation mechanisms to emit +a complete relocatable binary object image (either in either ELF or MachO +format, depending on the target) into the ObjectBufferStream object, which +is flushed to complete the process. If an ObjectCache is being used, the +image will be passed to the ObjectCache here.

+

At this point, the ObjectBufferStream contains the raw object image. +Before the code can be executed, the code and data sections from this +image must be loaded into suitable memory, relocations must be applied and +memory permission and code cache invalidation (if required) must be completed.

+
+
+

Object Loading

+

Once an object image has been obtained, either through code generation or +having been retrieved from an ObjectCache, it is passed to RuntimeDyld to +be loaded. The RuntimeDyld wrapper class examines the object to determine +its file format and creates an instance of either RuntimeDyldELF or +RuntimeDyldMachO (both of which derive from the RuntimeDyldImpl base +class) and calls the RuntimeDyldImpl::loadObject method to perform that +actual loading.

+_images/MCJIT-dyld-load.png +

RuntimeDyldImpl::loadObject begins by creating an ObjectImage instance +from the ObjectBuffer it received. ObjectImage, which wraps the +ObjectFile class, is a helper class which parses the binary object image +and provides access to the information contained in the format-specific +headers, including section, symbol and relocation information.

+

RuntimeDyldImpl::loadObject then iterates through the symbols in the +image. Information about common symbols is collected for later use. For +each function or data symbol, the associated section is loaded into memory +and the symbol is stored in a symbol table map data structure. When the +iteration is complete, a section is emitted for the common symbols.

+

Next, RuntimeDyldImpl::loadObject iterates through the sections in the +object image and for each section iterates through the relocations for +that sections. For each relocation, it calls the format-specific +processRelocationRef method, which will examine the relocation and store +it in one of two data structures, a section-based relocation list map and +an external symbol relocation map.

+_images/MCJIT-load-object.png +

When RuntimeDyldImpl::loadObject returns, all of the code and data +sections for the object will have been loaded into memory allocated by the +memory manager and relocation information will have been prepared, but the +relocations have not yet been applied and the generated code is still not +ready to be executed.

+

[Currently (as of August 2013) the MCJIT engine will immediately apply +relocations when loadObject completes. However, this shouldn’t be +happening. Because the code may have been generated for a remote target, +the client should be given a chance to re-map the section addresses before +relocations are applied. It is possible to apply relocations multiple +times, but in the case where addresses are to be re-mapped, this first +application is wasted effort.]

+
+
+

Address Remapping

+

At any time after initial code has been generated and before +finalizeObject is called, the client can remap the address of sections in +the object. Typically this is done because the code was generated for an +external process and is being mapped into that process’ address space. +The client remaps the section address by calling MCJIT::mapSectionAddress. +This should happen before the section memory is copied to its new +location.

+

When MCJIT::mapSectionAddress is called, MCJIT passes the call on to +RuntimeDyldImpl (via its Dyld member). RuntimeDyldImpl stores the new +address in an internal data structure but does not update the code at this +time, since other sections are likely to change.

+

When the client is finished remapping section addresses, it will call +MCJIT::finalizeObject to complete the remapping process.

+
+
+

Final Preparations

+

When MCJIT::finalizeObject is called, MCJIT calls +RuntimeDyld::resolveRelocations. This function will attempt to locate any +external symbols and then apply all relocations for the object.

+

External symbols are resolved by calling the memory manager’s +getPointerToNamedFunction method. The memory manager will return the +address of the requested symbol in the target address space. (Note, this +may not be a valid pointer in the host process.) RuntimeDyld will then +iterate through the list of relocations it has stored which are associated +with this symbol and invoke the resolveRelocation method which, through an +format-specific implementation, will apply the relocation to the loaded +section memory.

+

Next, RuntimeDyld::resolveRelocations iterates through the list of +sections and for each section iterates through a list of relocations that +have been saved which reference that symbol and call resolveRelocation for +each entry in this list. The relocation list here is a list of +relocations for which the symbol associated with the relocation is located +in the section associated with the list. Each of these locations will +have a target location at which the relocation will be applied that is +likely located in a different section.

+_images/MCJIT-resolve-relocations.png +

Once relocations have been applied as described above, MCJIT calls +RuntimeDyld::getEHFrameSection, and if a non-zero result is returned +passes the section data to the memory manager’s registerEHFrames method. +This allows the memory manager to call any desired target-specific +functions, such as registering the EH frame information with a debugger.

+

Finally, MCJIT calls the memory manager’s finalizeMemory method. In this +method, the memory manager will invalidate the target code cache, if +necessary, and apply final permissions to the memory pages it has +allocated for code and data memory.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MeetupGuidelines.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MeetupGuidelines.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MeetupGuidelines.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MeetupGuidelines.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,222 @@ + + + + + + + + + How to start LLVM Social in your town — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How to start LLVM Social in your town

+

Here are several ideas you can take into account when designing your specific +LLVM Social.

+

Before you start, it is essential to make sure that the meetup is as welcoming +as any other event related to LLVM. Therefore you shall follow LLVM’s +Code of Conduct.

+

Other than that - your mileage may vary. Please adapt your social to what works +best for your specific situation.

+
+

General suggestions

+
    +
  • We highly recommend that you join the official LLVM meetup organization. In +addition to covering the cost of the meetup, all LLVM meetups are advertised +together and easily found by potential attendees. Please contact +arnaud.degrandmaison@llvm.org for more details.

  • +
  • Beware of cultural differences: what works well in one region may not work in +other part of the world.

  • +
  • Do not be alone to organize the meetup. Try to work with a couple other +organizers. This is more motivating as an organizer, and this makes the +meetup more resilient over time.

  • +
  • Each event can have a different form such as a social event, or +a hackathon/workshop, or a ‘mini-conference’ with one or more talks. You do +not have to stick to one format forever.

  • +
  • Whatever format you choose, LLVM Weekly is an +excellent topic starter: go through the 3-4 recent LLVM Weekly posts and +prepare a list of the most interesting/notable news and discuss them with the +group.

  • +
+
+ +
+

Tech talks

+
    +
  • It’s a great idea to have several talks scheduled for several upcoming +meetups to get the ball rolling.

  • +
  • Keep looking for speakers far in advance, ideally you should have 2-3 +speakers ready in the pipeline.

  • +
  • Try to record the talks if possible. It adds visibility to the meetup and +just a good idea in general. Any modern smartphone or tablet should work, but +you can also get a camera. Though, it is recommended to get an external +microphone for better sound.

  • +
+
+
+

Where to host the meetup?

+
    +
  • Look around for bars/café with projectors.

  • +
  • Talk to tech companies in the area.

  • +
  • Some co-working spaces provide their facilities for non-profit (i.e., you do +not charge attendees any fees) meetups.

  • +
  • Ask nearby universities or university departments.

  • +
+
+
+

How to pick the date?

+
    +
  • Make sure you do not clash with the similar meetups in the city (e.g., +C++ user groups).

  • +
  • Prefer not to have a meetup the same week when the other similar meetups +happen (e.g., it’s not a good idea to have LLVM meetup on Thursday after +C++ meetup on Wednesday).

  • +
  • Meetups on weekends may attract people who live far away from the city, +but the people who live in the city may not attend.

  • +
  • Make a poll, but beware that not every responder will join (we had ~20 votes +on the poll, while only ~8 people attended).

  • +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MemorySSA.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MemorySSA.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MemorySSA.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MemorySSA.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,531 @@ + + + + + + + + + MemorySSA — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

MemorySSA

+ +
+

Introduction

+

MemorySSA is an analysis that allows us to cheaply reason about the +interactions between various memory operations. Its goal is to replace +MemoryDependenceAnalysis for most (if not all) use-cases. This is because, +unless you’re very careful, use of MemoryDependenceAnalysis can easily +result in quadratic-time algorithms in LLVM. Additionally, MemorySSA doesn’t +have as many arbitrary limits as MemoryDependenceAnalysis, so you should get +better results, too. One common use of MemorySSA is to quickly find out +that something definitely cannot happen (for example, reason that a hoist +out of a loop can’t happen).

+

At a high level, one of the goals of MemorySSA is to provide an SSA based +form for memory, complete with def-use and use-def chains, which +enables users to quickly find may-def and may-uses of memory operations. +It can also be thought of as a way to cheaply give versions to the complete +state of memory, and associate memory operations with those versions.

+

This document goes over how MemorySSA is structured, and some basic +intuition on how MemorySSA works.

+

A paper on MemorySSA (with notes about how it’s implemented in GCC) can be +found here. Though, it’s +relatively out-of-date; the paper references multiple memory partitions, but GCC +eventually swapped to just using one, like we now have in LLVM. Like +GCC’s, LLVM’s MemorySSA is intraprocedural.

+
+
+

MemorySSA Structure

+

MemorySSA is a virtual IR. After it’s built, MemorySSA will contain a +structure that maps Instructions to MemoryAccesses, which are +MemorySSA’s parallel to LLVM Instructions.

+

Each MemoryAccess can be one of three types:

+
    +
  • MemoryDef

  • +
  • MemoryPhi

  • +
  • MemoryUse

  • +
+

MemoryDefs are operations which may either modify memory, or which +introduce some kind of ordering constraints. Examples of MemoryDefs +include stores, function calls, loads with acquire (or higher) +ordering, volatile operations, memory fences, etc. A MemoryDef +always introduces a new version of the entire memory and is linked with a single +MemoryDef/MemoryPhi which is the version of memory that the new +version is based on. This implies that there is a single +Def chain that connects all the Defs, either directly +or indirectly. For example in:

+
b = MemoryDef(a)
+c = MemoryDef(b)
+d = MemoryDef(c)
+
+
+

d is connected directly with c and indirectly with b. +This means that d potentially clobbers (see below) c or +b or both. This in turn implies that without the use of The walker, +initially every MemoryDef clobbers every other MemoryDef.

+

MemoryPhis are PhiNodes, but for memory operations. If at any +point we have two (or more) MemoryDefs that could flow into a +BasicBlock, the block’s top MemoryAccess will be a +MemoryPhi. As in LLVM IR, MemoryPhis don’t correspond to any +concrete operation. As such, BasicBlocks are mapped to MemoryPhis +inside MemorySSA, whereas Instructions are mapped to MemoryUses +and MemoryDefs.

+

Note also that in SSA, Phi nodes merge must-reach definitions (that is, +definitions that must be new versions of variables). In MemorySSA, PHI nodes +merge may-reach definitions (that is, until disambiguated, the versions that +reach a phi node may or may not clobber a given variable).

+

MemoryUses are operations which use but don’t modify memory. An example of +a MemoryUse is a load, or a readonly function call.

+

Every function that exists has a special MemoryDef called liveOnEntry. +It dominates every MemoryAccess in the function that MemorySSA is being +run on, and implies that we’ve hit the top of the function. It’s the only +MemoryDef that maps to no Instruction in LLVM IR. Use of +liveOnEntry implies that the memory being used is either undefined or +defined before the function begins.

+

An example of all of this overlaid on LLVM IR (obtained by running opt +-passes='print<memoryssa>' -disable-output on an .ll file) is below. When +viewing this example, it may be helpful to view it in terms of clobbers. +The operands of a given MemoryAccess are all (potential) clobbers of said +MemoryAccess, and the value produced by a MemoryAccess can act as a clobber +for other MemoryAccesses.

+

If a MemoryAccess is a clobber of another, it means that these two +MemoryAccesses may access the same memory. For example, x = MemoryDef(y) +means that x potentially modifies memory that y modifies/constrains +(or has modified / constrained). +In the same manner, a = MemoryPhi({BB1,b},{BB2,c}) means that +anyone that uses a is accessing memory potentially modified / constrained +by either b or c (or both). And finally, MemoryUse(x) means +that this use accesses memory that x has modified / constrained +(as an example, think that if x = MemoryDef(...) +and MemoryUse(x) are in the same loop, the use can’t +be hoisted outside alone).

+

Another useful way of looking at it is in terms of memory versions. +In that view, operands of a given MemoryAccess are the version +of the entire memory before the operation, and if the access produces +a value (i.e. MemoryDef/MemoryPhi), +the value is the new version of the memory after the operation.

+
define void @foo() {
+entry:
+  %p1 = alloca i8
+  %p2 = alloca i8
+  %p3 = alloca i8
+  ; 1 = MemoryDef(liveOnEntry)
+  store i8 0, i8* %p3
+  br label %while.cond
+
+while.cond:
+  ; 6 = MemoryPhi({entry,1},{if.end,4})
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  ; 2 = MemoryDef(6)
+  store i8 0, i8* %p1
+  br label %if.end
+
+if.else:
+  ; 3 = MemoryDef(6)
+  store i8 1, i8* %p2
+  br label %if.end
+
+if.end:
+  ; 5 = MemoryPhi({if.then,2},{if.else,3})
+  ; MemoryUse(5)
+  %1 = load i8, i8* %p1
+  ; 4 = MemoryDef(5)
+  store i8 2, i8* %p2
+  ; MemoryUse(1)
+  %2 = load i8, i8* %p3
+  br label %while.cond
+}
+
+
+

The MemorySSA IR is shown in comments that precede the instructions they map +to (if such an instruction exists). For example, 1 = MemoryDef(liveOnEntry) +is a MemoryAccess (specifically, a MemoryDef), and it describes the LLVM +instruction store i8 0, i8* %p3. Other places in MemorySSA refer to this +particular MemoryDef as 1 (much like how one can refer to load i8, i8* +%p1 in LLVM with %1). Again, MemoryPhis don’t correspond to any LLVM +Instruction, so the line directly below a MemoryPhi isn’t special.

+

Going from the top down:

+
    +
  • 6 = MemoryPhi({entry,1},{if.end,4}) notes that, when entering +while.cond, the reaching definition for it is either 1 or 4. This +MemoryPhi is referred to in the textual IR by the number 6.

  • +
  • 2 = MemoryDef(6) notes that store i8 0, i8* %p1 is a definition, +and its reaching definition before it is 6, or the MemoryPhi after +while.cond. (See the Build-time use optimization and Precision +sections below for why this MemoryDef isn’t linked to a separate, +disambiguated MemoryPhi.)

  • +
  • 3 = MemoryDef(6) notes that store i8 0, i8* %p2 is a definition; its +reaching definition is also 6.

  • +
  • 5 = MemoryPhi({if.then,2},{if.else,3}) notes that the clobber before +this block could either be 2 or 3.

  • +
  • MemoryUse(5) notes that load i8, i8* %p1 is a use of memory, and that +it’s clobbered by 5.

  • +
  • 4 = MemoryDef(5) notes that store i8 2, i8* %p2 is a definition; its +reaching definition is 5.

  • +
  • MemoryUse(1) notes that load i8, i8* %p3 is just a user of memory, +and the last thing that could clobber this use is above while.cond (e.g. +the store to %p3). In memory versioning parlance, it really only depends on +the memory version 1, and is unaffected by the new memory versions generated since +then.

  • +
+

As an aside, MemoryAccess is a Value mostly for convenience; it’s not +meant to interact with LLVM IR.

+
+
+

Design of MemorySSA

+

MemorySSA is an analysis that can be built for any arbitrary function. When +it’s built, it does a pass over the function’s IR in order to build up its +mapping of MemoryAccesses. You can then query MemorySSA for things +like the dominance relation between MemoryAccesses, and get the +MemoryAccess for any given Instruction .

+

When MemorySSA is done building, it also hands you a MemorySSAWalker +that you can use (see below).

+
+

The walker

+

A structure that helps MemorySSA do its job is the MemorySSAWalker, or +the walker, for short. The goal of the walker is to provide answers to clobber +queries beyond what’s represented directly by MemoryAccesses. For example, +given:

+
define void @foo() {
+  %a = alloca i8
+  %b = alloca i8
+
+  ; 1 = MemoryDef(liveOnEntry)
+  store i8 0, i8* %a
+  ; 2 = MemoryDef(1)
+  store i8 0, i8* %b
+}
+
+
+

The store to %a is clearly not a clobber for the store to %b. It would +be the walker’s goal to figure this out, and return liveOnEntry when queried +for the clobber of MemoryAccess 2.

+

By default, MemorySSA provides a walker that can optimize MemoryDefs +and MemoryUses by consulting whatever alias analysis stack you happen to +be using. Walkers were built to be flexible, though, so it’s entirely reasonable +(and expected) to create more specialized walkers (e.g. one that specifically +queries GlobalsAA, one that always stops at MemoryPhi nodes, etc).

+
+

Default walker APIs

+

There are two main APIs used to retrieve the clobbering access using the walker:

+
    +
  • MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA); return the +clobbering memory access for MA, caching all intermediate results +computed along the way as part of each access queried.

  • +
  • MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, const MemoryLocation &Loc); +returns the access clobbering memory location Loc, starting at MA. +Because this API does not request the clobbering access of a specific memory +access, there are no results that can be cached.

  • +
+
+
+

Locating clobbers yourself

+

If you choose to make your own walker, you can find the clobber for a +MemoryAccess by walking every MemoryDef that dominates said +MemoryAccess. The structure of MemoryDefs makes this relatively simple; +they ultimately form a linked list of every clobber that dominates the +MemoryAccess that you’re trying to optimize. In other words, the +definingAccess of a MemoryDef is always the nearest dominating +MemoryDef or MemoryPhi of said MemoryDef.

+
+
+
+

Build-time use optimization

+

MemorySSA will optimize some MemoryAccesses at build-time. +Specifically, we optimize the operand of every MemoryUse to point to the +actual clobber of said MemoryUse. This can be seen in the above example; the +second MemoryUse in if.end has an operand of 1, which is a +MemoryDef from the entry block. This is done to make walking, +value numbering, etc, faster and easier.

+

It is not possible to optimize MemoryDef in the same way, as we +restrict MemorySSA to one memory variable and, thus, one Phi node +per block.

+
+
+

Invalidation and updating

+

Because MemorySSA keeps track of LLVM IR, it needs to be updated whenever +the IR is updated. “Update”, in this case, includes the addition, deletion, and +motion of Instructions. The update API is being made on an as-needed basis. +If you’d like examples, GVNHoist is a user of MemorySSAs update API.

+
+

Phi placement

+

MemorySSA only places MemoryPhis where they’re actually +needed. That is, it is a pruned SSA form, like LLVM’s SSA form. For +example, consider:

+
define void @foo() {
+entry:
+  %p1 = alloca i8
+  %p2 = alloca i8
+  %p3 = alloca i8
+  ; 1 = MemoryDef(liveOnEntry)
+  store i8 0, i8* %p3
+  br label %while.cond
+
+while.cond:
+  ; 3 = MemoryPhi({%0,1},{if.end,2})
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  br label %if.end
+
+if.else:
+  br label %if.end
+
+if.end:
+  ; MemoryUse(1)
+  %1 = load i8, i8* %p1
+  ; 2 = MemoryDef(3)
+  store i8 2, i8* %p2
+  ; MemoryUse(1)
+  %2 = load i8, i8* %p3
+  br label %while.cond
+}
+
+
+

Because we removed the stores from if.then and if.else, a MemoryPhi +for if.end would be pointless, so we don’t place one. So, if you need to +place a MemoryDef in if.then or if.else, you’ll need to also create +a MemoryPhi for if.end.

+

If it turns out that this is a large burden, we can just place MemoryPhis +everywhere. Because we have Walkers that are capable of optimizing above said +phis, doing so shouldn’t prohibit optimizations.

+
+
+
+

Non-Goals

+

MemorySSA is meant to reason about the relation between memory +operations, and enable quicker querying. +It isn’t meant to be the single source of truth for all potential memory-related +optimizations. Specifically, care must be taken when trying to use MemorySSA +to reason about atomic or volatile operations, as in:

+
define i8 @foo(i8* %a) {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  ; 1 = MemoryDef(liveOnEntry)
+  %0 = load volatile i8, i8* %a
+  br label %if.end
+
+if.end:
+  %av = phi i8 [0, %entry], [%0, %if.then]
+  ret i8 %av
+}
+
+
+

Going solely by MemorySSA’s analysis, hoisting the load to entry may +seem legal. Because it’s a volatile load, though, it’s not.

+
+
+

Design tradeoffs

+
+

Precision

+

MemorySSA in LLVM deliberately trades off precision for speed. +Let us think about memory variables as if they were disjoint partitions of the +memory (that is, if you have one variable, as above, it represents the entire +memory, and if you have multiple variables, each one represents some +disjoint portion of the memory)

+

First, because alias analysis results conflict with each other, and +each result may be what an analysis wants (IE +TBAA may say no-alias, and something else may say must-alias), it is +not possible to partition the memory the way every optimization wants. +Second, some alias analysis results are not transitive (IE A noalias B, +and B noalias C, does not mean A noalias C), so it is not possible to +come up with a precise partitioning in all cases without variables to +represent every pair of possible aliases. Thus, partitioning +precisely may require introducing at least N^2 new virtual variables, +phi nodes, etc.

+

Each of these variables may be clobbered at multiple def sites.

+

To give an example, if you were to split up struct fields into +individual variables, all aliasing operations that may-def multiple struct +fields, will may-def more than one of them. This is pretty common (calls, +copies, field stores, etc).

+

Experience with SSA forms for memory in other compilers has shown that +it is simply not possible to do this precisely, and in fact, doing it +precisely is not worth it, because now all the optimizations have to +walk tons and tons of virtual variables and phi nodes.

+

So we partition. At the point at which you partition, again, +experience has shown us there is no point in partitioning to more than +one variable. It simply generates more IR, and optimizations still +have to query something to disambiguate further anyway.

+

As a result, LLVM partitions to one variable.

+
+
+

Use Optimization

+

Unlike other partitioned forms, LLVM’s MemorySSA does make one +useful guarantee - all loads are optimized to point at the thing that +actually clobbers them. This gives some nice properties. For example, +for a given store, you can find all loads actually clobbered by that +store by walking the immediate uses of the store.

+
+
+ +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MemTagSanitizer.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MemTagSanitizer.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MemTagSanitizer.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MemTagSanitizer.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,228 @@ + + + + + + + + + MemTagSanitizer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

MemTagSanitizer

+ +
+

Introduction

+

Note: this page describes a tool under development. Part of this +functionality is planned but not implemented. Hardware capable of +running MemTagSanitizer does not exist as of Oct 2019.

+

MemTagSanitizer is a fast memory error detector and a code hardening +tool based on the Armv8.5-A Memory Tagging Extension. It +detects a similar class of errors as AddressSanitizer or HardwareAssistedAddressSanitizer, but with +much lower overhead.

+

MemTagSanitizer overhead is expected to be in low single digits, both +CPU and memory. There are plans for a debug mode with slightly higher +memory overhead and better diagnostics. The primary use case of +MemTagSanitizer is code hardening in production binaries, where it is +expected to be a strong mitigation for both stack and heap-based +memory bugs.

+
+
+

Usage

+

Compile and link your program with -fsanitize=memtag flag. This +will only work when targeting AArch64 with MemTag extension. One +possible way to achieve that is to add -target +aarch64-linux -march=armv8+memtag to compilation flags.

+
+
+

Implementation

+

See HardwareAssistedAddressSanitizer for a general overview of a +tag-based approach to memory safety. MemTagSanitizer follows a +similar implementation strategy, but with the tag storage (shadow) +provided by the hardware.

+

A quick overview of MTE hardware capabilities:

+
    +
  • Every 16 aligned bytes of memory can be assigned a 4-bit Allocation Tag.

  • +
  • Every pointer can have a 4-bit Address Tag that is in its most significant byte.

  • +
  • Most memory access instructions generate an exception if Address Tag != Allocation Tag.

  • +
  • Special instructions are provided for fast tag manipulation.

  • +
+
+
+

Stack instrumentation

+

Stack-based memory errors are detected by updating Allocation Tag for +each local variable to a random value at the start of its lifetime, +and resetting it to the stack pointer Address Tag at the end of +it. Unallocated stack space is expected to match the Address Tag of +SP; this allows to skip tagging of any variable when memory safety can +be statically proven.

+

Allocating a truly random tag for each stack variable in a large +function may incur significant code size overhead, because it means +that each variable’s address is an independent, non-rematerializable +value; thus a function with N local variables will have extra N live +values to keep through most of its life time.

+

For this reason MemTagSanitizer generates at most one random tag per +function, called a “base tag”. Other stack variables, if there are +any, are assigned tags at a fixed offset from the base.

+

Please refer to this document +for more details about stack instrumentation.

+
+
+

Heap tagging

+

Note: this part is not implemented as of Oct 2019.

+

MemTagSanitizer will use Scudo Hardened Allocator +with additional code to update memory tags when

+
    +
  • New memory is obtained from the system.

  • +
  • An allocation is freed.

  • +
+

There is no need to change Allocation Tags for the bulk of the +allocated memory in malloc(), as long as a pointer with the matching +Address Tag is returned.

+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MergeFunctions.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MergeFunctions.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MergeFunctions.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MergeFunctions.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,850 @@ + + + + + + + + + MergeFunctions pass, how it works — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

MergeFunctions pass, how it works

+ +
+

Introduction

+

Sometimes code contains equal functions, or functions that does exactly the same +thing even though they are non-equal on the IR level (e.g.: multiplication on 2 +and ‘shl 1’). It could happen due to several reasons: mainly, the usage of +templates and automatic code generators. Though, sometimes the user itself could +write the same thing twice :-)

+

The main purpose of this pass is to recognize such functions and merge them.

+

This document is the extension to pass comments and describes the pass logic. It +describes the algorithm that is used in order to compare functions and +explains how we could combine equal functions correctly to keep the module +valid.

+

Material is brought in a top-down form, so the reader could start to learn pass +from high level ideas and end with low-level algorithm details, thus preparing +him or her for reading the sources.

+

The main goal is to describe the algorithm and logic here and the concept. If +you don’t want to read the source code, but want to understand pass +algorithms, this document is good for you. The author tries not to repeat the +source-code and covers only common cases to avoid the cases of needing to +update this document after any minor code changes.

+
+

What should I know to be able to follow along with this document?

+

The reader should be familiar with common compile-engineering principles and +LLVM code fundamentals. In this article, we assume the reader is familiar with +Single Static Assignment +concept and has an understanding of +IR structure.

+

We will use terms such as +“module”, +“function”, +“basic block”, +“user”, +“value”, +“instruction”.

+

As a good starting point, the Kaleidoscope tutorial can be used:

+

LLVM Tutorial: Table of Contents

+

It’s especially important to understand chapter 3 of tutorial:

+

Kaleidoscope Tutorial

+

The reader should also know how passes work in LLVM. They could use this +article as a reference and start point here:

+

Writing an LLVM Pass

+

What else? Well perhaps the reader should also have some experience in LLVM pass +debugging and bug-fixing.

+
+
+

Narrative structure

+

The article consists of three parts. The first part explains pass functionality +on the top-level. The second part describes the comparison procedure itself. +The third part describes the merging process.

+

In every part, the author tries to put the contents in the top-down form. +The top-level methods will first be described followed by the terminal ones at +the end, in the tail of each part. If the reader sees the reference to the +method that wasn’t described yet, they will find its description a bit below.

+
+
+
+

Basics

+
+

How to do it?

+

Do we need to merge functions? The obvious answer is: Yes, that is quite a +possible case. We usually do have duplicates and it would be good to get rid +of them. But how do we detect duplicates? This is the idea: we split functions +into smaller bricks or parts and compare the “bricks” amount. If equal, +we compare the “bricks” themselves, and then do our conclusions about functions +themselves.

+

What could the difference be? For example, on a machine with 64-bit pointers +(let’s assume we have only one address space), one function stores a 64-bit +integer, while another one stores a pointer. If the target is the machine +mentioned above, and if functions are identical, except the parameter type (we +could consider it as a part of function type), then we can treat a uint64_t +and a void* as equal.

+

This is just an example; more possible details are described a bit below.

+

As another example, the reader may imagine two more functions. The first +function performs a multiplication by 2, while the second one performs an +logical left shift by 1.

+
+

Possible solutions

+

Let’s briefly consider possible options about how and what we have to implement +in order to create full-featured functions merging, and also what it would +mean for us.

+

Equal function detection obviously supposes that a “detector” method to be +implemented and latter should answer the question “whether functions are equal”. +This “detector” method consists of tiny “sub-detectors”, which each answers +exactly the same question, but for function parts.

+

As the second step, we should merge equal functions. So it should be a “merger” +method. “Merger” accepts two functions F1 and F2, and produces F1F2 +function, the result of merging.

+

Having such routines in our hands, we can process a whole module, and merge all +equal functions.

+

In this case, we have to compare every function with every another function. As +the reader may notice, this way seems to be quite expensive. Of course we could +introduce hashing and other helpers, but it is still just an optimization, and +thus the level of O(N*N) complexity.

+

Can we reach another level? Could we introduce logarithmical search, or random +access lookup? The answer is: “yes”.

+
+
Random-access
+

How it could this be done? Just convert each function to a number, and gather +all of them in a special hash-table. Functions with equal hashes are equal. +Good hashing means, that every function part must be taken into account. That +means we have to convert every function part into some number, and then add it +into the hash. The lookup-up time would be small, but such an approach adds some +delay due to the hashing routine.

+
+ +
+
Present state
+

Both of the approaches (random-access and logarithmical) have been implemented +and tested and both give a very good improvement. What was most +surprising is that logarithmical search was faster; sometimes by up to 15%. The +hashing method needs some extra CPU time, which is the main reason why it works +slower; in most cases, total “hashing” time is greater than total +“logarithmical-search” time.

+

So, preference has been granted to the “logarithmical search”.

+

Though in the case of need, logarithmical-search (read “total-ordering”) could +be used as a milestone on our way to the random-access implementation.

+

Every comparison is based either on the numbers or on the flags comparison. In +the random-access approach, we could use the same comparison algorithm. +During comparison, we exit once we find the difference, but here we might have +to scan the whole function body every time (note, it could be slower). Like in +“total-ordering”, we will track every number and flag, but instead of +comparison, we should get the numbers sequence and then create the hash number. +So, once again, total-ordering could be considered as a milestone for even +faster (in theory) random-access approach.

+
+
+
+

MergeFunctions, main fields and runOnModule

+

There are two main important fields in the class:

+

FnTree – the set of all unique functions. It keeps items that couldn’t be +merged with each other. It is defined as:

+

std::set<FunctionNode> FnTree;

+

Here FunctionNode is a wrapper for llvm::Function class, with +implemented “<” operator among the functions set (below we explain how it works +exactly; this is a key point in fast functions comparison).

+

Deferred – merging process can affect bodies of functions that are in +FnTree already. Obviously, such functions should be rechecked again. In this +case, we remove them from FnTree, and mark them to be rescanned, namely +put them into Deferred list.

+
+
runOnModule
+

The algorithm is pretty simple:

+
    +
  1. Put all module’s functions into the worklist.

  2. +
+

2. Scan worklist’s functions twice: first enumerate only strong functions and +then only weak ones:

+
+

2.1. Loop body: take a function from worklist (call it FCur) and try to +insert it into FnTree: check whether FCur is equal to one of functions +in FnTree. If there is an equal function in FnTree +(call it FExists): merge function FCur with FExists. Otherwise add +the function from the worklist to FnTree.

+
+

3. Once the worklist scanning and merging operations are complete, check the +Deferred list. If it is not empty: refill the worklist contents with +Deferred list and redo step 2, if the Deferred list is empty, then exit +from method.

+
+ +
+
+
+
+

Functions comparison

+

At first, let’s define how exactly we compare complex objects.

+

Complex object comparison (function, basic-block, etc) is mostly based on its +sub-object comparison results. It is similar to the next “tree” objects +comparison:

+
    +
  1. For two trees T1 and T2 we perform depth-first-traversal and have +two sequences as a product: “T1Items” and “T2Items”.

  2. +
  3. We then compare chains “T1Items” and “T2Items” in +the most-significant-item-first order. The result of items comparison +would be the result of T1 and T2 comparison itself.

  4. +
+
+

FunctionComparator::compare(void)

+

A brief look at the source code tells us that the comparison starts in the +“int FunctionComparator::compare(void)” method.

+

1. The first parts to be compared are the function’s attributes and some +properties that is outside the “attributes” term, but still could make the +function different without changing its body. This part of the comparison is +usually done within simple cmpNumbers or cmpFlags operations (e.g. +cmpFlags(F1->hasGC(), F2->hasGC())). Below is a full list of function’s +properties to be compared on this stage:

+
+
    +
  • Attributes (those are returned by Function::getAttributes() +method).

  • +
  • GC, for equivalence, RHS and LHS should be both either without +GC or with the same one.

  • +
  • Section, just like a GC: RHS and LHS should be defined in the +same section.

  • +
  • Variable arguments. LHS and RHS should be both either with or +without var-args.

  • +
  • Calling convention should be the same.

  • +
+
+

2. Function type. Checked by FunctionComparator::cmpType(Type*, Type*) +method. It checks return type and parameters type; the method itself will be +described later.

+

3. Associate function formal parameters with each other. Then comparing function +bodies, if we see the usage of LHS’s i-th argument in LHS’s body, then, +we want to see usage of RHS’s i-th argument at the same place in RHS’s +body, otherwise functions are different. On this stage we grant the preference +to those we met later in function body (value we met first would be less). +This is done by “FunctionComparator::cmpValues(const Value*, const Value*)” +method (will be described a bit later).

+
    +
  1. Function body comparison. As it written in method comments:

  2. +
+

“We do a CFG-ordered walk since the actual ordering of the blocks in the linked +list is immaterial. Our walk starts at the entry block for both functions, then +takes each block from each terminator in order. As an artifact, this also means +that unreachable blocks are ignored.”

+

So, using this walk we get BBs from left and right in the same order, and +compare them by “FunctionComparator::compare(const BasicBlock*, const +BasicBlock*)” method.

+

We also associate BBs with each other, like we did it with function formal +arguments (see cmpValues method below).

+
+
+

FunctionComparator::cmpType

+

Consider how type comparison works.

+

1. Coerce pointer to integer. If left type is a pointer, try to coerce it to the +integer type. It could be done if its address space is 0, or if address spaces +are ignored at all. Do the same thing for the right type.

+

2. If left and right types are equal, return 0. Otherwise we need to give +preference to one of them. So proceed to the next step.

+

3. If types are of different kind (different type IDs). Return result of type +IDs comparison, treating them as numbers (use cmpNumbers operation).

+

4. If types are vectors or integers, return result of their pointers comparison, +comparing them as numbers.

+
    +
  1. Check whether type ID belongs to the next group (call it equivalent-group):

    +
      +
    • Void

    • +
    • Float

    • +
    • Double

    • +
    • X86_FP80

    • +
    • FP128

    • +
    • PPC_FP128

    • +
    • Label

    • +
    • Metadata.

    • +
    +

    If ID belongs to group above, return 0. Since it’s enough to see that +types has the same TypeID. No additional information is required.

    +
  2. +
+

6. Left and right are pointers. Return result of address space comparison +(numbers comparison).

+

7. Complex types (structures, arrays, etc.). Follow complex objects comparison +technique (see the very first paragraph of this chapter). Both left and +right are to be expanded and their element types will be checked the same +way. If we get -1 or 1 on some stage, return it. Otherwise return 0.

+

8. Steps 1-6 describe all the possible cases, if we passed steps 1-6 and didn’t +get any conclusions, then invoke llvm_unreachable, since it’s quite an +unexpectable case.

+
+
+

cmpValues(const Value*, const Value*)

+

Method that compares local values.

+

This method gives us an answer to a very curious question: whether we could +treat local values as equal, and which value is greater otherwise. It’s +better to start from example:

+

Consider the situation when we’re looking at the same place in left +function “FL” and in right function “FR”. Every part of left place is +equal to the corresponding part of right place, and (!) both parts use +Value instances, for example:

+
instr0 i32 %LV   ; left side, function FL
+instr0 i32 %RV   ; right side, function FR
+
+
+

So, now our conclusion depends on Value instances comparison.

+

The main purpose of this method is to determine relation between such values.

+

What can we expect from equal functions? At the same place, in functions +“FL” and “FR” we expect to see equal values, or values defined at +the same place in “FL” and “FR”.

+

Consider a small example here:

+
define void %f(i32 %pf0, i32 %pf1) {
+  instr0 i32 %pf0 instr1 i32 %pf1 instr2 i32 123
+}
+
+
+
define void %g(i32 %pg0, i32 %pg1) {
+  instr0 i32 %pg0 instr1 i32 %pg0 instr2 i32 123
+}
+
+
+

In this example, pf0 is associated with pg0, pf1 is associated with +pg1, and we also declare that pf0 < pf1, and thus pg0 < pf1.

+

Instructions with opcode “instr0” would be equal, since their types and +opcodes are equal, and values are associated.

+

Instructions with opcode “instr1” from f is greater than instructions +with opcode “instr1” from g; here we have equal types and opcodes, but +“pf1 is greater than “pg0”.

+

Instructions with opcode “instr2” are equal, because their opcodes and +types are equal, and the same constant is used as a value.

+
+

What we associate in cmpValues?

+
    +
  • Function arguments. i-th argument from left function associated with +i-th argument from right function.

  • +
  • BasicBlock instances. In basic-block enumeration loop we associate i-th +BasicBlock from the left function with i-th BasicBlock from the right +function.

  • +
  • Instructions.

  • +
  • Instruction operands. Note, we can meet Value here we have never seen +before. In this case it is not a function argument, nor BasicBlock, nor +Instruction. It is a global value. It is a constant, since it’s the only +supposed global here. The method also compares: Constants that are of the +same type and if right constant can be losslessly bit-casted to the left +one, then we also compare them.

  • +
+
+
+

How to implement cmpValues?

+

Association is a case of equality for us. We just treat such values as equal, +but, in general, we need to implement antisymmetric relation. As mentioned +above, to understand what is less, we can use order in which we +meet values. If both values have the same order in a function (met at the same +time), we then treat values as associated. Otherwise – it depends on who was +first.

+

Every time we run the top-level compare method, we initialize two identical +maps (one for the left side, another one for the right side):

+

map<Value, int> sn_mapL, sn_mapR;

+

The key of the map is the Value itself, the value – is its order (call it +serial number).

+

To add value V we need to perform the next procedure:

+

sn_map.insert(std::make_pair(V, sn_map.size()));

+

For the first Value, map will return 0, for the second Value map will +return 1, and so on.

+

We can then check whether left and right values met at the same time with +a simple comparison:

+

cmpNumbers(sn_mapL[Left], sn_mapR[Right]);

+

Of course, we can combine insertion and comparison:

+
std::pair<iterator, bool>
+  LeftRes = sn_mapL.insert(std::make_pair(Left, sn_mapL.size())), RightRes
+  = sn_mapR.insert(std::make_pair(Right, sn_mapR.size()));
+return cmpNumbers(LeftRes.first->second, RightRes.first->second);
+
+
+

Let’s look, how whole method could be implemented.

+

1. We have to start with the bad news. Consider function self and +cross-referencing cases:

+
// self-reference unsigned fact0(unsigned n) { return n > 1 ? n
+* fact0(n-1) : 1; } unsigned fact1(unsigned n) { return n > 1 ? n *
+fact1(n-1) : 1; }
+
+// cross-reference unsigned ping(unsigned n) { return n!= 0 ? pong(n-1) : 0;
+} unsigned pong(unsigned n) { return n!= 0 ? ping(n-1) : 0; }
+
+
+
+

This comparison has been implemented in initial MergeFunctions pass +version. But, unfortunately, it is not transitive. And this is the only case +we can’t convert to less-equal-greater comparison. It is a seldom case, 4-5 +functions of 10000 (checked in test-suite), and, we hope, the reader would +forgive us for such a sacrifice in order to get the O(log(N)) pass time.

+
+

2. If left/right Value is a constant, we have to compare them. Return 0 if it +is the same constant, or use cmpConstants method otherwise.

+

3. If left/right is InlineAsm instance. Return result of Value pointers +comparison.

+

4. Explicit association of L (left value) and R (right value). We need to +find out whether values met at the same time, and thus are associated. Or we +need to put the rule: when we treat L < R. Now it is easy: we just return +the result of numbers comparison:

+
std::pair<iterator, bool>
+  LeftRes = sn_mapL.insert(std::make_pair(Left, sn_mapL.size())),
+  RightRes = sn_mapR.insert(std::make_pair(Right, sn_mapR.size()));
+if (LeftRes.first->second == RightRes.first->second) return 0;
+if (LeftRes.first->second < RightRes.first->second) return -1;
+return 1;
+
+
+

Now when cmpValues returns 0, we can proceed the comparison procedure. +Otherwise, if we get (-1 or 1), we need to pass this result to the top level, +and finish comparison procedure.

+
+
+
+

cmpConstants

+

Performs constants comparison as follows:

+

1. Compare constant types using cmpType method. If the result is -1 or 1, +goto step 2, otherwise proceed to step 3.

+

2. If types are different, we still can check whether constants could be +losslessly bitcasted to each other. The further explanation is modification of +canLosslesslyBitCastTo method.

+
+

2.1 Check whether constants are of the first class types +(isFirstClassType check):

+

2.1.1. If both constants are not of the first class type: return result +of cmpType.

+

2.1.2. Otherwise, if left type is not of the first class, return -1. If +right type is not of the first class, return 1.

+

2.1.3. If both types are of the first class type, proceed to the next step +(2.1.3.1).

+

2.1.3.1. If types are vectors, compare their bitwidth using the +cmpNumbers. If result is not 0, return it.

+

2.1.3.2. Different types, but not a vectors:

+
    +
  • if both of them are pointers, good for us, we can proceed to step 3.

  • +
  • if one of types is pointer, return result of isPointer flags +comparison (cmpFlags operation).

  • +
  • otherwise we have no methods to prove bitcastability, and thus return +result of types comparison (-1 or 1).

  • +
+
+

Steps below are for the case when types are equal, or case when constants are +bitcastable:

+

3. One of constants is a “null” value. Return the result of +cmpFlags(L->isNullValue, R->isNullValue) comparison.

+
    +
  1. Compare value IDs, and return result if it is not 0:

  2. +
+
if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+  return Res;
+
+
+

5. Compare the contents of constants. The comparison depends on the kind of +constants, but on this stage it is just a lexicographical comparison. Just see +how it was described in the beginning of “Functions comparison” paragraph. +Mathematically, it is equal to the next case: we encode left constant and right +constant (with similar way bitcode-writer does). Then compare left code +sequence and right code sequence.

+
+
+

compare(const BasicBlock*, const BasicBlock*)

+

Compares two BasicBlock instances.

+

It enumerates instructions from left BB and right BB.

+

1. It assigns serial numbers to the left and right instructions, using +cmpValues method.

+

2. If one of left or right is GEP (GetElementPtr), then treat GEP as +greater than other instructions. If both instructions are GEPs use cmpGEP +method for comparison. If result is -1 or 1, pass it to the top-level +comparison (return it).

+
+

3.1. Compare operations. Call cmpOperation method. If result is -1 or +1, return it.

+

3.2. Compare number of operands, if result is -1 or 1, return it.

+

3.3. Compare operands themselves, use cmpValues method. Return result +if it is -1 or 1.

+

3.4. Compare type of operands, using cmpType method. Return result if +it is -1 or 1.

+

3.5. Proceed to the next instruction.

+
+
    +
  1. We can finish instruction enumeration in 3 cases:

    +

    4.1. We reached the end of both left and right basic-blocks. We didn’t +exit on steps 1-3, so contents are equal, return 0.

    +

    4.2. We have reached the end of the left basic-block. Return -1.

    +

    4.3. Return 1 (we reached the end of the right basic block).

    +
  2. +
+
+
+

cmpGEP

+

Compares two GEPs (getelementptr instructions).

+

It differs from regular operations comparison with the only thing: possibility +to use accumulateConstantOffset method.

+

So, if we get constant offset for both left and right GEPs, then compare it as +numbers, and return comparison result.

+

Otherwise treat it like a regular operation (see previous paragraph).

+
+
+

cmpOperation

+

Compares instruction opcodes and some important operation properties.

+
    +
  1. Compare opcodes, if it differs return the result.

  2. +
  3. Compare number of operands. If it differs – return the result.

  4. +
+

3. Compare operation types, use cmpType. All the same – if types are +different, return result.

+

4. Compare subclassOptionalData, get it with getRawSubclassOptionalData +method, and compare it like a numbers.

+
    +
  1. Compare operand types.

  2. +
+

6. For some particular instructions, check equivalence (relation in our case) of +some significant attributes. For example, we have to compare alignment for +load instructions.

+
+
+

O(log(N))

+

Methods described above implement order relationship. And latter, could be used +for nodes comparison in a binary tree. So we can organize functions set into +the binary tree and reduce the cost of lookup procedure from +O(N*N) to O(log(N)).

+
+
+
+

Merging process, mergeTwoFunctions

+

Once MergeFunctions detected that current function (G) is equal to one that +were analyzed before (function F) it calls mergeTwoFunctions(Function*, +Function*).

+

Operation affects FnTree contents with next way: F will stay in +FnTree. G being equal to F will not be added to FnTree. Calls of +G would be replaced with something else. It changes bodies of callers. So, +functions that calls G would be put into Deferred set and removed from +FnTree, and analyzed again.

+

The approach is next:

+

1. Most wished case: when we can use alias and both of F and G are weak. We +make both of them with aliases to the third strong function H. Actually H +is F. See below how it’s made (but it’s better to look straight into the +source code). Well, this is a case when we can just replace G with F +everywhere, we use replaceAllUsesWith operation here (RAUW).

+

2. F could not be overridden, while G could. It would be good to do the +next: after merging the places where overridable function were used, still use +overridable stub. So try to make G alias to F, or create overridable tail +call wrapper around F and replace G with that call.

+

3. Neither F nor G could be overridden. We can’t use RAUW. We can just +change the callers: call F instead of G. That’s what +replaceDirectCallers does.

+

Below is a detailed body description.

+
+

If “F” may be overridden

+

As follows from mayBeOverridden comments: “whether the definition of this +global may be replaced by something non-equivalent at link time”. If so, that’s +ok: we can use alias to F instead of G or change call instructions itself.

+
+

HasGlobalAliases, removeUsers

+

First consider the case when we have global aliases of one function name to +another. Our purpose is make both of them with aliases to the third strong +function. Though if we keep F alive and without major changes we can leave it +in FnTree. Try to combine these two goals.

+

Do stub replacement of F itself with an alias to F.

+

1. Create stub function H, with the same name and attributes like function +F. It takes maximum alignment of F and G.

+

2. Replace all uses of function F with uses of function H. It is the two +steps procedure instead. First of all, we must take into account, all functions +from whom F is called would be changed: since we change the call argument +(from F to H). If so we must to review these caller functions again after +this procedure. We remove callers from FnTree, method with name +removeUsers(F) does that (don’t confuse with replaceAllUsesWith):

+
+

2.1. Inside removeUsers(Value* +V) we go through the all values that use value V (or F in our context). +If value is instruction, we go to function that holds this instruction and +mark it as to-be-analyzed-again (put to Deferred set), we also remove +caller from FnTree.

+

2.2. Now we can do the replacement: call F->replaceAllUsesWith(H).

+
+

3. H (that now “officially” plays F’s role) is replaced with alias to F. +Do the same with G: replace it with alias to F. So finally everywhere F +was used, we use H and it is alias to F, and everywhere G was used we +also have alias to F.

+
    +
  1. Set F linkage to private. Make it strong :-)

  2. +
+
+
+

No global aliases, replaceDirectCallers

+

If global aliases are not supported. We call replaceDirectCallers. Just +go through all calls of G and replace it with calls of F. If you look into +the method you will see that it scans all uses of G too, and if use is callee +(if user is call instruction and G is used as what to be called), we replace +it with use of F.

+
+
If “F” could not be overridden, fix it!
+

We call writeThunkOrAlias(Function *F, Function *G). Here we try to replace +G with alias to F first. The next conditions are essential:

+
    +
  • target should support global aliases,

  • +
  • the address itself of G should be not significant, not named and not +referenced anywhere,

  • +
  • function should come with external, local or weak linkage.

  • +
+

Otherwise we write thunk: some wrapper that has G’s interface and calls F, +so G could be replaced with this wrapper.

+

writeAlias

+

As follows from llvm reference:

+

“Aliases act as second name for the aliasee value”. So we just want to create +a second name for F and use it instead of G:

+
    +
  1. create global alias itself (GA),

  2. +
  3. adjust alignment of F so it must be maximum of current and G’s alignment;

  4. +
  5. replace uses of G:

    +

    3.1. first mark all callers of G as to-be-analyzed-again, using +removeUsers method (see chapter above),

    +

    3.2. call G->replaceAllUsesWith(GA).

    +
  6. +
  7. Get rid of G.

  8. +
+

writeThunk

+

As it written in method comments:

+

“Replace G with a simple tail call to bitcast(F). Also replace direct uses of G +with bitcast(F). Deletes G.”

+

In general it does the same as usual when we want to replace callee, except the +first point:

+

1. We generate tail call wrapper around F, but with interface that allows use +it instead of G.

+
    +
  1. “As-usual”: removeUsers and replaceAllUsesWith then.

  2. +
  3. Get rid of G.

  4. +
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MIRLangRef.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MIRLangRef.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MIRLangRef.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MIRLangRef.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,921 @@ + + + + + + + + + Machine IR (MIR) Format Reference Manual — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Machine IR (MIR) Format Reference Manual

+ +
+

Warning

+

This is a work in progress.

+
+
+

Introduction

+

This document is a reference manual for the Machine IR (MIR) serialization +format. MIR is a human readable serialization format that is used to represent +LLVM’s machine specific intermediate representation.

+

The MIR serialization format is designed to be used for testing the code +generation passes in LLVM.

+
+
+

Overview

+

The MIR serialization format uses a YAML container. YAML is a standard +data serialization language, and the full YAML language spec can be read at +yaml.org.

+

A MIR file is split up into a series of YAML documents. The first document +can contain an optional embedded LLVM IR module, and the rest of the documents +contain the serialized machine functions.

+
+
+

MIR Testing Guide

+

You can use the MIR format for testing in two different ways:

+
    +
  • You can write MIR tests that invoke a single code generation pass using the +-run-pass option in llc.

  • +
  • You can use llc’s -stop-after option with existing or new LLVM assembly +tests and check the MIR output of a specific code generation pass.

  • +
+
+

Testing Individual Code Generation Passes

+

The -run-pass option in llc allows you to create MIR tests that invoke just +a single code generation pass. When this option is used, llc will parse an +input MIR file, run the specified code generation pass(es), and output the +resulting MIR code.

+

You can generate an input MIR file for the test by using the -stop-after or +-stop-before option in llc. For example, if you would like to write a test +for the post register allocation pseudo instruction expansion pass, you can +specify the machine copy propagation pass in the -stop-after option, as it +runs just before the pass that we are trying to test:

+
+

llc -stop-after=machine-cp bug-trigger.ll > test.mir

+
+

If the same pass is run multiple times, a run index can be included +after the name with a comma.

+
+

llc -stop-after=dead-mi-elimination,1 bug-trigger.ll > test.mir

+
+

After generating the input MIR file, you’ll have to add a run line that uses +the -run-pass option to it. In order to test the post register allocation +pseudo instruction expansion pass on X86-64, a run line like the one shown +below can be used:

+
+

# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=postrapseudos | FileCheck %s

+
+

The MIR files are target dependent, so they have to be placed in the target +specific test directories (lib/CodeGen/TARGETNAME). They also need to +specify a target triple or a target architecture either in the run line or in +the embedded LLVM IR module.

+
+

Simplifying MIR files

+

The MIR code coming out of -stop-after/-stop-before is very verbose; +Tests are more accessible and future proof when simplified:

+
    +
  • Use the -simplify-mir option with llc.

  • +
  • Machine function attributes often have default values or the test works just +as well with default values. Typical candidates for this are: alignment:, +exposesReturnsTwice, legalized, regBankSelected, selected. +The whole frameInfo section is often unnecessary if there is no special +frame usage in the function. tracksRegLiveness on the other hand is often +necessary for some passes that care about block livein lists.

  • +
  • The (global) liveins: list is typically only interesting for early +instruction selection passes and can be removed when testing later passes. +The per-block liveins: on the other hand are necessary if +tracksRegLiveness is true.

  • +
  • Branch probability data in block successors: lists can be dropped if the +test doesn’t depend on it. Example: +successors: %bb.1(0x40000000), %bb.2(0x40000000) can be replaced with +successors: %bb.1, %bb.2.

  • +
  • MIR code contains a whole IR module. This is necessary because there are +no equivalents in MIR for global variables, references to external functions, +function attributes, metadata, debug info. Instead some MIR data references +the IR constructs. You can often remove them if the test doesn’t depend on +them.

  • +
  • Alias Analysis is performed on IR values. These are referenced by memory +operands in MIR. Example: :: (load 8 from %ir.foobar, !alias.scope !9). +If the test doesn’t depend on (good) alias analysis the references can be +dropped: :: (load 8)

  • +
  • MIR blocks can reference IR blocks for debug printing, profile information +or debug locations. Example: bb.42.myblock in MIR references the IR block +myblock. It is usually possible to drop the .myblock reference and simply +use bb.42.

  • +
  • If there are no memory operands or blocks referencing the IR then the +IR function can be replaced by a parameterless dummy function like +define @func() { ret void }.

  • +
  • It is possible to drop the whole IR section of the MIR file if it only +contains dummy functions (see above). The .mir loader will create the +IR functions automatically in this case.

  • +
+
+
+
+

Limitations

+

Currently the MIR format has several limitations in terms of which state it +can serialize:

+
    +
  • The target-specific state in the target-specific MachineFunctionInfo +subclasses isn’t serialized at the moment.

  • +
  • The target-specific MachineConstantPoolValue subclasses (in the ARM and +SystemZ backends) aren’t serialized at the moment.

  • +
  • The MCSymbol machine operands don’t support temporary or local symbols.

  • +
  • A lot of the state in MachineModuleInfo isn’t serialized - only the CFI +instructions and the variable debug information from MMI is serialized right +now.

  • +
+

These limitations impose restrictions on what you can test with the MIR format. +For now, tests that would like to test some behaviour that depends on the state +of temporary or local MCSymbol operands or the exception handling state in +MMI, can’t use the MIR format. As well as that, tests that test some behaviour +that depends on the state of the target specific MachineFunctionInfo or +MachineConstantPoolValue subclasses can’t use the MIR format at the moment.

+
+
+
+

High Level Structure

+
+

Embedded Module

+

When the first YAML document contains a YAML block literal string, the MIR +parser will treat this string as an LLVM assembly language string that +represents an embedded LLVM IR module. +Here is an example of a YAML document that contains an LLVM module:

+
define i32 @inc(i32* %x) {
+entry:
+  %0 = load i32, i32* %x
+  %1 = add i32 %0, 1
+  store i32 %1, i32* %x
+  ret i32 %1
+}
+
+
+
+
+

Machine Functions

+

The remaining YAML documents contain the machine functions. This is an example +of such YAML document:

+
---
+name:            inc
+tracksRegLiveness: true
+liveins:
+  - { reg: '$rdi' }
+callSites:
+  - { bb: 0, offset: 3, fwdArgRegs:
+      - { arg: 0, reg: '$edi' } }
+body: |
+  bb.0.entry:
+    liveins: $rdi
+
+    $eax = MOV32rm $rdi, 1, _, 0, _
+    $eax = INC32r killed $eax, implicit-def dead $eflags
+    MOV32mr killed $rdi, 1, _, 0, _, $eax
+    CALL64pcrel32 @foo <regmask...>
+    RETQ $eax
+...
+
+
+

The document above consists of attributes that represent the various +properties and data structures in a machine function.

+

The attribute name is required, and its value should be identical to the +name of a function that this machine function is based on.

+

The attribute body is a YAML block literal string. Its value represents +the function’s machine basic blocks and their machine instructions.

+

The attribute callSites is a representation of call site information which +keeps track of call instructions and registers used to transfer call arguments.

+
+
+
+

Machine Instructions Format Reference

+

The machine basic blocks and their instructions are represented using a custom, +human readable serialization language. This language is used in the +YAML block literal string that corresponds to the machine function’s body.

+

A source string that uses this language contains a list of machine basic +blocks, which are described in the section below.

+
+

Machine Basic Blocks

+

A machine basic block is defined in a single block definition source construct +that contains the block’s ID. +The example below defines two blocks that have an ID of zero and one:

+
bb.0:
+  <instructions>
+bb.1:
+  <instructions>
+
+
+

A machine basic block can also have a name. It should be specified after the ID +in the block’s definition:

+
bb.0.entry:       ; This block's name is "entry"
+   <instructions>
+
+
+

The block’s name should be identical to the name of the IR block that this +machine block is based on.

+
+

Block References

+

The machine basic blocks are identified by their ID numbers. Individual +blocks are referenced using the following syntax:

+
%bb.<id>
+
+
+

Example:

+
%bb.0
+
+
+

The following syntax is also supported, but the former syntax is preferred for +block references:

+
%bb.<id>[.<name>]
+
+
+

Example:

+
%bb.1.then
+
+
+
+
+

Successors

+

The machine basic block’s successors have to be specified before any of the +instructions:

+
bb.0.entry:
+  successors: %bb.1.then, %bb.2.else
+  <instructions>
+bb.1.then:
+  <instructions>
+bb.2.else:
+  <instructions>
+
+
+

The branch weights can be specified in brackets after the successor blocks. +The example below defines a block that has two successors with branch weights +of 32 and 16:

+
bb.0.entry:
+  successors: %bb.1.then(32), %bb.2.else(16)
+
+
+
+
+

Live In Registers

+

The machine basic block’s live in registers have to be specified before any of +the instructions:

+
bb.0.entry:
+  liveins: $edi, $esi
+
+
+

The list of live in registers and successors can be empty. The language also +allows multiple live in register and successor lists - they are combined into +one list by the parser.

+
+
+

Miscellaneous Attributes

+

The attributes IsAddressTaken, IsLandingPad and Alignment can be +specified in brackets after the block’s definition:

+
bb.0.entry (address-taken):
+  <instructions>
+bb.2.else (align 4):
+  <instructions>
+bb.3(landing-pad, align 4):
+  <instructions>
+
+
+

Alignment is specified in bytes, and must be a power of two.

+
+
+
+

Machine Instructions

+

A machine instruction is composed of a name, +machine operands, +instruction flags, and machine memory operands.

+

The instruction’s name is usually specified before the operands. The example +below shows an instance of the X86 RETQ instruction with a single machine +operand:

+
RETQ $eax
+
+
+

However, if the machine instruction has one or more explicitly defined register +operands, the instruction’s name has to be specified after them. The example +below shows an instance of the AArch64 LDPXpost instruction with three +defined register operands:

+
$sp, $fp, $lr = LDPXpost $sp, 2
+
+
+

The instruction names are serialized using the exact definitions from the +target’s *InstrInfo.td files, and they are case sensitive. This means that +similar instruction names like TSTri and tSTRi represent different +machine instructions.

+
+

Instruction Flags

+

The flag frame-setup or frame-destroy can be specified before the +instruction’s name:

+
$fp = frame-setup ADDXri $sp, 0, 0
+
+
+
$x21, $x20 = frame-destroy LDPXi $sp
+
+
+
+
+

Bundled Instructions

+

The syntax for bundled instructions is the following:

+
BUNDLE implicit-def $r0, implicit-def $r1, implicit $r2 {
+  $r0 = SOME_OP $r2
+  $r1 = ANOTHER_OP internal $r0
+}
+
+
+

The first instruction is often a bundle header. The instructions between { +and } are bundled with the first instruction.

+
+
+
+

Registers

+

Registers are one of the key primitives in the machine instructions +serialization language. They are primarily used in the +register machine operands, +but they can also be used in a number of other places, like the +basic block’s live in list.

+

The physical registers are identified by their name and by the ‘$’ prefix sigil. +They use the following syntax:

+
$<name>
+
+
+

The example below shows three X86 physical registers:

+
$eax
+$r15
+$eflags
+
+
+

The virtual registers are identified by their ID number and by the ‘%’ sigil. +They use the following syntax:

+
%<id>
+
+
+

Example:

+
%0
+
+
+

The null registers are represented using an underscore (‘_’). They can also be +represented using a ‘$noreg’ named register, although the former syntax +is preferred.

+
+
+

Machine Operands

+

There are seventeen different kinds of machine operands, and all of them can be +serialized.

+
+

Immediate Operands

+

The immediate machine operands are untyped, 64-bit signed integers. The +example below shows an instance of the X86 MOV32ri instruction that has an +immediate machine operand -42:

+
$eax = MOV32ri -42
+
+
+

An immediate operand is also used to represent a subregister index when the +machine instruction has one of the following opcodes:

+
    +
  • EXTRACT_SUBREG

  • +
  • INSERT_SUBREG

  • +
  • REG_SEQUENCE

  • +
  • SUBREG_TO_REG

  • +
+

In case this is true, the Machine Operand is printed according to the target.

+

For example:

+

In AArch64RegisterInfo.td:

+
def sub_32 : SubRegIndex<32>;
+
+
+

If the third operand is an immediate with the value 15 (target-dependent +value), based on the instruction’s opcode and the operand’s index the operand +will be printed as %subreg.sub_32:

+
%1:gpr64 = SUBREG_TO_REG 0, %0, %subreg.sub_32
+
+
+

For integers > 64bit, we use a special machine operand, MO_CImmediate, +which stores the immediate in a ConstantInt using an APInt (LLVM’s +arbitrary precision integers).

+
+
+

Register Operands

+

The register primitive is used to represent the register +machine operands. The register operands can also have optional +register flags, +a subregister index, +and a reference to the tied register operand. +The full syntax of a register operand is shown below:

+
[<flags>] <register> [ :<subregister-idx-name> ] [ (tied-def <tied-op>) ]
+
+
+

This example shows an instance of the X86 XOR32rr instruction that has +5 register operands with different register flags:

+
dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al
+
+
+
+
Register Flags
+

The table below shows all of the possible register flags along with the +corresponding internal llvm::RegState representation:

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Flag

Internal Value

implicit

RegState::Implicit

implicit-def

RegState::ImplicitDefine

def

RegState::Define

dead

RegState::Dead

killed

RegState::Kill

undef

RegState::Undef

internal

RegState::InternalRead

early-clobber

RegState::EarlyClobber

debug-use

RegState::Debug

renamable

RegState::Renamable

+
+
+
Subregister Indices
+

The register machine operands can reference a portion of a register by using +the subregister indices. The example below shows an instance of the COPY +pseudo instruction that uses the X86 sub_8bit subregister index to copy 8 +lower bits from the 32-bit virtual register 0 to the 8-bit virtual register 1:

+
%1 = COPY %0:sub_8bit
+
+
+

The names of the subregister indices are target specific, and are typically +defined in the target’s *RegisterInfo.td file.

+
+
+
+

Constant Pool Indices

+

A constant pool index (CPI) operand is printed using its index in the +function’s MachineConstantPool and an offset.

+

For example, a CPI with the index 1 and offset 8:

+
%1:gr64 = MOV64ri %const.1 + 8
+
+
+

For a CPI with the index 0 and offset -12:

+
%1:gr64 = MOV64ri %const.0 - 12
+
+
+

A constant pool entry is bound to a LLVM IR Constant or a target-specific +MachineConstantPoolValue. When serializing all the function’s constants the +following format is used:

+
constants:
+  - id:               <index>
+    value:            <value>
+    alignment:        <alignment>
+    isTargetSpecific: <target-specific>
+
+
+
+
where:
    +
  • <index> is a 32-bit unsigned integer;

  • +
  • <value> is a LLVM IR Constant;

  • +
  • <alignment> is a 32-bit unsigned integer specified in bytes, and must be +power of two;

  • +
  • <target-specific> is either true or false.

  • +
+
+
+

Example:

+
constants:
+  - id:               0
+    value:            'double 3.250000e+00'
+    alignment:        8
+  - id:               1
+    value:            'g-(LPC0+8)'
+    alignment:        4
+    isTargetSpecific: true
+
+
+
+
+

Global Value Operands

+

The global value machine operands reference the global values from the +embedded LLVM IR module. +The example below shows an instance of the X86 MOV64rm instruction that has +a global value operand named G:

+
$rax = MOV64rm $rip, 1, _, @G, _
+
+
+

The named global values are represented using an identifier with the ‘@’ prefix. +If the identifier doesn’t match the regular expression +[-a-zA-Z$._][-a-zA-Z$._0-9]*, then this identifier must be quoted.

+

The unnamed global values are represented using an unsigned numeric value with +the ‘@’ prefix, like in the following examples: @0, @989.

+
+
+

Target-dependent Index Operands

+

A target index operand is a target-specific index and an offset. The +target-specific index is printed using target-specific names and a positive or +negative offset.

+

For example, the amdgpu-constdata-start is associated with the index 0 +in the AMDGPU backend. So if we have a target index operand with the index 0 +and the offset 8:

+
$sgpr2 = S_ADD_U32 _, target-index(amdgpu-constdata-start) + 8, implicit-def _, implicit-def _
+
+
+
+
+

Jump-table Index Operands

+

A jump-table index operand with the index 0 is printed as following:

+
tBR_JTr killed $r0, %jump-table.0
+
+
+

A machine jump-table entry contains a list of MachineBasicBlocks. When serializing all the function’s jump-table entries, the following format is used:

+
jumpTable:
+  kind:             <kind>
+  entries:
+    - id:             <index>
+      blocks:         [ <bbreference>, <bbreference>, ... ]
+
+
+

where <kind> is describing how the jump table is represented and emitted (plain address, relocations, PIC, etc.), and each <index> is a 32-bit unsigned integer and blocks contains a list of machine basic block references.

+

Example:

+
jumpTable:
+  kind:             inline
+  entries:
+    - id:             0
+      blocks:         [ '%bb.3', '%bb.9', '%bb.4.d3' ]
+    - id:             1
+      blocks:         [ '%bb.7', '%bb.7', '%bb.4.d3', '%bb.5' ]
+
+
+
+
+

External Symbol Operands

+

An external symbol operand is represented using an identifier with the & +prefix. The identifier is surrounded with ““‘s and escaped if it has any +special non-printable characters in it.

+

Example:

+
CALL64pcrel32 &__stack_chk_fail, csr_64, implicit $rsp, implicit-def $rsp
+
+
+
+
+

MCSymbol Operands

+

A MCSymbol operand is holding a pointer to a MCSymbol. For the limitations +of this operand in MIR, see limitations.

+

The syntax is:

+
EH_LABEL <mcsymbol Ltmp1>
+
+
+
+
+

CFIIndex Operands

+

A CFI Index operand is holding an index into a per-function side-table, +MachineFunction::getFrameInstructions(), which references all the frame +instructions in a MachineFunction. A CFI_INSTRUCTION may look like it +contains multiple operands, but the only operand it contains is the CFI Index. +The other operands are tracked by the MCCFIInstruction object.

+

The syntax is:

+
CFI_INSTRUCTION offset $w30, -16
+
+
+

which may be emitted later in the MC layer as:

+
.cfi_offset w30, -16
+
+
+
+
+

IntrinsicID Operands

+

An Intrinsic ID operand contains a generic intrinsic ID or a target-specific ID.

+

The syntax for the returnaddress intrinsic is:

+
$x0 = COPY intrinsic(@llvm.returnaddress)
+
+
+
+
+

Predicate Operands

+

A Predicate operand contains an IR predicate from CmpInst::Predicate, like +ICMP_EQ, etc.

+

For an int eq predicate ICMP_EQ, the syntax is:

+
%2:gpr(s32) = G_ICMP intpred(eq), %0, %1
+
+
+
+
+

Comments

+

Machine operands can have C/C++ style comments, which are annotations enclosed +between /* and */ to improve readability of e.g. immediate operands. +In the example below, ARM instructions EOR and BCC and immediate operands +14 and 0 have been annotated with their condition codes (CC) +definitions, i.e. the always and eq condition codes:

+
dead renamable $r2, $cpsr = tEOR killed renamable $r2, renamable $r1, 14 /* CC::always */, $noreg
+t2Bcc %bb.4, 0 /* CC:eq */, killed $cpsr
+
+
+

As these annotations are comments, they are ignored by the MI parser. +Comments can be added or customized by overriding InstrInfo’s hook +createMIROperandComment().

+
+
+
+

Debug-Info constructs

+

Most of the debugging information in a MIR file is to be found in the metadata +of the embedded module. Within a machine function, that metadata is referred to +by various constructs to describe source locations and variable locations.

+
+

Source locations

+

Every MIR instruction may optionally have a trailing reference to a +DILocation metadata node, after all operands and symbols, but before +memory operands:

+
$rbp = MOV64rr $rdi, debug-location !12
+
+
+

The source location attachment is synonymous with the !dbg metadata +attachment in LLVM-IR. The absence of a source location attachment will be +represented by an empty DebugLoc object in the machine instruction.

+
+
+

Fixed variable locations

+

There are several ways of specifying variable locations. The simplest is +describing a variable that is permanently located on the stack. In the stack +or fixedStack attribute of the machine function, the variable, scope, and +any qualifying location modifier are provided:

+
- { id: 0, name: offset.addr, offset: -24, size: 8, alignment: 8, stack-id: default,
+ 4  debug-info-variable: '!1', debug-info-expression: '!DIExpression()',
+    debug-info-location: '!2' }
+
+
+

Where:

+
    +
  • debug-info-variable identifies a DILocalVariable metadata node,

  • +
  • debug-info-expression adds qualifiers to the variable location,

  • +
  • debug-info-location identifies a DILocation metadata node.

  • +
+

These metadata attributes correspond to the operands of a llvm.dbg.declare +IR intrinsic, see the source level debugging +documentation.

+
+
+

Varying variable locations

+

Variables that are not always on the stack or change location are specified +with the DBG_VALUE meta machine instruction. It is synonymous with the +llvm.dbg.value IR intrinsic, and is written:

+
DBG_VALUE $rax, $noreg, !123, !DIExpression(), debug-location !456
+
+
+

The operands to which respectively:

+
    +
  1. Identifies a machine location such as a register, immediate, or frame index,

  2. +
  3. Is either $noreg, or immediate value zero if an extra level of indirection is to be added to the first operand,

  4. +
  5. Identifies a DILocalVariable metadata node,

  6. +
  7. Specifies an expression qualifying the variable location, either inline or as a metadata node reference,

  8. +
+

While the source location identifies the DILocation for the scope of the +variable. The second operand (IsIndirect) is deprecated and to be deleted. +All additional qualifiers for the variable location should be made through the +expression metadata.

+
+
+

Instruction referencing locations

+

This experimental feature aims to separate the specification of variable +values from the program point where a variable takes on that value. Changes +in variable value occur in the same manner as DBG_VALUE meta instructions +but using DBG_INSTR_REF. Variable values are identified by a pair of +instruction number and operand number. Consider the example below:

+
$rbp = MOV64ri 0, debug-instr-number 1, debug-location !12
+DBG_INSTR_REF 1, 0, !123, !DIExpression(), debug-location !456
+
+
+

Instruction numbers are directly attached to machine instructions with an +optional debug-instr-number attachment, before the optional +debug-location attachment. The value defined in $rbp in the code +above would be identified by the pair <1, 0>.

+

The first two operands of the DBG_INSTR_REF above record the instruction +and operand number <1, 0>, identifying the value defined by the MOV64ri. +The additional operands to DBG_INSTR_REF are identical to DBG_VALUE, +and the DBG_INSTR_REF s position records where the variable takes on the +designated value in the same way.

+

More information about how these constructs are used will appear on the source +level debugging page in due course, see also Source Level Debugging with LLVM and How to Update Debug Info: A Guide for LLVM Pass Authors.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MyFirstTypoFix.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MyFirstTypoFix.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/MyFirstTypoFix.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/MyFirstTypoFix.html 2021-09-19 16:16:38.000000000 +0000 @@ -0,0 +1,625 @@ + + + + + + + + + MyFirstTypoFix — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

MyFirstTypoFix

+ +
+

Introduction

+

This tutorial will guide you through the process of making a change to +LLVM, and contributing it back to the LLVM project. We’ll be making a +change to Clang, but the steps for other parts of LLVM are the same. +Even though the change we’ll be making is simple, we’re going to cover +steps like building LLVM, running the tests, and code review. This is +good practice, and you’ll be prepared for making larger changes.

+

We’ll assume you:

+
    +
  • know how to use an editor,

  • +
  • have basic C++ knowledge,

  • +
  • know how to install software on your system,

  • +
  • are comfortable with the command line,

  • +
  • have basic knowledge of git.

  • +
+
+

The change we’re making

+

Clang has a warning for infinite recursion:

+
$ echo "void foo() { foo(); }" > ~/test.cc
+$ clang -c -Wall ~/test.cc
+input.cc:1:14: warning: all paths through this function will call
+itself [-Winfinite-recursion]
+
+
+

This is clear enough, but not exactly catchy. Let’s improve the wording +a little:

+
input.cc:1:14: warning: to understand recursion, you must first
+understand recursion [-Winfinite-recursion]
+
+
+
+
+

Dependencies

+

We’re going to need some tools:

+
    +
  • git: to check out the LLVM source code,

  • +
  • a C++ compiler: to compile LLVM source code. You’ll want a recent +version +of Clang, GCC, or Visual Studio.

  • +
  • CMake: used to configure how LLVM should be built on your system,

  • +
  • ninja: runs the C++ compiler to (re)build specific parts of LLVM,

  • +
  • python: to run the LLVM tests,

  • +
  • arcanist: for uploading changes for review,

  • +
+

As an example, on Ubuntu:

+
$ sudo apt-get install git clang cmake ninja-build python arcanist subversion
+
+
+
+
+
+

Building LLVM

+
+

Checkout

+

The source code is stored on +Github in one large repository +(“the monorepo”).

+

It may take a while to download!

+
$ git clone https://github.com/llvm/llvm-project.git
+
+
+

This will create a directory “llvm-project” with all of the source +code.(Checking out anonymously is OK - pushing commits uses a different +mechanism, as we’ll see later)

+
+
+

Configure your workspace

+

Before we can build the code, we must configure exactly how to build it +by running CMake. CMake combines information from three sources:

+
    +
  • explicit choices you make (is this a debug build?)

  • +
  • settings detected from your system (where are libraries installed?)

  • +
  • project structure (which files are part of ‘clang’?)

  • +
+

First, create a directory to build in. Usually, this is +llvm-project/build.

+
$ mkdir llvm-project/build
+$ cd llvm-project/build
+
+
+

Now, run CMake:

+
$ cmake -G Ninja ../llvm -DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_PROJECTS=clang
+
+
+

If all goes well, you’ll see a lot of “performing test” lines, and +finally:

+
Configuring done
+Generating done
+Build files have been written to: /path/llvm-project/build
+
+
+

And you should see a build.ninja file.

+

Let’s break down that last command a little:

+
    +
  • -G Ninja: we’re going to use ninja to build; please create +build.ninja

  • +
  • ../llvm: this is the path to the source of the “main” LLVM +project

  • +
  • The two -D flags set CMake variables, which override +CMake/project defaults:

  • +
  • CMAKEBUILDTYPE=Release: build in optimized mode, which is +(surprisingly) the fastest option.

    +

    If you want to run under a debugger, you should use the default Debug +(which is totally unoptimized, and will lead to >10x slower test +runs) or RelWithDebInfo which is a halfway point. +CMAKEBUILDTYPE affects code generation only, assertions are +on by default regardless! LLVMENABLEASSERTIONS=Off disables +them.

    +
  • +
  • LLVMENABLEPROJECTS=clang : this lists the LLVM subprojects +you are interested in building, in addition to LLVM itself. Multiple +projects can be listed, separated by semicolons, such as “clang; +lldb”.In this example, we’ll be making a change to Clang, so we +should build it.

  • +
+

Finally, create a symlink (or a copy) of +llvm-project/build/compile-commands.json into llvm-project/:

+
$ ln -s build/compile_commands.json ../
+
+
+

(This isn’t strictly necessary for building and testing, but allows +tools like clang-tidy, clang-query, and clangd to work in your source +tree).

+
+
+

Build and test

+

Finally, we can build the code! It’s important to do this first, to +ensure we’re in a good state before making changes. But what to build? +In ninja, you specify a target. If we just want to build the clang +binary, our target name is “clang” and we run:

+
$ ninja clang
+
+
+

The first time we build will be very slow - Clang + LLVM is a lot of +code. But incremental builds are fast: ninja will only rebuild the parts +that have changed. When it finally finishes you should have a working +clang binary. Try running:

+
$ bin/clang --version
+
+
+

There’s also a target for building and running all the clang tests:

+
$ ninja check-clang
+
+
+

This is a common pattern in LLVM: check-llvm is all the checks for core, +other projects have targets like check-lldb.

+
+
+
+

Making changes

+
+

Edit

+

We need to find the file containing the error message.

+
$ git grep "all paths through this function" ..
+../clang/include/clang/Basic/DiagnosticSemaKinds.td:  "all paths through this function will call itself">,
+
+
+

The string that appears in DiagnosticSemaKinds.td is the one that is +printed by Clang. *.td files define tables - in this case it’s a list +of warnings and errors clang can emit and their messages. Let’s update +the message in your favorite editor:

+
$ vi ../clang/include/clang/Basic/DiagnosticSemaKinds.td
+
+
+

Find the message (it should be under +warninfiniterecursive_function)Change the message to “in order to +understand recursion, you must first understand recursion”.

+
+
+

Test again

+

To verify our change, we can build clang and manually check that it +works.

+
$ ninja clang
+$ bin/clang -Wall ~/test.cc
+
+**/path/test.cc:1:124:** **warning****: in order to understand recursion, you must
+first understand recursion [-Winfinite-recursion]**
+
+
+

We should also run the tests to make sure we didn’t break something.

+
$ ninja check-clang
+
+
+

Notice that it is much faster to build this time, but the tests take +just as long to run. Ninja doesn’t know which tests might be affected, +so it runs them all.

+
********************
+Testing Time: 408.84s
+********************
+Failing Tests (1):
+    Clang :: SemaCXX/warn-infinite-recursion.cpp
+
+
+

Well, that makes sense… and the test output suggests it’s looking for +the old string “call itself” and finding our new message instead.

+

Let’s fix it by updating the expectation in the test.

+
$ vi ../clang/test/SemaCXX/warn-infinite-recursion.cpp
+
+
+

Everywhere we see // expected-warning{{call itself}}, let’s replace it +with // expected-warning{{to understand recursion}}.

+

Now we could run all the tests again, but this is a slow way to +iterate on a change! Instead, let’s find a way to re-run just the +specific test. There are two main types of tests in LLVM:

+
    +
  • lit tests (e.g. SemaCXX/warn-infinite-recursion.cpp).

  • +
+

These are fancy shell scripts that run command-line tools and verify the +output. They live in files like +clang/test/FixIt/dereference-addressof.c. Re-run like this:

+
$ bin/llvm-lit -v ../clang/test/SemaCXX/warn-infinite-recursion.cpp
+
+
+
    +
  • unit tests (e.g. ToolingTests/ReplacementText.CanDeleteAllText)

  • +
+

These are C++ programs that call LLVM functions and verify the results. +They live in suites like ToolingTests. Re-run like this:

+
$ ninja ToolingTests && tools/clang/unittests/Tooling/ToolingTests
+--gtest_filter=ReplacementText.CanDeleteAllText
+
+
+
+
+

Commit locally

+

We’ll save the change to a local git branch. This lets us work on other +things while the change is being reviewed. Changes should have a +description, to explain to reviewers and future readers of the code why +the change was made.

+
$ git checkout -b myfirstpatch
+$ git commit -am "[Diagnostic] Clarify -Winfinite-recursion message"
+
+
+

Now we’re ready to send this change out into the world! By the way, +There is a unwritten convention of using tag for your commit. Tags +usually represent modules that you intend to modify. If you don’t know +the tags for your modules, you can look at the commit history : +https://github.com/llvm/llvm-project/commits/main.

+
+
+
+

Code review

+
+

Finding a reviewer

+

Changes can be reviewed by anyone in the LLVM community who has commit +access.For larger and more complicated changes, it’s important that the +reviewer has experience with the area of LLVM and knows the design goals +well. The author of a change will often assign a specific reviewer (git +blame and git log can be useful to find one).

+

As our change is fairly simple, we’ll add the cfe-commits mailing list +as a subscriber; anyone who works on clang can likely pick up the +review. (For changes outside clang, llvm-commits is the usual list. See +http://lists.llvm.org/ for +all the *-commits mailing lists).

+
+
+

Uploading a change for review

+

LLVM code reviews happen at https://reviews.llvm.org. The web interface +is called Phabricator, and the code review part is Differential. You +should create a user account there for reviews (click “Log In” and then +“Register new account”).

+

Now you can upload your change for review:

+
$ arc diff HEAD^
+
+
+

This creates a review for your change, comparing your current commit +with the previous commit. You will be prompted to fill in the review +details. Your commit message is already there, so just add cfe-commits +under the “subscribers” section. It should print a code review URL: +https://reviews.llvm.org/D58291 You can always find your active reviews +on Phabricator under “My activity”.

+
+
+

Review process

+

When you upload a change for review, an email is sent to you, the +cfe-commits list, and anyone else subscribed to these kinds of changes. +Within a few days, someone should start the review. They may add +themselves as a reviewer, or simply start leaving comments. You’ll get +another email any time the review is updated. The details are in the +https://llvm.org/docs/CodeReview/.

+
+

Comments

+

The reviewer can leave comments on the change, and you can reply. Some +comments are attached to specific lines, and appear interleaved with the +code. You can either reply to these, or address them and mark them as +“done”. Note that in-line replies are not sent straight away! They +become “draft” comments and you must click “Submit” at the bottom of the +page.

+
+
+

Updating your change

+

If you make changes in response to a reviewer’s comments, simply run

+
$ arc diff
+
+
+

again to update the change and notify the reviewer. Typically this is a +good time to send any draft comments as well.

+
+
+

Accepting a revision

+

When the reviewer is happy with the change, they will Accept the +revision. They may leave some more minor comments that you should +address, but at this point the review is complete. It’s time to get it +committed!

+
+
+
+

Commit by proxy

+

As this is your first change, you won’t have access to commit it +yourself yet. The reviewer doesn’t know this, so you need to tell +them! Leave a message on the review like:

+
+

Thanks @somellvmdev. I don’t have commit access, can you land this +patch for me? Please use “My Name my@email” to commit the change.

+
+

The review will be updated when the change is committed.

+
+
+

Review expectations

+

In order to make LLVM a long-term sustainable effort, code needs to be +maintainable and well tested. Code reviews help to achieve that goal. +Especially for new contributors, that often means many rounds of reviews +and push-back on design decisions that do not fit well within the +overall architecture of the project.

+

For your first patches, this means:

+
    +
  • be kind, and expect reviewers to be kind in return - LLVM has a Code +of Conduct;

  • +
  • be patient - understanding how a new feature fits into the +architecture of the project is often a time consuming effort, and +people have to juggle this with other responsibilities in their +lives; ping the review once a week when there is no response;

  • +
  • if you can’t agree, generally the best way is to do what the reviewer +asks; we optimize for readability of the code, which the reviewer is +in a better position to judge; if this feels like it’s not the right +option, you can contact the cfe-dev mailing list to get more feedback +on the direction;

  • +
+
+
+
+

Commit access

+

Once you’ve contributed a handful of patches to LLVM, start to think +about getting commit access yourself. It’s probably a good idea if:

+
    +
  • you’ve landed 3-5 patches of larger scope than “fix a typo”

  • +
  • you’d be willing to review changes that are closely related to yours

  • +
  • you’d like to keep contributing to LLVM.

  • +
+
+

Getting commit access

+

LLVM uses Git for committing changes. The details are in the developer +policy +document.

+
+
+

With great power

+

Actually, this would be a great time to read the rest of the developer +policy, too. At minimum, +you need to be subscribed to the relevant commits list before landing +changes (e.g. llvm-commits@lists.llvm.org), as discussion often happens +there if a new patch causes problems.

+
+
+

Commit

+

Let’s say you have a change on a local git branch, reviewed and ready to +commit. Things to do first:

+
    +
  • if you used multiple fine-grained commits locally, squash them into a +single commit. LLVM prefers commits to match the code that was +reviewed. (If you created one commit and then used “arc diff”, you’re +fine)

  • +
  • rebase your patch against the latest LLVM code. LLVM uses a linear +history, so everything should be based on an up-to-date origin/main.

  • +
+
$ git pull --rebase https://github.com/llvm/llvm-project.git main
+
+
+
    +
  • ensure the patch looks correct.

  • +
+
$ git show
+
+
+
    +
  • run the tests one last time, for good luck

  • +
+

At this point git show should show a single commit on top of +origin/main.

+

Now you can push your commit with

+
$ git push https://github.com/llvm/llvm-project.git HEAD:main
+
+
+

You should see your change on +GitHub within +minutes.

+
+
+

Post-commit errors

+

Once your change is submitted it will be picked up by automated build +bots that will build and test your patch in a variety of configurations.

+

You can see all configurations and their current state in a waterfall +view at http://lab.llvm.org:8011/waterfall. The waterfall view is good +to get a general overview over the tested configurations and to see +which configuration have been broken for a while.

+

The console view at http://lab.llvm.org:8011/console helps to get a +better understanding of the build results of a specific patch. If you +want to follow along how your change is affecting the build bots, this +should be the first place to look at - the colored bubbles correspond +to projects in the waterfall.

+

If you see a broken build, do not despair - some build bots are +continuously broken; if your change broke the build, you will see a red +bubble in the console view, while an already broken build will show an +orange bubble. Of course, even when the build was already broken, a new +change might introduce a hidden new failure.

+
+
When you want to see more details how a specific build is broken, +click the red bubble.
+
If post-commit error logs confuse you, do not worry too much - +everybody on the project is aware that this is a bit unwieldy, so +expect people to jump in and help you understand what’s going on!
+
+

buildbots, overview of bots, getting error logs.

+
+
+

Reverts

+

if in doubt, revert and re-land.

+
+
+
+

Conclusion

+

llvm is a land of contrasts.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/NewPassManager.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/NewPassManager.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/NewPassManager.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/NewPassManager.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,548 @@ + + + + + + + + + Using the New Pass Manager — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Using the New Pass Manager

+ +
+

Overview

+

For an overview of the new pass manager, see the blog post.

+
+
+

Adding Passes to a Pass Manager

+

For how to write a new PM pass, see this page.

+

To add a pass to a new PM pass manager, the important thing is to match the +pass type and the pass manager type. For example, a FunctionPassManager +can only contain function passes:

+
FunctionPassManager FPM;
+// InstSimplifyPass is a function pass
+FPM.addPass(InstSimplifyPass());
+
+
+

If you want add a loop pass that runs on all loops in a function to a +FunctionPassManager, the loop pass must be wrapped in a function pass +adaptor that goes through all the loops in the function and runs the loop +pass on each one.

+
FunctionPassManager FPM;
+// LoopRotatePass is a loop pass
+FPM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass()));
+
+
+

The IR hierarchy in terms of the new PM is Module -> (CGSCC ->) Function -> +Loop, where going through a CGSCC is optional.

+
FunctionPassManager FPM;
+// loop -> function
+FPM.addPass(createFunctionToLoopPassAdaptor(LoopFooPass()));
+
+CGSCCPassManager CGPM;
+// loop -> function -> cgscc
+CGPM.addPass(createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(LoopFooPass())));
+// function -> cgscc
+CGPM.addPass(createCGSCCToFunctionPassAdaptor(FunctionFooPass()));
+
+ModulePassManager MPM;
+// loop -> function -> module
+MPM.addPass(createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(LoopFooPass())));
+// function -> module
+MPM.addPass(createModuleToFunctionPassAdaptor(FunctionFooPass()));
+
+// loop -> function -> cgscc -> module
+MPM.addPass(createModuleToCGSCCPassAdaptor(createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(LoopFooPass()))));
+// function -> cgscc -> module
+MPM.addPass(createModuleToCGSCCPassAdaptor(createCGSCCToFunctionPassAdaptor(FunctionFooPass())));
+
+
+

A pass manager of a specific IR unit is also a pass of that kind. For +example, a FunctionPassManager is a function pass, meaning it can be +added to a ModulePassManager:

+
ModulePassManager MPM;
+
+FunctionPassManager FPM;
+// InstSimplifyPass is a function pass
+FPM.addPass(InstSimplifyPass());
+
+MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+
+

Generally you want to group CGSCC/function/loop passes together in a pass +manager, as opposed to adding adaptors for each pass to the containing upper +level pass manager. For example,

+
ModulePassManager MPM;
+MPM.addPass(createModuleToFunctionPassAdaptor(FunctionPass1()));
+MPM.addPass(createModuleToFunctionPassAdaptor(FunctionPass2()));
+MPM.run();
+
+
+

will run FunctionPass1 on each function in a module, then run +FunctionPass2 on each function in the module. In contrast,

+
ModulePassManager MPM;
+
+FunctionPassManager FPM;
+FPM.addPass(FunctionPass1());
+FPM.addPass(FunctionPass2());
+
+MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+
+

will run FunctionPass1 and FunctionPass2 on the first function in a +module, then run both passes on the second function in the module, and so on. +This is better for cache locality around LLVM data structures. This similarly +applies for the other IR types, and in some cases can even affect the quality +of optimization. For example, running all loop passes on a loop may cause a +later loop to be able to be optimized more than if each loop pass were run +separately.

+
+
+

Inserting Passes into Default Pipelines

+

Rather than manually adding passes to a pass manager, the typical way of +creating a pass manager is to use a PassBuilder and call something like +PassBuilder::buildPerModuleDefaultPipeline() which creates a typical +pipeline for a given optimization level.

+

Sometimes either frontends or backends will want to inject passes into the +pipeline. For example, frontends may want to add instrumentation, and target +backends may want to add passes that lower custom intrinsics. For these +cases, PassBuilder exposes callbacks that allow injecting passes into +certain parts of the pipeline. For example,

+
PassBuilder PB;
+PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM,
+                                       PassBuilder::OptimizationLevel Level) {
+    MPM.addPass(FooPass());
+};
+
+
+

will add FooPass near the very beginning of the pipeline for pass +managers created by that PassBuilder. See the documentation for +PassBuilder for the various places that passes can be added.

+

If a PassBuilder has a corresponding TargetMachine for a backend, it +will call TargetMachine::registerPassBuilderCallbacks() to allow the +backend to inject passes into the pipeline. This is equivalent to the legacy +PM’s TargetMachine::adjustPassManager().

+

Clang’s BackendUtil.cpp shows examples of a frontend adding (mostly +sanitizer) passes to various parts of the pipeline. +AMDGPUTargetMachine::registerPassBuilderCallbacks() is an example of a +backend adding passes to various parts of the pipeline.

+
+
+

Using Analyses

+

LLVM provides many analyses that passes can use, such as a dominator tree. +Calculating these can be expensive, so the new pass manager has +infrastructure to cache analyses and reuse them when possible.

+

When a pass runs on some IR, it also receives an analysis manager which it can +query for analyses. Querying for an analysis will cause the manager to check if +it has already computed the result for the requested IR. If it already has and +the result is still valid, it will return that. Otherwise it will construct a +new result by calling the analysis’s run() method, cache it, and return it. +You can also ask the analysis manager to only return an analysis if it’s +already cached.

+

The analysis manager only provides analysis results for the same IR type as +what the pass runs on. For example, a function pass receives an analysis +manager that only provides function-level analyses. This works for many +passes which work on a fixed scope. However, some passes want to peek up or +down the IR hierarchy. For example, an SCC pass may want to look at function +analyses for the functions inside the SCC. Or it may want to look at some +immutable global analysis. In these cases, the analysis manager can provide a +proxy to an outer or inner level analysis manager. For example, to get a +FunctionAnalysisManager from a CGSCCAnalysisManager, you can call

+
FunctionAnalysisManager &FAM =
+    AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
+        .getManager();
+
+
+

and use FAM as a typical FunctionAnalysisManager that a function pass +would have access to. To get access to an outer level IR analysis, you can +call

+
const auto &MAMProxy =
+    AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG);
+FooAnalysisResult *AR = MAMProxy.getCachedResult<FooAnalysis>(M);
+
+
+

Getting direct access to an outer level IR analysis manager is not allowed. +This is to keep in mind potential future pass concurrency, for example +parallelizing function passes over different functions in a CGSCC or module. +Since passes can ask for a cached analysis result, allowing passes to trigger +outer level analysis computation could result in non-determinism if +concurrency was supported. Therefore a pass running on inner level IR cannot +change the state of outer level IR analyses. Another limitation is that outer +level IR analyses that are used must be immutable, or else they could be +invalidated by changes to inner level IR. Outer analyses unused by inner +passes can and often will be invalidated by changes to inner level IR. These +invalidations happen after the inner pass manager finishes, so accessing +mutable analyses would give invalid results.

+

The exception to the above is accessing function analyses in loop passes. +Loop passes inherently require modifying the function the loop is in, and +that includes some function analyses the loop analyses depend on. This +discounts future concurrency over separate loops in a function, but that’s a +tradeoff due to how tightly a loop and its function are coupled. To make sure +the function analyses loop passes use are valid, they are manually updated in +the loop passes to ensure that invalidation is not necessary. There is a set +of common function analyses that loop passes and analyses have access to +which is passed into loop passes as a LoopStandardAnalysisResults +parameter. Other function analyses are not accessible from loop passes.

+

As with any caching mechanism, we need some way to tell analysis managers +when results are no longer valid. Much of the analysis manager complexity +comes from trying to invalidate as few analysis results as possible to keep +compile times as low as possible.

+

There are two ways to deal with potentially invalid analysis results. One is +to simply force clear the results. This should generally only be used when +the IR that the result is keyed on becomes invalid. For example, a function +is deleted, or a CGSCC has become invalid due to call graph changes.

+

The typical way to invalidate analysis results is for a pass to declare what +types of analyses it preserves and what types it does not. When transforming +IR, a pass either has the option to update analyses alongside the IR +transformation, or tell the analysis manager that analyses are no longer +valid and should be invalidated. If a pass wants to keep some specific +analysis up to date, such as when updating it would be faster than +invalidating and recalculating it, the analysis itself may have methods to +update it for specific transformations, or there may be helper updaters like +DomTreeUpdater for a DominatorTree. Otherwise to mark some analysis +as no longer valid, the pass can return a PreservedAnalyses with the +proper analyses invalidated.

+
// We've made no transformations that can affect any analyses.
+return PreservedAnalyses::all();
+
+// We've made transformations and don't want to bother to update any analyses.
+return PreservedAnalyses::none();
+
+// We've specifically updated the dominator tree alongside any transformations, but other analysis results may be invalid.
+PreservedAnalyses PA;
+PA.preserve<DominatorAnalysis>();
+return PA;
+
+// We haven't made any control flow changes, any analyses that only care about the control flow are still valid.
+PreservedAnalyses PA;
+PA.preserveSet<CFGAnalyses>();
+return PA;
+
+
+

The pass manager will call the analysis manager’s invalidate() method +with the pass’s returned PreservedAnalyses. This can be also done +manually within the pass:

+
FooModulePass::run(Module& M, ModuleAnalysisManager& AM) {
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+  // Invalidate all analysis results for function F
+  FAM.invalidate(F, PreservedAnalyses::none());
+
+  // Invalidate all analysis results
+  AM.invalidate(M, PreservedAnalyses::none());
+
+  ...
+}
+
+
+

This is especially important when a pass removes then adds a function. The +analysis manager may store a pointer to a function that has been deleted, and +if the pass creates a new function before invalidating analysis results, the +new function may be at the same address as the old one, causing invalid +cached results. This is also useful for being more precise about +invalidation. Selectively invalidating analysis results only for functions +modified in an SCC pass can allow more analysis results to remain. But except +for complex fine-grain invalidation with inner proxies, passes should +typically just return a proper PreservedAnalyses and let the pass manager +deal with proper invalidation.

+
+
+

Implementing Analysis Invalidation

+

By default, an analysis is invalidated if PreservedAnalyses says that +analyses on the IR unit it runs on are not preserved (see +AnalysisResultModel::invalidate()). An analysis can implement +invalidate() to be more conservative when it comes to invalidation. For +example,

+
bool FooAnalysisResult::invalidate(Function &F, const PreservedAnalyses &PA,
+                                   FunctionAnalysisManager::Invalidator &) {
+  auto PAC = PA.getChecker<FooAnalysis>();
+  // the default would be:
+  // return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>());
+  return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()
+      || PAC.preservedSet<CFGAnalyses>());
+}
+
+
+

says that if the PreservedAnalyses specifically preserves +FooAnalysis, or if PreservedAnalyses preserves all analyses (implicit +in PAC.preserved()), or if PreservedAnalyses preserves all function +analyses, or PreservedAnalyses preserves all analyses that only care +about the CFG, the FooAnalysisResult should not be invalidated.

+

If an analysis is stateless and generally shouldn’t be invalidated, use the +following:

+
bool FooAnalysisResult::invalidate(Function &F, const PreservedAnalyses &PA,
+                                   FunctionAnalysisManager::Invalidator &) {
+  // Check whether the analysis has been explicitly invalidated. Otherwise, it's
+  // stateless and remains preserved.
+  auto PAC = PA.getChecker<FooAnalysis>();
+  return !PAC.preservedWhenStateless();
+}
+
+
+

If an analysis depends on other analyses, those analyses also need to be +checked if they are invalidated:

+
bool FooAnalysisResult::invalidate(Function &F, const PreservedAnalyses &PA,
+                                   FunctionAnalysisManager::Invalidator &) {
+  auto PAC = PA.getChecker<FooAnalysis>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+    return true;
+
+  // Check transitive dependencies.
+  return Inv.invalidate<BarAnalysis>(F, PA) ||
+        Inv.invalidate<BazAnalysis>(F, PA);
+}
+
+
+

Combining invalidation and analysis manager proxies results in some +complexity. For example, when we invalidate all analyses in a module pass, +we have to make sure that we also invalidate function analyses accessible via +any existing inner proxies. The inner proxy’s invalidate() first checks +if the proxy itself should be invalidated. If so, that means the proxy may +contain pointers to IR that is no longer valid, meaning that the inner proxy +needs to completely clear all relevant analysis results. Otherwise the proxy +simply forwards the invalidation to the inner analysis manager.

+

Generally for outer proxies, analysis results from the outer analysis manager +should be immutable, so invalidation shouldn’t be a concern. However, it is +possible for some inner analysis to depend on some outer analysis, and when +the outer analysis is invalidated, we need to make sure that dependent inner +analyses are also invalidated. This actually happens with alias analysis +results. Alias analysis is a function-level analysis, but there are +module-level implementations of specific types of alias analysis. Currently +GlobalsAA is the only module-level alias analysis and it generally is not +invalidated so this is not so much of a concern. See +OuterAnalysisManagerProxy::Result::registerOuterAnalysisInvalidation() +for more details.

+
+
+

Invoking opt

+

To use the legacy pass manager:

+
$ opt -enable-new-pm=0 -pass1 -pass2 /tmp/a.ll -S
+
+
+

This will be removed once the legacy pass manager is deprecated and removed for +the optimization pipeline.

+

To use the new PM:

+
$ opt -passes='pass1,pass2' /tmp/a.ll -S
+
+
+

The new PM typically requires explicit pass nesting. For example, to run a +function pass, then a module pass, we need to wrap the function pass in a module +adaptor:

+
$ opt -passes='function(no-op-function),no-op-module' /tmp/a.ll -S
+
+
+

A more complete example, and -debug-pass-manager to show the execution +order:

+
$ opt -passes='no-op-module,cgscc(no-op-cgscc,function(no-op-function,loop(no-op-loop))),function(no-op-function,loop(no-op-loop))' /tmp/a.ll -S -debug-pass-manager
+
+
+

Improper nesting can lead to error messages such as

+
$ opt -passes='no-op-function,no-op-module' /tmp/a.ll -S
+opt: unknown function pass 'no-op-module'
+
+
+

The nesting is: module (-> cgscc) -> function -> loop, where the CGSCC nesting is optional.

+

There are a couple of special cases for easier typing:

+
    +
  • If the first pass is not a module pass, a pass manager of the first pass is +implicitly created

    +
      +
    • For example, the following are equivalent

    • +
    +
  • +
+
$ opt -passes='no-op-function,no-op-function' /tmp/a.ll -S
+$ opt -passes='function(no-op-function,no-op-function)' /tmp/a.ll -S
+
+
+
    +
  • If there is an adaptor for a pass that lets it fit in the previous pass +manager, that is implicitly created

    +
      +
    • For example, the following are equivalent

    • +
    +
  • +
+
$ opt -passes='no-op-function,no-op-loop' /tmp/a.ll -S
+$ opt -passes='no-op-function,loop(no-op-loop)' /tmp/a.ll -S
+
+
+

For a list of available passes and analyses, including the IR unit (module, +CGSCC, function, loop) they operate on, run

+
$ opt --print-passes
+
+
+

or take a look at PassRegistry.def.

+

To make sure an analysis named foo is available before a pass, add +require<foo> to the pass pipeline. This adds a pass that simply requests +that the analysis is run. This pass is also subject to proper nesting. For +example, to make sure some function analysis is already computed for all +functions before a module pass:

+
$ opt -passes='function(require<my-function-analysis>),my-module-pass' /tmp/a.ll -S
+
+
+
+
+

Status of the New and Legacy Pass Managers

+

LLVM currently contains two pass managers, the legacy PM and the new PM. The +optimization pipeline (aka the middle-end) works with both the legacy PM and +the new PM, whereas the backend target-dependent code generation only works +with the legacy PM.

+

For the optimization pipeline, the new PM is the default PM. The legacy PM is +available for the optimization pipeline either by setting the CMake flag +-DLLVM_ENABLE_NEW_PASS_MANAGER=OFF when building LLVM, or by +various compiler/linker flags, e.g. -flegacy-pass-manager for clang.

+

There will be efforts to deprecate and remove the legacy PM for the +optimization pipeline in the future.

+

Some IR passes are considered part of the backend codegen pipeline even if +they are LLVM IR passes (whereas all MIR passes are codegen passes). This +includes anything added via TargetPassConfig hooks, e.g. +TargetPassConfig::addCodeGenPrepare(). As mentioned before, passes added +in TargetMachine::adjustPassManager() are part of the optimization +pipeline, and should have a corresponding line in +TargetMachine::registerPassBuilderCallbacks().

+

Currently there are efforts to make the codegen pipeline work with the new +PM.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/NVPTXUsage.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/NVPTXUsage.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/NVPTXUsage.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/NVPTXUsage.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,1113 @@ + + + + + + + + + User Guide for NVPTX Back-end — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

User Guide for NVPTX Back-end

+ +
+

Introduction

+

To support GPU programming, the NVPTX back-end supports a subset of LLVM IR +along with a defined set of conventions used to represent GPU programming +concepts. This document provides an overview of the general usage of the back- +end, including a description of the conventions used and the set of accepted +LLVM IR.

+
+

Note

+

This document assumes a basic familiarity with CUDA and the PTX +assembly language. Information about the CUDA Driver API and the PTX assembly +language can be found in the CUDA documentation.

+
+
+
+

Conventions

+
+

Marking Functions as Kernels

+

In PTX, there are two types of functions: device functions, which are only +callable by device code, and kernel functions, which are callable by host +code. By default, the back-end will emit device functions. Metadata is used to +declare a function as a kernel function. This metadata is attached to the +nvvm.annotations named metadata object, and has the following format:

+
!0 = !{<function-ref>, metadata !"kernel", i32 1}
+
+
+

The first parameter is a reference to the kernel function. The following +example shows a kernel function calling a device function in LLVM IR. The +function @my_kernel is callable from host code, but @my_fmad is not.

+
define float @my_fmad(float %x, float %y, float %z) {
+  %mul = fmul float %x, %y
+  %add = fadd float %mul, %z
+  ret float %add
+}
+
+define void @my_kernel(float* %ptr) {
+  %val = load float, float* %ptr
+  %ret = call float @my_fmad(float %val, float %val, float %val)
+  store float %ret, float* %ptr
+  ret void
+}
+
+!nvvm.annotations = !{!1}
+!1 = !{void (float*)* @my_kernel, !"kernel", i32 1}
+
+
+

When compiled, the PTX kernel functions are callable by host-side code.

+
+
+

Address Spaces

+

The NVPTX back-end uses the following address space mapping:

+
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + +

Address Space

Memory Space

0

Generic

1

Global

2

Internal Use

3

Shared

4

Constant

5

Local

+
+

Every global variable and pointer type is assigned to one of these address +spaces, with 0 being the default address space. Intrinsics are provided which +can be used to convert pointers between the generic and non-generic address +spaces.

+

As an example, the following IR will define an array @g that resides in +global device memory.

+
@g = internal addrspace(1) global [4 x i32] [ i32 0, i32 1, i32 2, i32 3 ]
+
+
+

LLVM IR functions can read and write to this array, and host-side code can +copy data to it by name with the CUDA Driver API.

+

Note that since address space 0 is the generic space, it is illegal to have +global variables in address space 0. Address space 0 is the default address +space in LLVM, so the addrspace(N) annotation is required for global +variables.

+
+
+

Triples

+

The NVPTX target uses the module triple to select between 32/64-bit code +generation and the driver-compiler interface to use. The triple architecture +can be one of nvptx (32-bit PTX) or nvptx64 (64-bit PTX). The +operating system should be one of cuda or nvcl, which determines the +interface used by the generated code to communicate with the driver. Most +users will want to use cuda as the operating system, which makes the +generated PTX compatible with the CUDA Driver API.

+

Example: 32-bit PTX for CUDA Driver API: nvptx-nvidia-cuda

+

Example: 64-bit PTX for CUDA Driver API: nvptx64-nvidia-cuda

+
+
+
+

NVPTX Intrinsics

+
+

Address Space Conversion

+
+

llvm.nvvm.ptr.*.to.gen’ Intrinsics

+
+
Syntax:
+

These are overloaded intrinsics. You can use these on any pointer types.

+
declare i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)*)
+declare i8* @llvm.nvvm.ptr.shared.to.gen.p0i8.p3i8(i8 addrspace(3)*)
+declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+declare i8* @llvm.nvvm.ptr.local.to.gen.p0i8.p5i8(i8 addrspace(5)*)
+
+
+
+
+
Overview:
+

The ‘llvm.nvvm.ptr.*.to.gen’ intrinsics convert a pointer in a non-generic +address space to a generic address space pointer.

+
+
+
Semantics:
+

These intrinsics modify the pointer value to be a valid generic address space +pointer.

+
+
+
+

llvm.nvvm.ptr.gen.to.*’ Intrinsics

+
+
Syntax:
+

These are overloaded intrinsics. You can use these on any pointer types.

+
declare i8 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i8.p0i8(i8*)
+declare i8 addrspace(3)* @llvm.nvvm.ptr.gen.to.shared.p3i8.p0i8(i8*)
+declare i8 addrspace(4)* @llvm.nvvm.ptr.gen.to.constant.p4i8.p0i8(i8*)
+declare i8 addrspace(5)* @llvm.nvvm.ptr.gen.to.local.p5i8.p0i8(i8*)
+
+
+
+
+
Overview:
+

The ‘llvm.nvvm.ptr.gen.to.*’ intrinsics convert a pointer in the generic +address space to a pointer in the target address space. Note that these +intrinsics are only useful if the address space of the target address space of +the pointer is known. It is not legal to use address space conversion +intrinsics to convert a pointer from one non-generic address space to another +non-generic address space.

+
+
+
Semantics:
+

These intrinsics modify the pointer value to be a valid pointer in the target +non-generic address space.

+
+
+
+
+

Reading PTX Special Registers

+
+

llvm.nvvm.read.ptx.sreg.*

+
+
Syntax:
+
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y()
+declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z()
+declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+
+
+
+
+
Overview:
+

The ‘@llvm.nvvm.read.ptx.sreg.*’ intrinsics provide access to the PTX +special registers, in particular the kernel launch bounds. These registers +map in the following way to CUDA builtins:

+
+
++++ + + + + + + + + + + + + + + + + + + + +

CUDA Builtin

PTX Special Register Intrinsic

threadId

@llvm.nvvm.read.ptx.sreg.tid.*

blockIdx

@llvm.nvvm.read.ptx.sreg.ctaid.*

blockDim

@llvm.nvvm.read.ptx.sreg.ntid.*

gridDim

@llvm.nvvm.read.ptx.sreg.nctaid.*

+
+
+
+
+
+

Barriers

+
+

llvm.nvvm.barrier0

+
+
Syntax:
+
declare void @llvm.nvvm.barrier0()
+
+
+
+
+
Overview:
+

The ‘@llvm.nvvm.barrier0()’ intrinsic emits a PTX bar.sync 0 +instruction, equivalent to the __syncthreads() call in CUDA.

+
+
+
+
+

Other Intrinsics

+

For the full set of NVPTX intrinsics, please see the +include/llvm/IR/IntrinsicsNVVM.td file in the LLVM source tree.

+
+
+
+

Linking with Libdevice

+

The CUDA Toolkit comes with an LLVM bitcode library called libdevice that +implements many common mathematical functions. This library can be used as a +high-performance math library for any compilers using the LLVM NVPTX target. +The library can be found under nvvm/libdevice/ in the CUDA Toolkit and +there is a separate version for each compute architecture.

+

For a list of all math functions implemented in libdevice, see +libdevice Users Guide.

+

To accommodate various math-related compiler flags that can affect code +generation of libdevice code, the library code depends on a special LLVM IR +pass (NVVMReflect) to handle conditional compilation within LLVM IR. This +pass looks for calls to the @__nvvm_reflect function and replaces them +with constants based on the defined reflection parameters. Such conditional +code often follows a pattern:

+
float my_function(float a) {
+  if (__nvvm_reflect("FASTMATH"))
+    return my_function_fast(a);
+  else
+    return my_function_precise(a);
+}
+
+
+

The default value for all unspecified reflection parameters is zero.

+

The NVVMReflect pass should be executed early in the optimization +pipeline, immediately after the link stage. The internalize pass is also +recommended to remove unused math functions from the resulting PTX. For an +input IR module module.bc, the following compilation flow is recommended:

+
    +
  1. Save list of external functions in module.bc

  2. +
  3. Link module.bc with libdevice.compute_XX.YY.bc

  4. +
  5. Internalize all functions not in list from (1)

  6. +
  7. Eliminate all unused internal functions

  8. +
  9. Run NVVMReflect pass

  10. +
  11. Run standard optimization pipeline

  12. +
+
+

Note

+

linkonce and linkonce_odr linkage types are not suitable for the +libdevice functions. It is possible to link two IR modules that have been +linked against libdevice using different reflection variables.

+
+

Since the NVVMReflect pass replaces conditionals with constants, it will +often leave behind dead code of the form:

+
entry:
+  ..
+  br i1 true, label %foo, label %bar
+foo:
+  ..
+bar:
+  ; Dead code
+  ..
+
+
+

Therefore, it is recommended that NVVMReflect is executed early in the +optimization pipeline before dead-code elimination.

+

The NVPTX TargetMachine knows how to schedule NVVMReflect at the beginning +of your pass manager; just use the following code when setting up your pass +manager:

+
std::unique_ptr<TargetMachine> TM = ...;
+PassManagerBuilder PMBuilder(...);
+if (TM)
+  TM->adjustPassManager(PMBuilder);
+
+
+
+

Reflection Parameters

+

The libdevice library currently uses the following reflection parameters to +control code generation:

+ ++++ + + + + + + + + + + +

Flag

Description

__CUDA_FTZ=[0,1]

Use optimized code paths that flush subnormals to zero

+

The value of this flag is determined by the “nvvm-reflect-ftz” module flag. +The following sets the ftz flag to 1.

+
!llvm.module.flag = !{!0}
+!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
+
+
+

(i32 4 indicates that the value set here overrides the value in another +module we link with. See the LangRef <LangRef.html#module-flags-metadata> +for details.)

+
+
+
+

Executing PTX

+

The most common way to execute PTX assembly on a GPU device is to use the CUDA +Driver API. This API is a low-level interface to the GPU driver and allows for +JIT compilation of PTX code to native GPU machine code.

+

Initializing the Driver API:

+
CUdevice device;
+CUcontext context;
+
+// Initialize the driver API
+cuInit(0);
+// Get a handle to the first compute device
+cuDeviceGet(&device, 0);
+// Create a compute device context
+cuCtxCreate(&context, 0, device);
+
+
+

JIT compiling a PTX string to a device binary:

+
CUmodule module;
+CUfunction function;
+
+// JIT compile a null-terminated PTX string
+cuModuleLoadData(&module, (void*)PTXString);
+
+// Get a handle to the "myfunction" kernel function
+cuModuleGetFunction(&function, module, "myfunction");
+
+
+

For full examples of executing PTX assembly, please see the CUDA Samples distribution.

+
+
+

Common Issues

+
+

ptxas complains of undefined function: __nvvm_reflect

+

When linking with libdevice, the NVVMReflect pass must be used. See +Linking with Libdevice for more information.

+
+
+
+

Tutorial: A Simple Compute Kernel

+

To start, let us take a look at a simple compute kernel written directly in +LLVM IR. The kernel implements vector addition, where each thread computes one +element of the output vector C from the input vectors A and B. To make this +easier, we also assume that only a single CTA (thread block) will be launched, +and that it will be one dimensional.

+
+

The Kernel

+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; Intrinsic to read X component of thread ID
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
+
+define void @kernel(float addrspace(1)* %A,
+                    float addrspace(1)* %B,
+                    float addrspace(1)* %C) {
+entry:
+  ; What is my ID?
+  %id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
+
+  ; Compute pointers into A, B, and C
+  %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
+  %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
+  %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
+
+  ; Read A, B
+  %valA = load float, float addrspace(1)* %ptrA, align 4
+  %valB = load float, float addrspace(1)* %ptrB, align 4
+
+  ; Compute C = A + B
+  %valC = fadd float %valA, %valB
+
+  ; Store back to C
+  store float %valC, float addrspace(1)* %ptrC, align 4
+
+  ret void
+}
+
+!nvvm.annotations = !{!0}
+!0 = !{void (float addrspace(1)*,
+             float addrspace(1)*,
+             float addrspace(1)*)* @kernel, !"kernel", i32 1}
+
+
+

We can use the LLVM llc tool to directly run the NVPTX code generator:

+
# llc -mcpu=sm_20 kernel.ll -o kernel.ptx
+
+
+
+

Note

+

If you want to generate 32-bit code, change p:64:64:64 to p:32:32:32 +in the module data layout string and use nvptx-nvidia-cuda as the +target triple.

+
+

The output we get from llc (as of LLVM 3.4):

+
//
+// Generated by LLVM NVPTX Back-End
+//
+
+.version 3.1
+.target sm_20
+.address_size 64
+
+  // .globl kernel
+                                        // @kernel
+.visible .entry kernel(
+  .param .u64 kernel_param_0,
+  .param .u64 kernel_param_1,
+  .param .u64 kernel_param_2
+)
+{
+  .reg .f32   %f<4>;
+  .reg .s32   %r<2>;
+  .reg .s64   %rl<8>;
+
+// %bb.0:                                // %entry
+  ld.param.u64    %rl1, [kernel_param_0];
+  mov.u32         %r1, %tid.x;
+  mul.wide.s32    %rl2, %r1, 4;
+  add.s64         %rl3, %rl1, %rl2;
+  ld.param.u64    %rl4, [kernel_param_1];
+  add.s64         %rl5, %rl4, %rl2;
+  ld.param.u64    %rl6, [kernel_param_2];
+  add.s64         %rl7, %rl6, %rl2;
+  ld.global.f32   %f1, [%rl3];
+  ld.global.f32   %f2, [%rl5];
+  add.f32         %f3, %f1, %f2;
+  st.global.f32   [%rl7], %f3;
+  ret;
+}
+
+
+
+
+

Dissecting the Kernel

+

Now let us dissect the LLVM IR that makes up this kernel.

+
+

Data Layout

+

The data layout string determines the size in bits of common data types, their +ABI alignment, and their storage size. For NVPTX, you should use one of the +following:

+

32-bit PTX:

+
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+

64-bit PTX:

+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+
+
+
+

Target Intrinsics

+

In this example, we use the @llvm.nvvm.read.ptx.sreg.tid.x intrinsic to +read the X component of the current thread’s ID, which corresponds to a read +of register %tid.x in PTX. The NVPTX back-end supports a large set of +intrinsics. A short list is shown below; please see +include/llvm/IR/IntrinsicsNVVM.td for the full list.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + +

Intrinsic

CUDA Equivalent

i32 @llvm.nvvm.read.ptx.sreg.tid.{x,y,z}

threadIdx.{x,y,z}

i32 @llvm.nvvm.read.ptx.sreg.ctaid.{x,y,z}

blockIdx.{x,y,z}

i32 @llvm.nvvm.read.ptx.sreg.ntid.{x,y,z}

blockDim.{x,y,z}

i32 @llvm.nvvm.read.ptx.sreg.nctaid.{x,y,z}

gridDim.{x,y,z}

void @llvm.nvvm.barrier0()

__syncthreads()

+
+
+

Address Spaces

+

You may have noticed that all of the pointer types in the LLVM IR example had +an explicit address space specifier. What is address space 1? NVIDIA GPU +devices (generally) have four types of memory:

+
    +
  • Global: Large, off-chip memory

  • +
  • Shared: Small, on-chip memory shared among all threads in a CTA

  • +
  • Local: Per-thread, private memory

  • +
  • Constant: Read-only memory shared across all threads

  • +
+

These different types of memory are represented in LLVM IR as address spaces. +There is also a fifth address space used by the NVPTX code generator that +corresponds to the “generic” address space. This address space can represent +addresses in any other address space (with a few exceptions). This allows +users to write IR functions that can load/store memory using the same +instructions. Intrinsics are provided to convert pointers between the generic +and non-generic address spaces.

+

See Address Spaces and NVPTX Intrinsics for more information.

+
+
+

Kernel Metadata

+

In PTX, a function can be either a kernel function (callable from the host +program), or a device function (callable only from GPU code). You can think +of kernel functions as entry-points in the GPU program. To mark an LLVM IR +function as a kernel function, we make use of special LLVM metadata. The +NVPTX back-end will look for a named metadata node called +nvvm.annotations. This named metadata must contain a list of metadata that +describe the IR. For our purposes, we need to declare a metadata node that +assigns the “kernel” attribute to the LLVM IR function that should be emitted +as a PTX kernel function. These metadata nodes take the form:

+
!{<function ref>, metadata !"kernel", i32 1}
+
+
+

For the previous example, we have:

+
!nvvm.annotations = !{!0}
+!0 = !{void (float addrspace(1)*,
+             float addrspace(1)*,
+             float addrspace(1)*)* @kernel, !"kernel", i32 1}
+
+
+

Here, we have a single metadata declaration in nvvm.annotations. This +metadata annotates our @kernel function with the kernel attribute.

+
+
+
+

Running the Kernel

+

Generating PTX from LLVM IR is all well and good, but how do we execute it on +a real GPU device? The CUDA Driver API provides a convenient mechanism for +loading and JIT compiling PTX to a native GPU device, and launching a kernel. +The API is similar to OpenCL. A simple example showing how to load and +execute our vector addition code is shown below. Note that for brevity this +code does not perform much error checking!

+
+

Note

+

You can also use the ptxas tool provided by the CUDA Toolkit to offline +compile PTX to machine code (SASS) for a specific GPU architecture. Such +binaries can be loaded by the CUDA Driver API in the same way as PTX. This +can be useful for reducing startup time by precompiling the PTX kernels.

+
+
#include <iostream>
+#include <fstream>
+#include <cassert>
+#include "cuda.h"
+
+
+void checkCudaErrors(CUresult err) {
+  assert(err == CUDA_SUCCESS);
+}
+
+/// main - Program entry point
+int main(int argc, char **argv) {
+  CUdevice    device;
+  CUmodule    cudaModule;
+  CUcontext   context;
+  CUfunction  function;
+  CUlinkState linker;
+  int         devCount;
+
+  // CUDA initialization
+  checkCudaErrors(cuInit(0));
+  checkCudaErrors(cuDeviceGetCount(&devCount));
+  checkCudaErrors(cuDeviceGet(&device, 0));
+
+  char name[128];
+  checkCudaErrors(cuDeviceGetName(name, 128, device));
+  std::cout << "Using CUDA Device [0]: " << name << "\n";
+
+  int devMajor, devMinor;
+  checkCudaErrors(cuDeviceComputeCapability(&devMajor, &devMinor, device));
+  std::cout << "Device Compute Capability: "
+            << devMajor << "." << devMinor << "\n";
+  if (devMajor < 2) {
+    std::cerr << "ERROR: Device 0 is not SM 2.0 or greater\n";
+    return 1;
+  }
+
+  std::ifstream t("kernel.ptx");
+  if (!t.is_open()) {
+    std::cerr << "kernel.ptx not found\n";
+    return 1;
+  }
+  std::string str((std::istreambuf_iterator<char>(t)),
+                    std::istreambuf_iterator<char>());
+
+  // Create driver context
+  checkCudaErrors(cuCtxCreate(&context, 0, device));
+
+  // Create module for object
+  checkCudaErrors(cuModuleLoadDataEx(&cudaModule, str.c_str(), 0, 0, 0));
+
+  // Get kernel function
+  checkCudaErrors(cuModuleGetFunction(&function, cudaModule, "kernel"));
+
+  // Device data
+  CUdeviceptr devBufferA;
+  CUdeviceptr devBufferB;
+  CUdeviceptr devBufferC;
+
+  checkCudaErrors(cuMemAlloc(&devBufferA, sizeof(float)*16));
+  checkCudaErrors(cuMemAlloc(&devBufferB, sizeof(float)*16));
+  checkCudaErrors(cuMemAlloc(&devBufferC, sizeof(float)*16));
+
+  float* hostA = new float[16];
+  float* hostB = new float[16];
+  float* hostC = new float[16];
+
+  // Populate input
+  for (unsigned i = 0; i != 16; ++i) {
+    hostA[i] = (float)i;
+    hostB[i] = (float)(2*i);
+    hostC[i] = 0.0f;
+  }
+
+  checkCudaErrors(cuMemcpyHtoD(devBufferA, &hostA[0], sizeof(float)*16));
+  checkCudaErrors(cuMemcpyHtoD(devBufferB, &hostB[0], sizeof(float)*16));
+
+
+  unsigned blockSizeX = 16;
+  unsigned blockSizeY = 1;
+  unsigned blockSizeZ = 1;
+  unsigned gridSizeX  = 1;
+  unsigned gridSizeY  = 1;
+  unsigned gridSizeZ  = 1;
+
+  // Kernel parameters
+  void *KernelParams[] = { &devBufferA, &devBufferB, &devBufferC };
+
+  std::cout << "Launching kernel\n";
+
+  // Kernel launch
+  checkCudaErrors(cuLaunchKernel(function, gridSizeX, gridSizeY, gridSizeZ,
+                                 blockSizeX, blockSizeY, blockSizeZ,
+                                 0, NULL, KernelParams, NULL));
+
+  // Retrieve device data
+  checkCudaErrors(cuMemcpyDtoH(&hostC[0], devBufferC, sizeof(float)*16));
+
+
+  std::cout << "Results:\n";
+  for (unsigned i = 0; i != 16; ++i) {
+    std::cout << hostA[i] << " + " << hostB[i] << " = " << hostC[i] << "\n";
+  }
+
+
+  // Clean up after ourselves
+  delete [] hostA;
+  delete [] hostB;
+  delete [] hostC;
+
+  // Clean-up
+  checkCudaErrors(cuMemFree(devBufferA));
+  checkCudaErrors(cuMemFree(devBufferB));
+  checkCudaErrors(cuMemFree(devBufferC));
+  checkCudaErrors(cuModuleUnload(cudaModule));
+  checkCudaErrors(cuCtxDestroy(context));
+
+  return 0;
+}
+
+
+

You will need to link with the CUDA driver and specify the path to cuda.h.

+
# clang++ sample.cpp -o sample -O2 -g -I/usr/local/cuda-5.5/include -lcuda
+
+
+

We don’t need to specify a path to libcuda.so since this is installed in a +system location by the driver, not the CUDA toolkit.

+

If everything goes as planned, you should see the following output when +running the compiled program:

+
Using CUDA Device [0]: GeForce GTX 680
+Device Compute Capability: 3.0
+Launching kernel
+Results:
+0 + 0 = 0
+1 + 2 = 3
+2 + 4 = 6
+3 + 6 = 9
+4 + 8 = 12
+5 + 10 = 15
+6 + 12 = 18
+7 + 14 = 21
+8 + 16 = 24
+9 + 18 = 27
+10 + 20 = 30
+11 + 22 = 33
+12 + 24 = 36
+13 + 26 = 39
+14 + 28 = 42
+15 + 30 = 45
+
+
+
+

Note

+

You will likely see a different device identifier based on your hardware

+
+
+
+
+

Tutorial: Linking with Libdevice

+

In this tutorial, we show a simple example of linking LLVM IR with the +libdevice library. We will use the same kernel as the previous tutorial, +except that we will compute C = pow(A, B) instead of C = A + B. +Libdevice provides an __nv_powf function that we will use.

+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; Intrinsic to read X component of thread ID
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
+; libdevice function
+declare float @__nv_powf(float, float)
+
+define void @kernel(float addrspace(1)* %A,
+                    float addrspace(1)* %B,
+                    float addrspace(1)* %C) {
+entry:
+  ; What is my ID?
+  %id = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() readnone nounwind
+
+  ; Compute pointers into A, B, and C
+  %ptrA = getelementptr float, float addrspace(1)* %A, i32 %id
+  %ptrB = getelementptr float, float addrspace(1)* %B, i32 %id
+  %ptrC = getelementptr float, float addrspace(1)* %C, i32 %id
+
+  ; Read A, B
+  %valA = load float, float addrspace(1)* %ptrA, align 4
+  %valB = load float, float addrspace(1)* %ptrB, align 4
+
+  ; Compute C = pow(A, B)
+  %valC = call float @__nv_powf(float %valA, float %valB)
+
+  ; Store back to C
+  store float %valC, float addrspace(1)* %ptrC, align 4
+
+  ret void
+}
+
+!nvvm.annotations = !{!0}
+!0 = !{void (float addrspace(1)*,
+             float addrspace(1)*,
+             float addrspace(1)*)* @kernel, !"kernel", i32 1}
+
+
+

To compile this kernel, we perform the following steps:

+
    +
  1. Link with libdevice

  2. +
  3. Internalize all but the public kernel function

  4. +
  5. Run NVVMReflect and set __CUDA_FTZ to 0

  6. +
  7. Optimize the linked module

  8. +
  9. Codegen the module

  10. +
+

These steps can be performed by the LLVM llvm-link, opt, and llc +tools. In a complete compiler, these steps can also be performed entirely +programmatically by setting up an appropriate pass configuration (see +Linking with Libdevice).

+
# llvm-link t2.bc libdevice.compute_20.10.bc -o t2.linked.bc
+# opt -internalize -internalize-public-api-list=kernel -nvvm-reflect-list=__CUDA_FTZ=0 -nvvm-reflect -O3 t2.linked.bc -o t2.opt.bc
+# llc -mcpu=sm_20 t2.opt.bc -o t2.ptx
+
+
+
+

Note

+

The -nvvm-reflect-list=_CUDA_FTZ=0 is not strictly required, as any +undefined variables will default to zero. It is shown here for evaluation +purposes.

+
+

This gives us the following PTX (excerpt):

+
//
+// Generated by LLVM NVPTX Back-End
+//
+
+.version 3.1
+.target sm_20
+.address_size 64
+
+  // .globl kernel
+                                        // @kernel
+.visible .entry kernel(
+  .param .u64 kernel_param_0,
+  .param .u64 kernel_param_1,
+  .param .u64 kernel_param_2
+)
+{
+  .reg .pred  %p<30>;
+  .reg .f32   %f<111>;
+  .reg .s32   %r<21>;
+  .reg .s64   %rl<8>;
+
+// %bb.0:                                // %entry
+  ld.param.u64  %rl2, [kernel_param_0];
+  mov.u32   %r3, %tid.x;
+  ld.param.u64  %rl3, [kernel_param_1];
+  mul.wide.s32  %rl4, %r3, 4;
+  add.s64   %rl5, %rl2, %rl4;
+  ld.param.u64  %rl6, [kernel_param_2];
+  add.s64   %rl7, %rl3, %rl4;
+  add.s64   %rl1, %rl6, %rl4;
+  ld.global.f32   %f1, [%rl5];
+  ld.global.f32   %f2, [%rl7];
+  setp.eq.f32 %p1, %f1, 0f3F800000;
+  setp.eq.f32 %p2, %f2, 0f00000000;
+  or.pred   %p3, %p1, %p2;
+  @%p3 bra  BB0_1;
+  bra.uni   BB0_2;
+BB0_1:
+  mov.f32   %f110, 0f3F800000;
+  st.global.f32   [%rl1], %f110;
+  ret;
+BB0_2:                                  // %__nv_isnanf.exit.i
+  abs.f32   %f4, %f1;
+  setp.gtu.f32  %p4, %f4, 0f7F800000;
+  @%p4 bra  BB0_4;
+// %bb.3:                                // %__nv_isnanf.exit5.i
+  abs.f32   %f5, %f2;
+  setp.le.f32 %p5, %f5, 0f7F800000;
+  @%p5 bra  BB0_5;
+BB0_4:                                  // %.critedge1.i
+  add.f32   %f110, %f1, %f2;
+  st.global.f32   [%rl1], %f110;
+  ret;
+BB0_5:                                  // %__nv_isinff.exit.i
+
+  ...
+
+BB0_26:                                 // %__nv_truncf.exit.i.i.i.i.i
+  mul.f32   %f90, %f107, 0f3FB8AA3B;
+  cvt.rzi.f32.f32 %f91, %f90;
+  mov.f32   %f92, 0fBF317200;
+  fma.rn.f32  %f93, %f91, %f92, %f107;
+  mov.f32   %f94, 0fB5BFBE8E;
+  fma.rn.f32  %f95, %f91, %f94, %f93;
+  mul.f32   %f89, %f95, 0f3FB8AA3B;
+  // inline asm
+  ex2.approx.ftz.f32 %f88,%f89;
+  // inline asm
+  add.f32   %f96, %f91, 0f00000000;
+  ex2.approx.f32  %f97, %f96;
+  mul.f32   %f98, %f88, %f97;
+  setp.lt.f32 %p15, %f107, 0fC2D20000;
+  selp.f32  %f99, 0f00000000, %f98, %p15;
+  setp.gt.f32 %p16, %f107, 0f42D20000;
+  selp.f32  %f110, 0f7F800000, %f99, %p16;
+  setp.eq.f32 %p17, %f110, 0f7F800000;
+  @%p17 bra   BB0_28;
+// %bb.27:
+  fma.rn.f32  %f110, %f110, %f108, %f110;
+BB0_28:                                 // %__internal_accurate_powf.exit.i
+  setp.lt.f32 %p18, %f1, 0f00000000;
+  setp.eq.f32 %p19, %f3, 0f3F800000;
+  and.pred    %p20, %p18, %p19;
+  @!%p20 bra  BB0_30;
+  bra.uni   BB0_29;
+BB0_29:
+  mov.b32    %r9, %f110;
+  xor.b32   %r10, %r9, -2147483648;
+  mov.b32    %f110, %r10;
+BB0_30:                                 // %__nv_powf.exit
+  st.global.f32   [%rl1], %f110;
+  ret;
+}
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/objects.inv and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/objects.inv differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/OpaquePointers.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/OpaquePointers.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/OpaquePointers.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/OpaquePointers.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,284 @@ + + + + + + + + + Opaque Pointers — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Opaque Pointers

+
+

The Opaque Pointer Type

+

Traditionally, LLVM IR pointer types have contained a pointee type. For example, +i32 * is a pointer that points to an i32 somewhere in memory. However, +due to a lack of pointee type semantics and various issues with having pointee +types, there is a desire to remove pointee types from pointers.

+

The opaque pointer type project aims to replace all pointer types containing +pointee types in LLVM with an opaque pointer type. The new pointer type is +tentatively represented textually as ptr.

+

Address spaces are still used to distinguish between different kinds of pointers +where the distinction is relevant for lowering (e.g. data vs function pointers +have different sizes on some architectures). Opaque pointers are not changing +anything related to address spaces and lowering. For more information, see +DataLayout.

+
+
+

Issues with explicit pointee types

+

LLVM IR pointers can be cast back and forth between pointers with different +pointee types. The pointee type does not necessarily actually represent the +actual underlying type in memory. In other words, the pointee type contains no +real semantics.

+

Lots of operations do not actually care about the underlying type. These +operations, typically intrinsics, usually end up taking an i8 *. This causes +lots of redundant no-op bitcasts in the IR to and from a pointer with a +different pointee type. The extra bitcasts take up space and require extra work +to look through in optimizations. And more bitcasts increases the chances of +incorrect bitcasts, especially in regards to address spaces.

+

Some instructions still need to know what type to treat the memory pointed to by +the pointer as. For example, a load needs to know how many bytes to load from +memory. In these cases, instructions themselves contain a type argument. For +example the load instruction from older versions of LLVM

+
load i64* %p
+
+
+

becomes

+
load i64, ptr %p
+
+
+

A nice analogous transition that happened earlier in LLVM is integer signedness. +There is no distinction between signed and unsigned integer types, rather the +integer operations themselves contain what to treat the integer as. Initially, +LLVM IR distinguished between unsigned and signed integer types. The transition +from manifesting signedness in types to instructions happened early on in LLVM’s +life to the betterment of LLVM IR.

+
+
+

I Still Need Pointee Types!

+

The frontend should already know what type each operation operates on based on +the input source code. However, some frontends like Clang may end up relying on +LLVM pointer pointee types to keep track of pointee types. The frontend needs to +keep track of frontend pointee types on its own.

+

For optimizations around frontend types, pointee types are not useful due their +lack of semantics. Rather, since LLVM IR works on untyped memory, for a frontend +to tell LLVM about frontend types for the purposes of alias analysis, extra +metadata is added to the IR. For more information, see TBAA.

+

Some specific operations still need to know what type a pointer types to. For +the most part, this is codegen and ABI specific. For example, byval arguments are pointers, but backends need +to know the underlying type of the argument to properly lower it. In cases like +these, the attributes contain a type argument. For example,

+
call void @f(ptr byval(i32) %p)
+
+
+

signifies that %p as an argument should be lowered as an i32 passed +indirectly.

+

If you have use cases that this sort of fix doesn’t cover, please email +llvm-dev.

+
+
+

Transition Plan

+

LLVM currently has many places that depend on pointee types. Each dependency on +pointee types needs to be resolved in some way or another. This essentially +translates to figuring out how to remove all calls to +PointerType::getElementType and Type::getPointerElementType().

+

Making everything use opaque pointers in one huge commit is infeasible. This +needs to be done incrementally. The following steps need to be done, in no +particular order:

+
    +
  • Introduce the opaque pointer type

    +
      +
    • Already done

    • +
    +
  • +
  • Remove remaining in-tree users of pointee types

    +
      +
    • There are many miscellaneous uses that should be cleaned up individually

    • +
    • Some of the larger use cases are mentioned below

    • +
    +
  • +
  • Various ABI attributes and instructions that rely on pointee types need to be +modified to specify the type separately

    +
      +
    • This has already happened for all instructions like loads, stores, GEPs, +and various attributes like byval

    • +
    • More cases may be found as work continues

    • +
    +
  • +
  • Remove calls to and deprecate IRBuilder methods that rely on pointee types

    +
      +
    • For example, some of the IRBuilder::CreateGEP() methods use the pointer +operand’s pointee type to determine the GEP operand type

    • +
    • Some methods are already deprecated with LLVM_ATTRIBUTE_DEPRECATED, such +as some overloads of IRBuilder::CreateLoad()

    • +
    +
  • +
  • Allow bitcode auto-upgrade of legacy pointer type to the new opaque pointer +type (not to be turned on until ready)

    +
      +
    • To support legacy bitcode, such as legacy stores/loads, we need to track +pointee types for all values since legacy instructions may infer the types +from a pointer operand’s pointee type

    • +
    +
  • +
  • Migrate frontends to not keep track of frontend pointee types via LLVM pointer +pointee types

    +
      +
    • This is mostly Clang, see clang::CodeGen::Address::getElementType()

    • +
    +
  • +
  • Add option to internally treat all pointer types opaque pointers and see what +breaks, starting with LLVM tests, then run Clang over large codebases

    +
      +
    • We don’t want to start mass-updating tests until we’re fairly confident that opaque pointers won’t cause major issues

    • +
    +
  • +
  • Replace legacy pointer types in LLVM tests with opaque pointer types

  • +
+
+
+

Frontend Migration Steps

+

If you have your own frontend, there are a couple of things to do after opaque +pointer types fully work.

+
    +
  • Don’t rely on LLVM pointee types to keep track of frontend pointee types

  • +
  • Migrate away from LLVM IR instruction builders that rely on pointee types

    +
      +
    • For example, IRBuilder::CreateGEP() has multiple overloads; make sure to +use one where the source element type is explicitly passed in, not inferred +from the pointer operand pointee type

    • +
    +
  • +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/OptBisect.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/OptBisect.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/OptBisect.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/OptBisect.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,308 @@ + + + + + + + + + Using -opt-bisect-limit to debug optimization errors — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Using -opt-bisect-limit to debug optimization errors

+ +
+

Introduction

+

The -opt-bisect-limit option provides a way to disable all optimization passes +above a specified limit without modifying the way in which the Pass Managers +are populated. The intention of this option is to assist in tracking down +problems where incorrect transformations during optimization result in incorrect +run-time behavior.

+

This feature is implemented on an opt-in basis. Passes which can be safely +skipped while still allowing correct code generation call a function to +check the opt-bisect limit before performing optimizations. Passes which +either must be run or do not modify the IR do not perform this check and are +therefore never skipped. Generally, this means analysis passes, passes +that are run at CodeGenOpt::None and passes which are required for register +allocation.

+

The -opt-bisect-limit option can be used with any tool, including front ends +such as clang, that uses the core LLVM library for optimization and code +generation. The exact syntax for invoking the option is discussed below.

+

This feature is not intended to replace other debugging tools such as bugpoint. +Rather it provides an alternate course of action when reproducing the problem +requires a complex build infrastructure that would make using bugpoint +impractical or when reproducing the failure requires a sequence of +transformations that is difficult to replicate with tools like opt and llc.

+
+
+

Getting Started

+

The -opt-bisect-limit command line option can be passed directly to tools such +as opt, llc and lli. The syntax is as follows:

+
<tool name> [other options] -opt-bisect-limit=<limit>
+
+
+

If a value of -1 is used the tool will perform all optimizations but a message +will be printed to stderr for each optimization that could be skipped +indicating the index value that is associated with that optimization. To skip +optimizations, pass the value of the last optimization to be performed as the +opt-bisect-limit. All optimizations with a higher index value will be skipped.

+

In order to use the -opt-bisect-limit option with a driver that provides a +wrapper around the LLVM core library, an additional prefix option may be +required, as defined by the driver. For example, to use this option with +clang, the “-mllvm” prefix must be used. A typical clang invocation would look +like this:

+
clang -O2 -mllvm -opt-bisect-limit=256 my_file.c
+
+
+

The -opt-bisect-limit option may also be applied to link-time optimizations by +using a prefix to indicate that this is a plug-in option for the linker. The +following syntax will set a bisect limit for LTO transformations:

+
# When using lld, or ld64 (macOS)
+clang -flto -Wl,-mllvm,-opt-bisect-limit=256 my_file.o my_other_file.o
+# When using Gold
+clang -flto -Wl,-plugin-opt,-opt-bisect-limit=256 my_file.o my_other_file.o
+
+
+

LTO passes are run by a library instance invoked by the linker. Therefore any +passes run in the primary driver compilation phase are not affected by options +passed via ‘-Wl,-plugin-opt’ and LTO passes are not affected by options +passed to the driver-invoked LLVM invocation via ‘-mllvm’.

+
+
+

Bisection Index Values

+

The granularity of the optimizations associated with a single index value is +variable. Depending on how the optimization pass has been instrumented the +value may be associated with as much as all transformations that would have +been performed by an optimization pass on an IR unit for which it is invoked +(for instance, during a single call of runOnFunction for a FunctionPass) or as +little as a single transformation. The index values may also be nested so that +if an invocation of the pass is not skipped individual transformations within +that invocation may still be skipped.

+

The order of the values assigned is guaranteed to remain stable and consistent +from one run to the next up to and including the value specified as the limit. +Above the limit value skipping of optimizations can cause a change in the +numbering, but because all optimizations above the limit are skipped this +is not a problem.

+

When an opt-bisect index value refers to an entire invocation of the run +function for a pass, the pass will query whether or not it should be skipped +each time it is invoked and each invocation will be assigned a unique value. +For example, if a FunctionPass is used with a module containing three functions +a different index value will be assigned to the pass for each of the functions +as the pass is run. The pass may be run on two functions but skipped for the +third.

+

If the pass internally performs operations on a smaller IR unit the pass must be +specifically instrumented to enable bisection at this finer level of granularity +(see below for details).

+
+
+

Example Usage

+
$ opt -O2 -o test-opt.bc -opt-bisect-limit=16 test.ll
+
+BISECT: running pass (1) Simplify the CFG on function (g)
+BISECT: running pass (2) SROA on function (g)
+BISECT: running pass (3) Early CSE on function (g)
+BISECT: running pass (4) Infer set function attributes on module (test.ll)
+BISECT: running pass (5) Interprocedural Sparse Conditional Constant Propagation on module (test.ll)
+BISECT: running pass (6) Global Variable Optimizer on module (test.ll)
+BISECT: running pass (7) Promote Memory to Register on function (g)
+BISECT: running pass (8) Dead Argument Elimination on module (test.ll)
+BISECT: running pass (9) Combine redundant instructions on function (g)
+BISECT: running pass (10) Simplify the CFG on function (g)
+BISECT: running pass (11) Remove unused exception handling info on SCC (<<null function>>)
+BISECT: running pass (12) Function Integration/Inlining on SCC (<<null function>>)
+BISECT: running pass (13) Deduce function attributes on SCC (<<null function>>)
+BISECT: running pass (14) Remove unused exception handling info on SCC (f)
+BISECT: running pass (15) Function Integration/Inlining on SCC (f)
+BISECT: running pass (16) Deduce function attributes on SCC (f)
+BISECT: NOT running pass (17) Remove unused exception handling info on SCC (g)
+BISECT: NOT running pass (18) Function Integration/Inlining on SCC (g)
+BISECT: NOT running pass (19) Deduce function attributes on SCC (g)
+BISECT: NOT running pass (20) SROA on function (g)
+BISECT: NOT running pass (21) Early CSE on function (g)
+BISECT: NOT running pass (22) Speculatively execute instructions if target has divergent branches on function (g)
+... etc. ...
+
+
+
+
+

Pass Skipping Implementation

+

The -opt-bisect-limit implementation depends on individual passes opting in to +the opt-bisect process. The OptBisect object that manages the process is +entirely passive and has no knowledge of how any pass is implemented. When a +pass is run if the pass may be skipped, it should call the OptBisect object to +see if it should be skipped.

+

The OptBisect object is intended to be accessed through LLVMContext and each +Pass base class contains a helper function that abstracts the details in order +to make this check uniform across all passes. These helper functions are:

+
bool ModulePass::skipModule(Module &M);
+bool CallGraphSCCPass::skipSCC(CallGraphSCC &SCC);
+bool FunctionPass::skipFunction(const Function &F);
+bool LoopPass::skipLoop(const Loop *L);
+
+
+

A MachineFunctionPass should use FunctionPass::skipFunction() as such:

+
bool MyMachineFunctionPass::runOnMachineFunction(Function &MF) {
+  if (skipFunction(*MF.getFunction())
+    return false;
+  // Otherwise, run the pass normally.
+}
+
+
+

In addition to checking with the OptBisect class to see if the pass should be +skipped, the skipFunction(), skipLoop() and skipBasicBlock() helper functions +also look for the presence of the “optnone” function attribute. The calling +pass will be unable to determine whether it is being skipped because the +“optnone” attribute is present or because the opt-bisect-limit has been +reached. This is desirable because the behavior should be the same in either +case.

+

The majority of LLVM passes which can be skipped have already been instrumented +in the manner described above. If you are adding a new pass or believe you +have found a pass which is not being included in the opt-bisect process but +should be, you can add it as described above.

+
+
+

Adding Finer Granularity

+

Once the pass in which an incorrect transformation is performed has been +determined, it may be useful to perform further analysis in order to determine +which specific transformation is causing the problem. Debug counters +can be used for this purpose.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ORCv2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ORCv2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ORCv2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ORCv2.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,897 @@ + + + + + + + + + ORC Design and Implementation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

ORC Design and Implementation

+ +
+

Introduction

+

This document aims to provide a high-level overview of the design and +implementation of the ORC JIT APIs. Except where otherwise stated all discussion +refers to the modern ORCv2 APIs (available since LLVM 7). Clients wishing to +transition from OrcV1 should see Section Transitioning from ORCv1 to ORCv2.

+
+
+

Use-cases

+

ORC provides a modular API for building JIT compilers. There are a number +of use cases for such an API. For example:

+

1. The LLVM tutorials use a simple ORC-based JIT class to execute expressions +compiled from a toy language: Kaleidoscope.

+

2. The LLVM debugger, LLDB, uses a cross-compiling JIT for expression +evaluation. In this use case, cross compilation allows expressions compiled +in the debugger process to be executed on the debug target process, which may +be on a different device/architecture.

+

3. In high-performance JITs (e.g. JVMs, Julia) that want to make use of LLVM’s +optimizations within an existing JIT infrastructure.

+
    +
  1. In interpreters and REPLs, e.g. Cling (C++) and the Swift interpreter.

  2. +
+

By adopting a modular, library-based design we aim to make ORC useful in as many +of these contexts as possible.

+
+
+

Features

+

ORC provides the following features:

+
+
JIT-linking

ORC provides APIs to link relocatable object files (COFF, ELF, MachO) 1 +into a target process at runtime. The target process may be the same process +that contains the JIT session object and jit-linker, or may be another process +(even one running on a different machine or architecture) that communicates +with the JIT via RPC.

+
+
LLVM IR compilation

ORC provides off the shelf components (IRCompileLayer, SimpleCompiler, +ConcurrentIRCompiler) that make it easy to add LLVM IR to a JIT’d process.

+
+
Eager and lazy compilation

By default, ORC will compile symbols as soon as they are looked up in the JIT +session object (ExecutionSession). Compiling eagerly by default makes it +easy to use ORC as an in-memory compiler for an existing JIT (similar to how +MCJIT is commonly used). However ORC also provides built-in support for lazy +compilation via lazy-reexports (see Laziness).

+
+
Support for Custom Compilers and Program Representations

Clients can supply custom compilers for each symbol that they define in their +JIT session. ORC will run the user-supplied compiler when the a definition of +a symbol is needed. ORC is actually fully language agnostic: LLVM IR is not +treated specially, and is supported via the same wrapper mechanism (the +MaterializationUnit class) that is used for custom compilers.

+
+
Concurrent JIT’d code and Concurrent Compilation

JIT’d code may be executed in multiple threads, may spawn new threads, and may +re-enter the ORC (e.g. to request lazy compilation) concurrently from multiple +threads. Compilers launched my ORC can run concurrently (provided the client +sets up an appropriate dispatcher). Built-in dependency tracking ensures that +ORC does not release pointers to JIT’d code or data until all dependencies +have also been JIT’d and they are safe to call or use.

+
+
Removable Code

Resources for JIT’d program representations

+
+
Orthogonality and Composability

Each of the features above can be used independently. It is possible to put +ORC components together to make a non-lazy, in-process, single threaded JIT +or a lazy, out-of-process, concurrent JIT, or anything in between.

+
+
+
+
+

LLJIT and LLLazyJIT

+

ORC provides two basic JIT classes off-the-shelf. These are useful both as +examples of how to assemble ORC components to make a JIT, and as replacements +for earlier LLVM JIT APIs (e.g. MCJIT).

+

The LLJIT class uses an IRCompileLayer and RTDyldObjectLinkingLayer to support +compilation of LLVM IR and linking of relocatable object files. All operations +are performed eagerly on symbol lookup (i.e. a symbol’s definition is compiled +as soon as you attempt to look up its address). LLJIT is a suitable replacement +for MCJIT in most cases (note: some more advanced features, e.g. +JITEventListeners are not supported yet).

+

The LLLazyJIT extends LLJIT and adds a CompileOnDemandLayer to enable lazy +compilation of LLVM IR. When an LLVM IR module is added via the addLazyIRModule +method, function bodies in that module will not be compiled until they are first +called. LLLazyJIT aims to provide a replacement of LLVM’s original (pre-MCJIT) +JIT API.

+

LLJIT and LLLazyJIT instances can be created using their respective builder +classes: LLJITBuilder and LLazyJITBuilder. For example, assuming you have a +module M loaded on a ThreadSafeContext Ctx:

+
// Try to detect the host arch and construct an LLJIT instance.
+auto JIT = LLJITBuilder().create();
+
+// If we could not construct an instance, return an error.
+if (!JIT)
+  return JIT.takeError();
+
+// Add the module.
+if (auto Err = JIT->addIRModule(TheadSafeModule(std::move(M), Ctx)))
+  return Err;
+
+// Look up the JIT'd code entry point.
+auto EntrySym = JIT->lookup("entry");
+if (!EntrySym)
+  return EntrySym.takeError();
+
+// Cast the entry point address to a function pointer.
+auto *Entry = (void(*)())EntrySym.getAddress();
+
+// Call into JIT'd code.
+Entry();
+
+
+

The builder classes provide a number of configuration options that can be +specified before the JIT instance is constructed. For example:

+
// Build an LLLazyJIT instance that uses four worker threads for compilation,
+// and jumps to a specific error handler (rather than null) on lazy compile
+// failures.
+
+void handleLazyCompileFailure() {
+  // JIT'd code will jump here if lazy compilation fails, giving us an
+  // opportunity to exit or throw an exception into JIT'd code.
+  throw JITFailed();
+}
+
+auto JIT = LLLazyJITBuilder()
+             .setNumCompileThreads(4)
+             .setLazyCompileFailureAddr(
+                 toJITTargetAddress(&handleLazyCompileFailure))
+             .create();
+
+// ...
+
+
+

For users wanting to get started with LLJIT a minimal example program can be +found at llvm/examples/HowToUseLLJIT.

+
+
+

Design Overview

+

ORC’s JIT program model aims to emulate the linking and symbol resolution +rules used by the static and dynamic linkers. This allows ORC to JIT +arbitrary LLVM IR, including IR produced by an ordinary static compiler (e.g. +clang) that uses constructs like symbol linkage and visibility, and weak 3 +and common symbol definitions.

+

To see how this works, imagine a program foo which links against a pair +of dynamic libraries: libA and libB. On the command line, building this +program might look like:

+
$ clang++ -shared -o libA.dylib a1.cpp a2.cpp
+$ clang++ -shared -o libB.dylib b1.cpp b2.cpp
+$ clang++ -o myapp myapp.cpp -L. -lA -lB
+$ ./myapp
+
+
+

In ORC, this would translate into API calls on a hypothetical CXXCompilingLayer +(with error checking omitted for brevity) as:

+
ExecutionSession ES;
+RTDyldObjectLinkingLayer ObjLinkingLayer(
+    ES, []() { return std::make_unique<SectionMemoryManager>(); });
+CXXCompileLayer CXXLayer(ES, ObjLinkingLayer);
+
+// Create JITDylib "A" and add code to it using the CXX layer.
+auto &LibA = ES.createJITDylib("A");
+CXXLayer.add(LibA, MemoryBuffer::getFile("a1.cpp"));
+CXXLayer.add(LibA, MemoryBuffer::getFile("a2.cpp"));
+
+// Create JITDylib "B" and add code to it using the CXX layer.
+auto &LibB = ES.createJITDylib("B");
+CXXLayer.add(LibB, MemoryBuffer::getFile("b1.cpp"));
+CXXLayer.add(LibB, MemoryBuffer::getFile("b2.cpp"));
+
+// Create and specify the search order for the main JITDylib. This is
+// equivalent to a "links against" relationship in a command-line link.
+auto &MainJD = ES.createJITDylib("main");
+MainJD.addToLinkOrder(&LibA);
+MainJD.addToLinkOrder(&LibB);
+CXXLayer.add(MainJD, MemoryBuffer::getFile("main.cpp"));
+
+// Look up the JIT'd main, cast it to a function pointer, then call it.
+auto MainSym = ExitOnErr(ES.lookup({&MainJD}, "main"));
+auto *Main = (int(*)(int, char*[]))MainSym.getAddress();
+
+int Result = Main(...);
+
+
+

This example tells us nothing about how or when compilation will happen. +That will depend on the implementation of the hypothetical CXXCompilingLayer. +The same linker-based symbol resolution rules will apply regardless of that +implementation, however. For example, if a1.cpp and a2.cpp both define a +function “foo” then ORCv2 will generate a duplicate definition error. On the +other hand, if a1.cpp and b1.cpp both define “foo” there is no error (different +dynamic libraries may define the same symbol). If main.cpp refers to “foo”, it +should bind to the definition in LibA rather than the one in LibB, since +main.cpp is part of the “main” dylib, and the main dylib links against LibA +before LibB.

+

Many JIT clients will have no need for this strict adherence to the usual +ahead-of-time linking rules, and should be able to get by just fine by putting +all of their code in a single JITDylib. However, clients who want to JIT code +for languages/projects that traditionally rely on ahead-of-time linking (e.g. +C++) will find that this feature makes life much easier.

+

Symbol lookup in ORC serves two other important functions, beyond providing +addresses for symbols: (1) It triggers compilation of the symbol(s) searched for +(if they have not been compiled already), and (2) it provides the +synchronization mechanism for concurrent compilation. The pseudo-code for the +lookup process is:

+
construct a query object from a query set and query handler
+lock the session
+lodge query against requested symbols, collect required materializers (if any)
+unlock the session
+dispatch materializers (if any)
+
+
+

In this context a materializer is something that provides a working definition +of a symbol upon request. Usually materializers are just wrappers for compilers, +but they may also wrap a jit-linker directly (if the program representation +backing the definitions is an object file), or may even be a class that writes +bits directly into memory (for example, if the definitions are +stubs). Materialization is the blanket term for any actions (compiling, linking, +splatting bits, registering with runtimes, etc.) that are required to generate a +symbol definition that is safe to call or access.

+

As each materializer completes its work it notifies the JITDylib, which in turn +notifies any query objects that are waiting on the newly materialized +definitions. Each query object maintains a count of the number of symbols that +it is still waiting on, and once this count reaches zero the query object calls +the query handler with a SymbolMap (a map of symbol names to addresses) +describing the result. If any symbol fails to materialize the query immediately +calls the query handler with an error.

+

The collected materialization units are sent to the ExecutionSession to be +dispatched, and the dispatch behavior can be set by the client. By default each +materializer is run on the calling thread. Clients are free to create new +threads to run materializers, or to send the work to a work queue for a thread +pool (this is what LLJIT/LLLazyJIT do).

+
+
+

Top Level APIs

+

Many of ORC’s top-level APIs are visible in the example above:

+
    +
  • ExecutionSession represents the JIT’d program and provides context for the +JIT: It contains the JITDylibs, error reporting mechanisms, and dispatches the +materializers.

  • +
  • JITDylibs provide the symbol tables.

  • +
  • Layers (ObjLinkingLayer and CXXLayer) are wrappers around compilers and +allow clients to add uncompiled program representations supported by those +compilers to JITDylibs.

  • +
+

Several other important APIs are used explicitly. JIT clients need not be aware +of them, but Layer authors will use them:

+
    +
  • MaterializationUnit - When XXXLayer::add is invoked it wraps the given +program representation (in this example, C++ source) in a MaterializationUnit, +which is then stored in the JITDylib. MaterializationUnits are responsible for +describing the definitions they provide, and for unwrapping the program +representation and passing it back to the layer when compilation is required +(this ownership shuffle makes writing thread-safe layers easier, since the +ownership of the program representation will be passed back on the stack, +rather than having to be fished out of a Layer member, which would require +synchronization).

  • +
  • MaterializationResponsibility - When a MaterializationUnit hands a program +representation back to the layer it comes with an associated +MaterializationResponsibility object. This object tracks the definitions +that must be materialized and provides a way to notify the JITDylib once they +are either successfully materialized or a failure occurs.

  • +
+
+
+

Absolute Symbols, Aliases, and Reexports

+

ORC makes it easy to define symbols with absolute addresses, or symbols that +are simply aliases of other symbols:

+
+

Absolute Symbols

+

Absolute symbols are symbols that map directly to addresses without requiring +further materialization, for example: “foo” = 0x1234. One use case for +absolute symbols is allowing resolution of process symbols. E.g.

+

With this mapping established code added to the JIT can refer to printf +symbolically rather than requiring the address of printf to be “baked in”. +This in turn allows cached versions of the JIT’d code (e.g. compiled objects) +to be re-used across JIT sessions as the JIT’d code no longer changes, only the +absolute symbol definition does.

+

For process and library symbols the DynamicLibrarySearchGenerator utility (See +How to Add Process and Library Symbols to JITDylibs) can be used to automatically build absolute +symbol mappings for you. However the absoluteSymbols function is still useful +for making non-global objects in your JIT visible to JIT’d code. For example, +imagine that your JIT standard library needs access to your JIT object to make +some calls. We could bake the address of your object into the library, but then +it would need to be recompiled for each session:

+

We can turn this into a symbolic reference in the JIT standard library:

+

And then make our JIT object visible to the JIT standard library with an +absolute symbol definition when the JIT is started:

+
+
+

Aliases and Reexports

+

Aliases and reexports allow you to define new symbols that map to existing +symbols. This can be useful for changing linkage relationships between symbols +across sessions without having to recompile code. For example, imagine that +JIT’d code has access to a log function, void log(const char*) for which +there are two implementations in the JIT standard library: log_fast and +log_detailed. Your JIT can choose which one of these definitions will be +used when the log symbol is referenced by setting up an alias at JIT startup +time:

+

The symbolAliases function allows you to define aliases within a single +JITDylib. The reexports function provides the same functionality, but +operates across JITDylib boundaries. E.g.

+

The reexports utility can be handy for composing a single JITDylib interface by +re-exporting symbols from several other JITDylibs.

+
+
+
+

Laziness

+

Laziness in ORC is provided by a utility called “lazy reexports”. A lazy +reexport is similar to a regular reexport or alias: It provides a new name for +an existing symbol. Unlike regular reexports however, lookups of lazy reexports +do not trigger immediate materialization of the reexported symbol. Instead, they +only trigger materialization of a function stub. This function stub is +initialized to point at a lazy call-through, which provides reentry into the +JIT. If the stub is called at runtime then the lazy call-through will look up +the reexported symbol (triggering materialization for it if necessary), update +the stub (to call directly to the reexported symbol on subsequent calls), and +then return via the reexported symbol. By re-using the existing symbol lookup +mechanism, lazy reexports inherit the same concurrency guarantees: calls to lazy +reexports can be made from multiple threads concurrently, and the reexported +symbol can be any state of compilation (uncompiled, already in the process of +being compiled, or already compiled) and the call will succeed. This allows +laziness to be safely mixed with features like remote compilation, concurrent +compilation, concurrent JIT’d code, and speculative compilation.

+

There is one other key difference between regular reexports and lazy reexports +that some clients must be aware of: The address of a lazy reexport will be +different from the address of the reexported symbol (whereas a regular +reexport is guaranteed to have the same address as the reexported symbol). +Clients who care about pointer equality will generally want to use the address +of the reexport as the canonical address of the reexported symbol. This will +allow the address to be taken without forcing materialization of the reexport.

+

Usage example:

+

If JITDylib JD contains definitions for symbols foo_body and +bar_body, we can create lazy entry points Foo and Bar in JITDylib +JD2 by calling:

+
auto ReexportFlags = JITSymbolFlags::Exported | JITSymbolFlags::Callable;
+JD2.define(
+  lazyReexports(CallThroughMgr, StubsMgr, JD,
+                SymbolAliasMap({
+                  { Mangle("foo"), { Mangle("foo_body"), ReexportedFlags } },
+                  { Mangle("bar"), { Mangle("bar_body"), ReexportedFlags } }
+                }));
+
+
+

A full example of how to use lazyReexports with the LLJIT class can be found at +llvm_project/llvm/examples/LLJITExamples/LLJITWithLazyReexports.

+
+ +
+

Transitioning from ORCv1 to ORCv2

+

Since LLVM 7.0, new ORC development work has focused on adding support for +concurrent JIT compilation. The new APIs (including new layer interfaces and +implementations, and new utilities) that support concurrency are collectively +referred to as ORCv2, and the original, non-concurrent layers and utilities +are now referred to as ORCv1.

+

The majority of the ORCv1 layers and utilities were renamed with a ‘Legacy’ +prefix in LLVM 8.0, and have deprecation warnings attached in LLVM 9.0. In LLVM +12.0 ORCv1 will be removed entirely.

+

Transitioning from ORCv1 to ORCv2 should be easy for most clients. Most of the +ORCv1 layers and utilities have ORCv2 counterparts 2 that can be directly +substituted. However there are some design differences between ORCv1 and ORCv2 +to be aware of:

+
+
    +
  1. ORCv2 fully adopts the JIT-as-linker model that began with MCJIT. Modules +(and other program representations, e.g. Object Files) are no longer added +directly to JIT classes or layers. Instead, they are added to JITDylib +instances by layers. The JITDylib determines where the definitions +reside, the layers determine how the definitions will be compiled. +Linkage relationships between JITDylibs determine how inter-module +references are resolved, and symbol resolvers are no longer used. See the +section Design Overview for more details.

    +

    Unless multiple JITDylibs are needed to model linkage relationships, ORCv1 +clients should place all code in a single JITDylib. +MCJIT clients should use LLJIT (see LLJIT and LLLazyJIT), and can place +code in LLJIT’s default created main JITDylib (See +LLJIT::getMainJITDylib()).

    +
  2. +
  3. All JIT stacks now need an ExecutionSession instance. ExecutionSession +manages the string pool, error reporting, synchronization, and symbol +lookup.

  4. +
  5. ORCv2 uses uniqued strings (SymbolStringPtr instances) rather than +string values in order to reduce memory overhead and improve lookup +performance. See the subsection How to manage symbol strings.

  6. +
  7. IR layers require ThreadSafeModule instances, rather than +std::unique_ptr<Module>s. ThreadSafeModule is a wrapper that ensures that +Modules that use the same LLVMContext are not accessed concurrently. +See How to use ThreadSafeModule and ThreadSafeContext.

  8. +
  9. Symbol lookup is no longer handled by layers. Instead, there is a +lookup method on JITDylib that takes a list of JITDylibs to scan.

    +
    ExecutionSession ES;
    +JITDylib &JD1 = ...;
    +JITDylib &JD2 = ...;
    +
    +auto Sym = ES.lookup({&JD1, &JD2}, ES.intern("_main"));
    +
    +
    +
  10. +
  11. Module removal is not yet supported. There is no equivalent of the +layer concept removeModule/removeObject methods. Work on resource tracking +and removal in ORCv2 is ongoing.

  12. +
+
+

For code examples and suggestions of how to use the ORCv2 APIs, please see +the section How-tos.

+
+
+

How-tos

+
+

How to manage symbol strings

+

Symbol strings in ORC are uniqued to improve lookup performance, reduce memory +overhead, and allow symbol names to function as efficient keys. To get the +unique SymbolStringPtr for a string value, call the +ExecutionSession::intern method:

+
+
ExecutionSession ES;
+/// ...
+auto MainSymbolName = ES.intern("main");
+
+
+
+

If you wish to perform lookup using the C/IR name of a symbol you will also +need to apply the platform linker-mangling before interning the string. On +Linux this mangling is a no-op, but on other platforms it usually involves +adding a prefix to the string (e.g. ‘_’ on Darwin). The mangling scheme is +based on the DataLayout for the target. Given a DataLayout and an +ExecutionSession, you can create a MangleAndInterner function object that +will perform both jobs for you:

+
+
ExecutionSession ES;
+const DataLayout &DL = ...;
+MangleAndInterner Mangle(ES, DL);
+
+// ...
+
+// Portable IR-symbol-name lookup:
+auto Sym = ES.lookup({&MainJD}, Mangle("main"));
+
+
+
+
+
+

How to create JITDylibs and set up linkage relationships

+

In ORC, all symbol definitions reside in JITDylibs. JITDylibs are created by +calling the ExecutionSession::createJITDylib method with a unique name:

+
+
ExecutionSession ES;
+auto &JD = ES.createJITDylib("libFoo.dylib");
+
+
+
+

The JITDylib is owned by the ExecutionEngine instance and will be freed +when it is destroyed.

+
+
+

How to use ThreadSafeModule and ThreadSafeContext

+

ThreadSafeModule and ThreadSafeContext are wrappers around Modules and +LLVMContexts respectively. A ThreadSafeModule is a pair of a +std::unique_ptr<Module> and a (possibly shared) ThreadSafeContext value. A +ThreadSafeContext is a pair of a std::unique_ptr<LLVMContext> and a lock. +This design serves two purposes: providing a locking scheme and lifetime +management for LLVMContexts. The ThreadSafeContext may be locked to prevent +accidental concurrent access by two Modules that use the same LLVMContext. +The underlying LLVMContext is freed once all ThreadSafeContext values pointing +to it are destroyed, allowing the context memory to be reclaimed as soon as +the Modules referring to it are destroyed.

+

ThreadSafeContexts can be explicitly constructed from a +std::unique_ptr<LLVMContext>:

+
+
ThreadSafeContext TSCtx(std::make_unique<LLVMContext>());
+
+
+
+

ThreadSafeModules can be constructed from a pair of a std::unique_ptr<Module> +and a ThreadSafeContext value. ThreadSafeContext values may be shared between +multiple ThreadSafeModules:

+
+
ThreadSafeModule TSM1(
+  std::make_unique<Module>("M1", *TSCtx.getContext()), TSCtx);
+
+ThreadSafeModule TSM2(
+  std::make_unique<Module>("M2", *TSCtx.getContext()), TSCtx);
+
+
+
+

Before using a ThreadSafeContext, clients should ensure that either the context +is only accessible on the current thread, or that the context is locked. In the +example above (where the context is never locked) we rely on the fact that both +TSM1 and TSM2, and TSCtx are all created on one thread. If a context is +going to be shared between threads then it must be locked before any accessing +or creating any Modules attached to it. E.g.

+
+
ThreadSafeContext TSCtx(std::make_unique<LLVMContext>());
+
+ThreadPool TP(NumThreads);
+JITStack J;
+
+for (auto &ModulePath : ModulePaths) {
+  TP.async(
+    [&]() {
+      auto Lock = TSCtx.getLock();
+      auto M = loadModuleOnContext(ModulePath, TSCtx.getContext());
+      J.addModule(ThreadSafeModule(std::move(M), TSCtx));
+    });
+}
+
+TP.wait();
+
+
+
+

To make exclusive access to Modules easier to manage the ThreadSafeModule class +provides a convenience function, withModuleDo, that implicitly (1) locks the +associated context, (2) runs a given function object, (3) unlocks the context, +and (3) returns the result generated by the function object. E.g.

+
+
ThreadSafeModule TSM = getModule(...);
+
+// Dump the module:
+size_t NumFunctionsInModule =
+  TSM.withModuleDo(
+    [](Module &M) { // <- Context locked before entering lambda.
+      return M.size();
+    } // <- Context unlocked after leaving.
+  );
+
+
+
+

Clients wishing to maximize possibilities for concurrent compilation will want +to create every new ThreadSafeModule on a new ThreadSafeContext. For this +reason a convenience constructor for ThreadSafeModule is provided that implicitly +constructs a new ThreadSafeContext value from a std::unique_ptr<LLVMContext>:

+
+
// Maximize concurrency opportunities by loading every module on a
+// separate context.
+for (const auto &IRPath : IRPaths) {
+  auto Ctx = std::make_unique<LLVMContext>();
+  auto M = std::make_unique<LLVMContext>("M", *Ctx);
+  CompileLayer.add(MainJD, ThreadSafeModule(std::move(M), std::move(Ctx)));
+}
+
+
+
+

Clients who plan to run single-threaded may choose to save memory by loading +all modules on the same context:

+
+
// Save memory by using one context for all Modules:
+ThreadSafeContext TSCtx(std::make_unique<LLVMContext>());
+for (const auto &IRPath : IRPaths) {
+  ThreadSafeModule TSM(parsePath(IRPath, *TSCtx.getContext()), TSCtx);
+  CompileLayer.add(MainJD, ThreadSafeModule(std::move(TSM));
+}
+
+
+
+
+
+
+

How to Add Process and Library Symbols to the JITDylibs

+

JIT’d code typically needs access to symbols in the host program or in +supporting libraries. References to process symbols can be “baked in” to code +as it is compiled by turning external references into pre-resolved integer +constants, however this ties the JIT’d code to the current process’s virtual +memory layout (meaning that it can not be cached between runs) and makes +debugging lower level program representations difficult (as all external +references are opaque integer values). A bettor solution is to maintain symbolic +external references and let the jit-linker bind them for you at runtime. To +allow the JIT linker to find these external definitions their addresses must +be added to a JITDylib that the JIT’d definitions link against.

+

Adding definitions for external symbols could be done using the absoluteSymbols +function:

+
+
const DataLayout &DL = getDataLayout();
+MangleAndInterner Mangle(ES, DL);
+
+auto &JD = ES.createJITDylib("main");
+
+JD.define(
+  absoluteSymbols({
+    { Mangle("puts"), pointerToJITTargetAddress(&puts)},
+    { Mangle("gets"), pointerToJITTargetAddress(&getS)}
+  }));
+
+
+
+

Manually adding absolute symbols for a large or changing interface is cumbersome +however, so ORC provides an alternative to generate new definitions on demand: +definition generators. If a definition generator is attached to a JITDylib, +then any unsuccessful lookup on that JITDylib will fall back to calling the +definition generator, and the definition generator may choose to generate a new +definition for the missing symbols. Of particular use here is the +DynamicLibrarySearchGenerator utility. This can be used to reflect the whole +exported symbol set of the process or a specific dynamic library, or a subset +of either of these determined by a predicate.

+

For example, to load the whole interface of a runtime library:

+
+
const DataLayout &DL = getDataLayout();
+auto &JD = ES.createJITDylib("main");
+
+JD.addGenerator(DynamicLibrarySearchGenerator::Load("/path/to/lib"
+                                                    DL.getGlobalPrefix()));
+
+// IR added to JD can now link against all symbols exported by the library
+// at '/path/to/lib'.
+CompileLayer.add(JD, loadModule(...));
+
+
+
+

Or, to expose an allowed set of symbols from the main process:

+
+
const DataLayout &DL = getDataLayout();
+MangleAndInterner Mangle(ES, DL);
+
+auto &JD = ES.createJITDylib("main");
+
+DenseSet<SymbolStringPtr> AllowList({
+    Mangle("puts"),
+    Mangle("gets")
+  });
+
+// Use GetForCurrentProcess with a predicate function that checks the
+// allowed list.
+JD.addGenerator(
+  DynamicLibrarySearchGenerator::GetForCurrentProcess(
+    DL.getGlobalPrefix(),
+    [&](const SymbolStringPtr &S) { return AllowList.count(S); }));
+
+// IR added to JD can now link against any symbols exported by the process
+// and contained in the list.
+CompileLayer.add(JD, loadModule(...));
+
+
+
+
+
+

Roadmap

+

ORC is still undergoing active development. Some current and future works are +listed below.

+
+

Current Work

+
    +
  1. TargetProcessControl: Improvements to in-tree support for out-of-process +execution

    +

    The TargetProcessControl API provides various operations on the JIT +target process (the one which will execute the JIT’d code), including +memory allocation, memory writes, function execution, and process queries +(e.g. for the target triple). By targeting this API new components can be +developed which will work equally well for in-process and out-of-process +JITing.

    +
  2. +
  3. ORC RPC based TargetProcessControl implementation

    +

    An ORC RPC based implementation of the TargetProcessControl API is +currently under development to enable easy out-of-process JITing via +file descriptors / sockets.

    +
  4. +
  5. Core State Machine Cleanup

    +

    The core ORC state machine is currently implemented between JITDylib and +ExecutionSession. Methods are slowly being moved to ExecutionSession. This +will tidy up the code base, and also allow us to support asynchronous removal +of JITDylibs (in practice deleting an associated state object in +ExecutionSession and leaving the JITDylib instance in a defunct state until +all references to it have been released).

    +
  6. +
+
+
+

Near Future Work

+
    +
  1. ORC JIT Runtime Libraries

    +

    We need a runtime library for JIT’d code. This would include things like +TLS registration, reentry functions, registration code for language runtimes +(e.g. Objective C and Swift) and other JIT specific runtime code. This should +be built in a similar manner to compiler-rt (possibly even as part of it).

    +
  2. +
  3. Remote jit_dlopen / jit_dlclose

    +

    To more fully mimic the environment that static programs operate in we would +like JIT’d code to be able to “dlopen” and “dlclose” JITDylibs, running all of +their initializers/deinitializers on the current thread. This would require +support from the runtime library described above.

    +
  4. +
  5. Debugging support

    +

    ORC currently supports the GDBRegistrationListener API when using RuntimeDyld +as the underlying JIT linker. We will need a new solution for JITLink based +platforms.

    +
  6. +
+
+
+

Further Future Work

+
    +
  1. Speculative Compilation

    +

    ORC’s support for concurrent compilation allows us to easily enable +speculative JIT compilation: compilation of code that is not needed yet, +but which we have reason to believe will be needed in the future. This can be +used to hide compile latency and improve JIT throughput. A proof-of-concept +example of speculative compilation with ORC has already been developed (see +llvm/examples/SpeculativeJIT). Future work on this is likely to focus on +re-using and improving existing profiling support (currently used by PGO) to +feed speculation decisions, as well as built-in tools to simplify use of +speculative compilation.

    +
  2. +
+
+
1
+

Formats/architectures vary in terms of supported features. MachO and +ELF tend to have better support than COFF. Patches very welcome!

+
+
2
+

The LazyEmittingLayer, RemoteObjectClientLayer and +RemoteObjectServerLayer do not have counterparts in the new +system. In the case of LazyEmittingLayer it was simply no longer +needed: in ORCv2, deferring compilation until symbols are looked up is +the default. The removal of RemoteObjectClientLayer and +RemoteObjectServerLayer means that JIT stacks can no longer be split +across processes, however this functionality appears not to have been +used.

+
+
3
+

Weak definitions are currently handled correctly within dylibs, but if +multiple dylibs provide a weak definition of a symbol then each will end +up with its own definition (similar to how weak definitions are handled +in Windows DLLs). This will be fixed in the future.

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Packaging.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Packaging.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Packaging.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Packaging.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,214 @@ + + + + + + + + + Advice on Packaging LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Advice on Packaging LLVM

+ +
+

Overview

+

LLVM sets certain default configure options to make sure our developers don’t +break things for constrained platforms. These settings are not optimal for most +desktop systems, and we hope that packagers (e.g., Redhat, Debian, MacPorts, +etc.) will tweak them. This document lists settings we suggest you tweak.

+

LLVM’s API changes with each release, so users are likely to want, for example, +both LLVM-2.6 and LLVM-2.7 installed at the same time to support apps developed +against each.

+
+
+

Compile Flags

+

LLVM runs much more quickly when it’s optimized and assertions are removed. +However, such a build is currently incompatible with users who build without +defining NDEBUG, and the lack of assertions makes it hard to debug problems +in user code. We recommend allowing users to install both optimized and debug +versions of LLVM in parallel. The following configure flags are relevant:

+
+
--disable-assertions

Builds LLVM with NDEBUG defined. Changes the LLVM ABI. Also available +by setting DISABLE_ASSERTIONS=0|1 in make’s environment. This +defaults to enabled regardless of the optimization setting, but it slows +things down.

+
+
--enable-debug-symbols

Builds LLVM with -g. Also available by setting DEBUG_SYMBOLS=0|1 in +make’s environment. This defaults to disabled when optimizing, so you +should turn it back on to let users debug their programs.

+
+
--enable-optimized

(For git checkouts) Builds LLVM with -O2 and, by default, turns off +debug symbols. Also available by setting ENABLE_OPTIMIZED=0|1 in +make’s environment. This defaults to enabled when not in a +checkout.

+
+
+
+
+

C++ Features

+
+
RTTI

LLVM disables RTTI by default. Add REQUIRES_RTTI=1 to your environment +while running make to re-enable it. This will allow users to build with +RTTI enabled and still inherit from LLVM classes.

+
+
+
+
+

Shared Library

+

Configure with --enable-shared to build +libLLVM-<major>.<minor>.(so|dylib) and link the tools against it. This +saves lots of binary size at the cost of some startup time.

+
+
+

Dependencies

+
+
--enable-libffi

Depend on libffi to allow the LLVM +interpreter to call external functions.

+
+
+

--with-oprofile

+
+

Depend on libopagent (>=version 0.9.4) +to let the LLVM JIT tell oprofile about function addresses and line +numbers.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Passes.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Passes.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Passes.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Passes.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,1259 @@ + + + + + + + + + LLVM’s Analysis and Transform Passes — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM’s Analysis and Transform Passes

+
+ +
+
+

Introduction

+

This document serves as a high level summary of the optimization features that +LLVM provides. Optimizations are implemented as Passes that traverse some +portion of a program to either collect information or transform the program. +The table below divides the passes that LLVM provides into three categories. +Analysis passes compute information that other passes can use or for debugging +or program visualization purposes. Transform passes can use (or invalidate) +the analysis passes. Transform passes all mutate the program in some way. +Utility passes provides some utility but don’t otherwise fit categorization. +For example passes to extract functions to bitcode or write a module to bitcode +are neither analysis nor transform passes. The table of contents above +provides a quick summary of each pass and links to the more complete pass +description later in the document.

+
+
+

Analysis Passes

+

This section describes the LLVM Analysis Passes.

+
+

-aa-eval: Exhaustive Alias Analysis Precision Evaluator

+

This is a simple N^2 alias analysis accuracy evaluator. Basically, for each +function in the program, it simply queries to see how the alias analysis +implementation answers alias queries between each pair of pointers in the +function.

+

This is inspired and adapted from code by: Naveen Neelakantam, Francesco +Spadini, and Wojciech Stryjewski.

+
+
+

-basic-aa: Basic Alias Analysis (stateless AA impl)

+

A basic alias analysis pass that implements identities (two different globals +cannot alias, etc), but does no stateful analysis.

+
+ +
+

-count-aa: Count Alias Analysis Query Responses

+

A pass which can be used to count how many alias queries are being made and how +the alias analysis implementation being used responds.

+
+
+

-da: Dependence Analysis

+

Dependence analysis framework, which is used to detect dependences in memory +accesses.

+
+
+

-debug-aa: AA use debugger

+

This simple pass checks alias analysis users to ensure that if they create a +new value, they do not query AA without informing it of the value. It acts as +a shim over any other AA pass you want.

+

Yes keeping track of every value in the program is expensive, but this is a +debugging pass.

+
+
+

-domfrontier: Dominance Frontier Construction

+

This pass is a simple dominator construction algorithm for finding forward +dominator frontiers.

+
+
+

-domtree: Dominator Tree Construction

+

This pass is a simple dominator construction algorithm for finding forward +dominators.

+
+
+

-dot-callgraph: Print Call Graph to “dot” file

+

This pass, only available in opt, prints the call graph into a .dot +graph. This graph can then be processed with the “dot” tool to convert it to +postscript or some other suitable format.

+
+
+

-dot-cfg: Print CFG of function to “dot” file

+

This pass, only available in opt, prints the control flow graph into a +.dot graph. This graph can then be processed with the dot tool +to convert it to postscript or some other suitable format. +Additionally the -cfg-func-name=<substring> option can be used to filter the +functions that are printed. All functions that contain the specified substring +will be printed.

+
+
+

-dot-cfg-only: Print CFG of function to “dot” file (with no function bodies)

+

This pass, only available in opt, prints the control flow graph into a +.dot graph, omitting the function bodies. This graph can then be processed +with the dot tool to convert it to postscript or some other suitable +format. +Additionally the -cfg-func-name=<substring> option can be used to filter the +functions that are printed. All functions that contain the specified substring +will be printed.

+
+
+

-dot-dom: Print dominance tree of function to “dot” file

+

This pass, only available in opt, prints the dominator tree into a .dot +graph. This graph can then be processed with the dot tool to +convert it to postscript or some other suitable format.

+
+
+

-dot-dom-only: Print dominance tree of function to “dot” file (with no function bodies)

+

This pass, only available in opt, prints the dominator tree into a .dot +graph, omitting the function bodies. This graph can then be processed with the +dot tool to convert it to postscript or some other suitable format.

+
+
+

-dot-postdom: Print postdominance tree of function to “dot” file

+

This pass, only available in opt, prints the post dominator tree into a +.dot graph. This graph can then be processed with the dot tool +to convert it to postscript or some other suitable format.

+
+
+

-dot-postdom-only: Print postdominance tree of function to “dot” file (with no function bodies)

+

This pass, only available in opt, prints the post dominator tree into a +.dot graph, omitting the function bodies. This graph can then be processed +with the dot tool to convert it to postscript or some other suitable +format.

+
+
+

-globalsmodref-aa: Simple mod/ref analysis for globals

+

This simple pass provides alias and mod/ref information for global values that +do not have their address taken, and keeps track of whether functions read or +write memory (are “pure”). For this simple (but very common) case, we can +provide pretty accurate and useful information.

+
+
+

-instcount: Counts the various types of Instructions

+

This pass collects the count of all instructions and reports them.

+
+
+

-intervals: Interval Partition Construction

+

This analysis calculates and represents the interval partition of a function, +or a preexisting interval partition.

+

In this way, the interval partition may be used to reduce a flow graph down to +its degenerate single node interval partition (unless it is irreducible).

+
+
+

-iv-users: Induction Variable Users

+

Bookkeeping for “interesting” users of expressions computed from induction +variables.

+
+
+

-lazy-value-info: Lazy Value Information Analysis

+

Interface for lazy computation of value constraint information.

+
+
+

-libcall-aa: LibCall Alias Analysis

+

LibCall Alias Analysis.

+
+
+

-lint: Statically lint-checks LLVM IR

+

This pass statically checks for common and easily-identified constructs which +produce undefined or likely unintended behavior in LLVM IR.

+

It is not a guarantee of correctness, in two ways. First, it isn’t +comprehensive. There are checks which could be done statically which are not +yet implemented. Some of these are indicated by TODO comments, but those +aren’t comprehensive either. Second, many conditions cannot be checked +statically. This pass does no dynamic instrumentation, so it can’t check for +all possible problems.

+

Another limitation is that it assumes all code will be executed. A store +through a null pointer in a basic block which is never reached is harmless, but +this pass will warn about it anyway.

+

Optimization passes may make conditions that this pass checks for more or less +obvious. If an optimization pass appears to be introducing a warning, it may +be that the optimization pass is merely exposing an existing condition in the +code.

+

This code may be run before instcombine. In many +cases, instcombine checks for the same kinds of things and turns instructions +with undefined behavior into unreachable (or equivalent). Because of this, +this pass makes some effort to look through bitcasts and so on.

+
+
+

-loops: Natural Loop Information

+

This analysis is used to identify natural loops and determine the loop depth of +various nodes of the CFG. Note that the loops identified may actually be +several natural loops that share the same header node… not just a single +natural loop.

+
+
+

-memdep: Memory Dependence Analysis

+

An analysis that determines, for a given memory operation, what preceding +memory operations it depends on. It builds on alias analysis information, and +tries to provide a lazy, caching interface to a common kind of alias +information query.

+
+
+

-module-debuginfo: Decodes module-level debug info

+

This pass decodes the debug info metadata in a module and prints in a +(sufficiently-prepared-) human-readable form.

+

For example, run this pass from opt along with the -analyze option, and +it’ll print to standard output.

+
+
+

-postdomfrontier: Post-Dominance Frontier Construction

+

This pass is a simple post-dominator construction algorithm for finding +post-dominator frontiers.

+
+
+

-postdomtree: Post-Dominator Tree Construction

+

This pass is a simple post-dominator construction algorithm for finding +post-dominators.

+
+ + + + + + + + + +
+

-regions: Detect single entry single exit regions

+

The RegionInfo pass detects single entry single exit regions in a function, +where a region is defined as any subgraph that is connected to the remaining +graph at only two spots. Furthermore, a hierarchical region tree is built.

+
+
+

-scalar-evolution: Scalar Evolution Analysis

+

The ScalarEvolution analysis can be used to analyze and categorize scalar +expressions in loops. It specializes in recognizing general induction +variables, representing them with the abstract and opaque SCEV class. +Given this analysis, trip counts of loops and other important properties can be +obtained.

+

This analysis is primarily useful for induction variable substitution and +strength reduction.

+
+
+

-scev-aa: ScalarEvolution-based Alias Analysis

+

Simple alias analysis implemented in terms of ScalarEvolution queries.

+

This differs from traditional loop dependence analysis in that it tests for +dependencies within a single iteration of a loop, rather than dependencies +between different iterations.

+

ScalarEvolution has a more complete understanding of pointer arithmetic +than BasicAliasAnalysis’ collection of ad-hoc analyses.

+
+
+

-stack-safety: Stack Safety Analysis

+

The StackSafety analysis can be used to determine if stack allocated +variables can be considered safe from memory access bugs.

+

This analysis’ primary purpose is to be used by sanitizers to avoid unnecessary +instrumentation of safe variables.

+
+
+

-targetdata: Target Data Layout

+

Provides other passes access to information on how the size and alignment +required by the target ABI for various data types.

+
+
+
+

Transform Passes

+

This section describes the LLVM Transform Passes.

+
+

-adce: Aggressive Dead Code Elimination

+

ADCE aggressively tries to eliminate code. This pass is similar to DCE but it assumes that values are dead until proven otherwise. This +is similar to SCCP, except applied to the liveness of +values.

+
+
+

-always-inline: Inliner for always_inline functions

+

A custom inliner that handles only functions that are marked as “always +inline”.

+
+
+

-argpromotion: Promote ‘by reference’ arguments to scalars

+

This pass promotes “by reference” arguments to be “by value” arguments. In +practice, this means looking for internal functions that have pointer +arguments. If it can prove, through the use of alias analysis, that an +argument is only loaded, then it can pass the value into the function instead +of the address of the value. This can cause recursive simplification of code +and lead to the elimination of allocas (especially in C++ template code like +the STL).

+

This pass also handles aggregate arguments that are passed into a function, +scalarizing them if the elements of the aggregate are only loaded. Note that +it refuses to scalarize aggregates which would require passing in more than +three operands to the function, because passing thousands of operands for a +large array or structure is unprofitable!

+

Note that this transformation could also be done for arguments that are only +stored to (returning the value instead), but does not currently. This case +would be best handled when and if LLVM starts supporting multiple return values +from functions.

+
+
+

-bb-vectorize: Basic-Block Vectorization

+

This pass combines instructions inside basic blocks to form vector +instructions. It iterates over each basic block, attempting to pair compatible +instructions, repeating this process until no additional pairs are selected for +vectorization. When the outputs of some pair of compatible instructions are +used as inputs by some other pair of compatible instructions, those pairs are +part of a potential vectorization chain. Instruction pairs are only fused into +vector instructions when they are part of a chain longer than some threshold +length. Moreover, the pass attempts to find the best possible chain for each +pair of compatible instructions. These heuristics are intended to prevent +vectorization in cases where it would not yield a performance increase of the +resulting code.

+
+
+

-block-placement: Profile Guided Basic Block Placement

+

This pass is a very simple profile guided basic block placement algorithm. The +idea is to put frequently executed blocks together at the start of the function +and hopefully increase the number of fall-through conditional branches. If +there is no profile information for a particular function, this pass basically +orders blocks in depth-first order.

+
+
+

-break-crit-edges: Break critical edges in CFG

+

Break all of the critical edges in the CFG by inserting a dummy basic block. +It may be “required” by passes that cannot deal with critical edges. This +transformation obviously invalidates the CFG, but can update forward dominator +(set, immediate dominators, tree, and frontier) information.

+
+
+

-codegenprepare: Optimize for code generation

+

This pass munges the code in the input function to better prepare it for +SelectionDAG-based code generation. This works around limitations in its +basic-block-at-a-time approach. It should eventually be removed.

+
+
+

-constmerge: Merge Duplicate Global Constants

+

Merges duplicate global constants together into a single constant that is +shared. This is useful because some passes (i.e., TraceValues) insert a lot of +string constants into the program, regardless of whether or not an existing +string is available.

+
+
+

-dce: Dead Code Elimination

+

Dead code elimination is similar to dead instruction elimination, but it rechecks instructions that were used by removed +instructions to see if they are newly dead.

+
+
+

-deadargelim: Dead Argument Elimination

+

This pass deletes dead arguments from internal functions. Dead argument +elimination removes arguments which are directly dead, as well as arguments +only passed into function calls as dead arguments of other functions. This +pass also deletes dead arguments in a similar way.

+

This pass is often useful as a cleanup pass to run after aggressive +interprocedural passes, which add possibly-dead arguments.

+
+
+

-deadtypeelim: Dead Type Elimination

+

This pass is used to cleanup the output of GCC. It eliminate names for types +that are unused in the entire translation unit, using the find used types pass.

+
+
+

-die: Dead Instruction Elimination

+

Dead instruction elimination performs a single pass over the function, removing +instructions that are obviously dead.

+
+
+

-dse: Dead Store Elimination

+

A trivial dead store elimination that only considers basic-block local +redundant stores.

+
+
+

-function-attrs: Deduce function attributes

+

A simple interprocedural pass which walks the call-graph, looking for functions +which do not access or only read non-local memory, and marking them +readnone/readonly. In addition, it marks function arguments (of +pointer type) “nocapture” if a call to the function does not create any +copies of the pointer value that outlive the call. This more or less means +that the pointer is only dereferenced, and not returned from the function or +stored in a global. This pass is implemented as a bottom-up traversal of the +call-graph.

+
+
+

-globaldce: Dead Global Elimination

+

This transform is designed to eliminate unreachable internal globals from the +program. It uses an aggressive algorithm, searching out globals that are known +to be alive. After it finds all of the globals which are needed, it deletes +whatever is left over. This allows it to delete recursive chunks of the +program which are unreachable.

+
+
+

-globalopt: Global Variable Optimizer

+

This pass transforms simple global variables that never have their address +taken. If obviously true, it marks read/write globals as constant, deletes +variables only stored to, etc.

+
+
+

-gvn: Global Value Numbering

+

This pass performs global value numbering to eliminate fully and partially +redundant instructions. It also performs redundant load elimination.

+
+
+

-indvars: Canonicalize Induction Variables

+

This transformation analyzes and transforms the induction variables (and +computations derived from them) into simpler forms suitable for subsequent +analysis and transformation.

+

This transformation makes the following changes to each loop with an +identifiable induction variable:

+
    +
  • All loops are transformed to have a single canonical induction variable +which starts at zero and steps by one.

  • +
  • The canonical induction variable is guaranteed to be the first PHI node in +the loop header block.

  • +
  • Any pointer arithmetic recurrences are raised to use array subscripts.

  • +
+

If the trip count of a loop is computable, this pass also makes the following +changes:

+
    +
  • The exit condition for the loop is canonicalized to compare the induction +value against the exit value. This turns loops like:

    +
    for (i = 7; i*i < 1000; ++i)
    +
    +into
    +
    +
    +
    for (i = 0; i != 25; ++i)
    +
    +
    +
  • +
  • Any use outside of the loop of an expression derived from the indvar is +changed to compute the derived value outside of the loop, eliminating the +dependence on the exit value of the induction variable. If the only purpose +of the loop is to compute the exit value of some derived expression, this +transformation will make the loop dead.

  • +
+

This transformation should be followed by strength reduction after all of the +desired loop transformations have been performed. Additionally, on targets +where it is profitable, the loop could be transformed to count down to zero +(the “do loop” optimization).

+
+
+

-inline: Function Integration/Inlining

+

Bottom-up inlining of functions into callees.

+
+
+

-instcombine: Combine redundant instructions

+

Combine instructions to form fewer, simple instructions. This pass does not +modify the CFG. This pass is where algebraic simplification happens.

+

This pass combines things like:

+
%Y = add i32 %X, 1
+%Z = add i32 %Y, 1
+
+
+

into:

+
%Z = add i32 %X, 2
+
+
+

This is a simple worklist driven algorithm.

+

This pass guarantees that the following canonicalizations are performed on the +program:

+
    +
  1. If a binary operator has a constant operand, it is moved to the right-hand +side.

  2. +
  3. Bitwise operators with constant operands are always grouped so that shifts +are performed first, then ors, then ands, then xors.

  4. +
  5. Compare instructions are converted from <, >, , or to += or if possible.

  6. +
  7. All cmp instructions on boolean values are replaced with logical +operations.

  8. +
  9. add X, X is represented as mul X, 2shl X, 1

  10. +
  11. Multiplies with a constant power-of-two argument are transformed into +shifts.

  12. +
  13. … etc.

  14. +
+

This pass can also simplify calls to specific well-known function calls (e.g. +runtime library functions). For example, a call exit(3) that occurs within +the main() function can be transformed into simply return 3. Whether or +not library calls are simplified is controlled by the +-function-attrs pass and LLVM’s knowledge of +library calls on different targets.

+
+
+

-aggressive-instcombine: Combine expression patterns

+

Combine expression patterns to form expressions with fewer, simple instructions. +This pass does not modify the CFG.

+

For example, this pass reduce width of expressions post-dominated by TruncInst +into smaller width when applicable.

+

It differs from instcombine pass in that it contains pattern optimization that +requires higher complexity than the O(1), thus, it should run fewer times than +instcombine pass.

+
+
+

-internalize: Internalize Global Symbols

+

This pass loops over all of the functions in the input module, looking for a +main function. If a main function is found, all other functions and all global +variables with initializers are marked as internal.

+
+ +
+

-jump-threading: Jump Threading

+

Jump threading tries to find distinct threads of control flow running through a +basic block. This pass looks at blocks that have multiple predecessors and +multiple successors. If one or more of the predecessors of the block can be +proven to always cause a jump to one of the successors, we forward the edge +from the predecessor to the successor by duplicating the contents of this +block.

+

An example of when this can occur is code like this:

+
if () { ...
+  X = 4;
+}
+if (X < 3) {
+
+
+

In this case, the unconditional branch at the end of the first if can be +revectored to the false side of the second if.

+
+
+

-lcssa: Loop-Closed SSA Form Pass

+

This pass transforms loops by placing phi nodes at the end of the loops for all +values that are live across the loop boundary. For example, it turns the left +into the right code:

+
for (...)                for (...)
+    if (c)                   if (c)
+        X1 = ...                 X1 = ...
+    else                     else
+        X2 = ...                 X2 = ...
+    X3 = phi(X1, X2)         X3 = phi(X1, X2)
+... = X3 + 4              X4 = phi(X3)
+                            ... = X4 + 4
+
+
+

This is still valid LLVM; the extra phi nodes are purely redundant, and will be +trivially eliminated by InstCombine. The major benefit of this +transformation is that it makes many other loop optimizations, such as +LoopUnswitching, simpler. You can read more in the +loop terminology section for the LCSSA form.

+
+
+

-licm: Loop Invariant Code Motion

+

This pass performs loop invariant code motion, attempting to remove as much +code from the body of a loop as possible. It does this by either hoisting code +into the preheader block, or by sinking code to the exit blocks if it is safe. +This pass also promotes must-aliased memory locations in the loop to live in +registers, thus hoisting and sinking “invariant” loads and stores.

+

Hoisting operations out of loops is a canonicalization transform. It enables +and simplifies subsequent optimizations in the middle-end. Rematerialization +of hoisted instructions to reduce register pressure is the responsibility of +the back-end, which has more accurate information about register pressure and +also handles other optimizations than LICM that increase live-ranges.

+

This pass uses alias analysis for two purposes:

+
    +
  1. Moving loop invariant loads and calls out of loops. If we can determine +that a load or call inside of a loop never aliases anything stored to, we +can hoist it or sink it like any other instruction.

  2. +
  3. Scalar Promotion of Memory. If there is a store instruction inside of the +loop, we try to move the store to happen AFTER the loop instead of inside of +the loop. This can only happen if a few conditions are true:

    +
      +
    1. The pointer stored through is loop invariant.

    2. +
    3. There are no stores or loads in the loop which may alias the pointer. +There are no calls in the loop which mod/ref the pointer.

    4. +
    +

    If these conditions are true, we can promote the loads and stores in the +loop of the pointer to use a temporary alloca’d variable. We then use the +mem2reg functionality to construct the appropriate +SSA form for the variable.

    +
  4. +
+
+
+

-loop-deletion: Delete dead loops

+

This file implements the Dead Loop Deletion Pass. This pass is responsible for +eliminating loops with non-infinite computable trip counts that have no side +effects or volatile instructions, and do not contribute to the computation of +the function’s return value.

+
+
+

-loop-extract: Extract loops into new functions

+

A pass wrapper around the ExtractLoop() scalar transformation to extract +each top-level loop into its own new function. If the loop is the only loop +in a given function, it is not touched. This is a pass most useful for +debugging via bugpoint.

+
+
+

-loop-extract-single: Extract at most one loop into a new function

+

Similar to Extract loops into new functions, this +pass extracts one natural loop from the program into a function if it can. +This is used by bugpoint.

+
+
+

-loop-reduce: Loop Strength Reduction

+

This pass performs a strength reduction on array references inside loops that +have as one or more of their components the loop induction variable. This is +accomplished by creating a new value to hold the initial value of the array +access for the first iteration, and then creating a new GEP instruction in the +loop to increment the value by the appropriate amount.

+
+
+

-loop-rotate: Rotate Loops

+

A simple loop rotation transformation. A summary of it can be found in +Loop Terminology for Rotated Loops.

+
+
+

-loop-simplify: Canonicalize natural loops

+

This pass performs several transformations to transform natural loops into a +simpler form, which makes subsequent analyses and transformations simpler and +more effective. A summary of it can be found in +Loop Terminology, Loop Simplify Form.

+

Loop pre-header insertion guarantees that there is a single, non-critical entry +edge from outside of the loop to the loop header. This simplifies a number of +analyses and transformations, such as LICM.

+

Loop exit-block insertion guarantees that all exit blocks from the loop (blocks +which are outside of the loop that have predecessors inside of the loop) only +have predecessors from inside of the loop (and are thus dominated by the loop +header). This simplifies transformations such as store-sinking that are built +into LICM.

+

This pass also guarantees that loops will have exactly one backedge.

+

Note that the simplifycfg pass will clean up blocks +which are split out but end up being unnecessary, so usage of this pass should +not pessimize generated code.

+

This pass obviously modifies the CFG, but updates loop information and +dominator information.

+
+
+

-loop-unroll: Unroll loops

+

This pass implements a simple loop unroller. It works best when loops have +been canonicalized by the indvars pass, allowing it to +determine the trip counts of loops easily.

+
+
+

-loop-unroll-and-jam: Unroll and Jam loops

+

This pass implements a simple unroll and jam classical loop optimisation pass. +It transforms loop from:

+
for i.. i+= 1              for i.. i+= 4
+  for j..                    for j..
+    code(i, j)                 code(i, j)
+                               code(i+1, j)
+                               code(i+2, j)
+                               code(i+3, j)
+                           remainder loop
+
+
+

Which can be seen as unrolling the outer loop and “jamming” (fusing) the inner +loops into one. When variables or loads can be shared in the new inner loop, this +can lead to significant performance improvements. It uses +Dependence Analysis for proving the transformations are safe.

+
+
+

-loop-unswitch: Unswitch loops

+

This pass transforms loops that contain branches on loop-invariant conditions +to have multiple loops. For example, it turns the left into the right code:

+
for (...)                  if (lic)
+    A                          for (...)
+    if (lic)                       A; B; C
+        B                  else
+    C                          for (...)
+                                   A; C
+
+
+

This can increase the size of the code exponentially (doubling it every time a +loop is unswitched) so we only unswitch if the resultant code will be smaller +than a threshold.

+

This pass expects LICM to be run before it to hoist +invariant conditions out of the loop, to make the unswitching opportunity +obvious.

+
+
+

-loweratomic: Lower atomic intrinsics to non-atomic form

+

This pass lowers atomic intrinsics to non-atomic form for use in a known +non-preemptible environment.

+

The pass does not verify that the environment is non-preemptible (in general +this would require knowledge of the entire call graph of the program including +any libraries which may not be available in bitcode form); it simply lowers +every atomic intrinsic.

+
+
+

-lowerinvoke: Lower invokes to calls, for unwindless code generators

+

This transformation is designed for use by code generators which do not yet +support stack unwinding. This pass converts invoke instructions to +call instructions, so that any exception-handling landingpad blocks +become dead code (which can be removed by running the -simplifycfg pass +afterwards).

+
+
+

-lowerswitch: Lower SwitchInsts to branches

+

Rewrites switch instructions with a sequence of branches, which allows targets +to get away with not implementing the switch instruction until it is +convenient.

+
+
+

-mem2reg: Promote Memory to Register

+

This file promotes memory references to be register references. It promotes +alloca instructions which only have loads and stores as uses. An alloca is +transformed by using dominator frontiers to place phi nodes, then traversing +the function in depth-first order to rewrite loads and stores as appropriate. +This is just the standard SSA construction algorithm to construct “pruned” SSA +form.

+
+
+

-memcpyopt: MemCpy Optimization

+

This pass performs various transformations related to eliminating memcpy +calls, or transforming sets of stores into memsets.

+
+
+

-mergefunc: Merge Functions

+

This pass looks for equivalent functions that are mergeable and folds them.

+

Total-ordering is introduced among the functions set: we define comparison +that answers for every two functions which of them is greater. It allows to +arrange functions into the binary tree.

+

For every new function we check for equivalent in tree.

+

If equivalent exists we fold such functions. If both functions are overridable, +we move the functionality into a new internal function and leave two +overridable thunks to it.

+

If there is no equivalent, then we add this function to tree.

+

Lookup routine has O(log(n)) complexity, while whole merging process has +complexity of O(n*log(n)).

+

Read +this +article for more details.

+
+
+

-mergereturn: Unify function exit nodes

+

Ensure that functions have at most one ret instruction in them. +Additionally, it keeps track of which node is the new exit node of the CFG.

+
+
+

-partial-inliner: Partial Inliner

+

This pass performs partial inlining, typically by inlining an if statement +that surrounds the body of the function.

+
+
+

-prune-eh: Remove unused exception handling info

+

This file implements a simple interprocedural pass which walks the call-graph, +turning invoke instructions into call instructions if and only if the callee +cannot throw an exception. It implements this as a bottom-up traversal of the +call-graph.

+
+
+

-reassociate: Reassociate expressions

+

This pass reassociates commutative expressions in an order that is designed to +promote better constant propagation, GCSE, LICM, PRE, etc.

+

For example: 4 + (x + 5) ⇒ x + (4 + 5)

+

In the implementation of this algorithm, constants are assigned rank = 0, +function arguments are rank = 1, and other values are assigned ranks +corresponding to the reverse post order traversal of current function (starting +at 2), which effectively gives values in deep loops higher rank than values not +in loops.

+
+
+

-rel-lookup-table-converter: Relative lookup table converter

+

This pass converts lookup tables to PIC-friendly relative lookup tables.

+
+
+

-reg2mem: Demote all values to stack slots

+

This file demotes all registers to memory references. It is intended to be the +inverse of mem2reg. By converting to load +instructions, the only values live across basic blocks are alloca +instructions and load instructions before phi nodes. It is intended +that this should make CFG hacking much easier. To make later hacking easier, +the entry block is split into two, such that all introduced alloca +instructions (and nothing else) are in the entry block.

+
+
+

-sroa: Scalar Replacement of Aggregates

+

The well-known scalar replacement of aggregates transformation. This transform +breaks up alloca instructions of aggregate type (structure or array) into +individual alloca instructions for each member if possible. Then, if +possible, it transforms the individual alloca instructions into nice clean +scalar SSA form.

+
+
+

-sccp: Sparse Conditional Constant Propagation

+

Sparse conditional constant propagation and merging, which can be summarized +as:

+
    +
  • Assumes values are constant unless proven otherwise

  • +
  • Assumes BasicBlocks are dead unless proven otherwise

  • +
  • Proves values to be constant, and replaces them with constants

  • +
  • Proves conditional branches to be unconditional

  • +
+

Note that this pass has a habit of making definitions be dead. It is a good +idea to run a DCE pass sometime after running this pass.

+
+
+

-simplifycfg: Simplify the CFG

+

Performs dead code elimination and basic block merging. Specifically:

+
    +
  • Removes basic blocks with no predecessors.

  • +
  • Merges a basic block into its predecessor if there is only one and the +predecessor only has one successor.

  • +
  • Eliminates PHI nodes for basic blocks with a single predecessor.

  • +
  • Eliminates a basic block that only contains an unconditional branch.

  • +
+
+
+

-sink: Code sinking

+

This pass moves instructions into successor blocks, when possible, so that they +aren’t executed on paths where their results aren’t needed.

+
+
+

-strip: Strip all symbols from a module

+

Performs code stripping. This transformation can delete:

+
    +
  • names for virtual registers

  • +
  • symbols for internal globals and functions

  • +
  • debug information

  • +
+

Note that this transformation makes code much less readable, so it should only +be used in situations where the strip utility would be used, such as reducing +code size or making it harder to reverse engineer code.

+
+
+

-strip-dead-debug-info: Strip debug info for unused symbols

+

performs code stripping. this transformation can delete:

+
    +
  • names for virtual registers

  • +
  • symbols for internal globals and functions

  • +
  • debug information

  • +
+

note that this transformation makes code much less readable, so it should only +be used in situations where the strip utility would be used, such as reducing +code size or making it harder to reverse engineer code.

+
+
+

-strip-dead-prototypes: Strip Unused Function Prototypes

+

This pass loops over all of the functions in the input module, looking for dead +declarations and removes them. Dead declarations are declarations of functions +for which no implementation is available (i.e., declarations for unused library +functions).

+
+
+

-strip-debug-declare: Strip all llvm.dbg.declare intrinsics

+

This pass implements code stripping. Specifically, it can delete:

+
    +
  1. names for virtual registers

  2. +
  3. symbols for internal globals and functions

  4. +
  5. debug information

  6. +
+

Note that this transformation makes code much less readable, so it should only +be used in situations where the ‘strip’ utility would be used, such as reducing +code size or making it harder to reverse engineer code.

+
+
+

-strip-nondebug: Strip all symbols, except dbg symbols, from a module

+

This pass implements code stripping. Specifically, it can delete:

+
    +
  1. names for virtual registers

  2. +
  3. symbols for internal globals and functions

  4. +
  5. debug information

  6. +
+

Note that this transformation makes code much less readable, so it should only +be used in situations where the ‘strip’ utility would be used, such as reducing +code size or making it harder to reverse engineer code.

+
+
+

-tailcallelim: Tail Call Elimination

+

This file transforms calls of the current function (self recursion) followed by +a return instruction with a branch to the entry of the function, creating a +loop. This pass also implements the following extensions to the basic +algorithm:

+
    +
  1. Trivial instructions between the call and return do not prevent the +transformation from taking place, though currently the analysis cannot +support moving any really useful instructions (only dead ones).

  2. +
  3. This pass transforms functions that are prevented from being tail recursive +by an associative expression to use an accumulator variable, thus compiling +the typical naive factorial or fib implementation into efficient code.

  4. +
  5. TRE is performed if the function returns void, if the return returns the +result returned by the call, or if the function returns a run-time constant +on all exits from the function. It is possible, though unlikely, that the +return returns something else (like constant 0), and can still be TRE’d. It +can be TRE’d if all other return instructions in the function return the +exact same value.

  6. +
  7. If it can prove that callees do not access their caller stack frame, they +are marked as eligible for tail call elimination (by the code generator).

  8. +
+
+
+
+

Utility Passes

+

This section describes the LLVM Utility Passes.

+
+

-deadarghaX0r: Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)

+

Same as dead argument elimination, but deletes arguments to functions which are +external. This is only for use by bugpoint.

+
+
+

-extract-blocks: Extract Basic Blocks From Module (for bugpoint use)

+

This pass is used by bugpoint to extract all blocks from the module into their +own functions.

+
+
+

-instnamer: Assign names to anonymous instructions

+

This is a little utility pass that gives instructions names, this is mostly +useful when diffing the effect of an optimization because deleting an unnamed +instruction can change all other instruction numbering, making the diff very +noisy.

+
+
+

-verify: Module Verifier

+

Verifies an LLVM IR code. This is useful to run after an optimization which is +undergoing testing. Note that llvm-as verifies its input before emitting +bitcode, and also that malformed bitcode is likely to make LLVM crash. All +language front-ends are therefore encouraged to verify their output before +performing optimizing transformations.

+
    +
  1. Both of a binary operator’s parameters are of the same type.

  2. +
  3. Verify that the indices of mem access instructions match other operands.

  4. +
  5. Verify that arithmetic and other things are only performed on first-class +types. Verify that shifts and logicals only happen on integrals f.e.

  6. +
  7. All of the constants in a switch statement are of the correct type.

  8. +
  9. The code is in valid SSA form.

  10. +
  11. It is illegal to put a label into any other type (like a structure) or to +return one.

  12. +
  13. Only phi nodes can be self referential: %x = add i32 %x, %x is +invalid.

  14. +
  15. PHI nodes must have an entry for each predecessor, with no extras.

  16. +
  17. PHI nodes must be the first thing in a basic block, all grouped together.

  18. +
  19. PHI nodes must have at least one entry.

  20. +
  21. All basic blocks should only end with terminator insts, not contain them.

  22. +
  23. The entry node to a function must not have predecessors.

  24. +
  25. All Instructions must be embedded into a basic block.

  26. +
  27. Functions cannot take a void-typed parameter.

  28. +
  29. Verify that a function’s argument list agrees with its declared type.

  30. +
  31. It is illegal to specify a name for a void value.

  32. +
  33. It is illegal to have an internal global value with no initializer.

  34. +
  35. It is illegal to have a ret instruction that returns a value that does +not agree with the function return value type.

  36. +
  37. Function call argument types match the function prototype.

  38. +
  39. All other things that are tested by asserts spread about the code.

  40. +
+

Note that this does not provide full security verification (like Java), but +instead just tries to ensure that code is well-formed.

+
+
+

-view-cfg: View CFG of function

+

Displays the control flow graph using the GraphViz tool. +Additionally the -cfg-func-name=<substring> option can be used to filter the +functions that are displayed. All functions that contain the specified substring +will be displayed.

+
+
+

-view-cfg-only: View CFG of function (with no function bodies)

+

Displays the control flow graph using the GraphViz tool, but omitting function +bodies. +Additionally the -cfg-func-name=<substring> option can be used to filter the +functions that are displayed. All functions that contain the specified substring +will be displayed.

+
+
+

-view-dom: View dominance tree of function

+

Displays the dominator tree using the GraphViz tool.

+
+
+

-view-dom-only: View dominance tree of function (with no function bodies)

+

Displays the dominator tree using the GraphViz tool, but omitting function +bodies.

+
+
+

-view-postdom: View postdominance tree of function

+

Displays the post dominator tree using the GraphViz tool.

+
+
+

-view-postdom-only: View postdominance tree of function (with no function bodies)

+

Displays the post dominator tree using the GraphViz tool, but omitting function +bodies.

+
+
+

-transform-warning: Report missed forced transformations

+

Emits warnings about not yet applied forced transformations (e.g. from +#pragma omp simd).

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/CodeViewSymbols.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/CodeViewSymbols.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/CodeViewSymbols.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/CodeViewSymbols.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,470 @@ + + + + + + + + + CodeView Symbol Records — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

CodeView Symbol Records

+ +
+

Introduction

+

This document describes the usage and serialization format of the various +CodeView symbol records that LLVM understands. Like +CodeView Type Records, we describe only the important +types which are generated by modern C++ toolchains.

+
+
+

Record Categories

+

Symbol records share one major similarity with type records: +They start with the same record prefix, which we will not describe +again (refer to the previous link for a description). As a result of this, a sequence +of symbol records can be processed with largely the same code as that which processes +type records. There are several important differences between symbol and type records:

+
    +
  • Symbol records only appear in the The PDB Public Symbol Stream, The PDB Global Symbol Stream, and +Module Info Streams.

  • +
  • Type records only appear in the TPI & IPI streams.

  • +
  • While types are referenced from other CodeView records via type indices, +symbol records are referenced by the byte offset of the record in the stream that it appears +in.

  • +
  • Types can reference types (via type indices), and symbols can reference both types (via type +indices) and symbols (via offsets), but types can never reference symbols.

  • +
  • There is no notion of Leaf Records and Member Records +as there are with types. Every symbol record describes is own length.

  • +
  • Certain special symbol records begin a “scope”. For these records, all following records +up until the next S_END record are “children” of this symbol record. For example, +given a symbol record which describes a certain function, all local variables of this +function would appear following the function up until the corresponding S_END record.

  • +
+

Finally, there are three general categories of symbol record, grouped by where they are legal +to appear in a PDB file. Public Symbols (which appear only in the +publics stream), Global Symbols (which appear only in the +globals stream) and module symbols (which appear in the +module info stream).

+
+

Public Symbols

+

Public symbols are the CodeView equivalent of DWARF .debug_pubnames. There +is one public symbol record for every function or variable in the program that +has a mangled name. The Publics Stream, which contains these +records, additionally contains a hash table that allows one to quickly locate a +record by mangled name.

+
+

S_PUB32 (0x110e)

+

There is only type of public symbol, an S_PUB32 which describes a mangled +name, a flag indicating what kind of symbol it is (e.g. function, variable), and +the symbol’s address. The Section Map Substream of the +DBI Stream can be consulted to determine what module this address +corresponds to, and from there that module’s module debug stream +can be consulted to locate full information for the symbol with the given address.

+
+
+
+

Global Symbols

+

While there is one public symbol for every symbol in the +program with external linkage, there is one global symbol for every symbol in the +program with linkage (including internal linkage). As a result, global symbols do +not describe a mangled name or an address, since symbols with internal linkage +need not have any mangling at all, and also may not have an address. Thus, all +global symbols simply refer directly to the full symbol record via a module/offset +combination.

+

Similarly to public symbols, all global symbols are contained +in a single Globals Stream, which contains a hash table mapping +fully qualified name to the corresponding record in the globals stream (which as +mentioned, then contains information allowing one to locate the full record in the +corresponding module symbol stream).

+

Note that a consequence and limitation of this design is that program-wide lookup +by anything other than an exact textually matching fully-qualified name of whatever +the compiler decided to emit is impractical. This differs from DWARF, where even +though we don’t necessarily have O(1) lookup by basename within a given scope (including +O(1) scope, we at least have O(n) access within a given scope).

+
+

Important

+

Program-wide lookup of names by anything other than an exact textually matching fully +qualified name is not possible.

+
+ + + + + +
+ + +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/CodeViewTypes.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/CodeViewTypes.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/CodeViewTypes.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/CodeViewTypes.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,434 @@ + + + + + + + + + CodeView Type Records — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

CodeView Type Records

+ +
+

Introduction

+

This document describes the usage and serialization format of the various +CodeView type records that LLVM understands. This document does not describe +every single CodeView type record that is defined. In some cases, this is +because the records are clearly deprecated and can only appear in very old +software (e.g. the 16-bit types). On other cases, it is because the records +have never been observed in practice. This could be because they are only +generated for non-C++ code (e.g. Visual Basic, C#), or because they have been +made obsolete by newer records, or any number of other reasons. However, the +records we describe here should cover 99% of type records that one can expect +to encounter when dealing with modern C++ toolchains.

+
+
+

Record Categories

+

We can think of a sequence of CodeView type records as an array of variable length +leaf records. Each such record describes its own length as part of a fixed-size +header, as well as the kind of record it is. Leaf records are either padded to 4 +bytes (if this type stream appears in a TPI/IPI stream of a PDB) or not padded at +all (if this type stream appears in the .debug$T section of an object file). +Padding is implemented by inserting a decreasing sequence of <_padding_records> +that terminates with LF_PAD0.

+

The final category of record is a member record. One particular leaf type – +LF_FIELDLIST – contains a series of embedded records. While the outer +LF_FIELDLIST describes its length (like any other leaf record), the embedded +records – called member records do not.

+
+

Leaf Records

+

All leaf records begin with the following 4 byte prefix:

+
struct RecordHeader {
+  uint16_t RecordLen;  // Record length, not including this 2 byte field.
+  uint16_t RecordKind; // Record kind enum.
+};
+
+
+
+

LF_POINTER (0x1002)

+

Usage: Describes a pointer to another type.

+

Layout:

+
.--------------------.-- +0
+|    Referent Type   |
+.--------------------.-- +4
+|     Attributes     |
+.--------------------.-- +8
+|  Member Ptr Info   |       Only present if |Attributes| indicates this is a member pointer.
+.--------------------.-- +E
+
+
+

Attributes is a bitfield with the following layout:

+
 .-----------------------------------------------------------------------------------------------------.
+ |     Unused                   |  Flags  |       Size       |   Modifiers   |  Mode   |      Kind     |
+ .-----------------------------------------------------------------------------------------------------.
+ |                              |         |                  |               |         |               |
+0x100                         +0x16     +0x13               +0xD            +0x8      +0x5            +0x0
+
+
+

where the various fields are defined by the following enums:

+
enum class PointerKind : uint8_t {
+  Near16 = 0x00,                // 16 bit pointer
+  Far16 = 0x01,                 // 16:16 far pointer
+  Huge16 = 0x02,                // 16:16 huge pointer
+  BasedOnSegment = 0x03,        // based on segment
+  BasedOnValue = 0x04,          // based on value of base
+  BasedOnSegmentValue = 0x05,   // based on segment value of base
+  BasedOnAddress = 0x06,        // based on address of base
+  BasedOnSegmentAddress = 0x07, // based on segment address of base
+  BasedOnType = 0x08,           // based on type
+  BasedOnSelf = 0x09,           // based on self
+  Near32 = 0x0a,                // 32 bit pointer
+  Far32 = 0x0b,                 // 16:32 pointer
+  Near64 = 0x0c                 // 64 bit pointer
+};
+enum class PointerMode : uint8_t {
+  Pointer = 0x00,                 // "normal" pointer
+  LValueReference = 0x01,         // "old" reference
+  PointerToDataMember = 0x02,     // pointer to data member
+  PointerToMemberFunction = 0x03, // pointer to member function
+  RValueReference = 0x04          // r-value reference
+};
+enum class PointerModifiers : uint8_t {
+  None = 0x00,                    // "normal" pointer
+  Flat32 = 0x01,                  // "flat" pointer
+  Volatile = 0x02,                // pointer is marked volatile
+  Const = 0x04,                   // pointer is marked const
+  Unaligned = 0x08,               // pointer is marked unaligned
+  Restrict = 0x10,                // pointer is marked restrict
+};
+enum class PointerFlags : uint8_t {
+  WinRTSmartPointer = 0x01,       // pointer is a WinRT smart pointer
+  LValueRefThisPointer = 0x02,    // pointer is a 'this' pointer of a member function with ref qualifier (e.g. void X::foo() &)
+  RValueRefThisPointer = 0x04     // pointer is a 'this' pointer of a member function with ref qualifier (e.g. void X::foo() &&)
+};
+
+
+

The Size field of the Attributes bitmask is a 1-byte value indicating the +pointer size. For example, a void* would have a size of either 4 or 8 depending +on the target architecture. On the other hand, if Mode indicates that this is +a pointer to member function or pointer to data member, then the size can be any +implementation defined number.

+

The Member Ptr Info field of the LF_POINTER record is only present if the +attributes indicate that this is a pointer to member.

+

Note that “plain” pointers to primitive types are not represented by LF_POINTER +records, they are indicated by special reserved TypeIndex values.

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/DbiStream.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/DbiStream.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/DbiStream.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/DbiStream.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,543 @@ + + + + + + + + + The PDB DBI (Debug Info) Stream — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The PDB DBI (Debug Info) Stream

+ +
+

Introduction

+

The PDB DBI Stream (Index 3) is one of the largest and most important streams +in a PDB file. It contains information about how the program was compiled, +(e.g. compilation flags, etc), the compilands (e.g. object files) that +were used to link together the program, the source files which were used +to build the program, as well as references to other streams that contain more +detailed information about each compiland, such as the CodeView symbol records +contained within each compiland and the source and line information for +functions and other symbols within each compiland.

+
+
+

Stream Header

+

At offset 0 of the DBI Stream is a header with the following layout:

+
struct DbiStreamHeader {
+  int32_t VersionSignature;
+  uint32_t VersionHeader;
+  uint32_t Age;
+  uint16_t GlobalStreamIndex;
+  uint16_t BuildNumber;
+  uint16_t PublicStreamIndex;
+  uint16_t PdbDllVersion;
+  uint16_t SymRecordStream;
+  uint16_t PdbDllRbld;
+  int32_t ModInfoSize;
+  int32_t SectionContributionSize;
+  int32_t SectionMapSize;
+  int32_t SourceInfoSize;
+  int32_t TypeServerMapSize;
+  uint32_t MFCTypeServerIndex;
+  int32_t OptionalDbgHeaderSize;
+  int32_t ECSubstreamSize;
+  uint16_t Flags;
+  uint16_t Machine;
+  uint32_t Padding;
+};
+
+
+
    +
  • VersionSignature - Unknown meaning. Appears to always be -1.

  • +
  • VersionHeader - A value from the following enum.

  • +
+
enum class DbiStreamVersion : uint32_t {
+  VC41 = 930803,
+  V50 = 19960307,
+  V60 = 19970606,
+  V70 = 19990903,
+  V110 = 20091201
+};
+
+
+

Similar to the PDB Stream, this value always appears to be +V70, and it is not clear what the other values are for.

+
    +
  • Age - The number of times the PDB has been written. Equal to the same +field from the PDB Stream header.

  • +
  • GlobalStreamIndex - The index of the Global Symbol Stream, +which contains CodeView symbol records for all global symbols. Actual records +are stored in the symbol record stream, and are referenced from this stream.

  • +
  • BuildNumber - A bitfield containing values representing the major and minor +version number of the toolchain (e.g. 12.0 for MSVC 2013) used to build the +program, with the following layout:

  • +
+
uint16_t MinorVersion : 8;
+uint16_t MajorVersion : 7;
+uint16_t NewVersionFormat : 1;
+
+
+

For the purposes of LLVM, we assume NewVersionFormat to be always true. +If it is false, the layout above does not apply and the reader should consult +the Microsoft Source Code for +further guidance.

+
    +
  • PublicStreamIndex - The index of the Public Symbol Stream, +which contains CodeView symbol records for all public symbols. Actual records +are stored in the symbol record stream, and are referenced from this stream.

  • +
  • PdbDllVersion - The version number of mspdbXXXX.dll used to produce this +PDB. Note this obviously does not apply for LLVM as LLVM does not use mspdb.dll.

  • +
  • SymRecordStream - The stream containing all CodeView symbol records used +by the program. This is used for deduplication, so that many different +compilands can refer to the same symbols without having to include the full record +content inside of each module stream.

  • +
  • PdbDllRbld - Unknown

  • +
  • MFCTypeServerIndex - The index of the MFC type server in the +Type Server Map Substream.

  • +
  • Flags - A bitfield with the following layout, containing various +information about how the program was built:

  • +
+
uint16_t WasIncrementallyLinked : 1;
+uint16_t ArePrivateSymbolsStripped : 1;
+uint16_t HasConflictingTypes : 1;
+uint16_t Reserved : 13;
+
+
+

The only one of these that is not self-explanatory is HasConflictingTypes. +Although undocumented, link.exe contains a hidden flag /DEBUG:CTYPES. +If it is passed to link.exe, this field will be set. Otherwise it will +not be set. It is unclear what this flag does, although it seems to have +subtle implications on the algorithm used to look up type records.

+
    +
  • Machine - A value from the CV_CPU_TYPE_e +enumeration. Common values are 0x8664 (x86-64) and 0x14C (x86).

  • +
+

Immediately after the fixed-size DBI Stream header are 7 variable-length +substreams. The following 7 fields of the DBI Stream header specify the +number of bytes of the corresponding substream. Each substream’s contents will +be described in detail below. The length of the entire +DBI Stream should equal 64 (the length of the header above) plus the value +of each of the following 7 fields.

+ +
+
+

Substreams

+
+

Module Info Substream

+

Begins at offset 0 immediately after the header. The +module info substream is an array of variable-length records, each one +describing a single module (e.g. object file) linked into the program. Each +record in the array has the format:

+
struct ModInfo {
+  uint32_t Unused1;
+  struct SectionContribEntry {
+    uint16_t Section;
+    char Padding1[2];
+    int32_t Offset;
+    int32_t Size;
+    uint32_t Characteristics;
+    uint16_t ModuleIndex;
+    char Padding2[2];
+    uint32_t DataCrc;
+    uint32_t RelocCrc;
+  } SectionContr;
+  uint16_t Flags;
+  uint16_t ModuleSymStream;
+  uint32_t SymByteSize;
+  uint32_t C11ByteSize;
+  uint32_t C13ByteSize;
+  uint16_t SourceFileCount;
+  char Padding[2];
+  uint32_t Unused2;
+  uint32_t SourceFileNameIndex;
+  uint32_t PdbFilePathNameIndex;
+  char ModuleName[];
+  char ObjFileName[];
+};
+
+
+
    +
  • SectionContr - Describes the properties of the section in the final binary +which contain the code and data from this module.

    +

    SectionContr.Characteristics corresponds to the Characteristics field +of the IMAGE_SECTION_HEADER +structure.

    +
  • +
  • Flags - A bitfield with the following format:

  • +
+
// ``true`` if this ModInfo has been written since reading the PDB.  This is
+// likely used to support incremental linking, so that the linker can decide
+// if it needs to commit changes to disk.
+uint16_t Dirty : 1;
+// ``true`` if EC information is present for this module. EC is presumed to
+// stand for "Edit & Continue", which LLVM does not support.  So this flag
+// will always be be false.
+uint16_t EC : 1;
+uint16_t Unused : 6;
+// Type Server Index for this module.  This is assumed to be related to /Zi,
+// but as LLVM treats /Zi as /Z7, this field will always be invalid for LLVM
+// generated PDBs.
+uint16_t TSM : 8;
+
+
+
    +
  • ModuleSymStream - The index of the stream that contains symbol information +for this module. This includes CodeView symbol information as well as source +and line information. If this field is -1, then no additional debug info will +be present for this module (for example, this is what happens when you strip +private symbols from a PDB).

  • +
  • SymByteSize - The number of bytes of data from the stream identified by +ModuleSymStream that represent CodeView symbol records.

  • +
  • C11ByteSize - The number of bytes of data from the stream identified by +ModuleSymStream that represent C11-style CodeView line information.

  • +
  • C13ByteSize - The number of bytes of data from the stream identified by +ModuleSymStream that represent C13-style CodeView line information. At +most one of C11ByteSize and C13ByteSize will be non-zero. Modern PDBs +always use C13 instead of C11.

  • +
  • SourceFileCount - The number of source files that contributed to this +module during compilation.

  • +
  • SourceFileNameIndex - The offset in the names buffer of the primary +translation unit used to build this module. All PDB files observed to date +always have this value equal to 0.

  • +
  • PdbFilePathNameIndex - The offset in the names buffer of the PDB file +containing this module’s symbol information. This has only been observed +to be non-zero for the special * Linker * module.

  • +
  • ModuleName - The module name. This is usually either a full path to an +object file (either directly passed to link.exe or from an archive) or +a string of the form Import:<dll name>.

  • +
  • ObjFileName - The object file name. In the case of an module that is +linked directly passed to link.exe, this is the same as ModuleName. +In the case of a module that comes from an archive, this is usually the full +path to the archive.

  • +
+
+
+

Section Contribution Substream

+

Begins at offset 0 immediately after the Module Info Substream ends, +and consumes Header->SectionContributionSize bytes. This substream begins +with a single uint32_t which will be one of the following values:

+
enum class SectionContrSubstreamVersion : uint32_t {
+  Ver60 = 0xeffe0000 + 19970605,
+  V2 = 0xeffe0000 + 20140516
+};
+
+
+

Ver60 is the only value which has been observed in a PDB so far. Following +this is an array of fixed-length structures. If the version is Ver60, +it is an array of SectionContribEntry structures (this is the nested structure +from the ModInfo type. If the version is V2, it is an array of +SectionContribEntry2 structures, defined as follows:

+
struct SectionContribEntry2 {
+  SectionContribEntry SC;
+  uint32_t ISectCoff;
+};
+
+
+

The purpose of the second field is not well understood. The name implies that +is the index of the COFF section, but this also describes the existing field +SectionContribEntry::Section.

+
+
+

Section Map Substream

+

Begins at offset 0 immediately after the Section Contribution Substream ends, +and consumes Header->SectionMapSize bytes. This substream begins with an 4 +byte header followed by an array of fixed-length records. The header and records +have the following layout:

+
struct SectionMapHeader {
+  uint16_t Count;    // Number of segment descriptors
+  uint16_t LogCount; // Number of logical segment descriptors
+};
+
+struct SectionMapEntry {
+  uint16_t Flags;         // See the SectionMapEntryFlags enum below.
+  uint16_t Ovl;           // Logical overlay number
+  uint16_t Group;         // Group index into descriptor array.
+  uint16_t Frame;
+  uint16_t SectionName;   // Byte index of segment / group name in string table, or 0xFFFF.
+  uint16_t ClassName;     // Byte index of class in string table, or 0xFFFF.
+  uint32_t Offset;        // Byte offset of the logical segment within physical segment.  If group is set in flags, this is the offset of the group.
+  uint32_t SectionLength; // Byte count of the segment or group.
+};
+
+enum class SectionMapEntryFlags : uint16_t {
+  Read = 1 << 0,              // Segment is readable.
+  Write = 1 << 1,             // Segment is writable.
+  Execute = 1 << 2,           // Segment is executable.
+  AddressIs32Bit = 1 << 3,    // Descriptor describes a 32-bit linear address.
+  IsSelector = 1 << 8,        // Frame represents a selector.
+  IsAbsoluteAddress = 1 << 9, // Frame represents an absolute address.
+  IsGroup = 1 << 10           // If set, descriptor represents a group.
+};
+
+
+

Many of these fields are not well understood, so will not be discussed further.

+
+
+

File Info Substream

+

Begins at offset 0 immediately after the Section Map Substream ends, +and consumes Header->SourceInfoSize bytes. This substream defines the mapping +from module to the source files that contribute to that module. Since multiple +modules can use the same source file (for example, a header file), this substream +uses a string table to store each unique file name only once, and then have each +module use offsets into the string table rather than embedding the string’s value +directly. The format of this substream is as follows:

+
struct FileInfoSubstream {
+  uint16_t NumModules;
+  uint16_t NumSourceFiles;
+
+  uint16_t ModIndices[NumModules];
+  uint16_t ModFileCounts[NumModules];
+  uint32_t FileNameOffsets[NumSourceFiles];
+  char NamesBuffer[][NumSourceFiles];
+};
+
+
+

NumModules - The number of modules for which source file information is +contained within this substream. Should match the corresponding value from the +ref:dbi_header.

+

NumSourceFiles: In theory this is supposed to contain the number of source +files for which this substream contains information. But that would present a +problem in that the width of this field being 16-bits would prevent one from +having more than 64K source files in a program. In early versions of the file +format, this seems to have been the case. In order to support more than this, this +field of the is simply ignored, and computed dynamically by summing up the values of +the ModFileCounts array (discussed below). In short, this value should be +ignored.

+

ModIndices - This array is present, but does not appear to be useful.

+

ModFileCountArray - An array of NumModules integers, each one containing +the number of source files which contribute to the module at the specified index. +While each individual module is limited to 64K contributing source files, the +union of all modules’ source files may be greater than 64K. The real number of +source files is thus computed by summing this array. Note that summing this array +does not give the number of unique source files, only the total number of source +file contributions to modules.

+

FileNameOffsets - An array of NumSourceFiles integers (where NumSourceFiles +here refers to the 32-bit value obtained from summing ModFileCountArray), where +each integer is an offset into NamesBuffer pointing to a null terminated string.

+

NamesBuffer - An array of null terminated strings containing the actual source +file names.

+
+
+

Type Server Map Substream

+

Begins at offset 0 immediately after the File Info Substream +ends, and consumes Header->TypeServerMapSize bytes. Neither the purpose +nor the layout of this substream is understood, although it is assumed to +related somehow to the usage of /Zi and mspdbsrv.exe. This substream +will not be discussed further.

+
+
+

EC Substream

+

Begins at offset 0 immediately after the +Type Server Map Substream ends, and consumes +Header->ECSubstreamSize bytes. This is presumed to be related to Edit & +Continue support in MSVC. LLVM does not support Edit & Continue, so this +stream will not be discussed further.

+
+
+

Optional Debug Header Stream

+

Begins at offset 0 immediately after the EC Substream ends, and +consumes Header->OptionalDbgHeaderSize bytes. This field is an array of +stream indices (e.g. uint16_t’s), each of which identifies a stream +index in the larger MSF file which contains some additional debug information. +Each position of this array has a special meaning, allowing one to determine +what kind of debug information is at the referenced stream. 11 indices +are currently understood, although it’s possible there may be more. The +layout of each stream generally corresponds exactly to a particular type +of debug data directory from the PE/COFF file. The format of these fields +can be found in the Microsoft PE/COFF Specification. +If any of these fields is -1, it means the corresponding type of debug info is +not present in the PDB.

+

FPO Data - DbgStreamArray[0]. The data in the referenced stream is an +array of FPO_DATA structures. This contains the relocated contents of +any .debug$F section from any of the linker inputs.

+

Exception Data - DbgStreamArray[1]. The data in the referenced stream +is a debug data directory of type IMAGE_DEBUG_TYPE_EXCEPTION.

+

Fixup Data - DbgStreamArray[2]. The data in the referenced stream is a +debug data directory of type IMAGE_DEBUG_TYPE_FIXUP.

+

Omap To Src Data - DbgStreamArray[3]. The data in the referenced stream +is a debug data directory of type IMAGE_DEBUG_TYPE_OMAP_TO_SRC. This +is used for mapping addresses between instrumented and uninstrumented code.

+

Omap From Src Data - DbgStreamArray[4]. The data in the referenced stream +is a debug data directory of type IMAGE_DEBUG_TYPE_OMAP_FROM_SRC. This +is used for mapping addresses between instrumented and uninstrumented code.

+

Section Header Data - DbgStreamArray[5]. A dump of all section headers from +the original executable.

+

Token / RID Map - DbgStreamArray[6]. The layout of this stream is not +understood, but it is assumed to be a mapping from CLR Token to +CLR Record ID. Refer to ECMA 335 +for more information.

+

Xdata - DbgStreamArray[7]. A copy of the .xdata section from the +executable.

+

Pdata - DbgStreamArray[8]. This is assumed to be a copy of the .pdata +section from the executable, but that would make it identical to +DbgStreamArray[1]. The difference between these two indices is not well +understood.

+

New FPO Data - DbgStreamArray[9]. The data in the referenced stream is a +debug data directory of type IMAGE_DEBUG_TYPE_FPO. Note that this is different +from DbgStreamArray[0] in that .debug$F sections are only emitted by MASM. +Thus, it is possible for both to appear in the same PDB if both MASM object files +and cl object files are linked into the same program.

+

Original Section Header Data - DbgStreamArray[10]. Similar to +DbgStreamArray[5], but contains the section headers before any binary translation +has been performed. This can be used in conjunction with DebugStreamArray[3] +and DbgStreamArray[4] to map instrumented and uninstrumented addresses.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/GlobalStream.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/GlobalStream.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/GlobalStream.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/GlobalStream.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,144 @@ + + + + + + + + + The PDB Global Symbol Stream — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The PDB Global Symbol Stream

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/HashTable.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/HashTable.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/HashTable.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/HashTable.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,226 @@ + + + + + + + + + The PDB Serialized Hash Table Format — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The PDB Serialized Hash Table Format

+
+
+

Introduction

+

One of the design goals of the PDB format is to provide accelerated access to +debug information, and for this reason there are several occasions where hash +tables are serialized and embedded directly to the file, rather than requiring +a consumer to read a list of values and reconstruct the hash table on the fly.

+

The serialization format supports hash tables of arbitrarily large size and +capacity, as well as value types and hash functions. The only supported key +value type is a uint32. The only requirement is that the producer and consumer +agree on the hash function. As such, the hash function can is not discussed +further in this document, it is assumed that for a particular instance of a PDB +file hash table, the appropriate hash function is being used.

+
+
+

On-Disk Format

+
.--------------------.-- +0
+|        Size        |
+.--------------------.-- +4
+|      Capacity      |
+.--------------------.-- +8
+| Present Bit Vector |
+.--------------------.-- +N
+| Deleted Bit Vector |
+.--------------------.-- +M                  ─╮
+|        Key         |                        │
+.--------------------.-- +M+4                 │
+|       Value        |                        │
+.--------------------.-- +M+4+sizeof(Value)   │
+         ...                                  ├─ |Capacity| Bucket entries
+.--------------------.                        │
+|        Key         |                        │
+.--------------------.                        │
+|       Value        |                        │
+.--------------------.                       ─╯
+
+
+
    +
  • Size - The number of values contained in the hash table.

  • +
  • Capacity - The number of buckets in the hash table. Producers should +maintain a load factor of no greater than 2/3*Capacity+1.

  • +
  • Present Bit Vector - A serialized bit vector which contains information +about which buckets have valid values. If the bucket has a value, the +corresponding bit will be set, and if the bucket doesn’t have a value (either +because the bucket is empty or because the value is a tombstone value) the bit +will be unset.

  • +
  • Deleted Bit Vector - A serialized bit vector which contains information +about which buckets have tombstone values. If the entry in this bucket is +deleted, the bit will be set, otherwise it will be unset.

  • +
  • Keys and Values - A list of Capacity hash buckets, where the first +entry is the key (always a uint32), and the second entry is the value. The +state of each bucket (valid, empty, deleted) can be determined by examining +the present and deleted bit vectors.

  • +
+
+
+

Present and Deleted Bit Vectors

+

The bit vectors indicating the status of each bucket are serialized as follows:

+
.--------------------.-- +0
+|     Word Count     |
+.--------------------.-- +4
+|        Word_0      |        ─╮
+.--------------------.-- +8    │
+|        Word_1      |         │
+.--------------------.-- +12   ├─ |Word Count| values
+         ...                   │
+.--------------------.         │
+|       Word_N       |         │
+.--------------------.        ─╯
+
+
+

The words, when viewed as a contiguous block of bytes, represent a bit vector +with the following layout:

+
  .------------.         .------------.------------.
+  |   Word_N   |   ...   |   Word_1   |   Word_0   |
+  .------------.         .------------.------------.
+  |            |         |            |            |
++N*32      +(N-1)*32    +64          +32          +0
+
+
+

where the k’th bit of this bit vector represents the status of the k’th bucket +in the hash table.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/index.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,405 @@ + + + + + + + + + The PDB File Format — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The PDB File Format

+ +
+

Introduction

+

PDB (Program Database) is a file format invented by Microsoft and which contains +debug information that can be consumed by debuggers and other tools. Since +officially supported APIs exist on Windows for querying debug information from +PDBs even without the user understanding the internals of the file format, a +large ecosystem of tools has been built for Windows to consume this format. In +order for Clang to be able to generate programs that can interoperate with these +tools, it is necessary for us to generate PDB files ourselves.

+

At the same time, LLVM has a long history of being able to cross-compile from +any platform to any platform, and we wish for the same to be true here. So it +is necessary for us to understand the PDB file format at the byte-level so that +we can generate PDB files entirely on our own.

+

This manual describes what we know about the PDB file format today. The layout +of the file, the various streams contained within, the format of individual +records within, and more.

+

We would like to extend our heartfelt gratitude to Microsoft, without whom we +would not be where we are today. Much of the knowledge contained within this +manual was learned through reading code published by Microsoft on their GitHub +repo.

+
+
+

File Layout

+
+

Important

+

Unless otherwise specified, all numeric values are encoded in little endian. +If you see a type such as uint16_t or uint64_t going forward, always +assume it is little endian!

+
+
+
+
+

The MSF Container

+

A PDB file is an MSF (Multi-Stream Format) file. An MSF file is a “file system +within a file”. It contains multiple streams (aka files) which can represent +arbitrary data, and these streams are divided into blocks which may not +necessarily be contiguously laid out within the MSF container file. +Additionally, the MSF contains a stream directory (aka MFT) which describes how +the streams (files) are laid out within the MSF.

+

For more information about the MSF container format, stream directory, and +block layout, see The MSF File Format.

+
+
+

Streams

+

The PDB format contains a number of streams which describe various information +such as the types, symbols, source files, and compilands (e.g. object files) +of a program, as well as some additional streams containing hash tables that are +used by debuggers and other tools to provide fast lookup of records and types +by name, and various other information about how the program was compiled such +as the specific toolchain used, and more. A summary of streams contained in a +PDB file is as follows:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

Stream Index

Contents

Old Directory

    +
  • Fixed Stream Index 0

  • +
+
    +
  • Previous MSF Stream Directory

  • +
+

PDB Stream

    +
  • Fixed Stream Index 1

  • +
+
    +
  • Basic File Information

  • +
  • Fields to match EXE to this PDB

  • +
  • Map of named streams to stream indices

  • +
+

TPI Stream

    +
  • Fixed Stream Index 2

  • +
+
    +
  • CodeView Type Records

  • +
  • Index of TPI Hash Stream

  • +
+

DBI Stream

    +
  • Fixed Stream Index 3

  • +
+
    +
  • Module/Compiland Information

  • +
  • Indices of individual module streams

  • +
  • Indices of public / global streams

  • +
  • Section Contribution Information

  • +
  • Source File Information

  • +
  • References to streams containing +FPO / PGO Data

  • +
+

IPI Stream

    +
  • Fixed Stream Index 4

  • +
+
    +
  • CodeView Type Records

  • +
  • Index of IPI Hash Stream

  • +
+

/LinkInfo

    +
  • Contained in PDB Stream +Named Stream map

  • +
+
    +
  • Unknown

  • +
+

/src/headerblock

    +
  • Contained in PDB Stream +Named Stream map

  • +
+
    +
  • Summary of embedded source file content +(e.g. natvis files)

  • +
+

/names

    +
  • Contained in PDB Stream +Named Stream map

  • +
+
    +
  • PDB-wide global string table used for +string de-duplication

  • +
+

Module Info Stream

    +
  • Contained in DBI Stream

  • +
  • One for each compiland

  • +
+
    +
  • CodeView Symbol Records for this module

  • +
  • Line Number Information

  • +
+

Public Stream

    +
  • Contained in DBI Stream

  • +
+
    +
  • Public (Exported) Symbol Records

  • +
  • Index of Public Hash Stream

  • +
+

Global Stream

    +
  • Contained in DBI Stream

  • +
+
    +
  • Single combined master symbol-table

  • +
  • Index of Global Hash Stream

  • +
+

TPI Hash Stream

    +
  • Contained in TPI Stream

  • +
+
    +
  • Hash table for looking up TPI records +by name

  • +
+

IPI Hash Stream

    +
  • Contained in IPI Stream

  • +
+
    +
  • Hash table for looking up IPI records +by name

  • +
+
+

More information about the structure of each of these can be found on the +following pages:

+
+
The PDB Info Stream (aka the PDB Stream)

Information about the PDB Info Stream and how it is used to match PDBs to EXEs.

+
+
The PDB TPI and IPI Streams

Information about the TPI stream and the CodeView records contained within.

+
+
The PDB DBI (Debug Info) Stream

Information about the DBI stream and relevant substreams including the +Module Substreams, source file information, and CodeView symbol records +contained within.

+
+
The Module Information Stream

Information about the Module Information Stream, of which there is one for +each compilation unit and the format of symbols contained within.

+
+
The PDB Public Symbol Stream

Information about the Public Symbol Stream.

+
+
The PDB Global Symbol Stream

Information about the Global Symbol Stream.

+
+
The PDB Serialized Hash Table Format

Information about the serialized hash table format used internally to +represent things such as the Named Stream Map and the Hash Adjusters in the +TPI/IPI Stream.

+
+
+
+
+
+

CodeView

+

CodeView is another format which comes into the picture. While MSF defines +the structure of the overall file, and PDB defines the set of streams that +appear within the MSF file and the format of those streams, CodeView defines +the format of symbol and type records that appear within specific streams. +Refer to the pages on CodeView Symbol Records and CodeView Type Records for +more information about the CodeView format.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/ModiStream.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/ModiStream.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/ModiStream.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/ModiStream.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,210 @@ + + + + + + + + + The Module Information Stream — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The Module Information Stream

+ +
+

Introduction

+

The Module Info Stream (henceforth referred to as the Modi stream) contains +information about a single module (object file, import library, etc that +contributes to the binary this PDB contains debug information about. There +is one modi stream for each module, and the mapping between modi stream index +and module is contained in the DBI Stream. The modi stream +for a single module contains line information for the compiland, as well as +all CodeView information for the symbols defined in the compiland. Finally, +there is a “global refs” substream which is not well understood.

+
+
+

Stream Layout

+

A modi stream is laid out as follows:

+
struct ModiStream {
+  uint32_t Signature;
+  uint8_t Symbols[SymbolSize-4];
+  uint8_t C11LineInfo[C11Size];
+  uint8_t C13LineInfo[C13Size];
+
+  uint32_t GlobalRefsSize;
+  uint8_t GlobalRefs[GlobalRefsSize];
+};
+
+
+
    +
  • Signature - Unknown. In practice only the value of 4 has been +observed. It is hypothesized that this value corresponds to the set of +CV_SIGNATURE_xx defines in cvinfo.h, with the value of 4 +meaning that this module has C13 line information (as opposed to C11 line +information). A corollary of this is that we expect to only ever see +C13 line info, and that we do not understand the format of C11 line info.

  • +
  • Symbols - The CodeView Symbol Substream. +SymbolSize is equal to the value of SymByteSize for the +corresponding module’s entry in the Module Info Substream of the DBI Stream.

  • +
  • C11LineInfo - A block containing CodeView line information in C11 +format. C11Size is equal to the value of C11ByteSize from the +Module Info Substream of the +DBI Stream. If this value is 0, then C11 line +information is not present. As mentioned previously, the format of +C11 line info is not understood and we assume all line in modern PDBs +to be in C13 format.

  • +
  • C13LineInfo - A block containing CodeView line information in C13 +format. C13Size is equal to the value of C13ByteSize from the +Module Info Substream of the +DBI Stream. If this value is 0, then C13 line +information is not present.

  • +
  • GlobalRefs - The meaning of this substream is not understood.

  • +
+
+
+

The CodeView Symbol Substream

+

The CodeView Symbol Substream. This is an array of variable length +records describing the functions, variables, inlining information, +and other symbols defined in the compiland. The entire array consumes +SymbolSize-4 bytes. The format of a CodeView Symbol Record (and +thusly, an array of CodeView Symbol Records) is described in +CodeView Symbol Records.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/MsfFile.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/MsfFile.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/MsfFile.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/MsfFile.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,346 @@ + + + + + + + + + The MSF File Format — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The MSF File Format

+ +
+

File Layout

+

The MSF file format consists of the following components:

+
    +
  1. The Superblock

  2. +
  3. The Free Block Map (also know as Free Page Map, or FPM)

  4. +
  5. Data

  6. +
+

Each component is stored as an indexed block, the length of which is specified +in SuperBlock::BlockSize. The file consists of 1 or more iterations of the +following pattern (sometimes referred to as an “interval”):

+
    +
  1. 1 block of data

  2. +
  3. Free Block Map 1 (corresponds to SuperBlock::FreeBlockMapBlock 1)

  4. +
  5. Free Block Map 2 (corresponds to SuperBlock::FreeBlockMapBlock 2)

  6. +
  7. SuperBlock::BlockSize - 3 blocks of data

  8. +
+

In the first interval, the first data block is used to store +The Superblock.

+

The following diagram demonstrates the general layout of the file (| denotes +the end of an interval, and is for visualization purposes only):

+ ++++++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Block Index

0

1

2

3 - 4095

|

4096

4097

4098

4099 - 8191

|

Meaning

The Superblock

Free Block Map 1

Free Block Map 2

Data

|

Data

FPM1

FPM2

Data

|

+

The file may end after any block, including immediately after a FPM1.

+
+

Note

+

LLVM only supports 4096 byte blocks (sometimes referred to as the “BigMsf” +variant), so the rest of this document will assume a block size of 4096.

+
+
+
+

The Superblock

+

At file offset 0 in an MSF file is the MSF SuperBlock, which is laid out as +follows:

+
struct SuperBlock {
+  char FileMagic[sizeof(Magic)];
+  ulittle32_t BlockSize;
+  ulittle32_t FreeBlockMapBlock;
+  ulittle32_t NumBlocks;
+  ulittle32_t NumDirectoryBytes;
+  ulittle32_t Unknown;
+  ulittle32_t BlockMapAddr;
+};
+
+
+
    +
  • FileMagic - Must be equal to "Microsoft C / C++ MSF 7.00\\r\\n" +followed by the bytes 1A 44 53 00 00 00.

  • +
  • BlockSize - The block size of the internal file system. Valid values are +512, 1024, 2048, and 4096 bytes. Certain aspects of the MSF file layout vary +depending on the block sizes. For the purposes of LLVM, we handle only block +sizes of 4KiB, and all further discussion assumes a block size of 4KiB.

  • +
  • FreeBlockMapBlock - The index of a block within the file, at which begins +a bitfield representing the set of all blocks within the file which are “free” +(i.e. the data within that block is not used). See The Free Block Map +for more information. +Important: FreeBlockMapBlock can only be 1 or 2!

  • +
  • NumBlocks - The total number of blocks in the file. NumBlocks * +BlockSize should equal the size of the file on disk.

  • +
  • NumDirectoryBytes - The size of the stream directory, in bytes. The +stream directory contains information about each stream’s size and the set of +blocks that it occupies. It will be described in more detail later.

  • +
  • BlockMapAddr - The index of a block within the MSF file. At this block is +an array of ulittle32_t’s listing the blocks that the stream directory +resides on. For large MSF files, the stream directory (which describes the +block layout of each stream) may not fit entirely on a single block. As a +result, this extra layer of indirection is introduced, whereby this block +contains the list of blocks that the stream directory occupies, and the stream +directory itself can be stitched together accordingly. The number of +ulittle32_t’s in this array is given by ceil(NumDirectoryBytes / +BlockSize).

  • +
+
+
+

The Free Block Map

+

The Free Block Map (sometimes referred to as the Free Page Map, or FPM) is a +series of blocks which contains a bit flag for every block in the file. The +flag will be set to 0 if the block is in use, and 1 if the block is unused.

+

Each file contains two FPMs, one of which is active at any given time. This +feature is designed to support incremental and atomic updates of the underlying +MSF file. While writing to an MSF file, if the active FPM is FPM1, you can +write your new modified bitfield to FPM2, and vice versa. Only when you commit +the file to disk do you need to swap the value in the SuperBlock to point to +the new FreeBlockMapBlock.

+

The Free Block Maps are stored as a series of single blocks throughout the file +at intervals of BlockSize. Because each FPM block is of size BlockSize +bytes, it contains 8 times as many bits as an interval has blocks. This means +that the first block of each FPM refers to the first 8 intervals of the file +(the first 32768 blocks), the second block of each FPM refers to the next 8 +blocks, and so on. This results in far more FPM blocks being present than are +required, but in order to maintain backwards compatibility the format must stay +this way.

+
+
+

The Stream Directory

+

The Stream Directory is the root of all access to the other streams in an MSF +file. Beginning at byte 0 of the stream directory is the following structure:

+
struct StreamDirectory {
+  ulittle32_t NumStreams;
+  ulittle32_t StreamSizes[NumStreams];
+  ulittle32_t StreamBlocks[NumStreams][];
+};
+
+
+

And this structure occupies exactly SuperBlock->NumDirectoryBytes bytes. +Note that each of the last two arrays is of variable length, and in particular +that the second array is jagged.

+

Example: Suppose a hypothetical PDB file with a 4KiB block size, and 4 +streams of lengths {1000 bytes, 8000 bytes, 16000 bytes, 9000 bytes}.

+

Stream 0: ceil(1000 / 4096) = 1 block

+

Stream 1: ceil(8000 / 4096) = 2 blocks

+

Stream 2: ceil(16000 / 4096) = 4 blocks

+

Stream 3: ceil(9000 / 4096) = 3 blocks

+

In total, 10 blocks are used. Let’s see what the stream directory might look +like:

+
struct StreamDirectory {
+  ulittle32_t NumStreams = 4;
+  ulittle32_t StreamSizes[] = {1000, 8000, 16000, 9000};
+  ulittle32_t StreamBlocks[][] = {
+    {4},
+    {5, 6},
+    {11, 9, 7, 8},
+    {10, 15, 12}
+  };
+};
+
+
+

In total, this occupies 15 * 4 = 60 bytes, so +SuperBlock->NumDirectoryBytes would equal 60, and +SuperBlock->BlockMapAddr would be an array of one ulittle32_t, since +60 <= SuperBlock->BlockSize.

+

Note also that the streams are discontiguous, and that part of stream 3 is in the +middle of part of stream 2. You cannot assume anything about the layout of the +blocks!

+
+
+

Alignment and Block Boundaries

+

As may be clear by now, it is possible for a single field (whether it be a high +level record, a long string field, or even a single uint16) to begin and +end in separate blocks. For example, if the block size is 4096 bytes, and a +uint16 field begins at the last byte of the current block, then it would +need to end on the first byte of the next block. Since blocks are not +necessarily contiguously laid out in the file, this means that both the consumer +and the producer of an MSF file must be prepared to split data apart +accordingly. In the aforementioned example, the high byte of the uint16 +would be written to the last byte of block N, and the low byte would be written +to the first byte of block N+1, which could be tens of thousands of bytes later +(or even earlier!) in the file, depending on what the stream directory says.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/PdbStream.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/PdbStream.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/PdbStream.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/PdbStream.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,307 @@ + + + + + + + + + The PDB Info Stream (aka the PDB Stream) — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The PDB Info Stream (aka the PDB Stream)

+ +
+

Stream Header

+

At offset 0 of the PDB Stream is a header with the following layout:

+
struct PdbStreamHeader {
+  ulittle32_t Version;
+  ulittle32_t Signature;
+  ulittle32_t Age;
+  Guid UniqueId;
+};
+
+
+
    +
  • Version - A Value from the following enum:

  • +
+
enum class PdbStreamVersion : uint32_t {
+  VC2 = 19941610,
+  VC4 = 19950623,
+  VC41 = 19950814,
+  VC50 = 19960307,
+  VC98 = 19970604,
+  VC70Dep = 19990604,
+  VC70 = 20000404,
+  VC80 = 20030901,
+  VC110 = 20091201,
+  VC140 = 20140508,
+};
+
+
+

While the meaning of this field appears to be obvious, in practice we have +never observed a value other than VC70, even with modern versions of +the toolchain, and it is unclear why the other values exist. It is assumed +that certain aspects of the PDB stream’s layout, and perhaps even that of +the other streams, will change if the value is something other than VC70.

+
    +
  • Signature - A 32-bit time-stamp generated with a call to time() at +the time the PDB file is written. Note that due to the inherent uniqueness +problems of using a timestamp with 1-second granularity, this field does not +really serve its intended purpose, and as such is typically ignored in favor +of the Guid field, described below.

  • +
  • Age - The number of times the PDB file has been written. This can be used +along with Guid to match the PDB to its corresponding executable.

  • +
  • Guid - A 128-bit identifier guaranteed to be unique across space and time. +In general, this can be thought of as the result of calling the Win32 API +UuidCreate, +although LLVM cannot rely on that, as it must work on non-Windows platforms.

  • +
+
+
+

Named Stream Map

+

Following the header is a serialized hash table whose key type is a string, and +whose value type is an integer. The existence of a mapping X -> Y means +that the stream with the name X has stream index Y in the underlying MSF +file. Note that not all streams are named (for example, the +TPI Stream has a fixed index and as such there is no need to +look up its index by name). In practice, there are usually only a small number +of named streams and these are enumerated in the table of streams in The PDB File Format. +A corollary of this is if a stream does have a name (and as such is in the named +stream map) then consulting the Named Stream Map is likely to be the only way to +discover the stream’s MSF stream index. Several important streams (such as the +global string table, which is called /names) can only be located this way, and +so it is important to both produce and consume this correctly as tools will not +function correctly without it.

+
+

Important

+

Some streams are located by fixed indices (e.g TPI Stream has index 2), but +other streams are located by fixed names (e.g. the string table is called +/names) and can only be located by consulting the Named Stream Map.

+
+

The on-disk layout of the Named Stream Map consists of 2 components. The first is +a buffer of string data prefixed by a 32-bit length. The second is a serialized +hash table whose key and value types are both uint32_t. The key is the offset +of a null-terminated string in the string data buffer specifying the name of the +stream, and the value is the MSF stream index of the stream with said name. +Note that although the key is an integer, the hash function used to find the right +bucket hashes the string at the corresponding offset in the string data buffer.

+

The on-disk layout of the serialized hash table is described at The PDB Serialized Hash Table Format.

+

Note that the entire Named Stream Map is not length-prefixed, so the only way to +get to the data following it is to de-serialize it in its entirety.

+
+
+

PDB Feature Codes

+

Following the Named Stream Map, and consuming all remaining bytes of the PDB +Stream is a list of values from the following enumeration:

+
enum class PdbRaw_FeatureSig : uint32_t {
+  VC110 = 20091201,
+  VC140 = 20140508,
+  NoTypeMerge = 0x4D544F4E,
+  MinimalDebugInfo = 0x494E494D,
+};
+
+
+

The meaning of these values is summarized by the following table:

+ ++++ + + + + + + + + + + + + + + + + + + + +

Flag

Meaning

VC110

    +
  • No other features flags are present

  • +
  • PDB contains an IPI Stream

  • +
+

VC140

    +
  • Other feature flags may be present

  • +
  • PDB contains an IPI Stream

  • +
+

NoTypeMerge

    +
  • Presumably duplicate types can appear in the +TPI Stream, although it’s unclear why this +might happen.

  • +
+

MinimalDebugInfo

    +
  • Program was linked with /DEBUG:FASTLINK

  • +
  • There is no TPI / IPI stream, all type info +is contained in the original object files.

  • +
+
+
+
+

Matching a PDB to its executable

+

The linker is responsible for writing both the PDB and the final executable, and +as a result is the only entity capable of writing the information necessary to +match the PDB to the executable.

+

In order to accomplish this, the linker generates a guid for the PDB (or +re-uses the existing guid if it is linking incrementally) and increments the Age +field.

+

The executable is a PE/COFF file, and part of a PE/COFF file is the presence of +number of “directories”. For our purposes here, we are interested in the “debug +directory”. The exact format of a debug directory is described by the +IMAGE_DEBUG_DIRECTORY structure. +For this particular case, the linker emits a debug directory of type +IMAGE_DEBUG_TYPE_CODEVIEW. The format of this record is defined in +llvm/DebugInfo/CodeView/CVDebugRecord.h, but it suffices to say here only +that it includes the same Guid and Age fields. At runtime, a +debugger or tool can scan the COFF executable image for the presence of +a debug directory of the correct type and verify that the Guid and Age match.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/PublicStream.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/PublicStream.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/PublicStream.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/PublicStream.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,144 @@ + + + + + + + + + The PDB Public Symbol Stream — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The PDB Public Symbol Stream

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/TpiStream.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/TpiStream.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/PDB/TpiStream.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/PDB/TpiStream.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,456 @@ + + + + + + + + + The PDB TPI and IPI Streams — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

The PDB TPI and IPI Streams

+ +
+

Introduction

+

The PDB TPI Stream (Index 2) and IPI Stream (Index 4) contain information about +all types used in the program. It is organized as a header +followed by a list of CodeView Type Records. Types are +referenced from various streams and records throughout the PDB by their +type index. In general, the sequence of type records +following the header forms a topologically sorted DAG +(directed acyclic graph), which means that a type record B can only refer to +the type A if A.TypeIndex < B.TypeIndex. While there are rare cases where +this property will not hold (particularly when dealing with object files +compiled with MASM), an implementation should try very hard to make this +property hold, as it means the entire type graph can be constructed in a single +pass.

+
+

Important

+

Type records form a topologically sorted DAG (directed acyclic graph).

+
+
+
+

TPI vs IPI Stream

+

Recent versions of the PDB format (aka all versions covered by this document) +have 2 streams with identical layout, henceforth referred to as the TPI stream +and IPI stream. Subsequent contents of this document describing the on-disk +format apply equally whether it is for the TPI Stream or the IPI Stream. The +only difference between the two is in which CodeView records are allowed to +appear in each one, summarized by the following table:

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

TPI Stream

IPI Stream

LF_POINTER

LF_FUNC_ID

LF_MODIFIER

LF_MFUNC_ID

LF_PROCEDURE

LF_BUILDINFO

LF_MFUNCTION

LF_SUBSTR_LIST

LF_LABEL

LF_STRING_ID

LF_ARGLIST

LF_UDT_SRC_LINE

LF_FIELDLIST

LF_UDT_MOD_SRC_LINE

LF_ARRAY

LF_CLASS

LF_STRUCTURE

LF_INTERFACE

LF_UNION

LF_ENUM

LF_TYPESERVER2

LF_VFTABLE

LF_VTSHAPE

LF_BITFIELD

LF_METHODLIST

LF_PRECOMP

LF_ENDPRECOMP

+

The usage of these records is described in more detail in +CodeView Type Records.

+
+
+

Type Indices

+

A type index is a 32-bit integer that uniquely identifies a type inside of an +object file’s .debug$T section or a PDB file’s TPI or IPI stream. The +value of the type index for the first type record from the TPI stream is given +by the TypeIndexBegin member of the TPI Stream Header +although in practice this value is always equal to 0x1000 (4096).

+

Any type index with a high bit set is considered to come from the IPI stream, +although this appears to be more of a hack, and LLVM does not generate type +indices of this nature. They can, however, be observed in Microsoft PDBs +occasionally, so one should be prepared to handle them. Note that having the +high bit set is not a necessary condition to determine whether a type index +comes from the IPI stream, it is only sufficient.

+

Once the high bit is cleared, any type index >= TypeIndexBegin is presumed +to come from the appropriate stream, and any type index less than this is a +bitmask which can be decomposed as follows:

+
.---------------------------.------.----------.
+|           Unused          | Mode |   Kind   |
+'---------------------------'------'----------'
+|+32                        |+12   |+8        |+0
+
+
+
    +
  • Kind - A value from the following enum:

  • +
+
enum class SimpleTypeKind : uint32_t {
+  None = 0x0000,          // uncharacterized type (no type)
+  Void = 0x0003,          // void
+  NotTranslated = 0x0007, // type not translated by cvpack
+  HResult = 0x0008,       // OLE/COM HRESULT
+
+  SignedCharacter = 0x0010,   // 8 bit signed
+  UnsignedCharacter = 0x0020, // 8 bit unsigned
+  NarrowCharacter = 0x0070,   // really a char
+  WideCharacter = 0x0071,     // wide char
+  Character16 = 0x007a,       // char16_t
+  Character32 = 0x007b,       // char32_t
+
+  SByte = 0x0068,       // 8 bit signed int
+  Byte = 0x0069,        // 8 bit unsigned int
+  Int16Short = 0x0011,  // 16 bit signed
+  UInt16Short = 0x0021, // 16 bit unsigned
+  Int16 = 0x0072,       // 16 bit signed int
+  UInt16 = 0x0073,      // 16 bit unsigned int
+  Int32Long = 0x0012,   // 32 bit signed
+  UInt32Long = 0x0022,  // 32 bit unsigned
+  Int32 = 0x0074,       // 32 bit signed int
+  UInt32 = 0x0075,      // 32 bit unsigned int
+  Int64Quad = 0x0013,   // 64 bit signed
+  UInt64Quad = 0x0023,  // 64 bit unsigned
+  Int64 = 0x0076,       // 64 bit signed int
+  UInt64 = 0x0077,      // 64 bit unsigned int
+  Int128Oct = 0x0014,   // 128 bit signed int
+  UInt128Oct = 0x0024,  // 128 bit unsigned int
+  Int128 = 0x0078,      // 128 bit signed int
+  UInt128 = 0x0079,     // 128 bit unsigned int
+
+  Float16 = 0x0046,                 // 16 bit real
+  Float32 = 0x0040,                 // 32 bit real
+  Float32PartialPrecision = 0x0045, // 32 bit PP real
+  Float48 = 0x0044,                 // 48 bit real
+  Float64 = 0x0041,                 // 64 bit real
+  Float80 = 0x0042,                 // 80 bit real
+  Float128 = 0x0043,                // 128 bit real
+
+  Complex16 = 0x0056,                 // 16 bit complex
+  Complex32 = 0x0050,                 // 32 bit complex
+  Complex32PartialPrecision = 0x0055, // 32 bit PP complex
+  Complex48 = 0x0054,                 // 48 bit complex
+  Complex64 = 0x0051,                 // 64 bit complex
+  Complex80 = 0x0052,                 // 80 bit complex
+  Complex128 = 0x0053,                // 128 bit complex
+
+  Boolean8 = 0x0030,   // 8 bit boolean
+  Boolean16 = 0x0031,  // 16 bit boolean
+  Boolean32 = 0x0032,  // 32 bit boolean
+  Boolean64 = 0x0033,  // 64 bit boolean
+  Boolean128 = 0x0034, // 128 bit boolean
+};
+
+
+
    +
  • Mode - A value from the following enum:

  • +
+
enum class SimpleTypeMode : uint32_t {
+  Direct = 0,        // Not a pointer
+  NearPointer = 1,   // Near pointer
+  FarPointer = 2,    // Far pointer
+  HugePointer = 3,   // Huge pointer
+  NearPointer32 = 4, // 32 bit near pointer
+  FarPointer32 = 5,  // 32 bit far pointer
+  NearPointer64 = 6, // 64 bit near pointer
+  NearPointer128 = 7 // 128 bit near pointer
+};
+
+
+

Note that for pointers, the bitness is represented in the mode. So a void* +would have a type index with Mode=NearPointer32, Kind=Void if built for +32-bits but a type index with Mode=NearPointer64, Kind=Void if built for +64-bits.

+

By convention, the type index for std::nullptr_t is constructed the same +way as the type index for void*, but using the bitless enumeration value +NearPointer.

+
+
+

Stream Header

+

At offset 0 of the TPI Stream is a header with the following layout:

+
struct TpiStreamHeader {
+  uint32_t Version;
+  uint32_t HeaderSize;
+  uint32_t TypeIndexBegin;
+  uint32_t TypeIndexEnd;
+  uint32_t TypeRecordBytes;
+
+  uint16_t HashStreamIndex;
+  uint16_t HashAuxStreamIndex;
+  uint32_t HashKeySize;
+  uint32_t NumHashBuckets;
+
+  int32_t HashValueBufferOffset;
+  uint32_t HashValueBufferLength;
+
+  int32_t IndexOffsetBufferOffset;
+  uint32_t IndexOffsetBufferLength;
+
+  int32_t HashAdjBufferOffset;
+  uint32_t HashAdjBufferLength;
+};
+
+
+
    +
  • Version - A value from the following enum.

  • +
+
enum class TpiStreamVersion : uint32_t {
+  V40 = 19950410,
+  V41 = 19951122,
+  V50 = 19961031,
+  V70 = 19990903,
+  V80 = 20040203,
+};
+
+
+

Similar to the PDB Stream, this value always appears to be +V80, and no other values have been observed. It is assumed that should +another value be observed, the layout described by this document may not be +accurate.

+
    +
  • HeaderSize - sizeof(TpiStreamHeader)

  • +
  • TypeIndexBegin - The numeric value of the type index representing the +first type record in the TPI stream. This is usually the value 0x1000 as +type indices lower than this are reserved (see Type Indices for +a discussion of reserved type indices).

  • +
  • TypeIndexEnd - One greater than the numeric value of the type index +representing the last type record in the TPI stream. The total number of +type records in the TPI stream can be computed as TypeIndexEnd - +TypeIndexBegin.

  • +
  • TypeRecordBytes - The number of bytes of type record data following the +header.

  • +
  • HashStreamIndex - The index of a stream which contains a list of hashes +for every type record. This value may be -1, indicating that hash +information is not present. In practice a valid stream index is always +observed, so any producer implementation should be prepared to emit this +stream to ensure compatibility with tools which may expect it to be present.

  • +
  • HashAuxStreamIndex - Presumably the index of a stream which contains a +separate hash table, although this has not been observed in practice and it’s +unclear what it might be used for.

  • +
  • HashKeySize - The size of a hash value (usually 4 bytes).

  • +
  • NumHashBuckets - The number of buckets used to generate the hash values +in the aforementioned hash streams.

  • +
  • HashValueBufferOffset / HashValueBufferLength - The offset and size within +the TPI Hash Stream of the list of hash values. It should be assumed that +there are either 0 hash values, or a number equal to the number of type +records in the TPI stream (TypeIndexEnd - TypeEndBegin). Thus, if +HashBufferLength is not equal to (TypeIndexEnd - TypeEndBegin) * +HashKeySize we can consider the PDB malformed.

  • +
  • IndexOffsetBufferOffset / IndexOffsetBufferLength - The offset and size +within the TPI Hash Stream of the Type Index Offsets Buffer. This is a list +of pairs of uint32_t’s where the first value is a Type Index and the second value is the offset in the type record data of +the type with this index. This can be used to do a binary search followed by +a linear search to get O(log n) lookup by type index.

  • +
  • HashAdjBufferOffset / HashAdjBufferLength - The offset and size within +the TPI hash stream of a serialized hash table whose keys are the hash values +in the hash value buffer and whose values are type indices. This appears to +be useful in incremental linking scenarios, so that if a type is modified an +entry can be created mapping the old hash value to the new type index so that +a PDB file consumer can always have the most up to date version of the type +without forcing the incremental linker to garbage collect and update +references that point to the old version to now point to the new version. +The layout of this hash table is described in The PDB Serialized Hash Table Format.

  • +
+
+
+

CodeView Type Record List

+

Following the header, there are TypeRecordBytes bytes of data that +represent a variable length array of CodeView type records. The number of such records (e.g. the length of the array) +can be determined by computing the value Header.TypeIndexEnd - +Header.TypeIndexBegin.

+

O(log(n)) access is provided by way of the Type Index Offsets array (if +present) described previously.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Phabricator.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Phabricator.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Phabricator.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Phabricator.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,417 @@ + + + + + + + + + Code Reviews with Phabricator — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Code Reviews with Phabricator

+ +

If you prefer to use a web user interface for code reviews, you can now submit +your patches for Clang and LLVM at LLVM’s Phabricator instance.

+

While Phabricator is a useful tool for some, the relevant -commits mailing list +is the system of record for all LLVM code review. The mailing list should be +added as a subscriber on all reviews, and Phabricator users should be prepared +to respond to free-form comments in mail sent to the commits list.

+
+

Sign up

+

To get started with Phabricator, navigate to https://reviews.llvm.org and +click the power icon in the top right. You can register with a GitHub account, +a Google account, or you can create your own profile.

+

Make sure that the email address registered with Phabricator is subscribed +to the relevant -commits mailing list. If you are not subscribed to the commit +list, all mail sent by Phabricator on your behalf will be held for moderation.

+

Note that if you use your git user name as Phabricator user name, +Phabricator will automatically connect your submits to your Phabricator user in +the Code Repository Browser.

+
+
+

Requesting a review via the command line

+

Phabricator has a tool called Arcanist to upload patches from +the command line. To get you set up, follow the +Arcanist Quick Start instructions.

+

You can learn more about how to use arc to interact with +Phabricator in the Arcanist User Guide. +The basic way of creating a revision for the current commit in your local +repository is to run:

+
arc diff HEAD~
+
+
+

Sometime you may want to create a draft revision to show the proof of concept +or for experimental purposes, In that case you can use the –draft option. It +will either create a new draft revisoin or convert the original revision to a +draft revision depending on your local branch status. The good part is: it will not +send mail to llvm-commit mailing list, patch reviewers, and all other subscribers, +buildbot will also run on every patch update:

+
arc diff --draft HEAD~
+
+
+

If you later update your commit message, you need to add the –verbatim +option to have arc update the description on Phabricator:

+
arc diff --edit --verbatim
+
+
+
+
+

Requesting a review via the web interface

+

The tool to create and review patches in Phabricator is called +Differential.

+

Note that you can upload patches created through git, but using arc on the +command line (see previous section) is preferred: it adds more metadata to +Phabricator which are useful for the pre-merge testing system and for +propagating attribution on commits when someone else has to push it for you.

+

To make reviews easier, please always include as much context as +possible with your diff! Don’t worry, Phabricator +will automatically send a diff with a smaller context in the review +email, but having the full file in the web interface will help the +reviewer understand your code.

+

To get a full diff, use one of the following commands (or just use Arcanist +to upload your patch):

+
    +
  • git show HEAD -U999999 > mypatch.patch

  • +
  • git diff -U999999 @{u} > mypatch.patch

  • +
  • git diff HEAD~1 -U999999 > mypatch.patch

  • +
+

Before uploading your patch, please make sure it is formatted properly, as +described in How to Submit a Patch.

+

To upload a new patch:

+
    +
  • Click Differential.

  • +
  • Click + Create Diff.

  • +
  • Paste the text diff or browse to the patch file. Click Create Diff.

  • +
  • Leave this first Repository field blank. (We’ll fill in the Repository +later, when sending the review.)

  • +
  • Leave the drop down on Create a new Revision… and click Continue.

  • +
  • Enter a descriptive title and summary. The title and summary are usually +in the form of a commit message.

  • +
  • Add reviewers (see below for advice). (If you set the Repository field +correctly, llvm-commits or cfe-commits will be subscribed automatically; +otherwise, you will have to manually subscribe them.)

  • +
  • In the Repository field, enter “rG LLVM Github Monorepo”.

  • +
  • Click Save.

  • +
+

To submit an updated patch:

+
    +
  • Click Differential.

  • +
  • Click + Create Diff.

  • +
  • Paste the updated diff or browse to the updated patch file. Click Create Diff.

  • +
  • Select the review you want to from the Attach To dropdown and click +Continue.

  • +
  • Leave the Repository field blank. (We previously filled out the Repository +for the review request.)

  • +
  • Add comments about the changes in the new diff. Click Save.

  • +
+

Choosing reviewers: You typically pick one or two people as initial reviewers. +This choice is not crucial, because you are merely suggesting and not requiring +them to participate. Many people will see the email notification on cfe-commits +or llvm-commits, and if the subject line suggests the patch is something they +should look at, they will.

+
+
+

Finding potential reviewers

+

Here are a couple of ways to pick the initial reviewer(s):

+
    +
  • Use git blame and the commit log to find names of people who have +recently modified the same area of code that you are modifying.

  • +
  • Look in CODE_OWNERS.TXT to see who might be responsible for that area.

  • +
  • If you’ve discussed the change on a dev list, the people who participated +might be appropriate reviewers.

  • +
+

Even if you think the code owner is the busiest person in the world, it’s still +okay to put them as a reviewer. Being the code owner means they have accepted +responsibility for making sure the review happens.

+
+
+

Reviewing code with Phabricator

+

Phabricator allows you to add inline comments as well as overall comments +to a revision. To add an inline comment, select the lines of code you want +to comment on by clicking and dragging the line numbers in the diff pane. +When you have added all your comments, scroll to the bottom of the page and +click the Submit button.

+

You can add overall comments in the text box at the bottom of the page. +When you’re done, click the Submit button.

+

Phabricator has many useful features, for example allowing you to select +diffs between different versions of the patch as it was reviewed in the +Revision Update History. Most features are self descriptive - explore, and +if you have a question, drop by on #llvm in IRC to get help.

+

Note that as e-mail is the system of reference for code reviews, and some +people prefer it over a web interface, we do not generate automated mail +when a review changes state, for example by clicking “Accept Revision” in +the web interface. Thus, please type LGTM into the comment box to accept +a change from Phabricator.

+
+
+

Pre-merge testing

+

The pre-merge tests are a continuous integration (CI) workflow. The workflow +checks the patches uploaded to Phabricator before a user merges them to the main +branch - thus the term pre-merge testing.

+

When a user uploads a patch to Phabricator, Phabricator triggers the checks and +then displays the results. This way bugs in a patch are contained during the +code review stage and do not pollute the main branch.

+

If you notice issues or have an idea on how to improve pre-merge checks, please +create a new issue +or give a ❤️ to an existing one.

+
+

Requirements

+

To get a patch on Phabricator tested, the build server must be able to apply the +patch to the checked out git repository. Please make sure that either:

+
    +
  • You set a git hash as sourceControlBaseRevision in Phabricator which is +available on the GitHub repository,

  • +
  • or you define the dependencies of your patch in Phabricator,

  • +
  • or your patch can be applied to the main branch.

  • +
+

Only then can the build server apply the patch locally and run the builds and +tests.

+
+
+

Accessing build results

+

Phabricator will automatically trigger a build for every new patch you upload or +modify. Phabricator shows the build results at the top of the entry. Clicking on +the links (in the red box) will show more details:

+
+
_images/Phabricator_premerge_results.png +
+

The CI will compile and run tests, run clang-format and clang-tidy on lines +changed.

+

If a unit test failed, this is shown below the build status. You can also expand +the unit test to see the details:

+
+
_images/Phabricator_premerge_unit_tests.png +
+
+
+
+

Committing a change

+

Once a patch has been reviewed and approved on Phabricator it can then be +committed to trunk. If you do not have commit access, someone has to +commit the change for you (with attribution). It is sufficient to add +a comment to the approved review indicating you cannot commit the patch +yourself. If you have commit access, there are multiple workflows to commit the +change. Whichever method you follow it is recommended that your commit message +ends with the line:

+
Differential Revision: <URL>
+
+
+

where <URL> is the URL for the code review, starting with +https://reviews.llvm.org/.

+

This allows people reading the version history to see the review for +context. This also allows Phabricator to detect the commit, close the +review, and add a link from the review to the commit.

+

Note that if you use the Arcanist tool the Differential Revision line will +be added automatically. If you don’t want to use Arcanist, you can add the +Differential Revision line (as the last line) to the commit message +yourself.

+

Using the Arcanist tool can simplify the process of committing reviewed code as +it will retrieve reviewers, the Differential Revision, etc from the review +and place it in the commit message. You may also commit an accepted change +directly using git push, per the section in the getting started +guide.

+

Note that if you commit the change without using Arcanist and forget to add the +Differential Revision line to your commit message then it is recommended +that you close the review manually. In the web UI, under “Leap Into Action” put +the git revision number in the Comment, set the Action to “Close Revision” and +click Submit. Note the review must have been Accepted first.

+
+

Committing someone’s change from Phabricator

+

On a clean Git repository on an up to date main branch run the +following (where <Revision> is the Phabricator review number):

+
arc patch D<Revision>
+
+
+

This will create a new branch called arcpatch-D<Revision> based on the +current main and will create a commit corresponding to D<Revision> with a +commit message derived from information in the Phabricator review.

+

Check you are happy with the commit message and amend it if necessary. +For example, ensure the ‘Author’ property of the commit is set to the original author. +You can use a command to correct the author property if it is incorrect:

+
git commit --amend --author="John Doe <jdoe@llvm.org>"
+
+
+

Then, make sure the commit is up-to-date, and commit it. This can be done by running +the following:

+
git pull --rebase https://github.com/llvm/llvm-project.git main
+git show # Ensure the patch looks correct.
+ninja check-$whatever # Rerun the appropriate tests if needed.
+git push https://github.com/llvm/llvm-project.git HEAD:main
+
+
+
+
+
+

Abandoning a change

+

If you decide you should not commit the patch, you should explicitly abandon +the review so that reviewers don’t think it is still open. In the web UI, +scroll to the bottom of the page where normally you would enter an overall +comment. In the drop-down Action list, which defaults to “Comment,” you should +select “Abandon Revision” and then enter a comment explaining why. Click the +Submit button to finish closing the review.

+
+
+

Status

+

Please let us know whether you like it and what could be improved! We’re still +working on setting up a bug tracker, but you can email klimek-at-google-dot-com +and chandlerc-at-gmail-dot-com and CC the llvm-dev mailing list with questions +until then. We also could use help implementing improvements. This sadly is +really painful and hard because the Phabricator codebase is in PHP and not as +testable as you might like. However, we’ve put exactly what we’re deploying up +on an llvm-reviews GitHub project where folks can hack on it and post pull +requests. We’re looking into what the right long-term hosting for this is, but +note that it is a derivative of an existing open source project, and so not +trivially a good fit for an official LLVM project.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ProgrammersManual.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ProgrammersManual.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ProgrammersManual.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ProgrammersManual.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,3645 @@ + + + + + + + + + LLVM Programmer’s Manual — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Programmer’s Manual

+
+ +
+
+

Warning

+

This is always a work in progress.

+
+
+

Introduction

+

This document is meant to highlight some of the important classes and interfaces +available in the LLVM source-base. This manual is not intended to explain what +LLVM is, how it works, and what LLVM code looks like. It assumes that you know +the basics of LLVM and are interested in writing transformations or otherwise +analyzing or manipulating the code.

+

This document should get you oriented so that you can find your way in the +continuously growing source code that makes up the LLVM infrastructure. Note +that this manual is not intended to serve as a replacement for reading the +source code, so if you think there should be a method in one of these classes to +do something, but it’s not listed, check the source. Links to the doxygen sources are provided to make this as easy as +possible.

+

The first section of this document describes general information that is useful +to know when working in the LLVM infrastructure, and the second describes the +Core LLVM classes. In the future this manual will be extended with information +describing how to use extension libraries, such as dominator information, CFG +traversal routines, and useful utilities like the InstVisitor (doxygen) template.

+
+
+

General Information

+

This section contains general information that is useful if you are working in +the LLVM source-base, but that isn’t specific to any particular API.

+
+

The C++ Standard Template Library

+

LLVM makes heavy use of the C++ Standard Template Library (STL), perhaps much +more than you are used to, or have seen before. Because of this, you might want +to do a little background reading in the techniques used and capabilities of the +library. There are many good pages that discuss the STL, and several books on +the subject that you can get, so it will not be discussed in this document.

+

Here are some useful links:

+
    +
  1. cppreference.com - an excellent +reference for the STL and other parts of the standard C++ library.

  2. +
  3. C++ In a Nutshell - This is an O’Reilly +book in the making. It has a decent Standard Library Reference that rivals +Dinkumware’s, and is unfortunately no longer free since the book has been +published.

  4. +
  5. C++ Frequently Asked Questions.

  6. +
  7. SGI’s STL Programmer’s Guide - Contains a +useful Introduction to the STL.

  8. +
  9. Bjarne Stroustrup’s C++ Page.

  10. +
  11. Bruce Eckel’s Thinking in C++, 2nd ed. Volume 2 Revision 4.0 +(even better, get the book).

  12. +
+

You are also encouraged to take a look at the LLVM Coding Standards guide which focuses on how to write maintainable code more +than where to put your curly braces.

+
+ +
+
+

Important and useful LLVM APIs

+

Here we highlight some LLVM APIs that are generally useful and good to know +about when writing transformations.

+
+

The isa<>, cast<> and dyn_cast<> templates

+

The LLVM source-base makes extensive use of a custom form of RTTI. These +templates have many similarities to the C++ dynamic_cast<> operator, but +they don’t have some drawbacks (primarily stemming from the fact that +dynamic_cast<> only works on classes that have a v-table). Because they are +used so often, you must know what they do and how they work. All of these +templates are defined in the llvm/Support/Casting.h (doxygen) file (note that you very +rarely have to include this file directly).

+
+
isa<>:

The isa<> operator works exactly like the Java “instanceof” operator. +It returns true or false depending on whether a reference or pointer points to +an instance of the specified class. This can be very useful for constraint +checking of various sorts (example below).

+
+
cast<>:

The cast<> operator is a “checked cast” operation. It converts a pointer +or reference from a base class to a derived class, causing an assertion +failure if it is not really an instance of the right type. This should be +used in cases where you have some information that makes you believe that +something is of the right type. An example of the isa<> and cast<> +template is:

+
static bool isLoopInvariant(const Value *V, const Loop *L) {
+  if (isa<Constant>(V) || isa<Argument>(V) || isa<GlobalValue>(V))
+    return true;
+
+  // Otherwise, it must be an instruction...
+  return !L->contains(cast<Instruction>(V)->getParent());
+}
+
+
+

Note that you should not use an isa<> test followed by a cast<>, +for that use the dyn_cast<> operator.

+
+
dyn_cast<>:

The dyn_cast<> operator is a “checking cast” operation. It checks to see +if the operand is of the specified type, and if so, returns a pointer to it +(this operator does not work with references). If the operand is not of the +correct type, a null pointer is returned. Thus, this works very much like +the dynamic_cast<> operator in C++, and should be used in the same +circumstances. Typically, the dyn_cast<> operator is used in an if +statement or some other flow control statement like this:

+
if (auto *AI = dyn_cast<AllocationInst>(Val)) {
+  // ...
+}
+
+
+

This form of the if statement effectively combines together a call to +isa<> and a call to cast<> into one statement, which is very +convenient.

+

Note that the dyn_cast<> operator, like C++’s dynamic_cast<> or Java’s +instanceof operator, can be abused. In particular, you should not use big +chained if/then/else blocks to check for lots of different variants of +classes. If you find yourself wanting to do this, it is much cleaner and more +efficient to use the InstVisitor class to dispatch over the instruction +type directly.

+
+
isa_and_nonnull<>:

The isa_and_nonnull<> operator works just like the isa<> operator, +except that it allows for a null pointer as an argument (which it then +returns false). This can sometimes be useful, allowing you to combine several +null checks into one.

+
+
cast_or_null<>:

The cast_or_null<> operator works just like the cast<> operator, +except that it allows for a null pointer as an argument (which it then +propagates). This can sometimes be useful, allowing you to combine several +null checks into one.

+
+
dyn_cast_or_null<>:

The dyn_cast_or_null<> operator works just like the dyn_cast<> +operator, except that it allows for a null pointer as an argument (which it +then propagates). This can sometimes be useful, allowing you to combine +several null checks into one.

+
+
+

These five templates can be used with any classes, whether they have a v-table +or not. If you want to add support for these templates, see the document +How to set up LLVM-style RTTI for your class hierarchy

+
+
+

Passing strings (the StringRef and Twine classes)

+

Although LLVM generally does not do much string manipulation, we do have several +important APIs which take strings. Two important examples are the Value class +– which has names for instructions, functions, etc. – and the StringMap +class which is used extensively in LLVM and Clang.

+

These are generic classes, and they need to be able to accept strings which may +have embedded null characters. Therefore, they cannot simply take a const +char *, and taking a const std::string& requires clients to perform a heap +allocation which is usually unnecessary. Instead, many LLVM APIs use a +StringRef or a const Twine& for passing strings efficiently.

+
+

The StringRef class

+

The StringRef data type represents a reference to a constant string (a +character array and a length) and supports the common operations available on +std::string, but does not require heap allocation.

+

It can be implicitly constructed using a C style null-terminated string, an +std::string, or explicitly with a character pointer and length. For +example, the StringRef find function is declared as:

+
iterator find(StringRef Key);
+
+
+

and clients can call it using any one of:

+
Map.find("foo");                 // Lookup "foo"
+Map.find(std::string("bar"));    // Lookup "bar"
+Map.find(StringRef("\0baz", 4)); // Lookup "\0baz"
+
+
+

Similarly, APIs which need to return a string may return a StringRef +instance, which can be used directly or converted to an std::string using +the str member function. See llvm/ADT/StringRef.h (doxygen) for more +information.

+

You should rarely use the StringRef class directly, because it contains +pointers to external memory it is not generally safe to store an instance of the +class (unless you know that the external storage will not be freed). +StringRef is small and pervasive enough in LLVM that it should always be +passed by value.

+
+
+

The Twine class

+

The Twine (doxygen) +class is an efficient way for APIs to accept concatenated strings. For example, +a common LLVM paradigm is to name one instruction based on the name of another +instruction with a suffix, for example:

+
New = CmpInst::Create(..., SO->getName() + ".cmp");
+
+
+

The Twine class is effectively a lightweight rope which points to +temporary (stack allocated) objects. Twines can be implicitly constructed as +the result of the plus operator applied to strings (i.e., a C strings, an +std::string, or a StringRef). The twine delays the actual concatenation +of strings until it is actually required, at which point it can be efficiently +rendered directly into a character array. This avoids unnecessary heap +allocation involved in constructing the temporary results of string +concatenation. See llvm/ADT/Twine.h (doxygen) and here +for more information.

+

As with a StringRef, Twine objects point to external memory and should +almost never be stored or mentioned directly. They are intended solely for use +when defining a function which should be able to efficiently accept concatenated +strings.

+
+
+
+

Formatting strings (the formatv function)

+

While LLVM doesn’t necessarily do a lot of string manipulation and parsing, it +does do a lot of string formatting. From diagnostic messages, to llvm tool +outputs such as llvm-readobj to printing verbose disassembly listings and +LLDB runtime logging, the need for string formatting is pervasive.

+

The formatv is similar in spirit to printf, but uses a different syntax +which borrows heavily from Python and C#. Unlike printf it deduces the type +to be formatted at compile time, so it does not need a format specifier such as +%d. This reduces the mental overhead of trying to construct portable format +strings, especially for platform-specific types like size_t or pointer types. +Unlike both printf and Python, it additionally fails to compile if LLVM does +not know how to format the type. These two properties ensure that the function +is both safer and simpler to use than traditional formatting methods such as +the printf family of functions.

+
+

Simple formatting

+

A call to formatv involves a single format string consisting of 0 or more +replacement sequences, followed by a variable length list of replacement values. +A replacement sequence is a string of the form {N[[,align]:style]}.

+

N refers to the 0-based index of the argument from the list of replacement +values. Note that this means it is possible to reference the same parameter +multiple times, possibly with different style and/or alignment options, in any order.

+

align is an optional string specifying the width of the field to format +the value into, and the alignment of the value within the field. It is specified as +an optional alignment style followed by a positive integral field width. The +alignment style can be one of the characters - (left align), = (center align), +or + (right align). The default is right aligned.

+

style is an optional string consisting of a type specific that controls the +formatting of the value. For example, to format a floating point value as a percentage, +you can use the style option P.

+
+
+

Custom formatting

+

There are two ways to customize the formatting behavior for a type.

+
    +
  1. Provide a template specialization of llvm::format_provider<T> for your +type T with the appropriate static format method.

  2. +
+
+
namespace llvm {
+  template<>
+  struct format_provider<MyFooBar> {
+    static void format(const MyFooBar &V, raw_ostream &Stream, StringRef Style) {
+      // Do whatever is necessary to format `V` into `Stream`
+    }
+  };
+  void foo() {
+    MyFooBar X;
+    std::string S = formatv("{0}", X);
+  }
+}
+
+
+

This is a useful extensibility mechanism for adding support for formatting your own +custom types with your own custom Style options. But it does not help when you want +to extend the mechanism for formatting a type that the library already knows how to +format. For that, we need something else.

+
+
    +
  1. Provide a format adapter inheriting from llvm::FormatAdapter<T>.

  2. +
+
+
namespace anything {
+  struct format_int_custom : public llvm::FormatAdapter<int> {
+    explicit format_int_custom(int N) : llvm::FormatAdapter<int>(N) {}
+    void format(llvm::raw_ostream &Stream, StringRef Style) override {
+      // Do whatever is necessary to format ``this->Item`` into ``Stream``
+    }
+  };
+}
+namespace llvm {
+  void foo() {
+    std::string S = formatv("{0}", anything::format_int_custom(42));
+  }
+}
+
+
+

If the type is detected to be derived from FormatAdapter<T>, formatv +will call the +format method on the argument passing in the specified style. This allows +one to provide custom formatting of any type, including one which already has +a builtin format provider.

+
+
+
+

formatv Examples

+

Below is intended to provide an incomplete set of examples demonstrating +the usage of formatv. More information can be found by reading the +doxygen documentation or by looking at the unit test suite.

+
std::string S;
+// Simple formatting of basic types and implicit string conversion.
+S = formatv("{0} ({1:P})", 7, 0.35);  // S == "7 (35.00%)"
+
+// Out-of-order referencing and multi-referencing
+outs() << formatv("{0} {2} {1} {0}", 1, "test", 3); // prints "1 3 test 1"
+
+// Left, right, and center alignment
+S = formatv("{0,7}",  'a');  // S == "      a";
+S = formatv("{0,-7}", 'a');  // S == "a      ";
+S = formatv("{0,=7}", 'a');  // S == "   a   ";
+S = formatv("{0,+7}", 'a');  // S == "      a";
+
+// Custom styles
+S = formatv("{0:N} - {0:x} - {1:E}", 12345, 123908342); // S == "12,345 - 0x3039 - 1.24E8"
+
+// Adapters
+S = formatv("{0}", fmt_align(42, AlignStyle::Center, 7));  // S == "  42   "
+S = formatv("{0}", fmt_repeat("hi", 3)); // S == "hihihi"
+S = formatv("{0}", fmt_pad("hi", 2, 6)); // S == "  hi      "
+
+// Ranges
+std::vector<int> V = {8, 9, 10};
+S = formatv("{0}", make_range(V.begin(), V.end())); // S == "8, 9, 10"
+S = formatv("{0:$[+]}", make_range(V.begin(), V.end())); // S == "8+9+10"
+S = formatv("{0:$[ + ]@[x]}", make_range(V.begin(), V.end())); // S == "0x8 + 0x9 + 0xA"
+
+
+
+
+
+

Error handling

+

Proper error handling helps us identify bugs in our code, and helps end-users +understand errors in their tool usage. Errors fall into two broad categories: +programmatic and recoverable, with different strategies for handling and +reporting.

+
+

Programmatic Errors

+

Programmatic errors are violations of program invariants or API contracts, and +represent bugs within the program itself. Our aim is to document invariants, and +to abort quickly at the point of failure (providing some basic diagnostic) when +invariants are broken at runtime.

+

The fundamental tools for handling programmatic errors are assertions and the +llvm_unreachable function. Assertions are used to express invariant conditions, +and should include a message describing the invariant:

+
assert(isPhysReg(R) && "All virt regs should have been allocated already.");
+
+
+

The llvm_unreachable function can be used to document areas of control flow +that should never be entered if the program invariants hold:

+
enum { Foo, Bar, Baz } X = foo();
+
+switch (X) {
+  case Foo: /* Handle Foo */; break;
+  case Bar: /* Handle Bar */; break;
+  default:
+    llvm_unreachable("X should be Foo or Bar here");
+}
+
+
+
+
+

Recoverable Errors

+

Recoverable errors represent an error in the program’s environment, for example +a resource failure (a missing file, a dropped network connection, etc.), or +malformed input. These errors should be detected and communicated to a level of +the program where they can be handled appropriately. Handling the error may be +as simple as reporting the issue to the user, or it may involve attempts at +recovery.

+
+

Note

+

While it would be ideal to use this error handling scheme throughout +LLVM, there are places where this hasn’t been practical to apply. In +situations where you absolutely must emit a non-programmatic error and +the Error model isn’t workable you can call report_fatal_error, +which will call installed error handlers, print a message, and abort the +program. The use of report_fatal_error in this case is discouraged.

+
+

Recoverable errors are modeled using LLVM’s Error scheme. This scheme +represents errors using function return values, similar to classic C integer +error codes, or C++’s std::error_code. However, the Error class is +actually a lightweight wrapper for user-defined error types, allowing arbitrary +information to be attached to describe the error. This is similar to the way C++ +exceptions allow throwing of user-defined types.

+

Success values are created by calling Error::success(), E.g.:

+
Error foo() {
+  // Do something.
+  // Return success.
+  return Error::success();
+}
+
+
+

Success values are very cheap to construct and return - they have minimal +impact on program performance.

+

Failure values are constructed using make_error<T>, where T is any class +that inherits from the ErrorInfo utility, E.g.:

+
class BadFileFormat : public ErrorInfo<BadFileFormat> {
+public:
+  static char ID;
+  std::string Path;
+
+  BadFileFormat(StringRef Path) : Path(Path.str()) {}
+
+  void log(raw_ostream &OS) const override {
+    OS << Path << " is malformed";
+  }
+
+  std::error_code convertToErrorCode() const override {
+    return make_error_code(object_error::parse_failed);
+  }
+};
+
+char BadFileFormat::ID; // This should be declared in the C++ file.
+
+Error printFormattedFile(StringRef Path) {
+  if (<check for valid format>)
+    return make_error<BadFileFormat>(Path);
+  // print file contents.
+  return Error::success();
+}
+
+
+

Error values can be implicitly converted to bool: true for error, false for +success, enabling the following idiom:

+
Error mayFail();
+
+Error foo() {
+  if (auto Err = mayFail())
+    return Err;
+  // Success! We can proceed.
+  ...
+
+
+

For functions that can fail but need to return a value the Expected<T> +utility can be used. Values of this type can be constructed with either a +T, or an Error. Expected<T> values are also implicitly convertible to +boolean, but with the opposite convention to Error: true for success, false +for error. If success, the T value can be accessed via the dereference +operator. If failure, the Error value can be extracted using the +takeError() method. Idiomatic usage looks like:

+
Expected<FormattedFile> openFormattedFile(StringRef Path) {
+  // If badly formatted, return an error.
+  if (auto Err = checkFormat(Path))
+    return std::move(Err);
+  // Otherwise return a FormattedFile instance.
+  return FormattedFile(Path);
+}
+
+Error processFormattedFile(StringRef Path) {
+  // Try to open a formatted file
+  if (auto FileOrErr = openFormattedFile(Path)) {
+    // On success, grab a reference to the file and continue.
+    auto &File = *FileOrErr;
+    ...
+  } else
+    // On error, extract the Error value and return it.
+    return FileOrErr.takeError();
+}
+
+
+

If an Expected<T> value is in success mode then the takeError() method +will return a success value. Using this fact, the above function can be +rewritten as:

+
Error processFormattedFile(StringRef Path) {
+  // Try to open a formatted file
+  auto FileOrErr = openFormattedFile(Path);
+  if (auto Err = FileOrErr.takeError())
+    // On error, extract the Error value and return it.
+    return Err;
+  // On success, grab a reference to the file and continue.
+  auto &File = *FileOrErr;
+  ...
+}
+
+
+

This second form is often more readable for functions that involve multiple +Expected<T> values as it limits the indentation required.

+

All Error instances, whether success or failure, must be either checked or +moved from (via std::move or a return) before they are destructed. +Accidentally discarding an unchecked error will cause a program abort at the +point where the unchecked value’s destructor is run, making it easy to identify +and fix violations of this rule.

+

Success values are considered checked once they have been tested (by invoking +the boolean conversion operator):

+
if (auto Err = mayFail(...))
+  return Err; // Failure value - move error to caller.
+
+// Safe to continue: Err was checked.
+
+
+

In contrast, the following code will always cause an abort, even if mayFail +returns a success value:

+
mayFail();
+// Program will always abort here, even if mayFail() returns Success, since
+// the value is not checked.
+
+
+

Failure values are considered checked once a handler for the error type has +been activated:

+
handleErrors(
+  processFormattedFile(...),
+  [](const BadFileFormat &BFF) {
+    report("Unable to process " + BFF.Path + ": bad format");
+  },
+  [](const FileNotFound &FNF) {
+    report("File not found " + FNF.Path);
+  });
+
+
+

The handleErrors function takes an error as its first argument, followed by +a variadic list of “handlers”, each of which must be a callable type (a +function, lambda, or class with a call operator) with one argument. The +handleErrors function will visit each handler in the sequence and check its +argument type against the dynamic type of the error, running the first handler +that matches. This is the same decision process that is used decide which catch +clause to run for a C++ exception.

+

Since the list of handlers passed to handleErrors may not cover every error +type that can occur, the handleErrors function also returns an Error value +that must be checked or propagated. If the error value that is passed to +handleErrors does not match any of the handlers it will be returned from +handleErrors. Idiomatic use of handleErrors thus looks like:

+
if (auto Err =
+      handleErrors(
+        processFormattedFile(...),
+        [](const BadFileFormat &BFF) {
+          report("Unable to process " + BFF.Path + ": bad format");
+        },
+        [](const FileNotFound &FNF) {
+          report("File not found " + FNF.Path);
+        }))
+  return Err;
+
+
+

In cases where you truly know that the handler list is exhaustive the +handleAllErrors function can be used instead. This is identical to +handleErrors except that it will terminate the program if an unhandled +error is passed in, and can therefore return void. The handleAllErrors +function should generally be avoided: the introduction of a new error type +elsewhere in the program can easily turn a formerly exhaustive list of errors +into a non-exhaustive list, risking unexpected program termination. Where +possible, use handleErrors and propagate unknown errors up the stack instead.

+

For tool code, where errors can be handled by printing an error message then +exiting with an error code, the ExitOnError utility +may be a better choice than handleErrors, as it simplifies control flow when +calling fallible functions.

+

In situations where it is known that a particular call to a fallible function +will always succeed (for example, a call to a function that can only fail on a +subset of inputs with an input that is known to be safe) the +cantFail functions can be used to remove the error type, +simplifying control flow.

+
+
StringError
+

Many kinds of errors have no recovery strategy, the only action that can be +taken is to report them to the user so that the user can attempt to fix the +environment. In this case representing the error as a string makes perfect +sense. LLVM provides the StringError class for this purpose. It takes two +arguments: A string error message, and an equivalent std::error_code for +interoperability. It also provides a createStringError function to simplify +common usage of this class:

+
// These two lines of code are equivalent:
+make_error<StringError>("Bad executable", errc::executable_format_error);
+createStringError(errc::executable_format_error, "Bad executable");
+
+
+

If you’re certain that the error you’re building will never need to be converted +to a std::error_code you can use the inconvertibleErrorCode() function:

+
createStringError(inconvertibleErrorCode(), "Bad executable");
+
+
+

This should be done only after careful consideration. If any attempt is made to +convert this error to a std::error_code it will trigger immediate program +termination. Unless you are certain that your errors will not need +interoperability you should look for an existing std::error_code that you +can convert to, and even (as painful as it is) consider introducing a new one as +a stopgap measure.

+

createStringError can take printf style format specifiers to provide a +formatted message:

+
createStringError(errc::executable_format_error,
+                  "Bad executable: %s", FileName);
+
+
+
+
+
Interoperability with std::error_code and ErrorOr
+

Many existing LLVM APIs use std::error_code and its partner ErrorOr<T> +(which plays the same role as Expected<T>, but wraps a std::error_code +rather than an Error). The infectious nature of error types means that an +attempt to change one of these functions to return Error or Expected<T> +instead often results in an avalanche of changes to callers, callers of callers, +and so on. (The first such attempt, returning an Error from +MachOObjectFile’s constructor, was abandoned after the diff reached 3000 lines, +impacted half a dozen libraries, and was still growing).

+

To solve this problem, the Error/std::error_code interoperability requirement was +introduced. Two pairs of functions allow any Error value to be converted to a +std::error_code, any Expected<T> to be converted to an ErrorOr<T>, and vice +versa:

+
std::error_code errorToErrorCode(Error Err);
+Error errorCodeToError(std::error_code EC);
+
+template <typename T> ErrorOr<T> expectedToErrorOr(Expected<T> TOrErr);
+template <typename T> Expected<T> errorOrToExpected(ErrorOr<T> TOrEC);
+
+
+

Using these APIs it is easy to make surgical patches that update individual +functions from std::error_code to Error, and from ErrorOr<T> to +Expected<T>.

+
+
+
Returning Errors from error handlers
+

Error recovery attempts may themselves fail. For that reason, handleErrors +actually recognises three different forms of handler signature:

+
// Error must be handled, no new errors produced:
+void(UserDefinedError &E);
+
+// Error must be handled, new errors can be produced:
+Error(UserDefinedError &E);
+
+// Original error can be inspected, then re-wrapped and returned (or a new
+// error can be produced):
+Error(std::unique_ptr<UserDefinedError> E);
+
+
+

Any error returned from a handler will be returned from the handleErrors +function so that it can be handled itself, or propagated up the stack.

+
+
+
Using ExitOnError to simplify tool code
+

Library code should never call exit for a recoverable error, however in tool +code (especially command line tools) this can be a reasonable approach. Calling +exit upon encountering an error dramatically simplifies control flow as the +error no longer needs to be propagated up the stack. This allows code to be +written in straight-line style, as long as each fallible call is wrapped in a +check and call to exit. The ExitOnError class supports this pattern by +providing call operators that inspect Error values, stripping the error away +in the success case and logging to stderr then exiting in the failure case.

+

To use this class, declare a global ExitOnError variable in your program:

+
ExitOnError ExitOnErr;
+
+
+

Calls to fallible functions can then be wrapped with a call to ExitOnErr, +turning them into non-failing calls:

+
Error mayFail();
+Expected<int> mayFail2();
+
+void foo() {
+  ExitOnErr(mayFail());
+  int X = ExitOnErr(mayFail2());
+}
+
+
+

On failure, the error’s log message will be written to stderr, optionally +preceded by a string “banner” that can be set by calling the setBanner method. A +mapping can also be supplied from Error values to exit codes using the +setExitCodeMapper method:

+
int main(int argc, char *argv[]) {
+  ExitOnErr.setBanner(std::string(argv[0]) + " error:");
+  ExitOnErr.setExitCodeMapper(
+    [](const Error &Err) {
+      if (Err.isA<BadFileFormat>())
+        return 2;
+      return 1;
+    });
+
+
+

Use ExitOnError in your tool code where possible as it can greatly improve +readability.

+
+
+
Using cantFail to simplify safe callsites
+

Some functions may only fail for a subset of their inputs, so calls using known +safe inputs can be assumed to succeed.

+

The cantFail functions encapsulate this by wrapping an assertion that their +argument is a success value and, in the case of Expected<T>, unwrapping the +T value:

+
Error onlyFailsForSomeXValues(int X);
+Expected<int> onlyFailsForSomeXValues2(int X);
+
+void foo() {
+  cantFail(onlyFailsForSomeXValues(KnownSafeValue));
+  int Y = cantFail(onlyFailsForSomeXValues2(KnownSafeValue));
+  ...
+}
+
+
+

Like the ExitOnError utility, cantFail simplifies control flow. Their treatment +of error cases is very different however: Where ExitOnError is guaranteed to +terminate the program on an error input, cantFail simply asserts that the result +is success. In debug builds this will result in an assertion failure if an error +is encountered. In release builds the behavior of cantFail for failure values is +undefined. As such, care must be taken in the use of cantFail: clients must be +certain that a cantFail wrapped call really can not fail with the given +arguments.

+

Use of the cantFail functions should be rare in library code, but they are +likely to be of more use in tool and unit-test code where inputs and/or +mocked-up classes or functions may be known to be safe.

+
+
+
Fallible constructors
+

Some classes require resource acquisition or other complex initialization that +can fail during construction. Unfortunately constructors can’t return errors, +and having clients test objects after they’re constructed to ensure that they’re +valid is error prone as it’s all too easy to forget the test. To work around +this, use the named constructor idiom and return an Expected<T>:

+
class Foo {
+public:
+
+  static Expected<Foo> Create(Resource R1, Resource R2) {
+    Error Err = Error::success();
+    Foo F(R1, R2, Err);
+    if (Err)
+      return std::move(Err);
+    return std::move(F);
+  }
+
+private:
+
+  Foo(Resource R1, Resource R2, Error &Err) {
+    ErrorAsOutParameter EAO(&Err);
+    if (auto Err2 = R1.acquire()) {
+      Err = std::move(Err2);
+      return;
+    }
+    Err = R2.acquire();
+  }
+};
+
+
+

Here, the named constructor passes an Error by reference into the actual +constructor, which the constructor can then use to return errors. The +ErrorAsOutParameter utility sets the Error value’s checked flag on entry +to the constructor so that the error can be assigned to, then resets it on exit +to force the client (the named constructor) to check the error.

+

By using this idiom, clients attempting to construct a Foo receive either a +well-formed Foo or an Error, never an object in an invalid state.

+
+
+
Propagating and consuming errors based on types
+

In some contexts, certain types of error are known to be benign. For example, +when walking an archive, some clients may be happy to skip over badly formatted +object files rather than terminating the walk immediately. Skipping badly +formatted objects could be achieved using an elaborate handler method, but the +Error.h header provides two utilities that make this idiom much cleaner: the +type inspection method, isA, and the consumeError function:

+
Error walkArchive(Archive A) {
+  for (unsigned I = 0; I != A.numMembers(); ++I) {
+    auto ChildOrErr = A.getMember(I);
+    if (auto Err = ChildOrErr.takeError()) {
+      if (Err.isA<BadFileFormat>())
+        consumeError(std::move(Err))
+      else
+        return Err;
+    }
+    auto &Child = *ChildOrErr;
+    // Use Child
+    ...
+  }
+  return Error::success();
+}
+
+
+
+
+
Concatenating Errors with joinErrors
+

In the archive walking example above BadFileFormat errors are simply +consumed and ignored. If the client had wanted report these errors after +completing the walk over the archive they could use the joinErrors utility:

+
Error walkArchive(Archive A) {
+  Error DeferredErrs = Error::success();
+  for (unsigned I = 0; I != A.numMembers(); ++I) {
+    auto ChildOrErr = A.getMember(I);
+    if (auto Err = ChildOrErr.takeError())
+      if (Err.isA<BadFileFormat>())
+        DeferredErrs = joinErrors(std::move(DeferredErrs), std::move(Err));
+      else
+        return Err;
+    auto &Child = *ChildOrErr;
+    // Use Child
+    ...
+  }
+  return DeferredErrs;
+}
+
+
+

The joinErrors routine builds a special error type called ErrorList, +which holds a list of user defined errors. The handleErrors routine +recognizes this type and will attempt to handle each of the contained errors in +order. If all contained errors can be handled, handleErrors will return +Error::success(), otherwise handleErrors will concatenate the remaining +errors and return the resulting ErrorList.

+
+
+
Building fallible iterators and iterator ranges
+

The archive walking examples above retrieve archive members by index, however +this requires considerable boiler-plate for iteration and error checking. We can +clean this up by using the “fallible iterator” pattern, which supports the +following natural iteration idiom for fallible containers like Archive:

+
Error Err = Error::success();
+for (auto &Child : Ar->children(Err)) {
+  // Use Child - only enter the loop when it's valid
+
+  // Allow early exit from the loop body, since we know that Err is success
+  // when we're inside the loop.
+  if (BailOutOn(Child))
+    return;
+
+  ...
+}
+// Check Err after the loop to ensure it didn't break due to an error.
+if (Err)
+  return Err;
+
+
+

To enable this idiom, iterators over fallible containers are written in a +natural style, with their ++ and -- operators replaced with fallible +Error inc() and Error dec() functions. E.g.:

+
class FallibleChildIterator {
+public:
+  FallibleChildIterator(Archive &A, unsigned ChildIdx);
+  Archive::Child &operator*();
+  friend bool operator==(const ArchiveIterator &LHS,
+                         const ArchiveIterator &RHS);
+
+  // operator++/operator-- replaced with fallible increment / decrement:
+  Error inc() {
+    if (!A.childValid(ChildIdx + 1))
+      return make_error<BadArchiveMember>(...);
+    ++ChildIdx;
+    return Error::success();
+  }
+
+  Error dec() { ... }
+};
+
+
+

Instances of this kind of fallible iterator interface are then wrapped with the +fallible_iterator utility which provides operator++ and operator--, +returning any errors via a reference passed in to the wrapper at construction +time. The fallible_iterator wrapper takes care of (a) jumping to the end of the +range on error, and (b) marking the error as checked whenever an iterator is +compared to end and found to be inequal (in particular: this marks the +error as checked throughout the body of a range-based for loop), enabling early +exit from the loop without redundant error checking.

+

Instances of the fallible iterator interface (e.g. FallibleChildIterator above) +are wrapped using the make_fallible_itr and make_fallible_end +functions. E.g.:

+
class Archive {
+public:
+  using child_iterator = fallible_iterator<FallibleChildIterator>;
+
+  child_iterator child_begin(Error &Err) {
+    return make_fallible_itr(FallibleChildIterator(*this, 0), Err);
+  }
+
+  child_iterator child_end() {
+    return make_fallible_end(FallibleChildIterator(*this, size()));
+  }
+
+  iterator_range<child_iterator> children(Error &Err) {
+    return make_range(child_begin(Err), child_end());
+  }
+};
+
+
+

Using the fallible_iterator utility allows for both natural construction of +fallible iterators (using failing inc and dec operations) and +relatively natural use of c++ iterator/loop idioms.

+

More information on Error and its related utilities can be found in the +Error.h header file.

+
+
+
+
+

Passing functions and other callable objects

+

Sometimes you may want a function to be passed a callback object. In order to +support lambda expressions and other function objects, you should not use the +traditional C approach of taking a function pointer and an opaque cookie:

+
void takeCallback(bool (*Callback)(Function *, void *), void *Cookie);
+
+
+

Instead, use one of the following approaches:

+
+

Function template

+

If you don’t mind putting the definition of your function into a header file, +make it a function template that is templated on the callable type.

+
template<typename Callable>
+void takeCallback(Callable Callback) {
+  Callback(1, 2, 3);
+}
+
+
+
+
+

The function_ref class template

+

The function_ref +(doxygen) class +template represents a reference to a callable object, templated over the type +of the callable. This is a good choice for passing a callback to a function, +if you don’t need to hold onto the callback after the function returns. In this +way, function_ref is to std::function as StringRef is to +std::string.

+

function_ref<Ret(Param1, Param2, ...)> can be implicitly constructed from +any callable object that can be called with arguments of type Param1, +Param2, …, and returns a value that can be converted to type Ret. +For example:

+
void visitBasicBlocks(Function *F, function_ref<bool (BasicBlock*)> Callback) {
+  for (BasicBlock &BB : *F)
+    if (Callback(&BB))
+      return;
+}
+
+
+

can be called using:

+
visitBasicBlocks(F, [&](BasicBlock *BB) {
+  if (process(BB))
+    return isEmpty(BB);
+  return false;
+});
+
+
+

Note that a function_ref object contains pointers to external memory, so it +is not generally safe to store an instance of the class (unless you know that +the external storage will not be freed). If you need this ability, consider +using std::function. function_ref is small enough that it should always +be passed by value.

+
+
+
+

The LLVM_DEBUG() macro and -debug option

+

Often when working on your pass you will put a bunch of debugging printouts and +other code into your pass. After you get it working, you want to remove it, but +you may need it again in the future (to work out new bugs that you run across).

+

Naturally, because of this, you don’t want to delete the debug printouts, but +you don’t want them to always be noisy. A standard compromise is to comment +them out, allowing you to enable them if you need them in the future.

+

The llvm/Support/Debug.h (doxygen) file provides a macro named +LLVM_DEBUG() that is a much nicer solution to this problem. Basically, you can +put arbitrary code into the argument of the LLVM_DEBUG macro, and it is only +executed if ‘opt’ (or any other tool) is run with the ‘-debug’ command +line argument:

+
LLVM_DEBUG(dbgs() << "I am here!\n");
+
+
+

Then you can run your pass like this:

+
$ opt < a.bc > /dev/null -mypass
+<no output>
+$ opt < a.bc > /dev/null -mypass -debug
+I am here!
+
+
+

Using the LLVM_DEBUG() macro instead of a home-brewed solution allows you to not +have to create “yet another” command line option for the debug output for your +pass. Note that LLVM_DEBUG() macros are disabled for non-asserts builds, so they +do not cause a performance impact at all (for the same reason, they should also +not contain side-effects!).

+

One additional nice thing about the LLVM_DEBUG() macro is that you can enable or +disable it directly in gdb. Just use “set DebugFlag=0” or “set +DebugFlag=1” from the gdb if the program is running. If the program hasn’t +been started yet, you can always just run it with -debug.

+
+

Fine grained debug info with DEBUG_TYPE and the -debug-only option

+

Sometimes you may find yourself in a situation where enabling -debug just +turns on too much information (such as when working on the code generator). +If you want to enable debug information with more fine-grained control, you +should define the DEBUG_TYPE macro and use the -debug-only option as +follows:

+
#define DEBUG_TYPE "foo"
+LLVM_DEBUG(dbgs() << "'foo' debug type\n");
+#undef  DEBUG_TYPE
+#define DEBUG_TYPE "bar"
+LLVM_DEBUG(dbgs() << "'bar' debug type\n");
+#undef  DEBUG_TYPE
+
+
+

Then you can run your pass like this:

+
$ opt < a.bc > /dev/null -mypass
+<no output>
+$ opt < a.bc > /dev/null -mypass -debug
+'foo' debug type
+'bar' debug type
+$ opt < a.bc > /dev/null -mypass -debug-only=foo
+'foo' debug type
+$ opt < a.bc > /dev/null -mypass -debug-only=bar
+'bar' debug type
+$ opt < a.bc > /dev/null -mypass -debug-only=foo,bar
+'foo' debug type
+'bar' debug type
+
+
+

Of course, in practice, you should only set DEBUG_TYPE at the top of a file, +to specify the debug type for the entire module. Be careful that you only do +this after including Debug.h and not around any #include of headers. Also, you +should use names more meaningful than “foo” and “bar”, because there is no +system in place to ensure that names do not conflict. If two different modules +use the same string, they will all be turned on when the name is specified. +This allows, for example, all debug information for instruction scheduling to be +enabled with -debug-only=InstrSched, even if the source lives in multiple +files. The name must not include a comma (,) as that is used to separate the +arguments of the -debug-only option.

+

For performance reasons, -debug-only is not available in optimized build +(--enable-optimized) of LLVM.

+

The DEBUG_WITH_TYPE macro is also available for situations where you would +like to set DEBUG_TYPE, but only for one specific DEBUG statement. It +takes an additional first parameter, which is the type to use. For example, the +preceding example could be written as:

+
DEBUG_WITH_TYPE("foo", dbgs() << "'foo' debug type\n");
+DEBUG_WITH_TYPE("bar", dbgs() << "'bar' debug type\n");
+
+
+
+
+
+

The Statistic class & -stats option

+

The llvm/ADT/Statistic.h (doxygen) file provides a class +named Statistic that is used as a unified way to keep track of what the LLVM +compiler is doing and how effective various optimizations are. It is useful to +see what optimizations are contributing to making a particular program run +faster.

+

Often you may run your pass on some big program, and you’re interested to see +how many times it makes a certain transformation. Although you can do this with +hand inspection, or some ad-hoc method, this is a real pain and not very useful +for big programs. Using the Statistic class makes it very easy to keep +track of this information, and the calculated information is presented in a +uniform manner with the rest of the passes being executed.

+

There are many examples of Statistic uses, but the basics of using it are as +follows:

+

Define your statistic like this:

+
#define DEBUG_TYPE "mypassname"   // This goes before any #includes.
+STATISTIC(NumXForms, "The # of times I did stuff");
+
+
+

The STATISTIC macro defines a static variable, whose name is specified by +the first argument. The pass name is taken from the DEBUG_TYPE macro, and +the description is taken from the second argument. The variable defined +(“NumXForms” in this case) acts like an unsigned integer.

+

Whenever you make a transformation, bump the counter:

+
++NumXForms;   // I did stuff!
+
+
+

That’s all you have to do. To get ‘opt’ to print out the statistics +gathered, use the ‘-stats’ option:

+
$ opt -stats -mypassname < program.bc > /dev/null
+... statistics output ...
+
+
+

Note that in order to use the ‘-stats’ option, LLVM must be +compiled with assertions enabled.

+

When running opt on a C file from the SPEC benchmark suite, it gives a +report that looks like this:

+
  7646 bitcodewriter   - Number of normal instructions
+   725 bitcodewriter   - Number of oversized instructions
+129996 bitcodewriter   - Number of bitcode bytes written
+  2817 raise           - Number of insts DCEd or constprop'd
+  3213 raise           - Number of cast-of-self removed
+  5046 raise           - Number of expression trees converted
+    75 raise           - Number of other getelementptr's formed
+   138 raise           - Number of load/store peepholes
+    42 deadtypeelim    - Number of unused typenames removed from symtab
+   392 funcresolve     - Number of varargs functions resolved
+    27 globaldce       - Number of global variables removed
+     2 adce            - Number of basic blocks removed
+   134 cee             - Number of branches revectored
+    49 cee             - Number of setcc instruction eliminated
+   532 gcse            - Number of loads removed
+  2919 gcse            - Number of instructions removed
+    86 indvars         - Number of canonical indvars added
+    87 indvars         - Number of aux indvars removed
+    25 instcombine     - Number of dead inst eliminate
+   434 instcombine     - Number of insts combined
+   248 licm            - Number of load insts hoisted
+  1298 licm            - Number of insts hoisted to a loop pre-header
+     3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
+    75 mem2reg         - Number of alloca's promoted
+  1444 cfgsimplify     - Number of blocks simplified
+
+
+

Obviously, with so many optimizations, having a unified framework for this stuff +is very nice. Making your pass fit well into the framework makes it more +maintainable and useful.

+
+
+

Adding debug counters to aid in debugging your code

+

Sometimes, when writing new passes, or trying to track down bugs, it +is useful to be able to control whether certain things in your pass +happen or not. For example, there are times the minimization tooling +can only easily give you large testcases. You would like to narrow +your bug down to a specific transformation happening or not happening, +automatically, using bisection. This is where debug counters help. +They provide a framework for making parts of your code only execute a +certain number of times.

+

The llvm/Support/DebugCounter.h (doxygen) file +provides a class named DebugCounter that can be used to create +command line counter options that control execution of parts of your code.

+

Define your DebugCounter like this:

+
DEBUG_COUNTER(DeleteAnInstruction, "passname-delete-instruction",
+              "Controls which instructions get delete");
+
+
+

The DEBUG_COUNTER macro defines a static variable, whose name +is specified by the first argument. The name of the counter +(which is used on the command line) is specified by the second +argument, and the description used in the help is specified by the +third argument.

+

Whatever code you want that control, use DebugCounter::shouldExecute to control it.

+
if (DebugCounter::shouldExecute(DeleteAnInstruction))
+  I->eraseFromParent();
+
+
+

That’s all you have to do. Now, using opt, you can control when this code triggers using +the ‘--debug-counter’ option. There are two counters provided, skip and count. +skip is the number of times to skip execution of the codepath. count is the number +of times, once we are done skipping, to execute the codepath.

+
$ opt --debug-counter=passname-delete-instruction-skip=1,passname-delete-instruction-count=2 -passname
+
+
+

This will skip the above code the first time we hit it, then execute it twice, then skip the rest of the executions.

+

So if executed on the following code:

+
%1 = add i32 %a, %b
+%2 = add i32 %a, %b
+%3 = add i32 %a, %b
+%4 = add i32 %a, %b
+
+
+

It would delete number %2 and %3.

+

A utility is provided in utils/bisect-skip-count to binary search +skip and count arguments. It can be used to automatically minimize the +skip and count for a debug-counter variable.

+
+
+

Viewing graphs while debugging code

+

Several of the important data structures in LLVM are graphs: for example CFGs +made out of LLVM BasicBlocks, CFGs made out of LLVM +MachineBasicBlocks, and Instruction Selection +DAGs. In many cases, while debugging various parts of the +compiler, it is nice to instantly visualize these graphs.

+

LLVM provides several callbacks that are available in a debug build to do +exactly that. If you call the Function::viewCFG() method, for example, the +current LLVM tool will pop up a window containing the CFG for the function where +each basic block is a node in the graph, and each node contains the instructions +in the block. Similarly, there also exists Function::viewCFGOnly() (does +not include the instructions), the MachineFunction::viewCFG() and +MachineFunction::viewCFGOnly(), and the SelectionDAG::viewGraph() +methods. Within GDB, for example, you can usually use something like call +DAG.viewGraph() to pop up a window. Alternatively, you can sprinkle calls to +these functions in your code in places you want to debug.

+

Getting this to work requires a small amount of setup. On Unix systems +with X11, install the graphviz toolkit, and make +sure ‘dot’ and ‘gv’ are in your path. If you are running on macOS, download +and install the macOS Graphviz program and add +/Applications/Graphviz.app/Contents/MacOS/ (or wherever you install it) to +your path. The programs need not be present when configuring, building or +running LLVM and can simply be installed when needed during an active debug +session.

+

SelectionDAG has been extended to make it easier to locate interesting +nodes in large complex graphs. From gdb, if you call DAG.setGraphColor(node, +"color"), then the next call DAG.viewGraph() would highlight the node in +the specified color (choices of colors can be found at colors.) More complex node attributes +can be provided with call DAG.setGraphAttrs(node, "attributes") (choices can +be found at Graph attributes.) +If you want to restart and clear all the current graph attributes, then you can +call DAG.clearGraphAttrs().

+

Note that graph visualization features are compiled out of Release builds to +reduce file size. This means that you need a Debug+Asserts or Release+Asserts +build to use these features.

+
+
+
+

Picking the Right Data Structure for a Task

+

LLVM has a plethora of data structures in the llvm/ADT/ directory, and we +commonly use STL data structures. This section describes the trade-offs you +should consider when you pick one.

+

The first step is a choose your own adventure: do you want a sequential +container, a set-like container, or a map-like container? The most important +thing when choosing a container is the algorithmic properties of how you plan to +access the container. Based on that, you should use:

+
    +
  • a map-like container if you need efficient look-up of a +value based on another value. Map-like containers also support efficient +queries for containment (whether a key is in the map). Map-like containers +generally do not support efficient reverse mapping (values to keys). If you +need that, use two maps. Some map-like containers also support efficient +iteration through the keys in sorted order. Map-like containers are the most +expensive sort, only use them if you need one of these capabilities.

  • +
  • a set-like container if you need to put a bunch of stuff into +a container that automatically eliminates duplicates. Some set-like +containers support efficient iteration through the elements in sorted order. +Set-like containers are more expensive than sequential containers.

  • +
  • a sequential container provides the most efficient way +to add elements and keeps track of the order they are added to the collection. +They permit duplicates and support efficient iteration, but do not support +efficient look-up based on a key.

  • +
  • a string container is a specialized sequential container or +reference structure that is used for character or byte arrays.

  • +
  • a bit container provides an efficient way to store and +perform set operations on sets of numeric id’s, while automatically +eliminating duplicates. Bit containers require a maximum of 1 bit for each +identifier you want to store.

  • +
+

Once the proper category of container is determined, you can fine tune the +memory use, constant factors, and cache behaviors of access by intelligently +picking a member of the category. Note that constant factors and cache behavior +can be a big deal. If you have a vector that usually only contains a few +elements (but could contain many), for example, it’s much better to use +SmallVector than vector. Doing so +avoids (relatively) expensive malloc/free calls, which dwarf the cost of adding +the elements to the container.

+
+

Sequential Containers (std::vector, std::list, etc)

+

There are a variety of sequential containers available for you, based on your +needs. Pick the first in this section that will do what you want.

+
+

llvm/ADT/ArrayRef.h

+

The llvm::ArrayRef class is the preferred class to use in an interface that +accepts a sequential list of elements in memory and just reads from them. By +taking an ArrayRef, the API can be passed a fixed size array, an +std::vector, an llvm::SmallVector and anything else that is contiguous +in memory.

+
+
+

Fixed Size Arrays

+

Fixed size arrays are very simple and very fast. They are good if you know +exactly how many elements you have, or you have a (low) upper bound on how many +you have.

+
+
+

Heap Allocated Arrays

+

Heap allocated arrays (new[] + delete[]) are also simple. They are good +if the number of elements is variable, if you know how many elements you will +need before the array is allocated, and if the array is usually large (if not, +consider a SmallVector). The cost of a heap allocated +array is the cost of the new/delete (aka malloc/free). Also note that if you +are allocating an array of a type with a constructor, the constructor and +destructors will be run for every element in the array (re-sizable vectors only +construct those elements actually used).

+
+
+

llvm/ADT/TinyPtrVector.h

+

TinyPtrVector<Type> is a highly specialized collection class that is +optimized to avoid allocation in the case when a vector has zero or one +elements. It has two major restrictions: 1) it can only hold values of pointer +type, and 2) it cannot hold a null pointer.

+

Since this container is highly specialized, it is rarely used.

+
+
+

llvm/ADT/SmallVector.h

+

SmallVector<Type, N> is a simple class that looks and smells just like +vector<Type>: it supports efficient iteration, lays out elements in memory +order (so you can do pointer arithmetic between elements), supports efficient +push_back/pop_back operations, supports efficient random access to its elements, +etc.

+

The main advantage of SmallVector is that it allocates space for some number of +elements (N) in the object itself. Because of this, if the SmallVector is +dynamically smaller than N, no malloc is performed. This can be a big win in +cases where the malloc/free call is far more expensive than the code that +fiddles around with the elements.

+

This is good for vectors that are “usually small” (e.g. the number of +predecessors/successors of a block is usually less than 8). On the other hand, +this makes the size of the SmallVector itself large, so you don’t want to +allocate lots of them (doing so will waste a lot of space). As such, +SmallVectors are most useful when on the stack.

+

In the absence of a well-motivated choice for the number of +inlined elements N, it is recommended to use SmallVector<T> (that is, +omitting the N). This will choose a default number of +inlined elements reasonable for allocation on the stack (for example, trying +to keep sizeof(SmallVector<T>) around 64 bytes).

+

SmallVector also provides a nice portable and efficient replacement for +alloca.

+

SmallVector has grown a few other minor advantages over std::vector, causing +SmallVector<Type, 0> to be preferred over std::vector<Type>.

+
    +
  1. std::vector is exception-safe, and some implementations have pessimizations +that copy elements when SmallVector would move them.

  2. +
  3. SmallVector understands std::is_trivially_copyable<Type> and uses realloc aggressively.

  4. +
  5. Many LLVM APIs take a SmallVectorImpl as an out parameter (see the note +below).

  6. +
  7. SmallVector with N equal to 0 is smaller than std::vector on 64-bit +platforms, since it uses unsigned (instead of void*) for its size +and capacity.

  8. +
+
+

Note

+

Prefer to use ArrayRef<T> or SmallVectorImpl<T> as a parameter type.

+

It’s rarely appropriate to use SmallVector<T, N> as a parameter type. +If an API only reads from the vector, it should use ArrayRef. Even if an API updates the vector the “small size” is +unlikely to be relevant; such an API should use the SmallVectorImpl<T> +class, which is the “vector header” (and methods) without the elements +allocated after it. Note that SmallVector<T, N> inherits from +SmallVectorImpl<T> so the conversion is implicit and costs nothing. E.g.

+
// DISCOURAGED: Clients cannot pass e.g. raw arrays.
+hardcodedContiguousStorage(const SmallVectorImpl<Foo> &In);
+// ENCOURAGED: Clients can pass any contiguous storage of Foo.
+allowsAnyContiguousStorage(ArrayRef<Foo> In);
+
+void someFunc1() {
+  Foo Vec[] = { /* ... */ };
+  hardcodedContiguousStorage(Vec); // Error.
+  allowsAnyContiguousStorage(Vec); // Works.
+}
+
+// DISCOURAGED: Clients cannot pass e.g. SmallVector<Foo, 8>.
+hardcodedSmallSize(SmallVector<Foo, 2> &Out);
+// ENCOURAGED: Clients can pass any SmallVector<Foo, N>.
+allowsAnySmallSize(SmallVectorImpl<Foo> &Out);
+
+void someFunc2() {
+  SmallVector<Foo, 8> Vec;
+  hardcodedSmallSize(Vec); // Error.
+  allowsAnySmallSize(Vec); // Works.
+}
+
+
+

Even though it has “Impl” in the name, SmallVectorImpl is widely used +and is no longer “private to the implementation”. A name like +SmallVectorHeader might be more appropriate.

+
+
+
+

<vector>

+

std::vector<T> is well loved and respected. However, SmallVector<T, 0> +is often a better option due to the advantages listed above. std::vector is +still useful when you need to store more than UINT32_MAX elements or when +interfacing with code that expects vectors :).

+

One worthwhile note about std::vector: avoid code like this:

+
for ( ... ) {
+   std::vector<foo> V;
+   // make use of V.
+}
+
+
+

Instead, write this as:

+
std::vector<foo> V;
+for ( ... ) {
+   // make use of V.
+   V.clear();
+}
+
+
+

Doing so will save (at least) one heap allocation and free per iteration of the +loop.

+
+
+

<deque>

+

std::deque is, in some senses, a generalized version of std::vector. +Like std::vector, it provides constant time random access and other similar +properties, but it also provides efficient access to the front of the list. It +does not guarantee continuity of elements within memory.

+

In exchange for this extra flexibility, std::deque has significantly higher +constant factor costs than std::vector. If possible, use std::vector or +something cheaper.

+
+
+

<list>

+

std::list is an extremely inefficient class that is rarely useful. It +performs a heap allocation for every element inserted into it, thus having an +extremely high constant factor, particularly for small data types. +std::list also only supports bidirectional iteration, not random access +iteration.

+

In exchange for this high cost, std::list supports efficient access to both ends +of the list (like std::deque, but unlike std::vector or +SmallVector). In addition, the iterator invalidation characteristics of +std::list are stronger than that of a vector class: inserting or removing an +element into the list does not invalidate iterator or pointers to other elements +in the list.

+
+
+

llvm/ADT/ilist.h

+

ilist<T> implements an ‘intrusive’ doubly-linked list. It is intrusive, +because it requires the element to store and provide access to the prev/next +pointers for the list.

+

ilist has the same drawbacks as std::list, and additionally requires an +ilist_traits implementation for the element type, but it provides some novel +characteristics. In particular, it can efficiently store polymorphic objects, +the traits class is informed when an element is inserted or removed from the +list, and ilists are guaranteed to support a constant-time splice +operation.

+

These properties are exactly what we want for things like Instructions and +basic blocks, which is why these are implemented with ilists.

+

Related classes of interest are explained in the following subsections:

+ +
+
+

llvm/ADT/PackedVector.h

+

Useful for storing a vector of values using only a few number of bits for each +value. Apart from the standard operations of a vector-like container, it can +also perform an ‘or’ set operation.

+

For example:

+
enum State {
+    None = 0x0,
+    FirstCondition = 0x1,
+    SecondCondition = 0x2,
+    Both = 0x3
+};
+
+State get() {
+    PackedVector<State, 2> Vec1;
+    Vec1.push_back(FirstCondition);
+
+    PackedVector<State, 2> Vec2;
+    Vec2.push_back(SecondCondition);
+
+    Vec1 |= Vec2;
+    return Vec1[0]; // returns 'Both'.
+}
+
+
+
+
+

ilist_traits

+

ilist_traits<T> is ilist<T>’s customization mechanism. iplist<T> +(and consequently ilist<T>) publicly derive from this traits class.

+
+
+

iplist

+

iplist<T> is ilist<T>’s base and as such supports a slightly narrower +interface. Notably, inserters from T& are absent.

+

ilist_traits<T> is a public base of this class and can be used for a wide +variety of customizations.

+
+
+

llvm/ADT/ilist_node.h

+

ilist_node<T> implements the forward and backward links that are expected +by the ilist<T> (and analogous containers) in the default manner.

+

ilist_node<T>s are meant to be embedded in the node type T, usually +T publicly derives from ilist_node<T>.

+
+
+

Sentinels

+

ilists have another specialty that must be considered. To be a good +citizen in the C++ ecosystem, it needs to support the standard container +operations, such as begin and end iterators, etc. Also, the +operator-- must work correctly on the end iterator in the case of +non-empty ilists.

+

The only sensible solution to this problem is to allocate a so-called sentinel +along with the intrusive list, which serves as the end iterator, providing +the back-link to the last element. However conforming to the C++ convention it +is illegal to operator++ beyond the sentinel and it also must not be +dereferenced.

+

These constraints allow for some implementation freedom to the ilist how to +allocate and store the sentinel. The corresponding policy is dictated by +ilist_traits<T>. By default a T gets heap-allocated whenever the need +for a sentinel arises.

+

While the default policy is sufficient in most cases, it may break down when +T does not provide a default constructor. Also, in the case of many +instances of ilists, the memory overhead of the associated sentinels is +wasted. To alleviate the situation with numerous and voluminous +T-sentinels, sometimes a trick is employed, leading to ghostly sentinels.

+

Ghostly sentinels are obtained by specially-crafted ilist_traits<T> which +superpose the sentinel with the ilist instance in memory. Pointer +arithmetic is used to obtain the sentinel, which is relative to the ilist’s +this pointer. The ilist is augmented by an extra pointer, which serves +as the back-link of the sentinel. This is the only field in the ghostly +sentinel which can be legally accessed.

+
+
+

Other Sequential Container options

+

Other STL containers are available, such as std::string.

+

There are also various STL adapter classes such as std::queue, +std::priority_queue, std::stack, etc. These provide simplified access +to an underlying container but don’t affect the cost of the container itself.

+
+
+
+

String-like containers

+

There are a variety of ways to pass around and use strings in C and C++, and +LLVM adds a few new options to choose from. Pick the first option on this list +that will do what you need, they are ordered according to their relative cost.

+

Note that it is generally preferred to not pass strings around as const +char*’s. These have a number of problems, including the fact that they +cannot represent embedded nul (“0”) characters, and do not have a length +available efficiently. The general replacement for ‘const char*’ is +StringRef.

+

For more information on choosing string containers for APIs, please see +Passing Strings.

+
+

llvm/ADT/StringRef.h

+

The StringRef class is a simple value class that contains a pointer to a +character and a length, and is quite related to the ArrayRef class (but specialized for arrays of characters). Because +StringRef carries a length with it, it safely handles strings with embedded nul +characters in it, getting the length does not require a strlen call, and it even +has very convenient APIs for slicing and dicing the character range that it +represents.

+

StringRef is ideal for passing simple strings around that are known to be live, +either because they are C string literals, std::string, a C array, or a +SmallVector. Each of these cases has an efficient implicit conversion to +StringRef, which doesn’t result in a dynamic strlen being executed.

+

StringRef has a few major limitations which make more powerful string containers +useful:

+
    +
  1. You cannot directly convert a StringRef to a ‘const char*’ because there is +no way to add a trailing nul (unlike the .c_str() method on various stronger +classes).

  2. +
  3. StringRef doesn’t own or keep alive the underlying string bytes. +As such it can easily lead to dangling pointers, and is not suitable for +embedding in datastructures in most cases (instead, use an std::string or +something like that).

  4. +
  5. For the same reason, StringRef cannot be used as the return value of a +method if the method “computes” the result string. Instead, use std::string.

  6. +
  7. StringRef’s do not allow you to mutate the pointed-to string bytes and it +doesn’t allow you to insert or remove bytes from the range. For editing +operations like this, it interoperates with the Twine +class.

  8. +
+

Because of its strengths and limitations, it is very common for a function to +take a StringRef and for a method on an object to return a StringRef that points +into some string that it owns.

+
+
+

llvm/ADT/Twine.h

+

The Twine class is used as an intermediary datatype for APIs that want to take a +string that can be constructed inline with a series of concatenations. Twine +works by forming recursive instances of the Twine datatype (a simple value +object) on the stack as temporary objects, linking them together into a tree +which is then linearized when the Twine is consumed. Twine is only safe to use +as the argument to a function, and should always be a const reference, e.g.:

+
void foo(const Twine &T);
+...
+StringRef X = ...
+unsigned i = ...
+foo(X + "." + Twine(i));
+
+
+

This example forms a string like “blarg.42” by concatenating the values +together, and does not form intermediate strings containing “blarg” or “blarg.”.

+

Because Twine is constructed with temporary objects on the stack, and because +these instances are destroyed at the end of the current statement, it is an +inherently dangerous API. For example, this simple variant contains undefined +behavior and will probably crash:

+
void foo(const Twine &T);
+...
+StringRef X = ...
+unsigned i = ...
+const Twine &Tmp = X + "." + Twine(i);
+foo(Tmp);
+
+
+

… because the temporaries are destroyed before the call. That said, Twine’s +are much more efficient than intermediate std::string temporaries, and they work +really well with StringRef. Just be aware of their limitations.

+
+
+

llvm/ADT/SmallString.h

+

SmallString is a subclass of SmallVector that adds some +convenience APIs like += that takes StringRef’s. SmallString avoids allocating +memory in the case when the preallocated space is enough to hold its data, and +it calls back to general heap allocation when required. Since it owns its data, +it is very safe to use and supports full mutation of the string.

+

Like SmallVector’s, the big downside to SmallString is their sizeof. While they +are optimized for small strings, they themselves are not particularly small. +This means that they work great for temporary scratch buffers on the stack, but +should not generally be put into the heap: it is very rare to see a SmallString +as the member of a frequently-allocated heap data structure or returned +by-value.

+
+
+

std::string

+

The standard C++ std::string class is a very general class that (like +SmallString) owns its underlying data. sizeof(std::string) is very reasonable +so it can be embedded into heap data structures and returned by-value. On the +other hand, std::string is highly inefficient for inline editing (e.g. +concatenating a bunch of stuff together) and because it is provided by the +standard library, its performance characteristics depend a lot of the host +standard library (e.g. libc++ and MSVC provide a highly optimized string class, +GCC contains a really slow implementation).

+

The major disadvantage of std::string is that almost every operation that makes +them larger can allocate memory, which is slow. As such, it is better to use +SmallVector or Twine as a scratch buffer, but then use std::string to persist +the result.

+
+
+
+

Set-Like Containers (std::set, SmallSet, SetVector, etc)

+

Set-like containers are useful when you need to canonicalize multiple values +into a single representation. There are several different choices for how to do +this, providing various trade-offs.

+
+

A sorted ‘vector’

+

If you intend to insert a lot of elements, then do a lot of queries, a great +approach is to use an std::vector (or other sequential container) with +std::sort+std::unique to remove duplicates. This approach works really well if +your usage pattern has these two distinct phases (insert then query), and can be +coupled with a good choice of sequential container.

+

This combination provides the several nice properties: the result data is +contiguous in memory (good for cache locality), has few allocations, is easy to +address (iterators in the final vector are just indices or pointers), and can be +efficiently queried with a standard binary search (e.g. +std::lower_bound; if you want the whole range of elements comparing +equal, use std::equal_range).

+
+
+

llvm/ADT/SmallSet.h

+

If you have a set-like data structure that is usually small and whose elements +are reasonably small, a SmallSet<Type, N> is a good choice. This set has +space for N elements in place (thus, if the set is dynamically smaller than N, +no malloc traffic is required) and accesses them with a simple linear search. +When the set grows beyond N elements, it allocates a more expensive +representation that guarantees efficient access (for most types, it falls back +to std::set, but for pointers it uses something far better, +SmallPtrSet.

+

The magic of this class is that it handles small sets extremely efficiently, but +gracefully handles extremely large sets without loss of efficiency.

+
+
+

llvm/ADT/SmallPtrSet.h

+

SmallPtrSet has all the advantages of SmallSet (and a SmallSet of +pointers is transparently implemented with a SmallPtrSet). If more than N +insertions are performed, a single quadratically probed hash table is allocated +and grows as needed, providing extremely efficient access (constant time +insertion/deleting/queries with low constant factors) and is very stingy with +malloc traffic.

+

Note that, unlike std::set, the iterators of SmallPtrSet +are invalidated whenever an insertion occurs. Also, the values visited by the +iterators are not visited in sorted order.

+
+
+

llvm/ADT/StringSet.h

+

StringSet is a thin wrapper around StringMap<char>, +and it allows efficient storage and retrieval of unique strings.

+

Functionally analogous to SmallSet<StringRef>, StringSet also supports +iteration. (The iterator dereferences to a StringMapEntry<char>, so you +need to call i->getKey() to access the item of the StringSet.) On the +other hand, StringSet doesn’t support range-insertion and +copy-construction, which SmallSet and SmallPtrSet do support.

+
+
+

llvm/ADT/DenseSet.h

+

DenseSet is a simple quadratically probed hash table. It excels at supporting +small values: it uses a single allocation to hold all of the pairs that are +currently inserted in the set. DenseSet is a great way to unique small values +that are not simple pointers (use SmallPtrSet for +pointers). Note that DenseSet has the same requirements for the value type that +DenseMap has.

+
+
+

llvm/ADT/SparseSet.h

+

SparseSet holds a small number of objects identified by unsigned keys of +moderate size. It uses a lot of memory, but provides operations that are almost +as fast as a vector. Typical keys are physical registers, virtual registers, or +numbered basic blocks.

+

SparseSet is useful for algorithms that need very fast clear/find/insert/erase +and fast iteration over small sets. It is not intended for building composite +data structures.

+
+
+

llvm/ADT/SparseMultiSet.h

+

SparseMultiSet adds multiset behavior to SparseSet, while retaining SparseSet’s +desirable attributes. Like SparseSet, it typically uses a lot of memory, but +provides operations that are almost as fast as a vector. Typical keys are +physical registers, virtual registers, or numbered basic blocks.

+

SparseMultiSet is useful for algorithms that need very fast +clear/find/insert/erase of the entire collection, and iteration over sets of +elements sharing a key. It is often a more efficient choice than using composite +data structures (e.g. vector-of-vectors, map-of-vectors). It is not intended for +building composite data structures.

+
+
+

llvm/ADT/FoldingSet.h

+

FoldingSet is an aggregate class that is really good at uniquing +expensive-to-create or polymorphic objects. It is a combination of a chained +hash table with intrusive links (uniqued objects are required to inherit from +FoldingSetNode) that uses SmallVector as part of its ID +process.

+

Consider a case where you want to implement a “getOrCreateFoo” method for a +complex object (for example, a node in the code generator). The client has a +description of what it wants to generate (it knows the opcode and all the +operands), but we don’t want to ‘new’ a node, then try inserting it into a set +only to find out it already exists, at which point we would have to delete it +and return the node that already exists.

+

To support this style of client, FoldingSet perform a query with a +FoldingSetNodeID (which wraps SmallVector) that can be used to describe the +element that we want to query for. The query either returns the element +matching the ID or it returns an opaque ID that indicates where insertion should +take place. Construction of the ID usually does not require heap traffic.

+

Because FoldingSet uses intrusive links, it can support polymorphic objects in +the set (for example, you can have SDNode instances mixed with LoadSDNodes). +Because the elements are individually allocated, pointers to the elements are +stable: inserting or removing elements does not invalidate any pointers to other +elements.

+
+
+

<set>

+

std::set is a reasonable all-around set class, which is decent at many +things but great at nothing. std::set allocates memory for each element +inserted (thus it is very malloc intensive) and typically stores three pointers +per element in the set (thus adding a large amount of per-element space +overhead). It offers guaranteed log(n) performance, which is not particularly +fast from a complexity standpoint (particularly if the elements of the set are +expensive to compare, like strings), and has extremely high constant factors for +lookup, insertion and removal.

+

The advantages of std::set are that its iterators are stable (deleting or +inserting an element from the set does not affect iterators or pointers to other +elements) and that iteration over the set is guaranteed to be in sorted order. +If the elements in the set are large, then the relative overhead of the pointers +and malloc traffic is not a big deal, but if the elements of the set are small, +std::set is almost never a good choice.

+
+
+

llvm/ADT/SetVector.h

+

LLVM’s SetVector<Type> is an adapter class that combines your choice of a +set-like container along with a Sequential Container The +important property that this provides is efficient insertion with uniquing +(duplicate elements are ignored) with iteration support. It implements this by +inserting elements into both a set-like container and the sequential container, +using the set-like container for uniquing and the sequential container for +iteration.

+

The difference between SetVector and other sets is that the order of iteration +is guaranteed to match the order of insertion into the SetVector. This property +is really important for things like sets of pointers. Because pointer values +are non-deterministic (e.g. vary across runs of the program on different +machines), iterating over the pointers in the set will not be in a well-defined +order.

+

The drawback of SetVector is that it requires twice as much space as a normal +set and has the sum of constant factors from the set-like container and the +sequential container that it uses. Use it only if you need to iterate over +the elements in a deterministic order. SetVector is also expensive to delete +elements out of (linear time), unless you use its “pop_back” method, which is +faster.

+

SetVector is an adapter class that defaults to using std::vector and a +size 16 SmallSet for the underlying containers, so it is quite expensive. +However, "llvm/ADT/SetVector.h" also provides a SmallSetVector class, +which defaults to using a SmallVector and SmallSet of a specified size. +If you use this, and if your sets are dynamically smaller than N, you will +save a lot of heap traffic.

+
+
+

llvm/ADT/UniqueVector.h

+

UniqueVector is similar to SetVector but it retains a +unique ID for each element inserted into the set. It internally contains a map +and a vector, and it assigns a unique ID for each value inserted into the set.

+

UniqueVector is very expensive: its cost is the sum of the cost of maintaining +both the map and vector, it has high complexity, high constant factors, and +produces a lot of malloc traffic. It should be avoided.

+
+
+

llvm/ADT/ImmutableSet.h

+

ImmutableSet is an immutable (functional) set implementation based on an AVL +tree. Adding or removing elements is done through a Factory object and results +in the creation of a new ImmutableSet object. If an ImmutableSet already exists +with the given contents, then the existing one is returned; equality is compared +with a FoldingSetNodeID. The time and space complexity of add or remove +operations is logarithmic in the size of the original set.

+

There is no method for returning an element of the set, you can only check for +membership.

+
+
+

Other Set-Like Container Options

+

The STL provides several other options, such as std::multiset and the various +“hash_set” like containers (whether from C++ TR1 or from the SGI library). We +never use hash_set and unordered_set because they are generally very expensive +(each insertion requires a malloc) and very non-portable.

+

std::multiset is useful if you’re not interested in elimination of duplicates, +but has all the drawbacks of std::set. A sorted vector +(where you don’t delete duplicate entries) or some other approach is almost +always better.

+
+
+
+

Map-Like Containers (std::map, DenseMap, etc)

+

Map-like containers are useful when you want to associate data to a key. As +usual, there are a lot of different ways to do this. :)

+
+

A sorted ‘vector’

+

If your usage pattern follows a strict insert-then-query approach, you can +trivially use the same approach as sorted vectors for set-like containers. The only difference is that your query function (which +uses std::lower_bound to get efficient log(n) lookup) should only compare the +key, not both the key and value. This yields the same advantages as sorted +vectors for sets.

+
+
+

llvm/ADT/StringMap.h

+

Strings are commonly used as keys in maps, and they are difficult to support +efficiently: they are variable length, inefficient to hash and compare when +long, expensive to copy, etc. StringMap is a specialized container designed to +cope with these issues. It supports mapping an arbitrary range of bytes to an +arbitrary other object.

+

The StringMap implementation uses a quadratically-probed hash table, where the +buckets store a pointer to the heap allocated entries (and some other stuff). +The entries in the map must be heap allocated because the strings are variable +length. The string data (key) and the element object (value) are stored in the +same allocation with the string data immediately after the element object. +This container guarantees the “(char*)(&Value+1)” points to the key string +for a value.

+

The StringMap is very fast for several reasons: quadratic probing is very cache +efficient for lookups, the hash value of strings in buckets is not recomputed +when looking up an element, StringMap rarely has to touch the memory for +unrelated objects when looking up a value (even when hash collisions happen), +hash table growth does not recompute the hash values for strings already in the +table, and each pair in the map is store in a single allocation (the string data +is stored in the same allocation as the Value of a pair).

+

StringMap also provides query methods that take byte ranges, so it only ever +copies a string if a value is inserted into the table.

+

StringMap iteration order, however, is not guaranteed to be deterministic, so +any uses which require that should instead use a std::map.

+
+
+

llvm/ADT/IndexedMap.h

+

IndexedMap is a specialized container for mapping small dense integers (or +values that can be mapped to small dense integers) to some other type. It is +internally implemented as a vector with a mapping function that maps the keys +to the dense integer range.

+

This is useful for cases like virtual registers in the LLVM code generator: they +have a dense mapping that is offset by a compile-time constant (the first +virtual register ID).

+
+
+

llvm/ADT/DenseMap.h

+

DenseMap is a simple quadratically probed hash table. It excels at supporting +small keys and values: it uses a single allocation to hold all of the pairs +that are currently inserted in the map. DenseMap is a great way to map +pointers to pointers, or map other small types to each other.

+

There are several aspects of DenseMap that you should be aware of, however. +The iterators in a DenseMap are invalidated whenever an insertion occurs, +unlike map. Also, because DenseMap allocates space for a large number of +key/value pairs (it starts with 64 by default), it will waste a lot of space if +your keys or values are large. Finally, you must implement a partial +specialization of DenseMapInfo for the key that you want, if it isn’t already +supported. This is required to tell DenseMap about two special marker values +(which can never be inserted into the map) that it needs internally.

+

DenseMap’s find_as() method supports lookup operations using an alternate key +type. This is useful in cases where the normal key type is expensive to +construct, but cheap to compare against. The DenseMapInfo is responsible for +defining the appropriate comparison and hashing methods for each alternate key +type used.

+
+
+

llvm/IR/ValueMap.h

+

ValueMap is a wrapper around a DenseMap mapping +Value*s (or subclasses) to another type. When a Value is deleted or +RAUW’ed, ValueMap will update itself so the new version of the key is mapped to +the same value, just as if the key were a WeakVH. You can configure exactly how +this happens, and what else happens on these two events, by passing a Config +parameter to the ValueMap template.

+
+
+

llvm/ADT/IntervalMap.h

+

IntervalMap is a compact map for small keys and values. It maps key intervals +instead of single keys, and it will automatically coalesce adjacent intervals. +When the map only contains a few intervals, they are stored in the map object +itself to avoid allocations.

+

The IntervalMap iterators are quite big, so they should not be passed around as +STL iterators. The heavyweight iterators allow a smaller data structure.

+
+
+

<map>

+

std::map has similar characteristics to std::set: it uses a +single allocation per pair inserted into the map, it offers log(n) lookup with +an extremely large constant factor, imposes a space penalty of 3 pointers per +pair in the map, etc.

+

std::map is most useful when your keys or values are very large, if you need to +iterate over the collection in sorted order, or if you need stable iterators +into the map (i.e. they don’t get invalidated if an insertion or deletion of +another element takes place).

+
+
+

llvm/ADT/MapVector.h

+

MapVector<KeyT,ValueT> provides a subset of the DenseMap interface. The +main difference is that the iteration order is guaranteed to be the insertion +order, making it an easy (but somewhat expensive) solution for non-deterministic +iteration over maps of pointers.

+

It is implemented by mapping from key to an index in a vector of key,value +pairs. This provides fast lookup and iteration, but has two main drawbacks: +the key is stored twice and removing elements takes linear time. If it is +necessary to remove elements, it’s best to remove them in bulk using +remove_if().

+
+
+

llvm/ADT/IntEqClasses.h

+

IntEqClasses provides a compact representation of equivalence classes of small +integers. Initially, each integer in the range 0..n-1 has its own equivalence +class. Classes can be joined by passing two class representatives to the +join(a, b) method. Two integers are in the same class when findLeader() returns +the same representative.

+

Once all equivalence classes are formed, the map can be compressed so each +integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m +is the total number of equivalence classes. The map must be uncompressed before +it can be edited again.

+
+
+

llvm/ADT/ImmutableMap.h

+

ImmutableMap is an immutable (functional) map implementation based on an AVL +tree. Adding or removing elements is done through a Factory object and results +in the creation of a new ImmutableMap object. If an ImmutableMap already exists +with the given key set, then the existing one is returned; equality is compared +with a FoldingSetNodeID. The time and space complexity of add or remove +operations is logarithmic in the size of the original map.

+
+
+

Other Map-Like Container Options

+

The STL provides several other options, such as std::multimap and the various +“hash_map” like containers (whether from C++ TR1 or from the SGI library). We +never use hash_set and unordered_set because they are generally very expensive +(each insertion requires a malloc) and very non-portable.

+

std::multimap is useful if you want to map a key to multiple values, but has all +the drawbacks of std::map. A sorted vector or some other approach is almost +always better.

+
+
+
+

Bit storage containers (BitVector, SparseBitVector, CoalescingBitVector)

+

There are three bit storage containers, and choosing when to use each is +relatively straightforward.

+

One additional option is std::vector<bool>: we discourage its use for two +reasons 1) the implementation in many common compilers (e.g. commonly +available versions of GCC) is extremely inefficient and 2) the C++ standards +committee is likely to deprecate this container and/or change it significantly +somehow. In any case, please don’t use it.

+
+

BitVector

+

The BitVector container provides a dynamic size set of bits for manipulation. +It supports individual bit setting/testing, as well as set operations. The set +operations take time O(size of bitvector), but operations are performed one word +at a time, instead of one bit at a time. This makes the BitVector very fast for +set operations compared to other containers. Use the BitVector when you expect +the number of set bits to be high (i.e. a dense set).

+
+
+

SmallBitVector

+

The SmallBitVector container provides the same interface as BitVector, but it is +optimized for the case where only a small number of bits, less than 25 or so, +are needed. It also transparently supports larger bit counts, but slightly less +efficiently than a plain BitVector, so SmallBitVector should only be used when +larger counts are rare.

+

At this time, SmallBitVector does not support set operations (and, or, xor), and +its operator[] does not provide an assignable lvalue.

+
+
+

SparseBitVector

+

The SparseBitVector container is much like BitVector, with one major difference: +Only the bits that are set, are stored. This makes the SparseBitVector much +more space efficient than BitVector when the set is sparse, as well as making +set operations O(number of set bits) instead of O(size of universe). The +downside to the SparseBitVector is that setting and testing of random bits is +O(N), and on large SparseBitVectors, this can be slower than BitVector. In our +implementation, setting or testing bits in sorted order (either forwards or +reverse) is O(1) worst case. Testing and setting bits within 128 bits (depends +on size) of the current bit is also O(1). As a general statement, +testing/setting bits in a SparseBitVector is O(distance away from last set bit).

+
+
+

CoalescingBitVector

+

The CoalescingBitVector container is similar in principle to a SparseBitVector, +but is optimized to represent large contiguous ranges of set bits compactly. It +does this by coalescing contiguous ranges of set bits into intervals. Searching +for a bit in a CoalescingBitVector is O(log(gaps between contiguous ranges)).

+

CoalescingBitVector is a better choice than BitVector when gaps between ranges +of set bits are large. It’s a better choice than SparseBitVector when find() +operations must have fast, predictable performance. However, it’s not a good +choice for representing sets which have lots of very short ranges. E.g. the set +{2*x : x in [0, n)} would be a pathological input.

+
+
+
+
+

Debugging

+

A handful of GDB pretty printers are +provided for some of the core LLVM libraries. To use them, execute the +following (or add it to your ~/.gdbinit):

+
source /path/to/llvm/src/utils/gdb-scripts/prettyprinters.py
+
+
+

It also might be handy to enable the print pretty option to +avoid data structures being printed as a big block of text.

+
+
+

Helpful Hints for Common Operations

+

This section describes how to perform some very simple transformations of LLVM +code. This is meant to give examples of common idioms used, showing the +practical side of LLVM transformations.

+

Because this is a “how-to” section, you should also read about the main classes +that you will be working with. The Core LLVM Class Hierarchy Reference contains details and descriptions of the main classes that you +should know about.

+
+

Basic Inspection and Traversal Routines

+

The LLVM compiler infrastructure have many different data structures that may be +traversed. Following the example of the C++ standard template library, the +techniques used to traverse these various data structures are all basically the +same. For an enumerable sequence of values, the XXXbegin() function (or +method) returns an iterator to the start of the sequence, the XXXend() +function returns an iterator pointing to one past the last valid element of the +sequence, and there is some XXXiterator data type that is common between the +two operations.

+

Because the pattern for iteration is common across many different aspects of the +program representation, the standard template library algorithms may be used on +them, and it is easier to remember how to iterate. First we show a few common +examples of the data structures that need to be traversed. Other data +structures are traversed in very similar ways.

+
+

Iterating over the BasicBlock in a Function

+

It’s quite common to have a Function instance that you’d like to transform +in some way; in particular, you’d like to manipulate its BasicBlocks. To +facilitate this, you’ll need to iterate over all of the BasicBlocks that +constitute the Function. The following is an example that prints the name +of a BasicBlock and the number of Instructions it contains:

+
Function &Func = ...
+for (BasicBlock &BB : Func)
+  // Print out the name of the basic block if it has one, and then the
+  // number of instructions that it contains
+  errs() << "Basic block (name=" << BB.getName() << ") has "
+             << BB.size() << " instructions.\n";
+
+
+
+
+

Iterating over the Instruction in a BasicBlock

+

Just like when dealing with BasicBlocks in Functions, it’s easy to +iterate over the individual instructions that make up BasicBlocks. Here’s +a code snippet that prints out each instruction in a BasicBlock:

+
BasicBlock& BB = ...
+for (Instruction &I : BB)
+   // The next statement works since operator<<(ostream&,...)
+   // is overloaded for Instruction&
+   errs() << I << "\n";
+
+
+

However, this isn’t really the best way to print out the contents of a +BasicBlock! Since the ostream operators are overloaded for virtually +anything you’ll care about, you could have just invoked the print routine on the +basic block itself: errs() << BB << "\n";.

+
+
+

Iterating over the Instruction in a Function

+

If you’re finding that you commonly iterate over a Function’s +BasicBlocks and then that BasicBlock’s Instructions, +InstIterator should be used instead. You’ll need to include +llvm/IR/InstIterator.h (doxygen) and then instantiate +InstIterators explicitly in your code. Here’s a small example that shows +how to dump all instructions in a function to the standard error stream:

+
#include "llvm/IR/InstIterator.h"
+
+// F is a pointer to a Function instance
+for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+  errs() << *I << "\n";
+
+
+

Easy, isn’t it? You can also use InstIterators to fill a work list with +its initial contents. For example, if you wanted to initialize a work list to +contain all instructions in a Function F, all you would need to do is +something like:

+
std::set<Instruction*> worklist;
+// or better yet, SmallPtrSet<Instruction*, 64> worklist;
+
+for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+  worklist.insert(&*I);
+
+
+

The STL set worklist would now contain all instructions in the Function +pointed to by F.

+
+
+

Turning an iterator into a class pointer (and vice-versa)

+

Sometimes, it’ll be useful to grab a reference (or pointer) to a class instance +when all you’ve got at hand is an iterator. Well, extracting a reference or a +pointer from an iterator is very straight-forward. Assuming that i is a +BasicBlock::iterator and j is a BasicBlock::const_iterator:

+
Instruction& inst = *i;   // Grab reference to instruction reference
+Instruction* pinst = &*i; // Grab pointer to instruction reference
+const Instruction& inst = *j;
+
+
+

However, the iterators you’ll be working with in the LLVM framework are special: +they will automatically convert to a ptr-to-instance type whenever they need to. +Instead of dereferencing the iterator and then taking the address of the result, +you can simply assign the iterator to the proper pointer type and you get the +dereference and address-of operation as a result of the assignment (behind the +scenes, this is a result of overloading casting mechanisms). Thus the second +line of the last example,

+
Instruction *pinst = &*i;
+
+
+

is semantically equivalent to

+
Instruction *pinst = i;
+
+
+

It’s also possible to turn a class pointer into the corresponding iterator, and +this is a constant time operation (very efficient). The following code snippet +illustrates use of the conversion constructors provided by LLVM iterators. By +using these, you can explicitly grab the iterator of something without actually +obtaining it via iteration over some structure:

+
void printNextInstruction(Instruction* inst) {
+  BasicBlock::iterator it(inst);
+  ++it; // After this line, it refers to the instruction after *inst
+  if (it != inst->getParent()->end()) errs() << *it << "\n";
+}
+
+
+

Unfortunately, these implicit conversions come at a cost; they prevent these +iterators from conforming to standard iterator conventions, and thus from being +usable with standard algorithms and containers. For example, they prevent the +following code, where B is a BasicBlock, from compiling:

+
llvm::SmallVector<llvm::Instruction *, 16>(B->begin(), B->end());
+
+
+

Because of this, these implicit conversions may be removed some day, and +operator* changed to return a pointer instead of a reference.

+
+
+

Finding call sites: a slightly more complex example

+

Say that you’re writing a FunctionPass and would like to count all the locations +in the entire module (that is, across every Function) where a certain +function (i.e., some Function *) is already in scope. As you’ll learn +later, you may want to use an InstVisitor to accomplish this in a much more +straight-forward manner, but this example will allow us to explore how you’d do +it if you didn’t have InstVisitor around. In pseudo-code, this is what we +want to do:

+
initialize callCounter to zero
+for each Function f in the Module
+  for each BasicBlock b in f
+    for each Instruction i in b
+      if (i a Call and calls the given function)
+        increment callCounter
+
+
+

And the actual code is (remember, because we’re writing a FunctionPass, our +FunctionPass-derived class simply has to override the runOnFunction +method):

+
Function* targetFunc = ...;
+
+class OurFunctionPass : public FunctionPass {
+  public:
+    OurFunctionPass(): callCounter(0) { }
+
+    virtual runOnFunction(Function& F) {
+      for (BasicBlock &B : F) {
+        for (Instruction &I: B) {
+          if (auto *CB = dyn_cast<CallBase>(&I)) {
+            // We know we've encountered some kind of call instruction (call,
+            // invoke, or callbr), so we need to determine if it's a call to
+            // the function pointed to by m_func or not.
+            if (CB->getCalledFunction() == targetFunc)
+              ++callCounter;
+          }
+        }
+      }
+    }
+
+  private:
+    unsigned callCounter;
+};
+
+
+
+
+

Iterating over def-use & use-def chains

+

Frequently, we might have an instance of the Value class (doxygen) and we want to determine +which Users use the Value. The list of all Users of a particular +Value is called a def-use chain. For example, let’s say we have a +Function* named F to a particular function foo. Finding all of the +instructions that use foo is as simple as iterating over the def-use +chain of F:

+
Function *F = ...;
+
+for (User *U : F->users()) {
+  if (Instruction *Inst = dyn_cast<Instruction>(U)) {
+    errs() << "F is used in instruction:\n";
+    errs() << *Inst << "\n";
+  }
+
+
+

Alternatively, it’s common to have an instance of the User Class (doxygen) and need to know what +Values are used by it. The list of all Values used by a User is +known as a use-def chain. Instances of class Instruction are common +User s, so we might want to iterate over all of the values that a particular +instruction uses (that is, the operands of the particular Instruction):

+
Instruction *pi = ...;
+
+for (Use &U : pi->operands()) {
+  Value *v = U.get();
+  // ...
+}
+
+
+

Declaring objects as const is an important tool of enforcing mutation free +algorithms (such as analyses, etc.). For this purpose above iterators come in +constant flavors as Value::const_use_iterator and +Value::const_op_iterator. They automatically arise when calling +use/op_begin() on const Value*s or const User*s respectively. +Upon dereferencing, they return const Use*s. Otherwise the above patterns +remain unchanged.

+
+
+

Iterating over predecessors & successors of blocks

+

Iterating over the predecessors and successors of a block is quite easy with the +routines defined in "llvm/IR/CFG.h". Just use code like this to +iterate over all predecessors of BB:

+
#include "llvm/IR/CFG.h"
+BasicBlock *BB = ...;
+
+for (BasicBlock *Pred : predecessors(BB)) {
+  // ...
+}
+
+
+

Similarly, to iterate over successors use successors.

+
+
+
+

Making simple changes

+

There are some primitive transformation operations present in the LLVM +infrastructure that are worth knowing about. When performing transformations, +it’s fairly common to manipulate the contents of basic blocks. This section +describes some of the common methods for doing so and gives example code.

+
+

Creating and inserting new Instructions

+

Instantiating Instructions

+

Creation of Instructions is straight-forward: simply call the constructor +for the kind of instruction to instantiate and provide the necessary parameters. +For example, an AllocaInst only requires a (const-ptr-to) Type. Thus:

+
auto *ai = new AllocaInst(Type::Int32Ty);
+
+
+

will create an AllocaInst instance that represents the allocation of one +integer in the current stack frame, at run time. Each Instruction subclass +is likely to have varying default parameters which change the semantics of the +instruction, so refer to the doxygen documentation for the subclass of +Instruction that +you’re interested in instantiating.

+

Naming values

+

It is very useful to name the values of instructions when you’re able to, as +this facilitates the debugging of your transformations. If you end up looking +at generated LLVM machine code, you definitely want to have logical names +associated with the results of instructions! By supplying a value for the +Name (default) parameter of the Instruction constructor, you associate a +logical name with the result of the instruction’s execution at run time. For +example, say that I’m writing a transformation that dynamically allocates space +for an integer on the stack, and that integer is going to be used as some kind +of index by some other code. To accomplish this, I place an AllocaInst at +the first point in the first BasicBlock of some Function, and I’m +intending to use it within the same Function. I might do:

+
auto *pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
+
+
+

where indexLoc is now the logical name of the instruction’s execution value, +which is a pointer to an integer on the run time stack.

+

Inserting instructions

+

There are essentially three ways to insert an Instruction into an existing +sequence of instructions that form a BasicBlock:

+
    +
  • Insertion into an explicit instruction list

    +

    Given a BasicBlock* pb, an Instruction* pi within that BasicBlock, +and a newly-created instruction we wish to insert before *pi, we do the +following:

    +
    BasicBlock *pb = ...;
    +Instruction *pi = ...;
    +auto *newInst = new Instruction(...);
    +
    +pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb
    +
    +
    +

    Appending to the end of a BasicBlock is so common that the Instruction +class and Instruction-derived classes provide constructors which take a +pointer to a BasicBlock to be appended to. For example code that looked +like:

    +
    BasicBlock *pb = ...;
    +auto *newInst = new Instruction(...);
    +
    +pb->getInstList().push_back(newInst); // Appends newInst to pb
    +
    +
    +

    becomes:

    +
    BasicBlock *pb = ...;
    +auto *newInst = new Instruction(..., pb);
    +
    +
    +

    which is much cleaner, especially if you are creating long instruction +streams.

    +
  • +
  • Insertion into an implicit instruction list

    +

    Instruction instances that are already in BasicBlocks are implicitly +associated with an existing instruction list: the instruction list of the +enclosing basic block. Thus, we could have accomplished the same thing as the +above code without being given a BasicBlock by doing:

    +
    Instruction *pi = ...;
    +auto *newInst = new Instruction(...);
    +
    +pi->getParent()->getInstList().insert(pi, newInst);
    +
    +
    +

    In fact, this sequence of steps occurs so frequently that the Instruction +class and Instruction-derived classes provide constructors which take (as +a default parameter) a pointer to an Instruction which the newly-created +Instruction should precede. That is, Instruction constructors are +capable of inserting the newly-created instance into the BasicBlock of a +provided instruction, immediately before that instruction. Using an +Instruction constructor with a insertBefore (default) parameter, the +above code becomes:

    +
    Instruction* pi = ...;
    +auto *newInst = new Instruction(..., pi);
    +
    +
    +

    which is much cleaner, especially if you’re creating a lot of instructions and +adding them to BasicBlocks.

    +
  • +
  • Insertion using an instance of IRBuilder

    +

    Inserting several Instructions can be quite laborious using the previous +methods. The IRBuilder is a convenience class that can be used to add +several instructions to the end of a BasicBlock or before a particular +Instruction. It also supports constant folding and renaming named +registers (see IRBuilder’s template arguments).

    +

    The example below demonstrates a very simple use of the IRBuilder where +three instructions are inserted before the instruction pi. The first two +instructions are Call instructions and third instruction multiplies the return +value of the two calls.

    +
    Instruction *pi = ...;
    +IRBuilder<> Builder(pi);
    +CallInst* callOne = Builder.CreateCall(...);
    +CallInst* callTwo = Builder.CreateCall(...);
    +Value* result = Builder.CreateMul(callOne, callTwo);
    +
    +
    +

    The example below is similar to the above example except that the created +IRBuilder inserts instructions at the end of the BasicBlock pb.

    +
    BasicBlock *pb = ...;
    +IRBuilder<> Builder(pb);
    +CallInst* callOne = Builder.CreateCall(...);
    +CallInst* callTwo = Builder.CreateCall(...);
    +Value* result = Builder.CreateMul(callOne, callTwo);
    +
    +
    +

    See Kaleidoscope Tutorial for a practical use of the IRBuilder.

    +
  • +
+
+
+

Deleting Instructions

+

Deleting an instruction from an existing sequence of instructions that form a +BasicBlock is very straight-forward: just call the instruction’s +eraseFromParent() method. For example:

+
Instruction *I = .. ;
+I->eraseFromParent();
+
+
+

This unlinks the instruction from its containing basic block and deletes it. If +you’d just like to unlink the instruction from its containing basic block but +not delete it, you can use the removeFromParent() method.

+
+
+

Replacing an Instruction with another Value

+
+
Replacing individual instructions
+

Including “llvm/Transforms/Utils/BasicBlockUtils.h” permits use of two +very useful replace functions: ReplaceInstWithValue and +ReplaceInstWithInst.

+
+
+
Deleting Instructions
+
    +
  • ReplaceInstWithValue

    +

    This function replaces all uses of a given instruction with a value, and then +removes the original instruction. The following example illustrates the +replacement of the result of a particular AllocaInst that allocates memory +for a single integer with a null pointer to an integer.

    +
    AllocaInst* instToReplace = ...;
    +BasicBlock::iterator ii(instToReplace);
    +
    +ReplaceInstWithValue(instToReplace->getParent()->getInstList(), ii,
    +                     Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
    +
    +
    +
  • +
  • ReplaceInstWithInst

    +

    This function replaces a particular instruction with another instruction, +inserting the new instruction into the basic block at the location where the +old instruction was, and replacing any uses of the old instruction with the +new instruction. The following example illustrates the replacement of one +AllocaInst with another.

    +
    AllocaInst* instToReplace = ...;
    +BasicBlock::iterator ii(instToReplace);
    +
    +ReplaceInstWithInst(instToReplace->getParent()->getInstList(), ii,
    +                    new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
    +
    +
    +
  • +
+
+
+
Replacing multiple uses of Users and Values
+

You can use Value::replaceAllUsesWith and User::replaceUsesOfWith to +change more than one use at a time. See the doxygen documentation for the +Value Class and User Class, respectively, for more +information.

+
+
+
+

Deleting GlobalVariables

+

Deleting a global variable from a module is just as easy as deleting an +Instruction. First, you must have a pointer to the global variable that you +wish to delete. You use this pointer to erase it from its parent, the module. +For example:

+
GlobalVariable *GV = .. ;
+
+GV->eraseFromParent();
+
+
+
+
+
+
+

Threads and LLVM

+

This section describes the interaction of the LLVM APIs with multithreading, +both on the part of client applications, and in the JIT, in the hosted +application.

+

Note that LLVM’s support for multithreading is still relatively young. Up +through version 2.5, the execution of threaded hosted applications was +supported, but not threaded client access to the APIs. While this use case is +now supported, clients must adhere to the guidelines specified below to ensure +proper operation in multithreaded mode.

+

Note that, on Unix-like platforms, LLVM requires the presence of GCC’s atomic +intrinsics in order to support threaded operation. If you need a +multithreading-capable LLVM on a platform without a suitably modern system +compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and +using the resultant compiler to build a copy of LLVM with multithreading +support.

+
+

Ending Execution with llvm_shutdown()

+

When you are done using the LLVM APIs, you should call llvm_shutdown() to +deallocate memory used for internal structures.

+
+
+

Lazy Initialization with ManagedStatic

+

ManagedStatic is a utility class in LLVM used to implement static +initialization of static resources, such as the global type tables. In a +single-threaded environment, it implements a simple lazy initialization scheme. +When LLVM is compiled with support for multi-threading, however, it uses +double-checked locking to implement thread-safe lazy initialization.

+
+
+

Achieving Isolation with LLVMContext

+

LLVMContext is an opaque class in the LLVM API which clients can use to +operate multiple, isolated instances of LLVM concurrently within the same +address space. For instance, in a hypothetical compile-server, the compilation +of an individual translation unit is conceptually independent from all the +others, and it would be desirable to be able to compile incoming translation +units concurrently on independent server threads. Fortunately, LLVMContext +exists to enable just this kind of scenario!

+

Conceptually, LLVMContext provides isolation. Every LLVM entity +(Modules, Values, Types, Constants, etc.) in LLVM’s +in-memory IR belongs to an LLVMContext. Entities in different contexts +cannot interact with each other: Modules in different contexts cannot be +linked together, Functions cannot be added to Modules in different +contexts, etc. What this means is that is safe to compile on multiple +threads simultaneously, as long as no two threads operate on entities within the +same context.

+

In practice, very few places in the API require the explicit specification of a +LLVMContext, other than the Type creation/lookup APIs. Because every +Type carries a reference to its owning context, most other entities can +determine what context they belong to by looking at their own Type. If you +are adding new entities to LLVM IR, please try to maintain this interface +design.

+
+
+

Threads and the JIT

+

LLVM’s “eager” JIT compiler is safe to use in threaded programs. Multiple +threads can call ExecutionEngine::getPointerToFunction() or +ExecutionEngine::runFunction() concurrently, and multiple threads can run +code output by the JIT concurrently. The user must still ensure that only one +thread accesses IR in a given LLVMContext while another thread might be +modifying it. One way to do that is to always hold the JIT lock while accessing +IR outside the JIT (the JIT modifies the IR by adding CallbackVHs). +Another way is to only call getPointerToFunction() from the +LLVMContext’s thread.

+

When the JIT is configured to compile lazily (using +ExecutionEngine::DisableLazyCompilation(false)), there is currently a race +condition in updating call sites +after a function is lazily-jitted. It’s still possible to use the lazy JIT in a +threaded program if you ensure that only one thread at a time can call any +particular lazy stub and that the JIT lock guards any IR access, but we suggest +using only the eager JIT in threaded programs.

+
+
+
+

Advanced Topics

+

This section describes some of the advanced or obscure API’s that most clients +do not need to be aware of. These API’s tend manage the inner workings of the +LLVM system, and only need to be accessed in unusual circumstances.

+
+

The ValueSymbolTable class

+

The ValueSymbolTable (doxygen) class provides +a symbol table that the Function and Module classes use for +naming value definitions. The symbol table can provide a name for any Value.

+

Note that the SymbolTable class should not be directly accessed by most +clients. It should only be used when iteration over the symbol table names +themselves are required, which is very special purpose. Note that not all LLVM +Values have names, and those without names (i.e. they have an empty name) do +not exist in the symbol table.

+

Symbol tables support iteration over the values in the symbol table with +begin/end/iterator and supports querying to see if a specific name is in the +symbol table (with lookup). The ValueSymbolTable class exposes no +public mutator methods, instead, simply call setName on a value, which will +autoinsert it into the appropriate symbol table.

+
+
+

The User and owned Use classes’ memory layout

+

The User (doxygen) +class provides a basis for expressing the ownership of User towards other +Value instances. The +Use (doxygen) helper +class is employed to do the bookkeeping and to facilitate O(1) addition and +removal.

+
+

Interaction and relationship between User and Use objects

+

A subclass of User can choose between incorporating its Use objects or +refer to them out-of-line by means of a pointer. A mixed variant (some Use +s inline others hung off) is impractical and breaks the invariant that the +Use objects belonging to the same User form a contiguous array.

+

We have 2 different layouts in the User (sub)classes:

+
    +
  • Layout a)

    +

    The Use object(s) are inside (resp. at fixed offset) of the User +object and there are a fixed number of them.

    +
  • +
  • Layout b)

    +

    The Use object(s) are referenced by a pointer to an array from the +User object and there may be a variable number of them.

    +
  • +
+

As of v2.4 each layout still possesses a direct pointer to the start of the +array of Uses. Though not mandatory for layout a), we stick to this +redundancy for the sake of simplicity. The User object also stores the +number of Use objects it has. (Theoretically this information can also be +calculated given the scheme presented below.)

+

Special forms of allocation operators (operator new) enforce the following +memory layouts:

+
    +
  • Layout a) is modelled by prepending the User object by the Use[] +array.

    +
    ...---.---.---.---.-------...
    +  | P | P | P | P | User
    +'''---'---'---'---'-------'''
    +
    +
    +
  • +
  • Layout b) is modelled by pointing at the Use[] array.

    +
    .-------...
    +| User
    +'-------'''
    +    |
    +    v
    +    .---.---.---.---...
    +    | P | P | P | P |
    +    '---'---'---'---'''
    +
    +
    +
  • +
+

(In the above figuresPstands for the Use** that is stored in +each Use object in the member Use::Prev )

+
+
+
+

Designing Type Hierarchies and Polymorphic Interfaces

+

There are two different design patterns that tend to result in the use of +virtual dispatch for methods in a type hierarchy in C++ programs. The first is +a genuine type hierarchy where different types in the hierarchy model +a specific subset of the functionality and semantics, and these types nest +strictly within each other. Good examples of this can be seen in the Value +or Type type hierarchies.

+

A second is the desire to dispatch dynamically across a collection of +polymorphic interface implementations. This latter use case can be modeled with +virtual dispatch and inheritance by defining an abstract interface base class +which all implementations derive from and override. However, this +implementation strategy forces an “is-a” relationship to exist that is not +actually meaningful. There is often not some nested hierarchy of useful +generalizations which code might interact with and move up and down. Instead, +there is a singular interface which is dispatched across a range of +implementations.

+

The preferred implementation strategy for the second use case is that of +generic programming (sometimes called “compile-time duck typing” or “static +polymorphism”). For example, a template over some type parameter T can be +instantiated across any particular implementation that conforms to the +interface or concept. A good example here is the highly generic properties of +any type which models a node in a directed graph. LLVM models these primarily +through templates and generic programming. Such templates include the +LoopInfoBase and DominatorTreeBase. When this type of polymorphism +truly needs dynamic dispatch you can generalize it using a technique +called concept-based polymorphism. This pattern emulates the interfaces and +behaviors of templates using a very limited form of virtual dispatch for type +erasure inside its implementation. You can find examples of this technique in +the PassManager.h system, and there is a more detailed introduction to it +by Sean Parent in several of his talks and papers:

+
    +
  1. Inheritance Is The Base Class of Evil +- The GoingNative 2013 talk describing this technique, and probably the best +place to start.

  2. +
  3. Value Semantics and Concepts-based Polymorphism - The C++Now! 2012 talk +describing this technique in more detail.

  4. +
  5. Sean Parent’s Papers and Presentations +- A GitHub project full of links to slides, video, and sometimes code.

  6. +
+

When deciding between creating a type hierarchy (with either tagged or virtual +dispatch) and using templates or concepts-based polymorphism, consider whether +there is some refinement of an abstract base class which is a semantically +meaningful type on an interface boundary. If anything more refined than the +root abstract interface is meaningless to talk about as a partial extension of +the semantic model, then your use case likely fits better with polymorphism and +you should avoid using virtual dispatch. However, there may be some exigent +circumstances that require one technique or the other to be used.

+

If you do need to introduce a type hierarchy, we prefer to use explicitly +closed type hierarchies with manual tagged dispatch and/or RTTI rather than the +open inheritance model and virtual dispatch that is more common in C++ code. +This is because LLVM rarely encourages library consumers to extend its core +types, and leverages the closed and tag-dispatched nature of its hierarchies to +generate significantly more efficient code. We have also found that a large +amount of our usage of type hierarchies fits better with tag-based pattern +matching rather than dynamic dispatch across a common interface. Within LLVM we +have built custom helpers to facilitate this design. See this document’s +section on isa and dyn_cast and our detailed document which describes how you can implement this +pattern for use with the LLVM helpers.

+
+
+

ABI Breaking Checks

+

Checks and asserts that alter the LLVM C++ ABI are predicated on the +preprocessor symbol LLVM_ENABLE_ABI_BREAKING_CHECKS – LLVM +libraries built with LLVM_ENABLE_ABI_BREAKING_CHECKS are not ABI +compatible LLVM libraries built without it defined. By default, +turning on assertions also turns on LLVM_ENABLE_ABI_BREAKING_CHECKS +so a default +Asserts build is not ABI compatible with a +default -Asserts build. Clients that want ABI compatibility +between +Asserts and -Asserts builds should use the CMake build system +to set LLVM_ENABLE_ABI_BREAKING_CHECKS independently +of LLVM_ENABLE_ASSERTIONS.

+
+
+
+

The Core LLVM Class Hierarchy Reference

+

#include "llvm/IR/Type.h"

+

header source: Type.h

+

doxygen info: Type Classes

+

The Core LLVM classes are the primary means of representing the program being +inspected or transformed. The core LLVM classes are defined in header files in +the include/llvm/IR directory, and implemented in the lib/IR +directory. It’s worth noting that, for historical reasons, this library is +called libLLVMCore.so, not libLLVMIR.so as you might expect.

+
+

The Type class and Derived Types

+

Type is a superclass of all type classes. Every Value has a Type. +Type cannot be instantiated directly but only through its subclasses. +Certain primitive types (VoidType, LabelType, FloatType and +DoubleType) have hidden subclasses. They are hidden because they offer no +useful functionality beyond what the Type class offers except to distinguish +themselves from other subclasses of Type.

+

All other types are subclasses of DerivedType. Types can be named, but this +is not a requirement. There exists exactly one instance of a given shape at any +one time. This allows type equality to be performed with address equality of +the Type Instance. That is, given two Type* values, the types are identical +if the pointers are identical.

+
+

Important Public Methods

+
    +
  • bool isIntegerTy() const: Returns true for any integer type.

  • +
  • bool isFloatingPointTy(): Return true if this is one of the five +floating point types.

  • +
  • bool isSized(): Return true if the type has known size. Things +that don’t have a size are abstract types, labels and void.

  • +
+
+
+

Important Derived Types

+
+
IntegerType

Subclass of DerivedType that represents integer types of any bit width. Any +bit width between IntegerType::MIN_INT_BITS (1) and +IntegerType::MAX_INT_BITS (~8 million) can be represented.

+
    +
  • static const IntegerType* get(unsigned NumBits): get an integer +type of a specific bit width.

  • +
  • unsigned getBitWidth() const: Get the bit width of an integer type.

  • +
+
+
SequentialType

This is subclassed by ArrayType and VectorType.

+
    +
  • const Type * getElementType() const: Returns the type of each +of the elements in the sequential type.

  • +
  • uint64_t getNumElements() const: Returns the number of elements +in the sequential type.

  • +
+
+
ArrayType

This is a subclass of SequentialType and defines the interface for array +types.

+
+
PointerType

Subclass of Type for pointer types.

+
+
VectorType

Subclass of SequentialType for vector types. A vector type is similar to an +ArrayType but is distinguished because it is a first class type whereas +ArrayType is not. Vector types are used for vector operations and are usually +small vectors of an integer or floating point type.

+
+
StructType

Subclass of DerivedTypes for struct types.

+
+
+
+
FunctionType

Subclass of DerivedTypes for function types.

+
    +
  • bool isVarArg() const: Returns true if it’s a vararg function.

  • +
  • const Type * getReturnType() const: Returns the return type of the +function.

  • +
  • const Type * getParamType (unsigned i): Returns the type of the ith +parameter.

  • +
  • const unsigned getNumParams() const: Returns the number of formal +parameters.

  • +
+
+
+
+
+
+

The Module class

+

#include "llvm/IR/Module.h"

+

header source: Module.h

+

doxygen info: Module Class

+

The Module class represents the top level structure present in LLVM +programs. An LLVM module is effectively either a translation unit of the +original program or a combination of several translation units merged by the +linker. The Module class keeps track of a list of Functions, a list of GlobalVariables, and a SymbolTable. +Additionally, it contains a few helpful member functions that try to make common +operations easy.

+
+

Important Public Members of the Module class

+
    +
  • Module::Module(std::string name = "")

    +

    Constructing a Module is easy. You can optionally provide a name for it +(probably based on the name of the translation unit).

    +
  • +
  • +
    Module::iterator - Typedef for function list iterator
    +
    Module::const_iterator - Typedef for const_iterator.
    +
    begin(), end(), size(), empty()
    +
    +

    These are forwarding methods that make it easy to access the contents of a +Module object’s Function list.

    +
  • +
  • Module::FunctionListType &getFunctionList()

    +

    Returns the list of Functions. This is necessary to use +when you need to update the list or perform a complex action that doesn’t have +a forwarding method.

    +
  • +
+
+
    +
  • +
    Module::global_iterator - Typedef for global variable list iterator
    +
    Module::const_global_iterator - Typedef for const_iterator.
    +
    global_begin(), global_end(), global_size(), global_empty()
    +
    +

    These are forwarding methods that make it easy to access the contents of a +Module object’s GlobalVariable list.

    +
  • +
  • Module::GlobalListType &getGlobalList()

    +

    Returns the list of GlobalVariables. This is necessary to use when you +need to update the list or perform a complex action that doesn’t have a +forwarding method.

    +
  • +
+
+
    +
  • SymbolTable *getSymbolTable()

    +

    Return a reference to the SymbolTable for this Module.

    +
  • +
+
+
    +
  • Function *getFunction(StringRef Name) const

    +

    Look up the specified function in the Module SymbolTable. If it does not +exist, return null.

    +
  • +
  • FunctionCallee getOrInsertFunction(const std::string &Name, +const FunctionType *T)

    +

    Look up the specified function in the Module SymbolTable. If +it does not exist, add an external declaration for the function and +return it. Note that the function signature already present may not +match the requested signature. Thus, in order to enable the common +usage of passing the result directly to EmitCall, the return type is +a struct of {FunctionType *T, Constant *FunctionPtr}, rather +than simply the Function* with potentially an unexpected +signature.

    +
  • +
  • std::string getTypeName(const Type *Ty)

    +

    If there is at least one entry in the SymbolTable for the specified Type, +return it. Otherwise return the empty string.

    +
  • +
  • bool addTypeName(const std::string &Name, const Type *Ty)

    +

    Insert an entry in the SymbolTable mapping Name to Ty. If there is +already an entry for this name, true is returned and the SymbolTable is not +modified.

    +
  • +
+
+
+
+

The Value class

+

#include "llvm/IR/Value.h"

+

header source: Value.h

+

doxygen info: Value Class

+

The Value class is the most important class in the LLVM Source base. It +represents a typed value that may be used (among other things) as an operand to +an instruction. There are many different types of Values, such as +Constants, Arguments. Even Instructions and Functions are Values.

+

A particular Value may be used many times in the LLVM representation for a +program. For example, an incoming argument to a function (represented with an +instance of the Argument class) is “used” by every instruction in the function +that references the argument. To keep track of this relationship, the Value +class keeps a list of all of the Users that is using it (the User class +is a base class for all nodes in the LLVM graph that can refer to Values). +This use list is how LLVM represents def-use information in the program, and is +accessible through the use_* methods, shown below.

+

Because LLVM is a typed representation, every LLVM Value is typed, and this +Type is available through the getType() method. In addition, all LLVM +values can be named. The “name” of the Value is a symbolic string printed +in the LLVM code:

+
%foo = add i32 1, 2
+
+
+

The name of this instruction is “foo”. NOTE that the name of any value may +be missing (an empty string), so names should ONLY be used for debugging +(making the source code easier to read, debugging printouts), they should not be +used to keep track of values or map between them. For this purpose, use a +std::map of pointers to the Value itself instead.

+

One important aspect of LLVM is that there is no distinction between an SSA +variable and the operation that produces it. Because of this, any reference to +the value produced by an instruction (or the value available as an incoming +argument, for example) is represented as a direct pointer to the instance of the +class that represents this value. Although this may take some getting used to, +it simplifies the representation and makes it easier to manipulate.

+
+

Important Public Members of the Value class

+
    +
  • +
    Value::use_iterator - Typedef for iterator over the use-list
    +
    Value::const_use_iterator - Typedef for const_iterator over the +use-list
    +
    unsigned use_size() - Returns the number of users of the value.
    +
    bool use_empty() - Returns true if there are no users.
    +
    use_iterator use_begin() - Get an iterator to the start of the +use-list.
    +
    use_iterator use_end() - Get an iterator to the end of the use-list.
    +
    User *use_back() - Returns the last element in the list.
    +
    +

    These methods are the interface to access the def-use information in LLVM. +As with all other iterators in LLVM, the naming conventions follow the +conventions defined by the STL.

    +
  • +
  • Type *getType() const +This method returns the Type of the Value.

  • +
  • +
    bool hasName() const
    +
    std::string getName() const
    +
    void setName(const std::string &Name)
    +
    +

    This family of methods is used to access and assign a name to a Value, be +aware of the precaution above.

    +
  • +
  • void replaceAllUsesWith(Value *V)

    +

    This method traverses the use list of a Value changing all Users of the +current value to refer to “V” instead. For example, if you detect that an +instruction always produces a constant value (for example through constant +folding), you can replace all uses of the instruction with the constant like +this:

    +
    Inst->replaceAllUsesWith(ConstVal);
    +
    +
    +
  • +
+
+
+
+

The User class

+

#include "llvm/IR/User.h"

+

header source: User.h

+

doxygen info: User Class

+

Superclass: Value

+

The User class is the common base class of all LLVM nodes that may refer to +Values. It exposes a list of “Operands” that are all of the Values +that the User is referring to. The User class itself is a subclass of +Value.

+

The operands of a User point directly to the LLVM Value that it refers +to. Because LLVM uses Static Single Assignment (SSA) form, there can only be +one definition referred to, allowing this direct connection. This connection +provides the use-def information in LLVM.

+
+

Important Public Members of the User class

+

The User class exposes the operand list in two ways: through an index access +interface and through an iterator based interface.

+
    +
  • +
    Value *getOperand(unsigned i)
    +
    unsigned getNumOperands()
    +
    +

    These two methods expose the operands of the User in a convenient form for +direct access.

    +
  • +
  • +
    User::op_iterator - Typedef for iterator over the operand list
    +
    op_iterator op_begin() - Get an iterator to the start of the operand +list.
    +
    op_iterator op_end() - Get an iterator to the end of the operand list.
    +
    +

    Together, these methods make up the iterator based interface to the operands +of a User.

    +
  • +
+
+
+
+

The Instruction class

+

#include "llvm/IR/Instruction.h"

+

header source: Instruction.h

+

doxygen info: Instruction Class

+

Superclasses: User, Value

+

The Instruction class is the common base class for all LLVM instructions. +It provides only a few methods, but is a very commonly used class. The primary +data tracked by the Instruction class itself is the opcode (instruction +type) and the parent BasicBlock the Instruction is embedded into. To +represent a specific type of instruction, one of many subclasses of +Instruction are used.

+

Because the Instruction class subclasses the User class, its operands can +be accessed in the same way as for other Users (with the +getOperand()/getNumOperands() and op_begin()/op_end() methods). +An important file for the Instruction class is the llvm/Instruction.def +file. This file contains some meta-data about the various different types of +instructions in LLVM. It describes the enum values that are used as opcodes +(for example Instruction::Add and Instruction::ICmp), as well as the +concrete sub-classes of Instruction that implement the instruction (for +example BinaryOperator and CmpInst). Unfortunately, the use of macros in this +file confuses doxygen, so these enum values don’t show up correctly in the +doxygen output.

+
+

Important Subclasses of the Instruction class

+
    +
  • BinaryOperator

    +

    This subclasses represents all two operand instructions whose operands must be +the same type, except for the comparison instructions.

    +
  • +
+
    +
  • CastInst +This subclass is the parent of the 12 casting instructions. It provides +common operations on cast instructions.

  • +
+
    +
  • CmpInst

    +

    This subclass represents the two comparison instructions, +ICmpInst (integer operands), and +FCmpInst (floating point operands).

    +
  • +
+
+
+

Important Public Members of the Instruction class

+
    +
  • BasicBlock *getParent()

    +

    Returns the BasicBlock that this +Instruction is embedded into.

    +
  • +
  • bool mayWriteToMemory()

    +

    Returns true if the instruction writes to memory, i.e. it is a call, +free, invoke, or store.

    +
  • +
  • unsigned getOpcode()

    +

    Returns the opcode for the Instruction.

    +
  • +
  • Instruction *clone() const

    +

    Returns another instance of the specified instruction, identical in all ways +to the original except that the instruction has no parent (i.e. it’s not +embedded into a BasicBlock), and it has no name.

    +
  • +
+
+
+
+

The Constant class and subclasses

+

Constant represents a base class for different types of constants. It is +subclassed by ConstantInt, ConstantArray, etc. for representing the various +types of Constants. GlobalValue is also a subclass, which represents the +address of a global variable or function.

+
+

Important Subclasses of Constant

+
    +
  • ConstantInt : This subclass of Constant represents an integer constant of +any width.

    +
      +
    • const APInt& getValue() const: Returns the underlying +value of this constant, an APInt value.

    • +
    • int64_t getSExtValue() const: Converts the underlying APInt value to an +int64_t via sign extension. If the value (not the bit width) of the APInt +is too large to fit in an int64_t, an assertion will result. For this +reason, use of this method is discouraged.

    • +
    • uint64_t getZExtValue() const: Converts the underlying APInt value +to a uint64_t via zero extension. IF the value (not the bit width) of the +APInt is too large to fit in a uint64_t, an assertion will result. For this +reason, use of this method is discouraged.

    • +
    • static ConstantInt* get(const APInt& Val): Returns the ConstantInt +object that represents the value provided by Val. The type is implied +as the IntegerType that corresponds to the bit width of Val.

    • +
    • static ConstantInt* get(const Type *Ty, uint64_t Val): Returns the +ConstantInt object that represents the value provided by Val for integer +type Ty.

    • +
    +
  • +
  • ConstantFP : This class represents a floating point constant.

    +
      +
    • double getValue() const: Returns the underlying value of this constant.

    • +
    +
  • +
  • ConstantArray : This represents a constant array.

    +
      +
    • const std::vector<Use> &getValues() const: Returns a vector of +component constants that makeup this array.

    • +
    +
  • +
  • ConstantStruct : This represents a constant struct.

    +
      +
    • const std::vector<Use> &getValues() const: Returns a vector of +component constants that makeup this array.

    • +
    +
  • +
  • GlobalValue : This represents either a global variable or a function. In +either case, the value is a constant fixed address (after linking).

  • +
+
+
+
+

The GlobalValue class

+

#include "llvm/IR/GlobalValue.h"

+

header source: GlobalValue.h

+

doxygen info: GlobalValue Class

+

Superclasses: Constant, User, Value

+

Global values ( GlobalVariables or Functions) are the +only LLVM values that are visible in the bodies of all Functions. Because they are visible at global scope, they are also +subject to linking with other globals defined in different translation units. +To control the linking process, GlobalValues know their linkage rules. +Specifically, GlobalValues know whether they have internal or external +linkage, as defined by the LinkageTypes enumeration.

+

If a GlobalValue has internal linkage (equivalent to being static in C), +it is not visible to code outside the current translation unit, and does not +participate in linking. If it has external linkage, it is visible to external +code, and does participate in linking. In addition to linkage information, +GlobalValues keep track of which Module they are currently part of.

+

Because GlobalValues are memory objects, they are always referred to by +their address. As such, the Type of a global is always a pointer to its +contents. It is important to remember this when using the GetElementPtrInst +instruction because this pointer must be dereferenced first. For example, if +you have a GlobalVariable (a subclass of GlobalValue) that is an array +of 24 ints, type [24 x i32], then the GlobalVariable is a pointer to +that array. Although the address of the first element of this array and the +value of the GlobalVariable are the same, they have different types. The +GlobalVariable’s type is [24 x i32]. The first element’s type is +i32. Because of this, accessing a global value requires you to dereference +the pointer with GetElementPtrInst first, then its elements can be accessed. +This is explained in the LLVM Language Reference Manual.

+
+

Important Public Members of the GlobalValue class

+
    +
  • +
    bool hasInternalLinkage() const
    +
    bool hasExternalLinkage() const
    +
    void setInternalLinkage(bool HasInternalLinkage)
    +
    +

    These methods manipulate the linkage characteristics of the GlobalValue.

    +
  • +
  • Module *getParent()

    +

    This returns the Module that the +GlobalValue is currently embedded into.

    +
  • +
+
+
+
+

The Function class

+

#include "llvm/IR/Function.h"

+

header source: Function.h

+

doxygen info: Function Class

+

Superclasses: GlobalValue, Constant, User, Value

+

The Function class represents a single procedure in LLVM. It is actually +one of the more complex classes in the LLVM hierarchy because it must keep track +of a large amount of data. The Function class keeps track of a list of +BasicBlocks, a list of formal Arguments, and a SymbolTable.

+

The list of BasicBlocks is the most commonly used part of Function +objects. The list imposes an implicit ordering of the blocks in the function, +which indicate how the code will be laid out by the backend. Additionally, the +first BasicBlock is the implicit entry node for the Function. It is not +legal in LLVM to explicitly branch to this initial block. There are no implicit +exit nodes, and in fact there may be multiple exit nodes from a single +Function. If the BasicBlock list is empty, this indicates that the +Function is actually a function declaration: the actual body of the function +hasn’t been linked in yet.

+

In addition to a list of BasicBlocks, the Function class also keeps track +of the list of formal Arguments that the function receives. This container +manages the lifetime of the Argument nodes, just like the BasicBlock list does +for the BasicBlocks.

+

The SymbolTable is a very rarely used LLVM feature that is only used when you +have to look up a value by name. Aside from that, the SymbolTable is used +internally to make sure that there are not conflicts between the names of +Instructions, BasicBlocks, or Arguments in the function body.

+

Note that Function is a GlobalValue and therefore also a Constant. The +value of the function is its address (after linking) which is guaranteed to be +constant.

+
+

Important Public Members of the Function

+
    +
  • Function(const FunctionType *Ty, LinkageTypes Linkage, +const std::string &N = "", Module* Parent = 0)

    +

    Constructor used when you need to create new Functions to add the +program. The constructor must specify the type of the function to create and +what type of linkage the function should have. The FunctionType argument +specifies the formal arguments and return value for the function. The same +FunctionType value can be used to create multiple functions. The Parent +argument specifies the Module in which the function is defined. If this +argument is provided, the function will automatically be inserted into that +module’s list of functions.

    +
  • +
  • bool isDeclaration()

    +

    Return whether or not the Function has a body defined. If the function is +“external”, it does not have a body, and thus must be resolved by linking with +a function defined in a different translation unit.

    +
  • +
  • +
    Function::iterator - Typedef for basic block list iterator
    +
    Function::const_iterator - Typedef for const_iterator.
    +
    begin(), end(), size(), empty()
    +
    +

    These are forwarding methods that make it easy to access the contents of a +Function object’s BasicBlock list.

    +
  • +
  • Function::BasicBlockListType &getBasicBlockList()

    +

    Returns the list of BasicBlocks. This is necessary to use when you need to +update the list or perform a complex action that doesn’t have a forwarding +method.

    +
  • +
  • +
    Function::arg_iterator - Typedef for the argument list iterator
    +
    Function::const_arg_iterator - Typedef for const_iterator.
    +
    arg_begin(), arg_end(), arg_size(), arg_empty()
    +
    +

    These are forwarding methods that make it easy to access the contents of a +Function object’s Argument list.

    +
  • +
  • Function::ArgumentListType &getArgumentList()

    +

    Returns the list of Argument. This is necessary to use when you need to +update the list or perform a complex action that doesn’t have a forwarding +method.

    +
  • +
  • BasicBlock &getEntryBlock()

    +

    Returns the entry BasicBlock for the function. Because the entry block +for the function is always the first block, this returns the first block of +the Function.

    +
  • +
  • +
    Type *getReturnType()
    +
    FunctionType *getFunctionType()
    +
    +

    This traverses the Type of the Function and returns the return type of +the function, or the FunctionType of the actual function.

    +
  • +
  • SymbolTable *getSymbolTable()

    +

    Return a pointer to the SymbolTable for this Function.

    +
  • +
+
+
+
+

The GlobalVariable class

+

#include "llvm/IR/GlobalVariable.h"

+

header source: GlobalVariable.h

+

doxygen info: GlobalVariable Class

+

Superclasses: GlobalValue, Constant, User, Value

+

Global variables are represented with the (surprise surprise) GlobalVariable +class. Like functions, GlobalVariables are also subclasses of +GlobalValue, and as such are always referenced by their address (global values +must live in memory, so their “name” refers to their constant address). See +GlobalValue for more on this. Global variables may have an initial value +(which must be a Constant), and if they have an initializer, they may be marked +as “constant” themselves (indicating that their contents never change at +runtime).

+
+

Important Public Members of the GlobalVariable class

+
    +
  • GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes &Linkage, +Constant *Initializer = 0, const std::string &Name = "", Module* Parent = 0)

    +

    Create a new global variable of the specified type. If isConstant is true +then the global variable will be marked as unchanging for the program. The +Linkage parameter specifies the type of linkage (internal, external, weak, +linkonce, appending) for the variable. If the linkage is InternalLinkage, +WeakAnyLinkage, WeakODRLinkage, LinkOnceAnyLinkage or LinkOnceODRLinkage, then +the resultant global variable will have internal linkage. AppendingLinkage +concatenates together all instances (in different translation units) of the +variable into a single variable but is only applicable to arrays. See the +LLVM Language Reference for further details +on linkage types. Optionally an initializer, a name, and the module to put +the variable into may be specified for the global variable as well.

    +
  • +
  • bool isConstant() const

    +

    Returns true if this is a global variable that is known not to be modified at +runtime.

    +
  • +
  • bool hasInitializer()

    +

    Returns true if this GlobalVariable has an initializer.

    +
  • +
  • Constant *getInitializer()

    +

    Returns the initial value for a GlobalVariable. It is not legal to call +this method if there is no initializer.

    +
  • +
+
+
+
+

The BasicBlock class

+

#include "llvm/IR/BasicBlock.h"

+

header source: BasicBlock.h

+

doxygen info: BasicBlock Class

+

Superclass: Value

+

This class represents a single entry single exit section of the code, commonly +known as a basic block by the compiler community. The BasicBlock class +maintains a list of Instructions, which form the body of the block. Matching +the language definition, the last element of this list of instructions is always +a terminator instruction.

+

In addition to tracking the list of instructions that make up the block, the +BasicBlock class also keeps track of the Function that +it is embedded into.

+

Note that BasicBlocks themselves are Values, because they are +referenced by instructions like branches and can go in the switch tables. +BasicBlocks have type label.

+
+

Important Public Members of the BasicBlock class

+
    +
  • BasicBlock(const std::string &Name = "", Function *Parent = 0)

    +

    The BasicBlock constructor is used to create new basic blocks for +insertion into a function. The constructor optionally takes a name for the +new block, and a Function to insert it into. If the +Parent parameter is specified, the new BasicBlock is automatically +inserted at the end of the specified Function, if not +specified, the BasicBlock must be manually inserted into the Function.

    +
  • +
  • +
    BasicBlock::iterator - Typedef for instruction list iterator
    +
    BasicBlock::const_iterator - Typedef for const_iterator.
    +
    begin(), end(), front(), back(), +size(), empty() +STL-style functions for accessing the instruction list.
    +
    +

    These methods and typedefs are forwarding functions that have the same +semantics as the standard library methods of the same names. These methods +expose the underlying instruction list of a basic block in a way that is easy +to manipulate. To get the full complement of container operations (including +operations to update the list), you must use the getInstList() method.

    +
  • +
  • BasicBlock::InstListType &getInstList()

    +

    This method is used to get access to the underlying container that actually +holds the Instructions. This method must be used when there isn’t a +forwarding function in the BasicBlock class for the operation that you +would like to perform. Because there are no forwarding functions for +“updating” operations, you need to use this if you want to update the contents +of a BasicBlock.

    +
  • +
  • Function *getParent()

    +

    Returns a pointer to Function the block is embedded into, +or a null pointer if it is homeless.

    +
  • +
  • Instruction *getTerminator()

    +

    Returns a pointer to the terminator instruction that appears at the end of the +BasicBlock. If there is no terminator instruction, or if the last +instruction in the block is not a terminator, then a null pointer is returned.

    +
  • +
+
+
+
+

The Argument class

+

This subclass of Value defines the interface for incoming formal arguments to a +function. A Function maintains a list of its formal arguments. An argument has +a pointer to the parent Function.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Projects.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Projects.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Projects.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Projects.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,394 @@ + + + + + + + + + Creating an LLVM Project — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Creating an LLVM Project

+ +
+

Overview

+

The LLVM build system is designed to facilitate the building of third party +projects that use LLVM header files, libraries, and tools. In order to use +these facilities, a Makefile from a project must do the following things:

+
    +
  • Set make variables. There are several variables that a Makefile needs +to set to use the LLVM build system:

    +
      +
    • PROJECT_NAME - The name by which your project is known.

    • +
    • LLVM_SRC_ROOT - The root of the LLVM source tree.

    • +
    • LLVM_OBJ_ROOT - The root of the LLVM object tree.

    • +
    • PROJ_SRC_ROOT - The root of the project’s source tree.

    • +
    • PROJ_OBJ_ROOT - The root of the project’s object tree.

    • +
    • PROJ_INSTALL_ROOT - The root installation directory.

    • +
    • LEVEL - The relative path from the current directory to the +project’s root ($PROJ_OBJ_ROOT).

    • +
    +
  • +
  • Include Makefile.config from $(LLVM_OBJ_ROOT).

  • +
  • Include Makefile.rules from $(LLVM_SRC_ROOT).

  • +
+

There are two ways that you can set all of these variables:

+
    +
  • You can write your own Makefiles which hard-code these values.

  • +
  • You can use the pre-made LLVM sample project. This sample project includes +Makefiles, a configure script that can be used to configure the location +of LLVM, and the ability to support multiple object directories from a single +source directory.

  • +
+

If you want to devise your own build system, studying other projects and LLVM +Makefiles will probably provide enough information on how to write your own +Makefiles.

+
+
+

Source Tree Layout

+

In order to use the LLVM build system, you will want to organize your source +code so that it can benefit from the build system’s features. Mainly, you want +your source tree layout to look similar to the LLVM source tree layout.

+

Underneath your top level directory, you should have the following directories:

+

lib

+
+

This subdirectory should contain all of your library source code. For each +library that you build, you will have one directory in lib that will +contain that library’s source code.

+

Libraries can be object files, archives, or dynamic libraries. The lib +directory is just a convenient place for libraries as it places them all in +a directory from which they can be linked later.

+
+

include

+
+

This subdirectory should contain any header files that are global to your +project. By global, we mean that they are used by more than one library or +executable of your project.

+

By placing your header files in include, they will be found +automatically by the LLVM build system. For example, if you have a file +include/jazz/note.h, then your source files can include it simply with +#include “jazz/note.h”.

+
+

tools

+
+

This subdirectory should contain all of your source code for executables. +For each program that you build, you will have one directory in tools +that will contain that program’s source code.

+
+

test

+
+

This subdirectory should contain tests that verify that your code works +correctly. Automated tests are especially useful.

+

Currently, the LLVM build system provides basic support for tests. The LLVM +system provides the following:

+
+
    +
  • LLVM contains regression tests in llvm/test. These tests are run by the +Lit testing tool. This test procedure uses RUN +lines in the actual test case to determine how to run the test. See the +LLVM Testing Infrastructure Guide for more details.

  • +
  • LLVM contains an optional package called llvm-test, which provides +benchmarks and programs that are known to compile with the Clang front +end. You can use these programs to test your code, gather statistical +information, and compare it to the current LLVM performance statistics.

    +

    Currently, there is no way to hook your tests directly into the llvm/test +testing harness. You will simply need to find a way to use the source +provided within that directory on your own.

    +
  • +
+

Typically, you will want to build your lib directory first followed by your +tools directory.

+
+
+

Writing LLVM Style Makefiles

+

The LLVM build system provides a convenient way to build libraries and +executables. Most of your project Makefiles will only need to define a few +variables. Below is a list of the variables one can set and what they can +do:

+
+

Required Variables

+

LEVEL

+
+

This variable is the relative path from this Makefile to the top +directory of your project’s source code. For example, if your source code +is in /tmp/src, then the Makefile in /tmp/src/jump/high +would set LEVEL to "../..".

+
+
+
+

Variables for Building Subdirectories

+

DIRS

+
+

This is a space separated list of subdirectories that should be built. They +will be built, one at a time, in the order specified.

+
+

PARALLEL_DIRS

+
+

This is a list of directories that can be built in parallel. These will be +built after the directories in DIRS have been built.

+
+

OPTIONAL_DIRS

+
+

This is a list of directories that can be built if they exist, but will not +cause an error if they do not exist. They are built serially in the order +in which they are listed.

+
+
+
+

Variables for Building Libraries

+

LIBRARYNAME

+
+

This variable contains the base name of the library that will be built. For +example, to build a library named libsample.a, LIBRARYNAME should +be set to sample.

+
+

BUILD_ARCHIVE

+
+

By default, a library is a .o file that is linked directly into a +program. To build an archive (also known as a static library), set the +BUILD_ARCHIVE variable.

+
+

SHARED_LIBRARY

+
+

If SHARED_LIBRARY is defined in your Makefile, a shared (or dynamic) +library will be built.

+
+
+
+

Variables for Building Programs

+

TOOLNAME

+
+

This variable contains the name of the program that will be built. For +example, to build an executable named sample, TOOLNAME should be set +to sample.

+
+

USEDLIBS

+
+

This variable holds a space separated list of libraries that should be +linked into the program. These libraries must be libraries that come from +your lib directory. The libraries must be specified without their +lib prefix. For example, to link libsample.a, you would set +USEDLIBS to sample.a.

+

Note that this works only for statically linked libraries.

+
+

LLVMLIBS

+
+

This variable holds a space separated list of libraries that should be +linked into the program. These libraries must be LLVM libraries. The +libraries must be specified without their lib prefix. For example, to +link with a driver that performs an IR transformation you might set +LLVMLIBS to this minimal set of libraries LLVMSupport.a LLVMCore.a +LLVMBitReader.a LLVMAsmParser.a LLVMAnalysis.a LLVMTransformUtils.a +LLVMScalarOpts.a LLVMTarget.a.

+

Note that this works only for statically linked libraries. LLVM is split +into a large number of static libraries, and the list of libraries you +require may be much longer than the list above. To see a full list of +libraries use: llvm-config --libs all. Using LINK_COMPONENTS as +described below, obviates the need to set LLVMLIBS.

+
+

LINK_COMPONENTS

+
+

This variable holds a space separated list of components that the LLVM +Makefiles pass to the llvm-config tool to generate a link line for +the program. For example, to link with all LLVM libraries use +LINK_COMPONENTS = all.

+
+

LIBS

+
+

To link dynamic libraries, add -l<library base name> to the LIBS +variable. The LLVM build system will look in the same places for dynamic +libraries as it does for static libraries.

+

For example, to link libsample.so, you would have the following line in +your Makefile:

+
+
LIBS += -lsample
+
+
+
+
+

Note that LIBS must occur in the Makefile after the inclusion of +Makefile.common.

+
+
+

Miscellaneous Variables

+

CFLAGS & CPPFLAGS

+
+

This variable can be used to add options to the C and C++ compiler, +respectively. It is typically used to add options that tell the compiler +the location of additional directories to search for header files.

+

It is highly suggested that you append to CFLAGS and CPPFLAGS as +opposed to overwriting them. The master Makefiles may already have +useful options in them that you may not want to overwrite.

+
+
+
+
+

Placement of Object Code

+

The final location of built libraries and executables will depend upon whether +you do a Debug, Release, or Profile build.

+

Libraries

+
+

All libraries (static and dynamic) will be stored in +PROJ_OBJ_ROOT/<type>/lib, where type is Debug, Release, or +Profile for a debug, optimized, or profiled build, respectively.

+
+

Executables

+
+

All executables will be stored in PROJ_OBJ_ROOT/<type>/bin, where type +is Debug, Release, or Profile for a debug, optimized, or +profiled build, respectively.

+
+
+
+

Further Help

+

If you have any questions or need any help creating an LLVM project, the LLVM +team would be more than happy to help. You can always post your questions to +the LLVM Developers Mailing List.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/GitHubMove.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/GitHubMove.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/GitHubMove.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/GitHubMove.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,1147 @@ + + + + + + + + + Moving LLVM Projects to GitHub — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Moving LLVM Projects to GitHub

+
+

Current Status

+

We are planning to complete the transition to GitHub by Oct 21, 2019. See +the GitHub migration status page +for the latest updates and instructions for how to migrate your workflows.

+
+
+

Introduction

+

This is a proposal to move our current revision control system from our own +hosted Subversion to GitHub. Below are the financial and technical arguments as +to why we are proposing such a move and how people (and validation +infrastructure) will continue to work with a Git-based LLVM.

+
+
+

What This Proposal is Not About

+

Changing the development policy.

+

This proposal relates only to moving the hosting of our source-code repository +from SVN hosted on our own servers to Git hosted on GitHub. We are not proposing +using GitHub’s issue tracker, pull-requests, or code-review.

+

Contributors will continue to earn commit access on demand under the Developer +Policy, except that that a GitHub account will be required instead of SVN +username/password-hash.

+
+
+

Why Git, and Why GitHub?

+
+

Why Move At All?

+

This discussion began because we currently host our own Subversion server +and Git mirror on a voluntary basis. The LLVM Foundation sponsors the server and +provides limited support, but there is only so much it can do.

+

Volunteers are not sysadmins themselves, but compiler engineers that happen +to know a thing or two about hosting servers. We also don’t have 24/7 support, +and we sometimes wake up to see that continuous integration is broken because +the SVN server is either down or unresponsive.

+

We should take advantage of one of the services out there (GitHub, GitLab, +and BitBucket, among others) that offer better service (24/7 stability, disk +space, Git server, code browsing, forking facilities, etc) for free.

+
+
+

Why Git?

+

Many new coders nowadays start with Git, and a lot of people have never used +SVN, CVS, or anything else. Websites like GitHub have changed the landscape +of open source contributions, reducing the cost of first contribution and +fostering collaboration.

+

Git is also the version control many LLVM developers use. Despite the +sources being stored in a SVN server, these developers are already using Git +through the Git-SVN integration.

+

Git allows you to:

+
    +
  • Commit, squash, merge, and fork locally without touching the remote server.

  • +
  • Maintain local branches, enabling multiple threads of development.

  • +
  • Collaborate on these branches (e.g. through your own fork of llvm on GitHub).

  • +
  • Inspect the repository history (blame, log, bisect) without Internet access.

  • +
  • Maintain remote forks and branches on Git hosting services and +integrate back to the main repository.

  • +
+

In addition, because Git seems to be replacing many OSS projects’ version +control systems, there are many tools that are built over Git. +Future tooling may support Git first (if not only).

+
+
+

Why GitHub?

+

GitHub, like GitLab and BitBucket, provides free code hosting for open source +projects. Any of these could replace the code-hosting infrastructure that we +have today.

+

These services also have a dedicated team to monitor, migrate, improve and +distribute the contents of the repositories depending on region and load.

+

GitHub has one important advantage over GitLab and +BitBucket: it offers read-write SVN access to the repository +(https://github.com/blog/626-announcing-svn-support). +This would enable people to continue working post-migration as though our code +were still canonically in an SVN repository.

+

In addition, there are already multiple LLVM mirrors on GitHub, indicating that +part of our community has already settled there.

+
+
+

On Managing Revision Numbers with Git

+

The current SVN repository hosts all the LLVM sub-projects alongside each other. +A single revision number (e.g. r123456) thus identifies a consistent version of +all LLVM sub-projects.

+

Git does not use sequential integer revision number but instead uses a hash to +identify each commit.

+

The loss of a sequential integer revision number has been a sticking point in +past discussions about Git:

+
    +
  • “The ‘branch’ I most care about is mainline, and losing the ability to say +‘fixed in r1234’ (with some sort of monotonically increasing number) would +be a tragic loss.” [LattnerRevNum]

  • +
  • “I like those results sorted by time and the chronology should be obvious, but +timestamps are incredibly cumbersome and make it difficult to verify that a +given checkout matches a given set of results.” [TrickRevNum]

  • +
  • “There is still the major regression with unreadable version numbers. +Given the amount of Bugzilla traffic with ‘Fixed in…’, that’s a +non-trivial issue.” [JSonnRevNum]

  • +
  • “Sequential IDs are important for LNT and llvmlab bisection tool.” [MatthewsRevNum].

  • +
+

However, Git can emulate this increasing revision number: +git rev-list --count <commit-hash>. This identifier is unique only +within a single branch, but this means the tuple (num, branch-name) uniquely +identifies a commit.

+

We can thus use this revision number to ensure that e.g. clang -v reports a +user-friendly revision number (e.g. master-12345 or 4.0-5321), addressing +the objections raised above with respect to this aspect of Git.

+
+
+

What About Branches and Merges?

+

In contrast to SVN, Git makes branching easy. Git’s commit history is +represented as a DAG, a departure from SVN’s linear history. However, we propose +to mandate making merge commits illegal in our canonical Git repository.

+

Unfortunately, GitHub does not support server side hooks to enforce such a +policy. We must rely on the community to avoid pushing merge commits.

+

GitHub offers a feature called Status Checks: a branch protected by +status checks requires commits to be explicitly allowed before the push can happen. +We could supply a pre-push hook on the client side that would run and check the +history, before allowing the commit being pushed [statuschecks]. +However this solution would be somewhat fragile (how do you update a script +installed on every developer machine?) and prevents SVN access to the +repository.

+
+
+

What About Commit Emails?

+

We will need a new bot to send emails for each commit. This proposal leaves the +email format unchanged besides the commit URL.

+
+
+
+

Straw Man Migration Plan

+
+

Step #1 : Before The Move

+
    +
  1. Update docs to mention the move, so people are aware of what is going on.

  2. +
  3. Set up a read-only version of the GitHub project, mirroring our current SVN +repository.

  4. +
  5. Add the required bots to implement the commit emails, as well as the +umbrella repository update (if the multirepo is selected) or the read-only +Git views for the sub-projects (if the monorepo is selected).

  6. +
+
+
+

Step #2 : Git Move

+
    +
  1. Update the buildbots to pick up updates and commits from the GitHub +repository. Not all bots have to migrate at this point, but it’ll help +provide infrastructure testing.

  2. +
  3. Update Phabricator to pick up commits from the GitHub repository.

  4. +
  5. LNT and llvmlab have to be updated: they rely on unique monotonically +increasing integer across branch [MatthewsRevNum].

  6. +
  7. Instruct downstream integrators to pick up commits from the GitHub +repository.

  8. +
  9. Review and prepare an update for the LLVM documentation.

  10. +
+

Until this point nothing has changed for developers, it will just +boil down to a lot of work for buildbot and other infrastructure +owners.

+

The migration will pause here until all dependencies have cleared, and all +problems have been solved.

+
+
+

Step #3: Write Access Move

+
    +
  1. Collect developers’ GitHub account information, and add them to the project.

  2. +
  3. Switch the SVN repository to read-only and allow pushes to the GitHub repository.

  4. +
  5. Update the documentation.

  6. +
  7. Mirror Git to SVN.

  8. +
+
+
+

Step #4 : Post Move

+
    +
  1. Archive the SVN repository.

  2. +
  3. Update links on the LLVM website pointing to viewvc/klaus/phab etc. to +point to GitHub instead.

  4. +
+
+
+
+

GitHub Repository Description

+
+

Monorepo

+

The LLVM git repository hosted at https://github.com/llvm/llvm-project contains all +sub-projects in a single source tree. It is often referred to as a monorepo and +mimics an export of the current SVN repository, with each sub-project having its +own top-level directory. Not all sub-projects are used for building toolchains. +For example, www/ and test-suite/ are not part of the monorepo.

+

Putting all sub-projects in a single checkout makes cross-project refactoring +naturally simple:

+
+
    +
  • New sub-projects can be trivially split out for better reuse and/or layering +(e.g., to allow libSupport and/or LIT to be used by runtimes without adding a +dependency on LLVM).

  • +
  • Changing an API in LLVM and upgrading the sub-projects will always be done in +a single commit, designing away a common source of temporary build breakage.

  • +
  • Moving code across sub-project (during refactoring for instance) in a single +commit enables accurate git blame when tracking code change history.

  • +
  • Tooling based on git grep works natively across sub-projects, allowing to +easier find refactoring opportunities across projects (for example reusing a +datastructure initially in LLDB by moving it into libSupport).

  • +
  • Having all the sources present encourages maintaining the other sub-projects +when changing API.

  • +
+
+

Finally, the monorepo maintains the property of the existing SVN repository that +the sub-projects move synchronously, and a single revision number (or commit +hash) identifies the state of the development across all projects.

+
+

Building a single sub-project

+

Even though there is a single source tree, you are not required to build +all sub-projects together. It is trivial to configure builds for a single +sub-project.

+

For example:

+
mkdir build && cd build
+# Configure only LLVM (default)
+cmake path/to/monorepo
+# Configure LLVM and lld
+cmake path/to/monorepo -DLLVM_ENABLE_PROJECTS=lld
+# Configure LLVM and clang
+cmake path/to/monorepo -DLLVM_ENABLE_PROJECTS=clang
+
+
+
+
+
+

Outstanding Questions

+
+

Read-only sub-project mirrors

+

With the Monorepo, it is undecided whether the existing single-subproject +mirrors (e.g. https://git.llvm.org/git/compiler-rt.git) will continue to +be maintained.

+
+
+

Read/write SVN bridge

+

GitHub supports a read/write SVN bridge for its repositories. However, +there have been issues with this bridge working correctly in the past, +so it’s not clear if this is something that will be supported going forward.

+
+
+
+

Monorepo Drawbacks

+
+
    +
  • Using the monolithic repository may add overhead for those contributing to a +standalone sub-project, particularly on runtimes like libcxx and compiler-rt +that don’t rely on LLVM; currently, a fresh clone of libcxx is only 15MB (vs. +1GB for the monorepo), and the commit rate of LLVM may cause more frequent +git push collisions when upstreaming. Affected contributors may be able to +use the SVN bridge or the single-subproject Git mirrors. However, it’s +undecided if these projects will continue to be maintained.

  • +
  • Using the monolithic repository may add overhead for those integrating a +standalone sub-project, even if they aren’t contributing to it, due to the +same disk space concern as the point above. The availability of the +sub-project Git mirrors would addresses this.

  • +
  • Preservation of the existing read/write SVN-based workflows relies on the +GitHub SVN bridge, which is an extra dependency. Maintaining this locks us +into GitHub and could restrict future workflow changes.

  • +
+
+ +
+
+
+

Workflow Before/After

+

This section goes through a few examples of workflows, intended to illustrate +how end-users or developers would interact with the repository for +various use-cases.

+
+

Checkout/Clone a Single Project, with Commit Access

+
+

Currently

+
# direct SVN checkout
+svn co https://user@llvm.org/svn/llvm-project/llvm/trunk llvm
+# or using the read-only Git view, with git-svn
+git clone https://llvm.org/git/llvm.git
+cd llvm
+git svn init https://llvm.org/svn/llvm-project/llvm/trunk --username=<username>
+git config svn-remote.svn.fetch :refs/remotes/origin/master
+git svn rebase -l  # -l avoids fetching ahead of the git mirror.
+
+
+

Commits are performed using svn commit or with the sequence git commit and +git svn dcommit.

+
+
+

Monorepo Variant

+

With the monorepo variant, there are a few options, depending on your +constraints. First, you could just clone the full repository:

+

git clone https://github.com/llvm/llvm-project.git

+

At this point you have every sub-project (llvm, clang, lld, lldb, …), which +doesn’t imply you have to build all of them. You +can still build only compiler-rt for instance. In this way it’s not different +from someone who would check out all the projects with SVN today.

+

If you want to avoid checking out all the sources, you can hide the other +directories using a Git sparse checkout:

+
git config core.sparseCheckout true
+echo /compiler-rt > .git/info/sparse-checkout
+git read-tree -mu HEAD
+
+
+

The data for all sub-projects is still in your .git directory, but in your +checkout, you only see compiler-rt. +Before you push, you’ll need to fetch and rebase (git pull –rebase) as +usual.

+

Note that when you fetch you’ll likely pull in changes to sub-projects you don’t +care about. If you are using sparse checkout, the files from other projects +won’t appear on your disk. The only effect is that your commit hash changes.

+

You can check whether the changes in the last fetch are relevant to your commit +by running:

+
git log origin/master@{1}..origin/master -- libcxx
+
+
+

This command can be hidden in a script so that git llvmpush would perform all +these steps, fail only if such a dependent change exists, and show immediately +the change that prevented the push. An immediate repeat of the command would +(almost) certainly result in a successful push. +Note that today with SVN or git-svn, this step is not possible since the +“rebase” implicitly happens while committing (unless a conflict occurs).

+
+
+
+

Checkout/Clone Multiple Projects, with Commit Access

+

Let’s look how to assemble llvm+clang+libcxx at a given revision.

+
+

Currently

+
svn co https://llvm.org/svn/llvm-project/llvm/trunk llvm -r $REVISION
+cd llvm/tools
+svn co https://llvm.org/svn/llvm-project/clang/trunk clang -r $REVISION
+cd ../projects
+svn co https://llvm.org/svn/llvm-project/libcxx/trunk libcxx -r $REVISION
+
+
+

Or using git-svn:

+
git clone https://llvm.org/git/llvm.git
+cd llvm/
+git svn init https://llvm.org/svn/llvm-project/llvm/trunk --username=<username>
+git config svn-remote.svn.fetch :refs/remotes/origin/master
+git svn rebase -l
+git checkout `git svn find-rev -B r258109`
+cd tools
+git clone https://llvm.org/git/clang.git
+cd clang/
+git svn init https://llvm.org/svn/llvm-project/clang/trunk --username=<username>
+git config svn-remote.svn.fetch :refs/remotes/origin/master
+git svn rebase -l
+git checkout `git svn find-rev -B r258109`
+cd ../../projects/
+git clone https://llvm.org/git/libcxx.git
+cd libcxx
+git svn init https://llvm.org/svn/llvm-project/libcxx/trunk --username=<username>
+git config svn-remote.svn.fetch :refs/remotes/origin/master
+git svn rebase -l
+git checkout `git svn find-rev -B r258109`
+
+
+

Note that the list would be longer with more sub-projects.

+
+
+

Monorepo Variant

+

The repository contains natively the source for every sub-projects at the right +revision, which makes this straightforward:

+
git clone https://github.com/llvm/llvm-project.git
+cd llvm-projects
+git checkout $REVISION
+
+
+

As before, at this point clang, llvm, and libcxx are stored in directories +alongside each other.

+
+
+
+

Commit an API Change in LLVM and Update the Sub-projects

+

Today this is possible, even though not common (at least not documented) for +subversion users and for git-svn users. For example, few Git users try to update +LLD or Clang in the same commit as they change an LLVM API.

+

The multirepo variant does not address this: one would have to commit and push +separately in every individual repository. It would be possible to establish a +protocol whereby users add a special token to their commit messages that causes +the umbrella repo’s updater bot to group all of them into a single revision.

+

The monorepo variant handles this natively.

+
+
+

Branching/Stashing/Updating for Local Development or Experiments

+
+

Currently

+

SVN does not allow this use case, but developers that are currently using +git-svn can do it. Let’s look in practice what it means when dealing with +multiple sub-projects.

+

To update the repository to tip of trunk:

+
git pull
+cd tools/clang
+git pull
+cd ../../projects/libcxx
+git pull
+
+
+

To create a new branch:

+
git checkout -b MyBranch
+cd tools/clang
+git checkout -b MyBranch
+cd ../../projects/libcxx
+git checkout -b MyBranch
+
+
+

To switch branches:

+
git checkout AnotherBranch
+cd tools/clang
+git checkout AnotherBranch
+cd ../../projects/libcxx
+git checkout AnotherBranch
+
+
+
+
+

Monorepo Variant

+

Regular Git commands are sufficient, because everything is in a single +repository:

+

To update the repository to tip of trunk:

+
git pull
+
+
+

To create a new branch:

+
git checkout -b MyBranch
+
+
+

To switch branches:

+
git checkout AnotherBranch
+
+
+
+
+
+

Bisecting

+

Assuming a developer is looking for a bug in clang (or lld, or lldb, …).

+
+

Currently

+

SVN does not have builtin bisection support, but the single revision across +sub-projects makes it possible to script around.

+

Using the existing Git read-only view of the repositories, it is possible to use +the native Git bisection script over the llvm repository, and use some scripting +to synchronize the clang repository to match the llvm revision.

+
+
+

Monorepo Variant

+

Bisecting on the monorepo is straightforward, and very similar to the above, +except that the bisection script does not need to include the +git submodule update step.

+

The same example, finding which commit introduces a regression where clang-3.9 +crashes but not clang-3.8 passes, will look like:

+
git bisect start releases/3.9.x releases/3.8.x
+git bisect run ./bisect_script.sh
+
+
+

With the bisect_script.sh script being:

+
#!/bin/sh
+cd $BUILD_DIR
+
+ninja clang || exit 125   # an exit code of 125 asks "git bisect"
+                          # to "skip" the current commit
+
+./bin/clang some_crash_test.cpp
+
+
+

Also, since the monorepo handles commits update across multiple projects, you’re +less like to encounter a build failure where a commit change an API in LLVM and +another later one “fixes” the build in clang.

+
+
+
+
+

Moving Local Branches to the Monorepo

+

Suppose you have been developing against the existing LLVM git +mirrors. You have one or more git branches that you want to migrate +to the “final monorepo”.

+

The simplest way to migrate such branches is with the +migrate-downstream-fork.py tool at +https://github.com/jyknight/llvm-git-migration.

+
+

Basic migration

+

Basic instructions for migrate-downstream-fork.py are in the +Python script and are expanded on below to a more general recipe:

+
# Make a repository which will become your final local mirror of the
+# monorepo.
+mkdir my-monorepo
+git -C my-monorepo init
+
+# Add a remote to the monorepo.
+git -C my-monorepo remote add upstream/monorepo https://github.com/llvm/llvm-project.git
+
+# Add remotes for each git mirror you use, from upstream as well as
+# your local mirror.  All projects are listed here but you need only
+# import those for which you have local branches.
+my_projects=( clang
+              clang-tools-extra
+              compiler-rt
+              debuginfo-tests
+              libcxx
+              libcxxabi
+              libunwind
+              lld
+              lldb
+              llvm
+              openmp
+              polly )
+for p in ${my_projects[@]}; do
+  git -C my-monorepo remote add upstream/split/${p} https://github.com/llvm-mirror/${p}.git
+  git -C my-monorepo remote add local/split/${p} https://my.local.mirror.org/${p}.git
+done
+
+# Pull in all the commits.
+git -C my-monorepo fetch --all
+
+# Run migrate-downstream-fork to rewrite local branches on top of
+# the upstream monorepo.
+(
+   cd my-monorepo
+   migrate-downstream-fork.py \
+     refs/remotes/local \
+     refs/tags \
+     --new-repo-prefix=refs/remotes/upstream/monorepo \
+     --old-repo-prefix=refs/remotes/upstream/split \
+     --source-kind=split \
+     --revmap-out=monorepo-map.txt
+)
+
+# Octopus-merge the resulting local split histories to unify them.
+
+# Assumes local work on local split mirrors is on master (and
+# upstream is presumably represented by some other branch like
+# upstream/master).
+my_local_branch="master"
+
+git -C my-monorepo branch --no-track local/octopus/master \
+  $(git -C my-monorepo merge-base refs/remotes/upstream/monorepo/master \
+                                  refs/remotes/local/split/llvm/${my_local_branch})
+git -C my-monorepo checkout local/octopus/${my_local_branch}
+
+subproject_branches=()
+for p in ${my_projects[@]}; do
+  subproject_branch=${p}/local/monorepo/${my_local_branch}
+  git -C my-monorepo branch ${subproject_branch} \
+    refs/remotes/local/split/${p}/${my_local_branch}
+  if [[ "${p}" != "llvm" ]]; then
+    subproject_branches+=( ${subproject_branch} )
+  fi
+done
+
+git -C my-monorepo merge ${subproject_branches[@]}
+
+for p in ${my_projects[@]}; do
+  subproject_branch=${p}/local/monorepo/${my_local_branch}
+  git -C my-monorepo branch -d ${subproject_branch}
+done
+
+# Create local branches for upstream monorepo branches.
+for ref in $(git -C my-monorepo for-each-ref --format="%(refname)" \
+                 refs/remotes/upstream/monorepo); do
+  upstream_branch=${ref#refs/remotes/upstream/monorepo/}
+  git -C my-monorepo branch upstream/${upstream_branch} ${ref}
+done
+
+
+

The above gets you to a state like the following:

+
U1 - U2 - U3 <- upstream/master
+  \   \    \
+   \   \    - Llld1 - Llld2 -
+    \   \                    \
+     \   - Lclang1 - Lclang2-- Lmerge <- local/octopus/master
+      \                      /
+       - Lllvm1 - Lllvm2-----
+
+
+

Each branched component has its branch rewritten on top of the +monorepo and all components are unified by a giant octopus merge.

+

If additional active local branches need to be preserved, the above +operations following the assignment to my_local_branch should be +done for each branch. Ref paths will need to be updated to map the +local branch to the corresponding upstream branch. If local branches +have no corresponding upstream branch, then the creation of +local/octopus/<local branch> need not use git-merge-base to +pinpoint its root commit; it may simply be branched from the +appropriate component branch (say, llvm/local_release_X).

+
+
+

Zipping local history

+

The octopus merge is suboptimal for many cases, because walking back +through the history of one component leaves the other components fixed +at a history that likely makes things unbuildable.

+

Some downstream users track the order commits were made to subprojects +with some kind of “umbrella” project that imports the project git +mirrors as submodules, similar to the multirepo umbrella proposed +above. Such an umbrella repository looks something like this:

+
 UM1 ---- UM2 -- UM3 -- UM4 ---- UM5 ---- UM6 ---- UM7 ---- UM8 <- master
+ |        |             |        |        |        |        |
+Lllvm1   Llld1         Lclang1  Lclang2  Lllvm2   Llld2     Lmyproj1
+
+
+

The vertical bars represent submodule updates to a particular local +commit in the project mirror. UM3 in this case is a commit of +some local umbrella repository state that is not a submodule update, +perhaps a README or project build script update. Commit UM8 +updates a submodule of local project myproj.

+

The tool zip-downstream-fork.py at +https://github.com/greened/llvm-git-migration/tree/zip can be used to +convert the umbrella history into a monorepo-based history with +commits in the order implied by submodule updates:

+
U1 - U2 - U3 <- upstream/master
+ \    \    \
+  \    -----\---------------                                    local/zip--.
+   \         \              \                                               |
+  - Lllvm1 - Llld1 - UM3 -  Lclang1 - Lclang2 - Lllvm2 - Llld2 - Lmyproj1 <-'
+
+
+

The U* commits represent upstream commits to the monorepo master +branch. Each submodule update in the local UM* commits brought in +a subproject tree at some local commit. The trees in the L*1 +commits represent merges from upstream. These result in edges from +the U* commits to their corresponding rewritten L*1 commits. +The L*2 commits did not do any merges from upstream.

+

Note that the merge from U2 to Lclang1 appears redundant, but +if, say, U3 changed some files in upstream clang, the Lclang1 +commit appearing after the Llld1 commit would actually represent a +clang tree earlier in the upstream clang history. We want the +local/zip branch to accurately represent the state of our umbrella +history and so the edge U2 -> Lclang1 is a visual reminder of what +clang’s tree actually looks like in Lclang1.

+

Even so, the edge U3 -> Llld1 could be problematic for future +merges from upstream. git will think that we’ve already merged from +U3, and we have, except for the state of the clang tree. One +possible mitigation strategy is to manually diff clang between U2 +and U3 and apply those updates to local/zip. Another, +possibly simpler strategy is to freeze local work on downstream +branches and merge all submodules from the latest upstream before +running zip-downstream-fork.py. If downstream merged each project +from upstream in lockstep without any intervening local commits, then +things should be fine without any special action. We anticipate this +to be the common case.

+

The tree for Lclang1 outside of clang will represent the state of +things at U3 since all of the upstream projects not participating +in the umbrella history should be in a state respecting the commit +U3. The trees for llvm and lld should correctly represent commits +Lllvm1 and Llld1, respectively.

+

Commit UM3 changed files not related to submodules and we need +somewhere to put them. It is not safe in general to put them in the +monorepo root directory because they may conflict with files in the +monorepo. Let’s assume we want them in a directory local in the +monorepo.

+

Example 1: Umbrella looks like the monorepo

+

For this example, we’ll assume that each subproject appears in its own +top-level directory in the umbrella, just as they do in the monorepo . +Let’s also assume that we want the files in directory myproj to +appear in local/myproj.

+

Given the above run of migrate-downstream-fork.py, a recipe to +create the zipped history is below:

+
# Import any non-LLVM repositories the umbrella references.
+git -C my-monorepo remote add localrepo \
+                              https://my.local.mirror.org/localrepo.git
+git fetch localrepo
+
+subprojects=( clang clang-tools-extra compiler-rt debuginfo-tests libclc
+              libcxx libcxxabi libunwind lld lldb llgo llvm openmp
+              parallel-libs polly pstl )
+
+# Import histories for upstream split projects (this was probably
+# already done for the ``migrate-downstream-fork.py`` run).
+for project in ${subprojects[@]}; do
+  git remote add upstream/split/${project} \
+                 https://github.com/llvm-mirror/${subproject}.git
+  git fetch umbrella/split/${project}
+done
+
+# Import histories for downstream split projects (this was probably
+# already done for the ``migrate-downstream-fork.py`` run).
+for project in ${subprojects[@]}; do
+  git remote add local/split/${project} \
+                 https://my.local.mirror.org/${subproject}.git
+  git fetch local/split/${project}
+done
+
+# Import umbrella history.
+git -C my-monorepo remote add umbrella \
+                              https://my.local.mirror.org/umbrella.git
+git fetch umbrella
+
+# Put myproj in local/myproj
+echo "myproj local/myproj" > my-monorepo/submodule-map.txt
+
+# Rewrite history
+(
+  cd my-monorepo
+  zip-downstream-fork.py \
+    refs/remotes/umbrella \
+    --new-repo-prefix=refs/remotes/upstream/monorepo \
+    --old-repo-prefix=refs/remotes/upstream/split \
+    --revmap-in=monorepo-map.txt \
+    --revmap-out=zip-map.txt \
+    --subdir=local \
+    --submodule-map=submodule-map.txt \
+    --update-tags
+ )
+
+ # Create the zip branch (assuming umbrella master is wanted).
+ git -C my-monorepo branch --no-track local/zip/master refs/remotes/umbrella/master
+
+
+

Note that if the umbrella has submodules to non-LLVM repositories, +zip-downstream-fork.py needs to know about them to be able to +rewrite commits. That is why the first step above is to fetch commits +from such repositories.

+

With --update-tags the tool will migrate annotated tags pointing +to submodule commits that were inlined into the zipped history. If +the umbrella pulled in an upstream commit that happened to have a tag +pointing to it, that tag will be migrated, which is almost certainly +not what is wanted. The tag can always be moved back to its original +commit after rewriting, or the --update-tags option may be +discarded and any local tags would then be migrated manually.

+

Example 2: Nested sources layout

+

The tool handles nested submodules (e.g. llvm is a submodule in +umbrella and clang is a submodule in llvm). The file +submodule-map.txt is a list of pairs, one per line. The first +pair item describes the path to a submodule in the umbrella +repository. The second pair item describes the path where trees for +that submodule should be written in the zipped history.

+

Let’s say your umbrella repository is actually the llvm repository and +it has submodules in the “nested sources” layout (clang in +tools/clang, etc.). Let’s also say projects/myproj is a submodule +pointing to some downstream repository. The submodule map file should +look like this (we still want myproj mapped the same way as +previously):

+
tools/clang clang
+tools/clang/tools/extra clang-tools-extra
+projects/compiler-rt compiler-rt
+projects/debuginfo-tests debuginfo-tests
+projects/libclc libclc
+projects/libcxx libcxx
+projects/libcxxabi libcxxabi
+projects/libunwind libunwind
+tools/lld lld
+tools/lldb lldb
+projects/openmp openmp
+tools/polly polly
+projects/myproj local/myproj
+
+
+

If a submodule path does not appear in the map, the tools assumes it +should be placed in the same place in the monorepo. That means if you +use the “nested sources” layout in your umrella, you must provide +map entries for all of the projects in your umbrella (except llvm). +Otherwise trees from submodule updates will appear underneath llvm in +the zippped history.

+

Because llvm is itself the umbrella, we use –subdir to write its +content into llvm in the zippped history:

+
# Import any non-LLVM repositories the umbrella references.
+git -C my-monorepo remote add localrepo \
+                              https://my.local.mirror.org/localrepo.git
+git fetch localrepo
+
+subprojects=( clang clang-tools-extra compiler-rt debuginfo-tests libclc
+              libcxx libcxxabi libunwind lld lldb llgo llvm openmp
+              parallel-libs polly pstl )
+
+# Import histories for upstream split projects (this was probably
+# already done for the ``migrate-downstream-fork.py`` run).
+for project in ${subprojects[@]}; do
+  git remote add upstream/split/${project} \
+                 https://github.com/llvm-mirror/${subproject}.git
+  git fetch umbrella/split/${project}
+done
+
+# Import histories for downstream split projects (this was probably
+# already done for the ``migrate-downstream-fork.py`` run).
+for project in ${subprojects[@]}; do
+  git remote add local/split/${project} \
+                 https://my.local.mirror.org/${subproject}.git
+  git fetch local/split/${project}
+done
+
+# Import umbrella history.  We want this under a different refspec
+# so zip-downstream-fork.py knows what it is.
+git -C my-monorepo remote add umbrella \
+                               https://my.local.mirror.org/llvm.git
+git fetch umbrella
+
+# Create the submodule map.
+echo "tools/clang clang" > my-monorepo/submodule-map.txt
+echo "tools/clang/tools/extra clang-tools-extra" >> my-monorepo/submodule-map.txt
+echo "projects/compiler-rt compiler-rt" >> my-monorepo/submodule-map.txt
+echo "projects/debuginfo-tests debuginfo-tests" >> my-monorepo/submodule-map.txt
+echo "projects/libclc libclc" >> my-monorepo/submodule-map.txt
+echo "projects/libcxx libcxx" >> my-monorepo/submodule-map.txt
+echo "projects/libcxxabi libcxxabi" >> my-monorepo/submodule-map.txt
+echo "projects/libunwind libunwind" >> my-monorepo/submodule-map.txt
+echo "tools/lld lld" >> my-monorepo/submodule-map.txt
+echo "tools/lldb lldb" >> my-monorepo/submodule-map.txt
+echo "projects/openmp openmp" >> my-monorepo/submodule-map.txt
+echo "tools/polly polly" >> my-monorepo/submodule-map.txt
+echo "projects/myproj local/myproj" >> my-monorepo/submodule-map.txt
+
+# Rewrite history
+(
+  cd my-monorepo
+  zip-downstream-fork.py \
+    refs/remotes/umbrella \
+    --new-repo-prefix=refs/remotes/upstream/monorepo \
+    --old-repo-prefix=refs/remotes/upstream/split \
+    --revmap-in=monorepo-map.txt \
+    --revmap-out=zip-map.txt \
+    --subdir=llvm \
+    --submodule-map=submodule-map.txt \
+    --update-tags
+ )
+
+ # Create the zip branch (assuming umbrella master is wanted).
+ git -C my-monorepo branch --no-track local/zip/master refs/remotes/umbrella/master
+
+
+

Comments at the top of zip-downstream-fork.py describe in more +detail how the tool works and various implications of its operation.

+
+
+

Importing local repositories

+

You may have additional repositories that integrate with the LLVM +ecosystem, essentially extending it with new tools. If such +repositories are tightly coupled with LLVM, it may make sense to +import them into your local mirror of the monorepo.

+

If such repositories participated in the umbrella repository used +during the zipping process above, they will automatically be added to +the monorepo. For downstream repositories that don’t participate in +an umbrella setup, the import-downstream-repo.py tool at +https://github.com/greened/llvm-git-migration/tree/import can help with +getting them into the monorepo. A recipe follows:

+
# Import downstream repo history into the monorepo.
+git -C my-monorepo remote add myrepo https://my.local.mirror.org/myrepo.git
+git fetch myrepo
+
+my_local_tags=( refs/tags/release
+                refs/tags/hotfix )
+
+(
+  cd my-monorepo
+  import-downstream-repo.py \
+    refs/remotes/myrepo \
+    ${my_local_tags[@]} \
+    --new-repo-prefix=refs/remotes/upstream/monorepo \
+    --subdir=myrepo \
+    --tag-prefix="myrepo-"
+ )
+
+ # Preserve release branches.
+ for ref in $(git -C my-monorepo for-each-ref --format="%(refname)" \
+                refs/remotes/myrepo/release); do
+   branch=${ref#refs/remotes/myrepo/}
+   git -C my-monorepo branch --no-track myrepo/${branch} ${ref}
+ done
+
+ # Preserve master.
+ git -C my-monorepo branch --no-track myrepo/master refs/remotes/myrepo/master
+
+ # Merge master.
+ git -C my-monorepo checkout local/zip/master  # Or local/octopus/master
+ git -C my-monorepo merge myrepo/master
+
+
+

You may want to merge other corresponding branches, for example +myrepo release branches if they were in lockstep with LLVM project +releases.

+

--tag-prefix tells import-downstream-repo.py to rename +annotated tags with the given prefix. Due to limitations with +fast_filter_branch.py, unannotated tags cannot be renamed +(fast_filter_branch.py considers them branches, not tags). Since +the upstream monorepo had its tags rewritten with an “llvmorg-” +prefix, name conflicts should not be an issue. --tag-prefix can +be used to more clearly indicate which tags correspond to various +imported repositories.

+

Given this repository history:

+
R1 - R2 - R3 <- master
+     ^
+     |
+  release/1
+
+
+

The above recipe results in a history like this:

+
U1 - U2 - U3 <- upstream/master
+ \    \    \
+  \    -----\---------------                                         local/zip--.
+   \         \              \                                                    |
+  - Lllvm1 - Llld1 - UM3 -  Lclang1 - Lclang2 - Lllvm2 - Llld2 - Lmyproj1 - M1 <-'
+                                                                           /
+                                                               R1 - R2 - R3  <-.
+                                                                    ^           |
+                                                                    |           |
+                                                             myrepo-release/1   |
+                                                                                |
+                                                                 myrepo/master--'
+
+
+

Commits R1, R2 and R3 have trees that only contain blobs +from myrepo. If you require commits from myrepo to be +interleaved with commits on local project branches (for example, +interleaved with llvm1, llvm2, etc. above) and myrepo doesn’t +appear in an umbrella repository, a new tool will need to be +developed. Creating such a tool would involve:

+
    +
  1. Modifying fast_filter_branch.py to optionally take a +revlist directly rather than generating it itself

  2. +
  3. Creating a tool to generate an interleaved ordering of local +commits based on some criteria (zip-downstream-fork.py uses the +umbrella history as its criterion)

  4. +
  5. Generating such an ordering and feeding it to +fast_filter_branch.py as a revlist

  6. +
+

Some care will also likely need to be taken to handle merge commits, +to ensure the parents of such commits migrate correctly.

+
+
+

Scrubbing the Local Monorepo

+

Once all of the migrating, zipping and importing is done, it’s time to +clean up. The python tools use git-fast-import which leaves a lot +of cruft around and we want to shrink our new monorepo mirror as much +as possible. Here is one way to do it:

+
git -C my-monorepo checkout master
+
+# Delete branches we no longer need.  Do this for any other branches
+# you merged above.
+git -C my-monorepo branch -D local/zip/master || true
+git -C my-monorepo branch -D local/octopus/master || true
+
+# Remove remotes.
+git -C my-monorepo remote remove upstream/monorepo
+
+for p in ${my_projects[@]}; do
+  git -C my-monorepo remote remove upstream/split/${p}
+  git -C my-monorepo remote remove local/split/${p}
+done
+
+git -C my-monorepo remote remove localrepo
+git -C my-monorepo remote remove umbrella
+git -C my-monorepo remote remove myrepo
+
+# Add anything else here you don't need.  refs/tags/release is
+# listed below assuming tags have been rewritten with a local prefix.
+# If not, remove it from this list.
+refs_to_clean=(
+  refs/original
+  refs/remotes
+  refs/tags/backups
+  refs/tags/release
+)
+
+git -C my-monorepo for-each-ref --format="%(refname)" ${refs_to_clean[@]} |
+  xargs -n1 --no-run-if-empty git -C my-monorepo update-ref -d
+
+git -C my-monorepo reflog expire --all --expire=now
+
+# fast_filter_branch.py might have gc running in the background.
+while ! git -C my-monorepo \
+  -c gc.reflogExpire=0 \
+  -c gc.reflogExpireUnreachable=0 \
+  -c gc.rerereresolved=0 \
+  -c gc.rerereunresolved=0 \
+  -c gc.pruneExpire=now \
+  gc --prune=now; do
+  continue
+done
+
+# Takes a LOOOONG time!
+git -C my-monorepo repack -A -d -f --depth=250 --window=250
+
+git -C my-monorepo prune-packed
+git -C my-monorepo prune
+
+
+

You should now have a trim monorepo. Upload it to your git server and +happy hacking!

+
+
+ +
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/LLVMLibC.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/LLVMLibC.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/LLVMLibC.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/LLVMLibC.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,265 @@ + + + + + + + + + “llvm-libc” C Standard Library — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

“llvm-libc” C Standard Library

+ +
+

Introduction

+

This is a proposal to start llvm-libc, an implementation of the +C standard library targeting C17 and above, as part of the LLVM project. +llvm-libc will also provide platform specific extensions as relevant. +For example, on Linux it also provides pthreads, librt and other POSIX +extension libraries.

+
+
+

Features

+

llvm-libc will be developed to have a certain minimum set of features:

+
    +
  • C17 and upwards conformant.

  • +
  • A modular libc with individual pieces implemented in the “as a +library” philosophy of the LLVM project.

  • +
  • Ability to layer this libc over the system libc if possible and desired +for a platform.

  • +
  • Provide C symbols as specified by the standards, but take advantage +and use C++ language facilities for the core implementation.

  • +
  • Provides POSIX extensions on POSIX compliant platforms.

  • +
  • Provides system-specific extensions as appropriate. For example, +provides the Linux API on Linux.

  • +
  • Vendor extensions if and only if necessary.

  • +
  • Designed and developed from the start to work with LLVM tooling and +testing like fuzz testing and sanitizer-supported testing.

  • +
  • ABI independent implementation as far as possible.

  • +
  • Use source based implementations as far possible rather than +assembly. Will try to fix the compiler rather than use assembly +language workarounds.

  • +
  • Extensive unit testing and standards conformance testing. If relevant +and possible, differential testing: We want to be able +to test llvm-libc against another battle-tested libc. This is +essentially to understand how we differ from other libcs. Also if +relevant and possible, test against the testsuite of an another +battle-tested libc implementation.

  • +
+
+
+

Why a new C Standard Library?

+

Implementing a libc is no small task and is not be taken lightly. A +natural question to ask is, “why a new implementation of the C +standard library?” There is no single answer to this question, but +some of the major reasons are as follows:

+
    +
  • Most libc implementations are monolithic. It is a non-trivial +porting task to pick and choose only the pieces relevant to one’s +platform. The llvm-libc will be developed with sufficient modularity to +make picking and choosing a straightforward task.

  • +
  • Most libc implementations break when built with sanitizer specific +compiler options. The llvm-libc will be developed from the start to +work with those specialized compiler options.

  • +
  • The llvm-libc will be developed to support and employ fuzz testing +from the start.

  • +
  • Most libc implementations use a good amount of assembly language, +and assume specific ABIs (may be platform dependent). With the llvm-libc +implementation, we want to use normal source code as much as possible so +that compiler-based changes to the ABI are easy. Moreover, as part of the +LLVM project, we want to use this opportunity to fix performance related +compiler bugs rather than using assembly workarounds.

  • +
  • A large hole in the LLVM toolchain will be plugged with llvm-libc. +With the broad platform expertise in the LLVM community, and the +strong license and project structure, we think that llvm-libc will +be more tunable and robust, without sacrificing the simplicity and +accessibility typical of the LLVM project.

  • +
+
+
+

Platform Support

+

We envision that llvm-libc will support a variety of platforms in the coming +years. Interested parties are encouraged to participate in the design and +implementation, and add support for their favorite platforms.

+
+
+

ABI Compatibility

+

As llvm-libc is new, it will not offer ABI stability in the initial stages. +However, as we’ve heard from other LLVM contributors that they are interested +in having ABI stability, llvm-libc code will be written in a manner which is +amenable to ABI stability. We are looking for contributors interested in +driving the design in this space to help us define what exactly does ABI +stability mean for llvm-libc.

+
+
+

Layering Over Another libc

+

When meaningful and practically possible on a platform, llvm-libc will be +developed in a fashion that it will be possible to layer it over the system +libc. This does not mean that one can mix llvm-libc with the system-libc. Also, +it does not mean that layering is the only way to use llvm-libc. What it +means is that, llvm-libc can optionally be packaged in a way that it can +delegate parts of the functionality to the system-libc. The delegation happens +internal to llvm-libc and is invisible to the users. From the user’s point of +view, they only call into llvm-libc.

+

There are a few problems one needs to be mindful of when implementing such a +delegation scheme in llvm-libc. Examples of such problems are:

+

1. One cannot mix data structures from llvm-libc with those from the +system-libc. A translation from one set of data structures to the other should +happen internal to llvm-libc. +2. The delegation mechanism has to be implemented over a related set of +functions. For example, one cannot delegate just the fopen function to the +system-libc. One will have to delegate all FILE related functions to the +system-libc.

+
+
+

Current Status

+

llvm-libc development is still in the planning phase.

+
+
+

Build Bots

+

Once the development starts, there will be llvm-libc focused builders added to +the LLVM BuildBot.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/TestSuite.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/TestSuite.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/TestSuite.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/TestSuite.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,492 @@ + + + + + + + + + Test-Suite Extensions — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Test-Suite Extensions

+ +
+

Abstract

+

These are ideas for additional programs, benchmarks, applications and +algorithms that could be added to the LLVM Test-Suite. +The test-suite could be much larger than it is now, which would help us +detecting compiler errors (crashes, miscompiles) during development.

+

Most probably, the reason why the programs below have not been added to +the test-suite yet is that nobody has found time to do it. But there +might be other issues as well, such as

+
+
    +
  • +
    Licensing (Support can still be added as external module,

    like for the SPEC benchmarks)

    +
    +
    +
  • +
  • +
    Language (in particular, there is no official LLVM frontend

    for FORTRAN yet)

    +
    +
    +
  • +
  • +
    Parallelism (currently, all programs in test-suite use

    one thread only)

    +
    +
    +
  • +
+
+
+
+

Benchmarks

+
+

SPEC CPU 2017

+

https://www.spec.org/cpu2017/

+

The following have not been included yet because they contain Fortran +code.

+

In case of cactuBSSN only a small portion is Fortran. The hosts’s +Fortran compiler could be used for these parts.

+

Note that CMake’s Ninja generator has difficulties with Fortran. See the +CMake documentation +for details.

+
+
    +
  • 503.bwaves_r/603.bwaves_s

  • +
  • 507.cactuBSSN_r

  • +
  • 521.wrf_r/621.wrf_s

  • +
  • 527.cam4_r/627.cam4_s

  • +
  • 628.pop2_s

  • +
  • 548.exchange2_r/648.exchange2_s

  • +
  • 549.fotonik3d_r/649.fotonik3d_s

  • +
  • 554.roms_r/654.roms_s

  • +
+
+
+
+

SPEC OMP2012

+

https://www.spec.org/omp2012/

+
+
    +
  • 350.md

  • +
  • 351.bwaves

  • +
  • 352.nab

  • +
  • 357.bt331

  • +
  • 358.botsalgn

  • +
  • 359.botsspar

  • +
  • 360.ilbdc

  • +
  • 362.fma3d

  • +
  • 363.swim

  • +
  • 367.imagick

  • +
  • 370.mgrid331

  • +
  • 371.applu331

  • +
  • 372.smithwa

  • +
  • 376.kdtree

  • +
+
+
+ +
+

OpenMP 4.x SIMD Benchmarks

+

https://github.com/flwende/simd_benchmarks

+
+ + + +
+

A Benchmark for the C/C++ Standard Library

+

https://github.com/hiraditya/std-benchmark

+
+
+

OpenBenchmarking.org CPU / Processor Suite

+

https://openbenchmarking.org/suite/pts/cpu

+

This is a subset of the +Phoronix Test Suite +and is itself a collection of benchmark suites

+
+ + +
+

Rodinia

+

http://lava.cs.virginia.edu/Rodinia/download_links.htm

+

Rodinia has already been partially included in +MultiSource/Benchmarks/Rodinia. Benchmarks still missing are:

+
+
    +
  • streamcluster

  • +
  • particlefilter

  • +
  • nw

  • +
  • nn

  • +
  • myocyte

  • +
  • mummergpu

  • +
  • lud

  • +
  • leukocyte

  • +
  • lavaMD

  • +
  • kmeans

  • +
  • hotspot3D

  • +
  • heartwall

  • +
  • cfd

  • +
  • bfs

  • +
  • b+tree

  • +
+
+
+ + +
+

Graph500 reference implementations

+

https://github.com/graph500/graph500/tree/v2-spec

+
+
+

NAS Parallel Benchmarks

+

https://www.nas.nasa.gov/publications/npb.html

+

The official benchmark is written in Fortran, but an unofficial +C-translation is available as well: +https://github.com/benchmark-subsetting/NPB3.0-omp-C

+
+
+

DARPA HPCS SSCA#2 C/OpenMP reference implementation

+

http://www.highproductivity.org/SSCABmks.htm

+

This web site does not exist any more, but there seems to be a copy of +some of the benchmarks +https://github.com/gtcasl/hpc-benchmarks/tree/master/SSCA2v2.2

+
+ + +
+

PolyBench

+

https://sourceforge.net/projects/polybench/

+

A modified version of Polybench 3.2 is already presented in +SingleSource/Benchmarks/Polybench. A newer version 4.2.1 is available.

+
+ +
+

RAJA Performance Suite

+

https://github.com/LLNL/RAJAPerf

+
+
+

CORAL-2 Benchmarks

+

https://asc.llnl.gov/coral-2-benchmarks/

+

Many of its programs have already been integrated in +MultiSource/Benchmarks/DOE-ProxyApps-C and +MultiSource/Benchmarks/DOE-ProxyApps-C++.

+
+
    +
  • Nekbone

  • +
  • QMCPack

  • +
  • LAMMPS

  • +
  • Kripke

  • +
  • Quicksilver

  • +
  • PENNANT

  • +
  • Big Data Analytic Suite

  • +
  • Deep Learning Suite

  • +
  • Stream

  • +
  • Stride

  • +
  • ML/DL micro-benchmark

  • +
  • Pynamic

  • +
  • ACME

  • +
  • VPIC

  • +
  • Laghos

  • +
  • Parallel Integer Sort

  • +
  • Havoq

  • +
+
+
+ + + + +
+
+

Applications/Libraries

+ + + + + +
+

rawspeed

+

https://github.com/darktable-org/rawspeed

+

Its test dataset is 756 MB in size, which is too large to be included +into the test-suite repository.

+
+ +
+
+

Generic Algorithms

+
+

Image processing

+
+

Resampling

+
+
    +
  • Bilinear

  • +
  • Bicubic

  • +
  • Lanczos

  • +
+
+
+
+

Dither

+
+
    +
  • Threshold

  • +
  • Random

  • +
  • Halftone

  • +
  • Bayer

  • +
  • Floyd-Steinberg

  • +
  • Jarvis

  • +
  • Stucki

  • +
  • Burkes

  • +
  • Sierra

  • +
  • Atkinson

  • +
  • Gradient-based

  • +
+
+
+
+

Feature detection

+
+
    +
  • Harris

  • +
  • Histogram of Oriented Gradients

  • +
+
+
+
+

Color conversion

+
+
    +
  • RGB to grayscale

  • +
  • HSL to RGB

  • +
+
+
+
+
+

Graph

+
+

Search Algorithms

+
+
    +
  • Breadth-First-Search

  • +
  • Depth-First-Search

  • +
  • Dijkstra’s algorithm

  • +
  • A-Star

  • +
+
+
+
+

Spanning Tree

+
+
    +
  • Kruskal’s algorithm

  • +
  • Prim’s algorithm

  • +
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/VariableNames.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/VariableNames.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/VariableNames.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/VariableNames.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,686 @@ + + + + + + + + + Variable Names Plan — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Variable Names Plan

+ +

This plan is provisional. It is not agreed upon. It is written with the +intention of capturing the desires and concerns of the LLVM community, and +forming them into a plan that can be agreed upon. +The original author is somewhat naïve in the ways of LLVM so there will +inevitably be some details that are flawed. You can help - you can edit this +page (preferably with a Phabricator review for larger changes) or reply to the +Request For Comments thread.

+
+

Too Long; Didn’t Read

+

Improve the readability of LLVM code.

+
+
+

Introduction

+

The current variable naming rule +states:

+
+

Variable names should be nouns (as they represent state). The name should be +camel case, and start with an upper case letter (e.g. Leader or Boats).

+
+

This rule is the same as that for type names. This is a problem because the +type name cannot be reused for a variable name *. LLVM developers tend to +work around this by either prepending The to the type name:

+
Triple TheTriple;
+
+
+

… or more commonly use an acronym, despite the coding standard stating “Avoid +abbreviations unless they are well known”:

+
Triple T;
+
+
+

The proliferation of acronyms leads to hard-to-read code such as this:

+
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
+                       &LVL, &CM);
+
+
+

Many other coding guidelines [LLDB] [Google] [WebKit] [Qt] [Rust] [Swift] +[Python] require that variable names begin with a lower case letter in contrast +to class names which begin with a capital letter. This convention means that the +most readable variable name also requires the least thought:

+
Triple triple;
+
+
+

There is some agreement that the current rule is broken [LattnerAgree] +[ArsenaultAgree] [RobinsonAgree] and that acronyms are an obstacle to reading +new code [MalyutinDistinguish] [CarruthAcronym] [PicusAcronym]. There are +some opposing views [ParzyszekAcronym2] [RicciAcronyms].

+

This work-in-progress proposal is to change the coding standard for variable +names to require that they start with a lower case letter.

+
+
*
+

In some cases +the type name is reused as a variable name, but this shadows the type name +and confuses many debuggers [DenisovCamelBack].

+
+
+
+
+

Variable Names Coding Standard Options

+

There are two main options for variable names that begin with a lower case +letter: camelBack and lower_case. (These are also known by other names +but here we use the terminology from clang-tidy).

+

camelBack is consistent with [WebKit], [Qt] and [Swift] while +lower_case is consistent with [LLDB], [Google], [Rust] and [Python].

+

camelBack is already used for function names, which may be considered an +advantage [LattnerFunction] or a disadvantage [CarruthFunction].

+

Approval for camelBack was expressed by [DenisovCamelBack] +[LattnerFunction] [IvanovicDistinguish]. +Opposition to camelBack was expressed by [CarruthCamelBack] +[TurnerCamelBack]. +Approval for lower_case was expressed by [CarruthLower] +[CarruthCamelBack] [TurnerLLDB]. +Opposition to lower_case was expressed by [LattnerLower].

+
+

Differentiating variable kinds

+

An additional requested change is to distinguish between different kinds of +variables [RobinsonDistinguish] [RobinsonDistinguish2] [JonesDistinguish] +[IvanovicDistinguish] [CarruthDistinguish] [MalyutinDistinguish].

+

Others oppose this idea [HähnleDistinguish] [GreeneDistinguish] +[HendersonPrefix].

+

A possibility is for member variables to be prefixed with m_ and for global +variables to be prefixed with g_ to distinguish them from local variables. +This is consistent with [LLDB]. The m_ prefix is consistent with [WebKit].

+

A variation is for member variables to be prefixed with m +[IvanovicDistinguish] [BeylsDistinguish]. This is consistent with [Mozilla].

+

Another option is for member variables to be suffixed with _ which is +consistent with [Google] and similar to [Python]. Opposed by +[ParzyszekDistinguish].

+
+
+
+

Reducing the number of acronyms

+

While switching coding standard will make it easier to use non-acronym names for +new code, it doesn’t improve the existing large body of code that uses acronyms +extensively to the detriment of its readability. Further, it is natural and +generally encouraged that new code be written in the style of the surrounding +code. Therefore it is likely that much newly written code will also use +acronyms despite what the coding standard says, much as it is today.

+

As well as changing the case of variable names, they could also be expanded to +their non-acronym form e.g. Triple TTriple triple.

+

There is support for expanding many acronyms [CarruthAcronym] [PicusAcronym] +but there is a preference that expanding acronyms be deferred +[ParzyszekAcronym] [CarruthAcronym].

+

The consensus within the community seems to be that at least some acronyms are +valuable [ParzyszekAcronym] [LattnerAcronym]. The most commonly cited acronym +is TLI however that is used to refer to both TargetLowering and +TargetLibraryInfo [GreeneDistinguish].

+

The following is a list of acronyms considered sufficiently useful that the +benefit of using them outweighs the cost of learning them. Acronyms that are +either not on the list or are used to refer to a different type should be +expanded.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Class name

Variable name

DeterministicFiniteAutomaton

dfa

DominatorTree

dt

LoopInfo

li

MachineFunction

mf

MachineInstr

mi

MachineRegisterInfo

mri

ScalarEvolution

se

TargetInstrInfo

tii

TargetLibraryInfo

tli

TargetRegisterInfo

tri

+

In some cases renaming acronyms to the full type name will result in overly +verbose code. Unlike most classes, a variable’s scope is limited and therefore +some of its purpose can implied from that scope, meaning that fewer words are +necessary to give it a clear name. For example, in an optimization pass the reader +can assume that a variable’s purpose relates to optimization and therefore an +OptimizationRemarkEmitter variable could be given the name remarkEmitter +or even remarker.

+

The following is a list of longer class names and the associated shorter +variable name.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Class name

Variable name

BasicBlock

block

ConstantExpr

expr

ExecutionEngine

engine

MachineOperand

operand

OptimizationRemarkEmitter

remarker

PreservedAnalyses

analyses

PreservedAnalysesChecker

checker

TargetLowering

lowering

TargetMachine

machine

+
+
+

Transition Options

+

There are three main options for transitioning:

+
    +
  1. Keep the current coding standard

  2. +
  3. Laissez faire

  4. +
  5. Big bang

  6. +
+
+

Keep the current coding standard

+

Proponents of keeping the current coding standard (i.e. not transitioning at +all) question whether the cost of transition outweighs the benefit +[EmersonConcern] [ReamesConcern] [BradburyConcern]. +The costs are that git blame will become less usable; and that merging the +changes will be costly for downstream maintainers. See Big bang for potential +mitigations.

+
+
+

Laissez faire

+

The coding standard could allow both CamelCase and camelBack styles for +variable names [LattnerTransition].

+

A code review to implement this is at https://reviews.llvm.org/D57896.

+
+

Advantages

+
+
    +
  • Very easy to implement initially.

  • +
+
+
+
+

Disadvantages

+
+
+
+
+
+
+

Big bang

+

With this approach, variables will be renamed by an automated script in a series +of large commits.

+

The principle advantage of this approach is that it minimises the cost of +inconsistency [BradburyTransition] [RobinsonTransition].

+

It goes against a policy of avoiding large-scale reformatting of existing code +[GreeneDistinguish].

+

It has been suggested that LLD would be a good starter project for the renaming +[Ueyama].

+
+

Keeping git blame usable

+

git blame (or git annotate) permits quickly identifying the commit that +changed a given line in a file. After renaming variables, many lines will show +as being changed by that one commit, requiring a further invocation of git +blame to identify prior, more interesting commits [GreeneGitBlame] +[RicciAcronyms].

+

Mitigation: git-hyper-blame +can ignore or “look through” a given set of commits. +A .git-blame-ignore-revs file identifying the variable renaming commits +could be added to the LLVM git repository root directory. +It is being investigated +whether similar functionality could be added to git blame itself.

+
+
+

Minimising cost of downstream merges

+

There are many forks of LLVM with downstream changes. Merging a large-scale +renaming change could be difficult for the fork maintainers.

+

Mitigation: A large-scale renaming would be automated. A fork maintainer can +merge from the commit immediately before the renaming, then apply the renaming +script to their own branch. They can then merge again from the renaming commit, +resolving all conflicts by choosing their own version. This could be tested on +the [SVE] fork.

+
+
+
+
+

Provisional Plan

+

This is a provisional plan for the Big bang approach. It has not been agreed.

+
    +
  1. Investigate improving git blame. The extent to which it can be made to +“look through” commits may impact how big a change can be made.

  2. +
  3. Write a script to expand acronyms.

  4. +
  5. Experiment and perform dry runs of the various refactoring options. +Results can be published in forks of the LLVM Git repository.

  6. +
  7. Consider the evidence and agree on the new policy.

  8. +
  9. Agree & announce a date for the renaming of the starter project (LLD).

  10. +
  11. Update the policy page. This will explain the +old and new rules and which projects each applies to.

  12. +
  13. Refactor the starter project in two commits:

    +
      +
    1. Add or change the project’s .clang-tidy to reflect the agreed rules. +(This is in a separate commit to enable the merging process described in +Minimising cost of downstream merges). +Also update the project list on the policy page.

    2. +
    3. Apply clang-tidy to the project’s files, with only the +readability-identifier-naming rules enabled. clang-tidy will also +reformat the affected lines according to the rules in .clang-format. +It is anticipated that this will be a good dog-fooding opportunity for +clang-tidy, and bugs should be fixed in the process, likely including:

      +
      +
      +
    4. +
    +
  14. +
  15. Gather feedback and refine the process as appropriate.

  16. +
  17. Apply the process to the following projects, with a suitable delay between +each (at least 4 weeks after the first change, at least 2 weeks subsequently) +to allow gathering further feedback. +This list should exclude projects that must adhere to an externally defined +standard e.g. libcxx. +The list is roughly in chronological order of renaming. +Some items may not make sense to rename individually - it is expected that +this list will change following experimentation:

    +
      +
    • TableGen

    • +
    • llvm/tools

    • +
    • clang-tools-extra

    • +
    • clang

    • +
    • ARM backend

    • +
    • AArch64 backend

    • +
    • AMDGPU backend

    • +
    • ARC backend

    • +
    • AVR backend

    • +
    • BPF backend

    • +
    • Hexagon backend

    • +
    • Lanai backend

    • +
    • MIPS backend

    • +
    • NVPTX backend

    • +
    • PowerPC backend

    • +
    • RISC-V backend

    • +
    • Sparc backend

    • +
    • SystemZ backend

    • +
    • WebAssembly backend

    • +
    • X86 backend

    • +
    • XCore backend

    • +
    • libLTO

    • +
    • Debug Information

    • +
    • Remainder of llvm

    • +
    • compiler-rt

    • +
    • libunwind

    • +
    • openmp

    • +
    • parallel-libs

    • +
    • polly

    • +
    • lldb

    • +
    +
  18. +
  19. Remove the old variable name rule from the policy page.

  20. +
  21. Repeat many of the steps in the sequence, using a script to expand acronyms.

  22. +
+
+
+

References

+
+
LLDB(1,2,3)
+

LLDB Coding Conventions https://llvm.org/svn/llvm-project/lldb/branches/release_39/www/lldb-coding-conventions.html

+
+
Google(1,2,3)
+

Google C++ Style Guide https://google.github.io/styleguide/cppguide.html#Variable_Names

+
+
WebKit(1,2,3)
+

WebKit Code Style Guidelines https://webkit.org/code-style-guidelines/#names

+
+
Qt(1,2)
+

Qt Coding Style https://wiki.qt.io/Qt_Coding_Style#Declaring_variables

+
+
Rust(1,2)
+

Rust naming conventions https://doc.rust-lang.org/1.0.0/style/style/naming/README.html

+
+
Swift(1,2)
+

Swift API Design Guidelines https://swift.org/documentation/api-design-guidelines/#general-conventions

+
+
Python(1,2,3)
+

Style Guide for Python Code https://www.python.org/dev/peps/pep-0008/#function-and-variable-names

+
+
Mozilla
+

Mozilla Coding style: Prefixes https://firefox-source-docs.mozilla.org/tools/lint/coding-style/coding_style_cpp.html#prefixes

+
+
SVE
+

LLVM with support for SVE https://github.com/ARM-software/LLVM-SVE

+
+
AminiInconsistent
+

Mehdi Amini, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130329.html

+
+
ArsenaultAgree
+

Matt Arsenault, http://lists.llvm.org/pipermail/llvm-dev/2019-February/129934.html

+
+
BeylsDistinguish
+

Kristof Beyls, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130292.html

+
+
BradburyConcern(1,2)
+

Alex Bradbury, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130266.html

+
+
BradburyTransition
+

Alex Bradbury, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130388.html

+
+
CarruthAcronym(1,2,3)
+

Chandler Carruth, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130313.html

+
+
CarruthCamelBack(1,2)
+

Chandler Carruth, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130214.html

+
+
CarruthDistinguish
+

Chandler Carruth, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130310.html

+
+
CarruthFunction
+

Chandler Carruth, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130309.html

+
+
CarruthInconsistent
+

Chandler Carruth, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130312.html

+
+
CarruthLower
+

Chandler Carruth, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130430.html

+
+
DasInconsistent
+

Sanjoy Das, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130304.html

+
+
DenisovCamelBack(1,2)
+

Alex Denisov, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130179.html

+
+
EmersonConcern
+

Amara Emerson, http://lists.llvm.org/pipermail/llvm-dev/2019-February/129894.html

+
+
GreeneDistinguish(1,2,3)
+

David Greene, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130425.html

+
+
GreeneGitBlame
+

David Greene, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130228.html

+
+
HendersonPrefix
+

James Henderson, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130465.html

+
+
HähnleDistinguish
+

Nicolai Hähnle, http://lists.llvm.org/pipermail/llvm-dev/2019-February/129923.html

+
+
IvanovicDistinguish(1,2,3)
+

Nemanja Ivanovic, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130249.html

+
+
JonesDistinguish
+

JD Jones, http://lists.llvm.org/pipermail/llvm-dev/2019-February/129926.html

+
+
LattnerAcronym
+

Chris Lattner, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130353.html

+
+
LattnerAgree
+

Chris Latter, http://lists.llvm.org/pipermail/llvm-dev/2019-February/129907.html

+
+
LattnerFunction(1,2)
+

Chris Lattner, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130630.html

+
+
LattnerLower
+

Chris Lattner, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130629.html

+
+
LattnerTransition
+

Chris Lattner, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130355.html

+
+
MalyutinDistinguish(1,2)
+

Danila Malyutin, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130320.html

+
+
ParzyszekAcronym(1,2)
+

Krzysztof Parzyszek, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130306.html

+
+
ParzyszekAcronym2
+

Krzysztof Parzyszek, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130323.html

+
+
ParzyszekDistinguish
+

Krzysztof Parzyszek, http://lists.llvm.org/pipermail/llvm-dev/2019-February/129941.html

+
+
PicusAcronym(1,2)
+

Diana Picus, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130318.html

+
+
ReamesConcern
+

Philip Reames, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130181.html

+
+
RicciAcronyms(1,2)
+

Bruno Ricci, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130328.html

+
+
RobinsonAgree
+

Paul Robinson, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130111.html

+
+
RobinsonDistinguish
+

Paul Robinson, http://lists.llvm.org/pipermail/llvm-dev/2019-February/129920.html

+
+
RobinsonDistinguish2
+

Paul Robinson, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130229.html

+
+
RobinsonTransition
+

Paul Robinson, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130415.html

+
+
TurnerCamelBack
+

Zachary Turner, https://reviews.llvm.org/D57896#1402264

+
+
TurnerLLDB
+

Zachary Turner, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130213.html

+
+
Ueyama
+

Rui Ueyama, http://lists.llvm.org/pipermail/llvm-dev/2019-February/130435.html

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/VectorizationPlan.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/VectorizationPlan.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/VectorizationPlan.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/VectorizationPlan.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,415 @@ + + + + + + + + + Vectorization Plan — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Vectorization Plan

+ +
+

Abstract

+

The vectorization transformation can be rather complicated, involving several +potential alternatives, especially for outer-loops 1 but also possibly for +innermost loops. These alternatives may have significant performance impact, +both positive and negative. A cost model is therefore employed to identify the +best alternative, including the alternative of avoiding any transformation +altogether.

+

The Vectorization Plan is an explicit model for describing vectorization +candidates. It serves for both optimizing candidates including estimating their +cost reliably, and for performing their final translation into IR. This +facilitates dealing with multiple vectorization candidates.

+
+
+

High-level Design

+
+

Vectorization Workflow

+

VPlan-based vectorization involves three major steps, taking a “scenario-based +approach” to vectorization planning:

+
    +
  1. Legal Step: check if a loop can be legally vectorized; encode constraints and +artifacts if so.

  2. +
  3. Plan Step:

    +
      +
    1. Build initial VPlans following the constraints and decisions taken by +Legal Step 1, and compute their cost.

    2. +
    3. Apply optimizations to the VPlans, possibly forking additional VPlans. +Prune sub-optimal VPlans having relatively high cost.

    4. +
    +
  4. +
  5. Execute Step: materialize the best VPlan. Note that this is the only step +that modifies the IR.

  6. +
+
+
+

Design Guidelines

+

In what follows, the term “input IR” refers to code that is fed into the +vectorizer whereas the term “output IR” refers to code that is generated by the +vectorizer. The output IR contains code that has been vectorized or “widened” +according to a loop Vectorization Factor (VF), and/or loop unroll-and-jammed +according to an Unroll Factor (UF). +The design of VPlan follows several high-level guidelines:

+
    +
  1. Analysis-like: building and manipulating VPlans must not modify the input IR. +In particular, if the best option is not to vectorize at all, the +vectorization process terminates before reaching Step 3, and compilation +should proceed as if VPlans had not been built.

  2. +
  3. Align Cost & Execute: each VPlan must support both estimating the cost and +generating the output IR code, such that the cost estimation evaluates the +to-be-generated code reliably.

  4. +
  5. Support vectorizing additional constructs:

    +
      +
    1. Outer-loop vectorization. In particular, VPlan must be able to model the +control-flow of the output IR which may include multiple basic-blocks and +nested loops.

    2. +
    3. SLP vectorization.

    4. +
    5. Combinations of the above, including nested vectorization: vectorizing +both an inner loop and an outer-loop at the same time (each with its own +VF and UF), mixed vectorization: vectorizing a loop with SLP patterns +inside 4, (re)vectorizing input IR containing vector code.

    6. +
    7. Function vectorization 2.

    8. +
    +
  6. +
  7. Support multiple candidates efficiently. In particular, similar candidates +related to a range of possible VF’s and UF’s must be represented efficiently. +Potential versioning needs to be supported efficiently.

  8. +
  9. Support vectorizing idioms, such as interleaved groups of strided loads or +stores. This is achieved by modeling a sequence of output instructions using +a “Recipe”, which is responsible for computing its cost and generating its +code.

  10. +
  11. Encapsulate Single-Entry Single-Exit regions (SESE). During vectorization +such regions may need to be, for example, predicated and linearized, or +replicated VF*UF times to handle scalarized and predicated instructions. +Innerloops are also modelled as SESE regions.

  12. +
  13. Support instruction-level analysis and transformation, as part of Planning +Step 2.b: During vectorization instructions may need to be traversed, moved, +replaced by other instructions or be created. For example, vector idiom +detection and formation involves searching for and optimizing instruction +patterns.

  14. +
+
+
+
+

Definitions

+

The low-level design of VPlan comprises of the following classes.

+
+
LoopVectorizationPlanner
+

A LoopVectorizationPlanner is designed to handle the vectorization of a loop +or a loop nest. It can construct, optimize and discard one or more VPlans, +each VPlan modelling a distinct way to vectorize the loop or the loop nest. +Once the best VPlan is determined, including the best VF and UF, this VPlan +drives the generation of output IR.

+
+
VPlan
+

A model of a vectorized candidate for a given input IR loop or loop nest. This +candidate is represented using a Hierarchical CFG. VPlan supports estimating +the cost and driving the generation of the output IR code it represents.

+
+
Hierarchical CFG
+

A control-flow graph whose nodes are basic-blocks or Hierarchical CFG’s. The +Hierarchical CFG data structure is similar to the Tile Tree 5, where +cross-Tile edges are lifted to connect Tiles instead of the original +basic-blocks as in Sharir 6, promoting the Tile encapsulation. The terms +Region and Block are used rather than Tile 5 to avoid confusion with loop +tiling.

+
+
VPBlockBase
+

The building block of the Hierarchical CFG. A pure-virtual base-class of +VPBasicBlock and VPRegionBlock, see below. VPBlockBase models the hierarchical +control-flow relations with other VPBlocks. Note that in contrast to the IR +BasicBlock, a VPBlockBase models its control-flow successors and predecessors +directly, rather than through a Terminator branch or through predecessor +branches that “use” the VPBlockBase.

+
+
VPBasicBlock
+

VPBasicBlock is a subclass of VPBlockBase, and serves as the leaves of the +Hierarchical CFG. It represents a sequence of output IR instructions that will +appear consecutively in an output IR basic-block. The instructions of this +basic-block originate from one or more VPBasicBlocks. VPBasicBlock holds a +sequence of zero or more VPRecipes that model the cost and generation of the +output IR instructions.

+
+
VPRegionBlock
+

VPRegionBlock is a subclass of VPBlockBase. It models a collection of +VPBasicBlocks and VPRegionBlocks which form a SESE subgraph of the output IR +CFG. A VPRegionBlock may indicate that its contents are to be replicated a +constant number of times when output IR is generated, effectively representing +a loop with constant trip-count that will be completely unrolled. This is used +to support scalarized and predicated instructions with a single model for +multiple candidate VF’s and UF’s.

+
+
VPRecipeBase
+

A pure-virtual base class modeling a sequence of one or more output IR +instructions, possibly based on one or more input IR instructions. These +input IR instructions are referred to as “Ingredients” of the Recipe. A Recipe +may specify how its ingredients are to be transformed to produce the output IR +instructions; e.g., cloned once, replicated multiple times or widened +according to selected VF.

+
+
VPValue
+

The base of VPlan’s def-use relations class hierarchy. When instantiated, it +models a constant or a live-in Value in VPlan. It has users, which are of type +VPUser, but no operands.

+
+
VPUser
+

A VPUser represents an entity that uses a number of VPValues as operands. +VPUser is similar in some aspects to LLVM’s User class.

+
+
VPDef
+

A VPDef represents an entity that defines zero, one or multiple VPValues. +It is used to model the fact that recipes in VPlan can define multiple +VPValues.

+
+
VPInstruction
+

A VPInstruction is both a VPRecipe and a VPUser. It models a single +VPlan-level instruction to be generated if the VPlan is executed, including +its opcode and possibly additional characteristics. It is the basis for +writing instruction-level analyses and optimizations in VPlan as creating, +replacing or moving VPInstructions record both def-use and scheduling +decisions. VPInstructions also extend LLVM IR’s opcodes with idiomatic +operations that enrich the Vectorizer’s semantics.

+
+
VPTransformState
+

Stores information used for generating output IR, passed from +LoopVectorizationPlanner to its selected VPlan for execution, and used to pass +additional information down to VPBlocks and VPRecipes.

+
+
+
+
+

The Planning Process and VPlan Roadmap

+

Transforming the Loop Vectorizer to use VPlan follows a staged approach. First, +VPlan is used to record the final vectorization decisions, and to execute them: +the Hierarchical CFG models the planned control-flow, and Recipes capture +decisions taken inside basic-blocks. Next, VPlan will be used also as the basis +for taking these decisions, effectively turning them into a series of +VPlan-to-VPlan algorithms. Finally, VPlan will support the planning process +itself including cost-based analyses for making these decisions, to fully +support compositional and iterative decision making.

+

Some decisions are local to an instruction in the loop, such as whether to widen +it into a vector instruction or replicate it, keeping the generated instructions +in place. Other decisions, however, involve moving instructions, replacing them +with other instructions, and/or introducing new instructions. For example, a +cast may sink past a later instruction and be widened to handle first-order +recurrence; an interleave group of strided gathers or scatters may effectively +move to one place where they are replaced with shuffles and a common wide vector +load or store; new instructions may be introduced to compute masks, shuffle the +elements of vectors, and pack scalar values into vectors or vice-versa.

+

In order for VPlan to support making instruction-level decisions and analyses, +it needs to model the relevant instructions along with their def/use relations. +This too follows a staged approach: first, the new instructions that compute +masks are modeled as VPInstructions, along with their induced def/use subgraph. +This effectively models masks in VPlan, facilitating VPlan-based predication. +Next, the logic embedded within each Recipe for generating its instructions at +VPlan execution time, will instead take part in the planning process by modeling +them as VPInstructions. Finally, only logic that applies to instructions as a +group will remain in Recipes, such as interleave groups and potentially other +idiom groups having synergistic cost.

+ +
+

References

+
+
1
+

“Outer-loop vectorization: revisited for short SIMD architectures”, Dorit +Nuzman and Ayal Zaks, PACT 2008.

+
+
2
+

“Proposal for function vectorization and loop vectorization with function +calls”, Xinmin Tian, [cfe-dev]., +March 2, 2016. +See also review.

+
+
3
+

“Throttling Automatic Vectorization: When Less is More”, Vasileios +Porpodas and Tim Jones, PACT 2015 and LLVM Developers’ Meeting 2015.

+
+
4
+

“Exploiting mixed SIMD parallelism by reducing data reorganization +overhead”, Hao Zhou and Jingling Xue, CGO 2016.

+
+
5(1,2)
+

“Register Allocation via Hierarchical Graph Coloring”, David Callahan and +Brian Koblenz, PLDI 1991

+
+
6
+

“Structural analysis: A new approach to flow analysis in optimizing +compilers”, M. Sharir, Journal of Computer Languages, Jan. 1980

+
+
7
+

“Enabling Polyhedral Optimizations in LLVM”, Tobias Grosser, Diploma +thesis, 2011.

+
+
8
+

“Introducing VPlan to the Loop Vectorizer”, Gil Rapaport and Ayal Zaks, +European LLVM Developers’ Meeting 2017.

+
+
9
+

“Extending LoopVectorizer: OpenMP4.5 SIMD and Outer Loop +Auto-Vectorization”, Intel Vectorizer Team, LLVM Developers’ Meeting 2016.

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/VectorPredication.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/VectorPredication.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Proposals/VectorPredication.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Proposals/VectorPredication.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,234 @@ + + + + + + + + + Vector Predication Roadmap — LLVM 13 documentation + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Vector Predication Roadmap

+ +
+

Motivation

+

This proposal defines a roadmap towards native vector predication in LLVM, +specifically for vector instructions with a mask and/or an explicit vector +length. LLVM currently has no target-independent means to model predicated +vector instructions for modern SIMD ISAs such as AVX512, ARM SVE, the RISC-V V +extension and NEC SX-Aurora. Only some predicated vector operations, such as +masked loads and stores, are available through intrinsics [MaskedIR].

+

The Vector Predication (VP) extensions is a concrete RFC and prototype +implementation to achieve native vector predication in LLVM. The VP prototype +and all related discussions can be found in the VP patch on Phabricator +[VPRFC].

+
+
+

Roadmap

+
+

1. IR-level VP intrinsics

+
    +
  • There is a consensus on the semantics/instruction set of VP.

  • +
  • VP intrinsics and attributes are available on IR level.

  • +
  • TTI has capability flags for VP (supportsVP()?, +haveActiveVectorLength()?).

  • +
+

Result: VP usable for IR-level vectorizers (LV, VPlan, RegionVectorizer), +potential integration in Clang with builtins.

+
+
+

2. CodeGen support

+
    +
  • VP intrinsics translate to first-class SDNodes +(eg llvm.vp.fdiv.* -> vp_fdiv).

  • +
  • VP legalization (legalize explicit vector length to mask (AVX512), legalize VP +SDNodes to pre-existing ones (SSE, NEON)).

  • +
+

Result: Backend development based on VP SDNodes.

+
+
+

3. Lift InstSimplify/InstCombine/DAGCombiner to VP

+
    +
  • Introduce PredicatedInstruction, PredicatedBinaryOperator, .. helper classes +that match standard vector IR and VP intrinsics.

  • +
  • Add a matcher context to PatternMatch and context-aware IR Builder APIs.

  • +
  • Incrementally lift DAGCombiner to work on VP SDNodes as well as on regular +vector instructions.

  • +
  • Incrementally lift InstCombine/InstSimplify to operate on VP as well as +regular IR instructions.

  • +
+

Result: Optimization of VP intrinsics on par with standard vector instructions.

+
+
+

4. Deprecate llvm.masked.* / llvm.experimental.reduce.*

+
    +
  • Modernize llvm.masked.* / llvm.experimental.reduce* by translating to VP.

  • +
  • DCE transitional APIs.

  • +
+

Result: VP has superseded earlier vector intrinsics.

+
+
+

5. Predicated IR Instructions

+
    +
  • Vector instructions have an optional mask and vector length parameter. These +lower to VP SDNodes (from Stage 2).

  • +
  • Phase out VP intrinsics, only keeping those that are not equivalent to +vectorized scalar instructions (reduce, shuffles, ..)

  • +
  • InstCombine/InstSimplify expect predication in regular Instructions (Stage (3) +has laid the groundwork).

  • +
+

Result: Native vector predication in IR.

+
+
+
+

References

+
+
MaskedIR
+

llvm.masked.* intrinsics, +https://llvm.org/docs/LangRef.html#masked-vector-load-and-store-intrinsics

+
+
VPRFC
+

RFC: Prototype & Roadmap for vector predication in LLVM, +https://reviews.llvm.org/D57504

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Reference.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Reference.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Reference.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Reference.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,290 @@ + + + + + + + + + Reference — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Reference

+

LLVM and API reference documentation.

+ +
+
+
+

API Reference

+
+
Doxygen generated documentation

(classes)

+
+
How To Use Attributes

Answers some questions about the new Attributes infrastructure.

+
+
+

Documentation for Go bindings

+
+
+

LLVM Reference

+
+

Command Line Utilities

+
+
LLVM Command Guide

A reference manual for the LLVM command line utilities (“man” pages for LLVM +tools).

+
+
LLVM bugpoint tool: design and usage

Automatic bug finder and test-case reducer description and usage +information.

+
+
Using -opt-bisect-limit to debug optimization errors

A command line option for debugging optimization-induced failures.

+
+
The Microsoft PDB File Format

A detailed description of the Microsoft PDB (Program Database) file format.

+
+
+
+
+

Garbage Collection

+
+
Garbage Collection with LLVM

The interfaces source-language compilers should use for compiling GC’d +programs.

+
+
Garbage Collection Safepoints in LLVM

This describes a set of experimental extensions for garbage +collection support.

+
+
+
+
+

LibFuzzer

+
+
libFuzzer – a library for coverage-guided fuzz testing.

A library for writing in-process guided fuzzers.

+
+
Fuzzing LLVM libraries and tools

Information on writing and using Fuzzers to find bugs in LLVM.

+
+
+
+
+

LLVM IR

+
+
LLVM Language Reference Manual

Defines the LLVM intermediate representation and the assembly form of the +different nodes.

+
+
Design and Usage of the InAlloca Attribute

Description of the inalloca argument attribute.

+
+
LLVM Bitcode File Format

This describes the file format and encoding used for LLVM “bc” files.

+
+
Machine IR (MIR) Format Reference Manual

A reference manual for the MIR serialization format, which is used to test +LLVM’s code generation passes.

+
+
Global Instruction Selection

This describes the prototype instruction selection replacement, GlobalISel.

+
+
+
+
+

Testing and Debugging

+
+
LLVM Testing Infrastructure Guide

A reference manual for using the LLVM testing infrastructure.

+
+
test-suite Guide

Describes how to compile and run the test-suite benchmarks.

+
+
GWP-ASan

A sampled heap memory error detection toolkit designed for production use.

+
+
+
+
+

XRay

+
+
XRay Instrumentation

High-level documentation of how to use XRay in LLVM.

+
+
Debugging with XRay

An example of how to debug an application with XRay.

+
+
+
+
+

Additional Topics

+
+
FaultMaps and implicit checks

LLVM support for folding control flow into faulting machine instructions.

+
+
LLVM Atomic Instructions and Concurrency Guide

Information about LLVM’s concurrency model.

+
+
Exception Handling in LLVM

This document describes the design and implementation of exception handling +in LLVM.

+
+
LLVM Extensions

LLVM-specific extensions to tools and formats LLVM seeks compatibility with.

+
+
How to set up LLVM-style RTTI for your class hierarchy

How to make isa<>, dyn_cast<>, etc. available for clients of your +class hierarchy.

+
+
LLVM Block Frequency Terminology

Provides information about terminology used in the BlockFrequencyInfo +analysis pass.

+
+
LLVM Branch Weight Metadata

Provides information about Branch Prediction Information.

+
+
The Often Misunderstood GEP Instruction

Answers to some very frequent questions about LLVM’s most frequently +misunderstood instruction.

+
+
Scudo Hardened Allocator

A library that implements a security-hardened malloc().

+
+
MemTagSanitizer

Security hardening for production code aiming to mitigate memory +related vulnerabilities. Based on the Armv8.5-A Memory Tagging Extension.

+
+
Dependence Graphs

A description of the design of the various dependence graphs such as +the DDG (Data Dependence Graph).

+
+
Speculative Load Hardening

A description of the Speculative Load Hardening mitigation for Spectre v1.

+
+
Segmented Stacks in LLVM

This document describes segmented stacks and how they are used in LLVM.

+
+
LLVM’s Optional Rich Disassembly Output

This document describes the optional rich disassembly output syntax.

+
+
Stack maps and patch points in LLVM

LLVM support for mapping instruction addresses to the location of +values and allowing code to be patched.

+
+
Coroutines in LLVM

LLVM support for coroutines.

+
+
YAML I/O

A reference guide for using LLVM’s YAML I/O library.

+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ReleaseNotes.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ReleaseNotes.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ReleaseNotes.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ReleaseNotes.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,315 @@ + + + + + + + + + LLVM 14.0.0 Release Notes — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM 14.0.0 Release Notes

+ +
+

Warning

+

These are in-progress notes for the upcoming LLVM 14 release. +Release notes for previous releases can be found on +the Download Page.

+
+
+

Introduction

+

This document contains the release notes for the LLVM Compiler Infrastructure, +release 14.0.0. Here we describe the status of LLVM, including major improvements +from the previous release, improvements in various subprojects of LLVM, and +some of the current users of the code. All LLVM releases may be downloaded +from the LLVM releases web site.

+

For more information about LLVM, including information about the latest +release, please check out the main LLVM web site. If you +have questions or comments, the LLVM Developer’s Mailing List is a good place to send +them.

+

Note that if you are reading this file from a Git checkout or the main +LLVM web page, this document applies to the next release, not the current +one. To see the release notes for a specific release, please see the releases +page.

+
+
+

Non-comprehensive list of changes in this release

+
    +
  • +
+
+

Changes to the LLVM IR

+
    +
  • Using the legacy pass manager for the optimization pipeline is deprecated and +will be removed after LLVM 14. In the meantime, only minimal effort will be +made to maintain the legacy pass manager for the optimization pipeline.

  • +
  • Max allowed integer type was reduced from 2^24-1 bits to 2^23 bits.

  • +
+
+ + + +
+

Changes to the ARM Backend

+

During this release …

+
+
+

Changes to the MIPS Target

+

During this release …

+
+ +
+

Changes to the PowerPC Target

+

During this release …

+
+
+

Changes to the X86 Target

+

During this release …

+
    +
  • Support for AVX512-FP16 instructions has been added.

  • +
+
+
+

Changes to the AMDGPU Target

+

During this release …

+
+
+

Changes to the AVR Target

+

During this release …

+
+
+

Changes to the WebAssembly Target

+

During this release …

+
+ + + + + +
+

Changes to the Debug Info

+

During this release …

+
+ + + +
+ +
+

Additional Information

+

A wide variety of additional information is available on the LLVM web page, in particular in the documentation section. The web page also contains versions of the +API documentation which is up-to-date with the Git version of the source +code. You can access versions of these documents specific to this release by +going into the llvm/docs/ directory in the LLVM tree.

+

If you have any questions or comments about LLVM, please feel free to contact +us via the mailing lists.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ReleaseProcess.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ReleaseProcess.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ReleaseProcess.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ReleaseProcess.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,321 @@ + + + + + + + + + How To Validate a New Release — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

How To Validate a New Release

+ +
+

Introduction

+

This document contains information about testing the release candidates that +will ultimately be the next LLVM release. For more information on how to +manage the actual release, please refer to How To Release LLVM To The Public.

+
+

Overview of the Release Process

+

Once the release process starts, the Release Manager will ask for volunteers, +and it’ll be the role of each volunteer to:

+
    +
  • Test and benchmark the previous release

  • +
  • Test and benchmark each release candidate, comparing to the previous release +and candidates

  • +
  • Identify, reduce and report every regression found during tests and benchmarks

  • +
  • Make sure the critical bugs get fixed and merged to the next release candidate

  • +
+

Not all bugs or regressions are show-stoppers and it’s a bit of a grey area what +should be fixed before the next candidate and what can wait until the next +release.

+

It’ll depend on:

+
    +
  • The severity of the bug, how many people it affects and if it’s a regression +or a known bug. Known bugs are “unsupported features” and some bugs can be +disabled if they have been implemented recently.

  • +
  • The stage in the release. Less critical bugs should be considered to be +fixed between RC1 and RC2, but not so much at the end of it.

  • +
  • If it’s a correctness or a performance regression. Performance regression +tends to be taken more lightly than correctness.

  • +
+
+
+
+

Scripts

+

The scripts are in the utils/release directory.

+
+

test-release.sh

+

This script will check-out, configure and compile LLVM+Clang (+ most add-ons, +like compiler-rt, libcxx, libomp and clang-extra-tools) in +three stages, and will test the final stage. +It’ll have installed the final binaries on the Phase3/Releasei(+Asserts) +directory, and that’s the one you should use for the test-suite and other +external tests.

+

To run the script on a specific release candidate run:

+
./test-release.sh \
+     -release 3.3 \
+     -rc 1 \
+     -no-64bit \
+     -test-asserts \
+     -no-compare-files
+
+
+

Each system will require different options. For instance, x86_64 will +obviously not need -no-64bit while 32-bit systems will, or the script will +fail.

+

The important flags to get right are:

+
    +
  • On the pre-release, you should change -rc 1 to -final. On RC2, +change it to -rc 2 and so on.

  • +
  • On non-release testing, you can use -final in conjunction with +-no-checkout, but you’ll have to create the final directory by hand +and link the correct source dir to final/llvm.src.

  • +
  • For release candidates, you need -test-asserts, or it won’t create a +“Release+Asserts” directory, which is needed for release testing and +benchmarking. This will take twice as long.

  • +
  • On the final candidate you just need Release builds, and that’s the binary +directory you’ll have to pack.

  • +
  • On macOS, you must export MACOSX_DEPLOYMENT_TARGET=10.9 before running +the script.

  • +
+

This script builds three phases of Clang+LLVM twice each (Release and +Release+Asserts), so use screen or nohup to avoid headaches, since it’ll take +a long time.

+

Use the --help option to see all the options and chose it according to +your needs.

+
+
+

findRegressions-nightly.py

+

TODO

+
+
+
+

Test Suite

+

Follow the LNT Quick Start Guide link on how to set-up the +test-suite

+

The binary location you’ll have to use for testing is inside the +rcN/Phase3/Release+Asserts/llvmCore-REL-RC.install. +Link that directory to an easier location and run the test-suite.

+

An example on the run command line, assuming you created a link from the correct +install directory to ~/devel/llvm/install:

+
./sandbox/bin/python sandbox/bin/lnt runtest \
+    nt \
+    -j4 \
+    --sandbox sandbox \
+    --test-suite ~/devel/llvm/test/test-suite \
+    --cc ~/devel/llvm/install/bin/clang \
+    --cxx ~/devel/llvm/install/bin/clang++
+
+
+

It should have no new regressions, compared to the previous release or release +candidate. You don’t need to fix all the bugs in the test-suite, since they’re +not necessarily meant to pass on all architectures all the time. This is +due to the nature of the result checking, which relies on direct comparison, +and most of the time, the failures are related to bad output checking, rather +than bad code generation.

+

If the errors are in LLVM itself, please report every single regression found +as blocker, and all the other bugs as important, but not necessarily blocking +the release to proceed. They can be set as “known failures” and to be +fix on a future date.

+
+
+

Pre-Release Process

+

When the release process is announced on the mailing list, you should prepare +for the testing, by applying the same testing you’ll do on the release +candidates, on the previous release.

+

You should:

+
    +
  • Download the previous release sources from +https://llvm.org/releases/download.html.

  • +
  • Run the test-release.sh script on final mode (change -rc 1 to +-final).

  • +
  • Once all three stages are done, it’ll test the final stage.

  • +
  • Using the Phase3/Release+Asserts/llvmCore-MAJ.MIN-final.install base, +run the test-suite.

  • +
+

If the final phase’s make check-all failed, it’s a good idea to also test +the intermediate stages by going on the obj directory and running +make check-all to find if there’s at least one stage that passes (helps +when reducing the error for bug report purposes).

+
+
+

Release Process

+

When the Release Manager sends you the release candidate, download all sources, +unzip on the same directory (there will be sym-links from the appropriate places +to them), and run the release test as above.

+

You should:

+
    +
  • Download the current candidate sources from where the release manager points +you (ex. https://llvm.org/pre-releases/3.3/rc1/).

  • +
  • Repeat the steps above with -rc 1, -rc 2 etc modes and run the +test-suite the same way.

  • +
  • Compare the results, report all errors on Bugzilla and publish the binary blob +where the release manager can grab it.

  • +
+

Once the release manages announces that the latest candidate is the good one, +you have to pack the Release (no Asserts) install directory on Phase3 +and that will be the official binary.

+
    +
  • Rename (or link) clang+llvm-REL-ARCH-ENV to the .install directory

  • +
  • Tar that into the same name with .tar.gz extension from outside the +directory

  • +
  • Make it available for the release manager to download

  • +
+
+
+

Bug Reporting Process

+

If you found regressions or failures when comparing a release candidate with the +previous release, follow the rules below:

+
    +
  • Critical bugs on compilation should be fixed as soon as possible, possibly +before releasing the binary blobs.

  • +
  • Check-all tests should be fixed before the next release candidate, but can +wait until the test-suite run is finished.

  • +
  • Bugs in the test suite or unimportant check-all tests can be fixed in between +release candidates.

  • +
  • New features or recent big changes, when close to the release, should have +done in a way that it’s easy to disable. If they misbehave, prefer disabling +them than releasing an unstable (but untested) binary package.

  • +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Remarks.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Remarks.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Remarks.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Remarks.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,835 @@ + + + + + + + + + Remarks — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Remarks

+ +
+

Introduction to the LLVM remark diagnostics

+

LLVM is able to emit diagnostics from passes describing whether an optimization +has been performed or missed for a particular reason, which should give more +insight to users about what the compiler did during the compilation pipeline.

+

There are three main remark types:

+

Passed

+
+

Remarks that describe a successful optimization performed by the compiler.

+
+
Example
+

+
+
foo inlined into bar with (cost=always): always inline attribute
+
+
+
+

Missed

+
+

Remarks that describe an attempt to an optimization by the compiler that +could not be performed.

+
+
Example
+

+
+
foo not inlined into bar because it should never be inlined
+(cost=never): noinline function attribute
+
+
+
+

Analysis

+
+

Remarks that describe the result of an analysis, that can bring more +information to the user regarding the generated code.

+
+
Example
+

+
+
16 stack bytes in function
+
+
+
10 instructions in function
+
+
+
+
+
+

Enabling optimization remarks

+

There are two modes that are supported for enabling optimization remarks in +LLVM: through remark diagnostics, or through serialized remarks.

+
+

Remark diagnostics

+

Optimization remarks can be emitted as diagnostics. These diagnostics will be +propagated to front-ends if desired, or emitted by tools like llc or opt.

+
+
+-pass-remarks=<regex>
+

Enables optimization remarks from passes whose name match the given (POSIX) +regular expression.

+
+ +
+
+-pass-remarks-missed=<regex>
+

Enables missed optimization remarks from passes whose name match the given +(POSIX) regular expression.

+
+ +
+
+-pass-remarks-analysis=<regex>
+

Enables optimization analysis remarks from passes whose name match the given +(POSIX) regular expression.

+
+ +
+
+

Serialized remarks

+

While diagnostics are useful during development, it is often more useful to +refer to optimization remarks post-compilation, typically during performance +analysis.

+

For that, LLVM can serialize the remarks produced for each compilation unit to +a file that can be consumed later.

+

By default, the format of the serialized remarks is YAML, and it can be accompanied by a section +in the object files to easily retrieve it.

+

llc and opt support the +following options:

+

Basic options

+
+
+
+-pass-remarks-output=<filename>
+

Enables the serialization of remarks to a file specified in <filename>.

+

By default, the output is serialized to YAML.

+
+ +
+
+-pass-remarks-format=<format>
+

Specifies the output format of the serialized remarks.

+

Supported formats:

+ +
+ +
+

Content configuration

+
+
+
+-pass-remarks-filter=<regex>
+

Only passes whose name match the given (POSIX) regular expression will be +serialized to the final output.

+
+ +
+
+-pass-remarks-with-hotness
+

With PGO, include profile count in optimization remarks.

+
+ +
+
+-pass-remarks-hotness-threshold
+

The minimum profile count required for an optimization remark to be +emitted.

+
+ +
+

Other tools that support remarks:

+

llvm-lto

+
+
+
+-lto-pass-remarks-output=<filename>
+
+ +
+
+-lto-pass-remarks-filter=<regex>
+
+ +
+
+-lto-pass-remarks-format=<format>
+
+ +
+
+-lto-pass-remarks-with-hotness
+
+ +
+
+-lto-pass-remarks-hotness-threshold
+
+ +
+

gold-plugin and lld

+
+
+
+-opt-remarks-filename=<filename>
+
+ +
+
+-opt-remarks-filter=<regex>
+
+ +
+
+-opt-remarks-format=<format>
+
+ +
+
+-opt-remarks-with-hotness
+
+ +
+
+
+
+

Serialization modes

+

There are two modes available for serializing remarks:

+

Separate

+
+

In this mode, the remarks and the metadata are serialized separately. The +client is responsible for parsing the metadata first, then use the metadata +to correctly parse the remarks.

+
+

Standalone

+
+

In this mode, the remarks and the metadata are serialized to the same +stream. The metadata will always come before the remarks.

+

The compiler does not support emitting standalone remarks. This mode is +more suited for post-processing tools like linkers, that can merge the +remarks for one whole project.

+
+
+
+

YAML remarks

+

A typical remark serialized to YAML looks like this:

+
--- !<TYPE>
+Pass: <pass>
+Name: <name>
+DebugLoc: { File: <file>, Line: <line>, Column: <column> }
+Function: <function>
+Hotness: <hotness>
+Args:
+  - <key>: <value>
+    DebugLoc: { File: <arg-file>, Line: <arg-line>, Column: <arg-column> }
+
+
+

The following entries are mandatory:

+
    +
  • <TYPE>: can be Passed, Missed, Analysis, +AnalysisFPCommute, AnalysisAliasing, Failure.

  • +
  • <pass>: the name of the pass that emitted this remark.

  • +
  • <name>: the name of the remark coming from <pass>.

  • +
  • <function>: the mangled name of the function.

  • +
+

If a DebugLoc entry is specified, the following fields are required:

+
    +
  • <file>

  • +
  • <line>

  • +
  • <column>

  • +
+

If an arg entry is specified, the following fields are required:

+
    +
  • <key>

  • +
  • <value>

  • +
+

If a DebugLoc entry is specified within an arg entry, the following +fields are required:

+
    +
  • <arg-file>

  • +
  • <arg-line>

  • +
  • <arg-column>

  • +
+
+

YAML with a string table

+

The YAML serialization supports the usage of a string table by using the +yaml-strtab format.

+

This format replaces strings in the YAML output with integers representing the +index in the string table that can be provided separately through metadata.

+

The following entries can take advantage of the string table while respecting +YAML rules:

+
    +
  • <pass>

  • +
  • <name>

  • +
  • <function>

  • +
  • <file>

  • +
  • <value>

  • +
  • <arg-file>

  • +
+

Currently, none of the tools in the opt-viewer directory +support this format.

+
+
+

YAML metadata

+

The metadata used together with the YAML format is:

+
    +
  • a magic number: “REMARKS\0”

  • +
  • the version number: a little-endian uint64_t

  • +
  • the total size of the string table (the size itself excluded): +little-endian uint64_t

  • +
  • a list of null-terminated strings

  • +
+

Optional:

+
    +
  • the absolute file path to the serialized remark diagnostics: a +null-terminated string.

  • +
+

When the metadata is serialized separately from the remarks, the file path +should be present and point to the file where the remarks are serialized to.

+

In case the metadata only acts as a header to the remarks, the file path can be +omitted.

+
+
+
+

LLVM bitstream remarks

+

This format is using LLVM bitstream to serialize remarks +and their associated metadata.

+

A bitstream remark stream can be identified by the magic number "RMRK" that +is placed at the very beginning.

+

The format for serializing remarks is composed of two different block types:

+
+

META_BLOCK

+

The block providing information about the rest of the content in the stream.

+

Exactly one block is expected. Having multiple metadata blocks is an error.

+

This block can contain the following records:

+

RECORD_META_CONTAINER_INFO

+
+

The container version and type.

+

Version: u32

+

Type: u2

+
+

RECORD_META_REMARK_VERSION

+
+

The version of the remark entries. This can change independently from the +container version.

+

Version: u32

+
+

RECORD_META_STRTAB

+
+

The string table used by the remark entries. The format of the string table +is a sequence of strings separated by \0.

+
+

RECORD_META_EXTERNAL_FILE

+
+

The external remark file path that contains the remark blocks associated +with this metadata. This is an absolute path.

+
+
+
+

REMARK_BLOCK

+

The block describing a remark entry.

+

0 or more blocks per file are allowed. Each block will depend on the +META_BLOCK in order to be parsed correctly.

+

This block can contain the following records:

+

RECORD_REMARK_HEADER

+
+

The header of the remark. This contains all the mandatory information about +a remark.

+ ++++ + + + + + + + + + + + + + + +

Type

u3

Remark name

VBR6 (string table index)

Pass name

VBR6 (string table index)

Function name

VBR6 (string table index)

+
+

RECORD_REMARK_DEBUG_LOC

+
+

The source location for the corresponding remark. This record is optional.

+ ++++ + + + + + + + + + + + +

File

VBR7 (string table index)

Line

u32

Column

u32

+
+

RECORD_REMARK_HOTNESS

+
+

The hotness of the remark. This record is optional.

+ ++++ + + + + +

Hotness | VBR8 (string table index)

+
+

RECORD_REMARK_ARG_WITH_DEBUGLOC

+
+

A remark argument with an associated debug location.

+ ++++ + + + + + + + + + + + + + + + + + +

Key

VBR7 (string table index)

Value

VBR7 (string table index)

File

VBR7 (string table index)

Line

u32

Column

u32

+
+

RECORD_REMARK_ARG_WITHOUT_DEBUGLOC

+
+

A remark argument with an associated debug location.

+ ++++ + + + + + + + + +

Key

VBR7 (string table index)

Value

VBR7 (string table index)

+
+
+
+

The remark container

+

Bitstream remarks are designed to be used in two different modes:

+

The separate mode

+
+

The separate mode is the mode that is typically used during compilation. It +provides a way to serialize the remark entries to a stream while some +metadata is kept in memory to be emitted in the product of the compilation +(typically, an object file).

+
+

The standalone mode

+
+

The standalone mode is typically stored and used after the distribution of +a program. It contains all the information that allows the parsing of all +the remarks without having any external dependencies.

+
+

In order to support multiple modes, the format introduces the concept of a +bitstream remark container type.

+

SeparateRemarksMeta: the metadata emitted separately

+
+

This container type expects only a META_BLOCK containing only:

+ +

Typically, this is emitted in a section in the object files, allowing +clients to retrieve remarks and their associated metadata directly from +intermediate products.

+
+

SeparateRemarksFile: the remark entries emitted separately

+
+

This container type expects only a META_BLOCK containing only:

+ +

This container type expects 0 or more REMARK_BLOCK.

+

Typically, this is emitted in a side-file alongside an object file, and is +made to be able to stream to without increasing the memory consumption of +the compiler. This is referenced by the RECORD_META_EXTERNAL_FILE entry in the +SeparateRemarksMeta container.

+
+

When the parser tries to parse a container that contains the metadata for the +separate remarks, it should parse the version and type, then keep the string +table in memory while opening the external file, validating its metadata and +parsing the remark entries.

+

The container versions from the separate container should match in order to +have a well-formed file.

+

Standalone: the metadata and the remark entries emitted together

+
+

This container type expects only a META_BLOCK containing only:

+ +

This container type expects 0 or more REMARK_BLOCK.

+
+

A complete output of llvm-bcanalyzer on the different container types:

+

SeparateRemarksMeta

+
<BLOCKINFO_BLOCK/>
+<Meta BlockID=8 NumWords=13 BlockCodeSize=3>
+  <Container info codeid=1 abbrevid=4 op0=5 op1=0/>
+  <String table codeid=3 abbrevid=5/> blob data = 'pass\\x00key\\x00value\\x00'
+  <External File codeid=4 abbrevid=6/> blob data = '/path/to/file/name'
+</Meta>
+
+
+

SeparateRemarksFile

+
<BLOCKINFO_BLOCK/>
+<Meta BlockID=8 NumWords=3 BlockCodeSize=3>
+  <Container info codeid=1 abbrevid=4 op0=0 op1=1/>
+  <Remark version codeid=2 abbrevid=5 op0=0/>
+</Meta>
+<Remark BlockID=9 NumWords=8 BlockCodeSize=4>
+  <Remark header codeid=5 abbrevid=4 op0=2 op1=0 op2=1 op3=2/>
+  <Remark debug location codeid=6 abbrevid=5 op0=3 op1=99 op2=55/>
+  <Remark hotness codeid=7 abbrevid=6 op0=999999999/>
+  <Argument with debug location codeid=8 abbrevid=7 op0=4 op1=5 op2=6 op3=11 op4=66/>
+</Remark>
+
+
+

Standalone

+
<BLOCKINFO_BLOCK/>
+<Meta BlockID=8 NumWords=15 BlockCodeSize=3>
+  <Container info codeid=1 abbrevid=4 op0=5 op1=2/>
+  <Remark version codeid=2 abbrevid=5 op0=30/>
+  <String table codeid=3 abbrevid=6/> blob data = 'pass\\x00remark\\x00function\\x00path\\x00key\\x00value\\x00argpath\\x00'
+</Meta>
+<Remark BlockID=9 NumWords=8 BlockCodeSize=4>
+  <Remark header codeid=5 abbrevid=4 op0=2 op1=1 op2=0 op3=2/>
+  <Remark debug location codeid=6 abbrevid=5 op0=3 op1=99 op2=55/>
+  <Remark hotness codeid=7 abbrevid=6 op0=999999999/>
+  <Argument with debug location codeid=8 abbrevid=7 op0=4 op1=5 op2=6 op3=11 op4=66/>
+</Remark>
+
+
+
+
+
+

opt-viewer

+

The opt-viewer directory contains a collection of tools that visualize and +summarize serialized remarks.

+

The tools only support the yaml format.

+
+

opt-viewer.py

+

Output a HTML page which gives visual feedback on compiler interactions with +your program.

+
+
+
Examples
+

+
+
$ opt-viewer.py my_yaml_file.opt.yaml
+
+
+
$ opt-viewer.py my_build_dir/
+
+
+
+
+
+

opt-stats.py

+

Output statistics about the optimization remarks in the input set.

+
+
+
Example
+

+
+
$ opt-stats.py my_yaml_file.opt.yaml
+
+Total number of remarks           3
+
+
+Top 10 remarks by pass:
+  inline                         33%
+  asm-printer                    33%
+  prologepilog                   33%
+
+Top 10 remarks:
+  asm-printer/InstructionCount   33%
+  inline/NoDefinition            33%
+  prologepilog/StackSize         33%
+
+
+
+
+
+

opt-diff.py

+

Produce a new YAML file which contains all of the changes in optimizations +between two YAML files.

+

Typically, this tool should be used to do diffs between:

+
    +
  • new compiler + fixed source vs old compiler + fixed source

  • +
  • fixed compiler + new source vs fixed compiler + old source

  • +
+

This diff file can be displayed using opt-viewer.py.

+
+
+
Example
+

+
+
$ opt-diff.py my_opt_yaml1.opt.yaml my_opt_yaml2.opt.yaml -o my_opt_diff.opt.yaml
+$ opt-viewer.py my_opt_diff.opt.yaml
+
+
+
+
+
+
+

Emitting remark diagnostics in the object file

+

A section containing metadata on remark diagnostics will be emitted for the +following formats:

+
    +
  • yaml-strtab

  • +
  • bitstream

  • +
+

This can be overridden by using the flag -remarks-section=<bool>.

+

The section is named:

+
    +
  • __LLVM,__remarks (MachO)

  • +
+
+
+

C API

+

LLVM provides a library that can be used to parse remarks through a shared +library named libRemarks.

+

The typical usage through the C API is like the following:

+
LLVMRemarkParserRef Parser = LLVMRemarkParserCreateYAML(Buf, Size);
+LLVMRemarkEntryRef Remark = NULL;
+while ((Remark = LLVMRemarkParserGetNext(Parser))) {
+   // use Remark
+   LLVMRemarkEntryDispose(Remark); // Release memory.
+}
+bool HasError = LLVMRemarkParserHasError(Parser);
+LLVMRemarkParserDispose(Parser);
+
+
+
+
+

Remark streamers

+

The RemarkStreamer interface is used to unify the serialization +capabilities of remarks across all the components that can generate remarks.

+

All remark serialization should go through the main remark streamer, the +llvm::remarks::RemarkStreamer set up in the LLVMContext. The interface +takes remark objects converted to llvm::remarks::Remark, and takes care of +serializing it to the requested format, using the requested type of metadata, +etc.

+

Typically, a specialized remark streamer will hold a reference to the one set +up in the LLVMContext, and will operate on its own type of diagnostics.

+

For example, LLVM IR passes will emit llvm::DiagnosticInfoOptimization* +that get converted to llvm::remarks::Remark objects. Then, clang could set +up its own specialized remark streamer that takes clang::Diagnostic +objects. This can allow various components of the frontend to emit remarks +using the same techniques as the LLVM remarks.

+

This gives us the following advantages:

+
    +
  • Composition: during the compilation pipeline, multiple components can set up +their specialized remark streamers that all emit remarks through the same +main streamer.

  • +
  • Re-using the remark infrastructure in lib/Remarks.

  • +
  • Using the same file and format for the remark emitters created throughout the +compilation.

  • +
+

at the cost of an extra layer of abstraction.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ReportingGuide.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ReportingGuide.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ReportingGuide.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ReportingGuide.html 2021-09-19 16:16:39.000000000 +0000 @@ -0,0 +1,263 @@ + + + + + + + + + Reporting Guide — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Reporting Guide

+
+

Note

+

This document is currently a DRAFT document while it is being discussed +by the community.

+
+

If you believe someone is violating the code of conduct +you can always report it to the LLVM Foundation Code of Conduct Advisory +Committee by emailing conduct@llvm.org. All reports will be kept +confidential. This isn’t a public list and only members of the advisory +committee will receive the report.

+

If you believe anyone is in physical danger, please notify appropriate law +enforcement first. If you are unsure what law enforcement agency is +appropriate, please include this in your report and we will attempt to notify +them.

+

If the violation occurs at an event such as a Developer Meeting and requires +immediate attention, you can also reach out to any of the event organizers or +staff. Event organizers and staff will be prepared to handle the incident and +able to help. If you cannot find one of the organizers, the venue staff can +locate one for you. We will also post detailed contact information for specific +events as part of each events’ information. In person reports will still be +kept confidential exactly as above, but also feel free to (anonymously if +needed) email conduct@llvm.org.

+
+

Note

+

The LLVM community has long handled inappropriate behavior on its own, using +both private communication and public responses. Nothing in this document is +intended to discourage this self enforcement of community norms. Instead, +the mechanisms described here are intended to supplement any self +enforcement within the community. They provide avenues for handling severe +cases or cases where the reporting party does not wish to respond directly +for any reason.

+
+
+

Filing a report

+

Reports can be as formal or informal as needed for the situation at hand. If +possible, please include as much information as you can. If you feel +comfortable, please consider including:

+
    +
  • Your contact info (so we can get in touch with you if we need to follow up).

  • +
  • Names (real, nicknames, or pseudonyms) of any individuals involved. If there +were other witnesses besides you, please try to include them as well.

  • +
  • When and where the incident occurred. Please be as specific as possible.

  • +
  • Your account of what occurred. If there is a publicly available record (e.g. +a mailing list archive or a public IRC logger) please include a link.

  • +
  • Any extra context you believe existed for the incident.

  • +
  • If you believe this incident is ongoing.

  • +
  • Any other information you believe we should have.

  • +
+
+
+

What happens after you file a report?

+

You will receive an email from the advisory committee acknowledging receipt +within 24 hours (and we will aim to respond much quicker than that).

+

The advisory committee will immediately meet to review the incident and try to +determine:

+
    +
  • What happened and who was involved.

  • +
  • Whether this event constitutes a code of conduct violation.

  • +
  • Whether this is an ongoing situation, or if there is a threat to anyone’s +physical safety.

  • +
+

If this is determined to be an ongoing incident or a threat to physical safety, +the working groups’ immediate priority will be to protect everyone involved. +This means we may delay an “official” response until we believe that the +situation has ended and that everyone is physically safe.

+

The working group will try to contact other parties involved or witnessing the +event to gain clarity on what happened and understand any different +perspectives.

+

Once the advisory committee has a complete account of the events they will make +a decision as to how to respond. Responses may include:

+
    +
  • Nothing, if we determine no violation occurred or it has already been +appropriately resolved.

  • +
  • Providing either moderation or mediation to ongoing interactions (where +appropriate, safe, and desired by both parties).

  • +
  • A private reprimand from the working group to the individuals involved.

  • +
  • An imposed vacation (i.e. asking someone to “take a week off” from a mailing +list or IRC).

  • +
  • A public reprimand.

  • +
  • A permanent or temporary ban from some or all LLVM spaces (mailing lists, +IRC, etc.)

  • +
  • Involvement of relevant law enforcement if appropriate.

  • +
+

If the situation is not resolved within one week, we’ll respond within one week +to the original reporter with an update and explanation.

+

Once we’ve determined our response, we will separately contact the original +reporter and other individuals to let them know what actions (if any) we’ll be +taking. We will take into account feedback from the individuals involved on the +appropriateness of our response, but we don’t guarantee we’ll act on it.

+

After any incident, the advisory committee will make a report on the situation +to the LLVM Foundation board. The board may choose to make a public statement +about the incident. If that’s the case, the identities of anyone involved will +remain confidential unless instructed by those individuals otherwise.

+
+
+

Appealing

+

Only permanent resolutions (such as bans) or requests for public actions may be +appealed. To appeal a decision of the working group, contact the LLVM +Foundation board at board@llvm.org with your appeal and the board will review +the case.

+

In general, it is not appropriate to appeal a particular decision on +a public mailing list. Doing so would involve disclosure of information which +would be confidential. Disclosing this kind of information publicly may be +considered a separate and (potentially) more serious violation of the Code of +Conduct. This is not meant to limit discussion of the Code of Conduct, the +advisory board itself, or the appropriateness of responses in general, but +please refrain from mentioning specific facts about cases without the +explicit permission of all parties involved.

+
+
+

Members of the Code of Conduct Advisory Committee

+

The members serving on the advisory committee are listed here with contact +information in case you are more comfortable talking directly to a specific +member of the committee.

+
+

Note

+

FIXME: When we form the initial advisory committee, the members names and private contact info need to be added here.

+
+

(This text is based on the Django Project Code of Conduct, which is in turn +based on wording from the Speak Up! project.)

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ScudoHardenedAllocator.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ScudoHardenedAllocator.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/ScudoHardenedAllocator.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/ScudoHardenedAllocator.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,507 @@ + + + + + + + + + Scudo Hardened Allocator — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Scudo Hardened Allocator

+ +
+

Introduction

+

The Scudo Hardened Allocator is a user-mode allocator, originally based on LLVM +Sanitizers’ +CombinedAllocator. +It aims at providing additional mitigation against heap based vulnerabilities, +while maintaining good performance. Scudo is currently the default allocator in +Fuchsia, and in Android +since Android 11.

+

The name “Scudo” comes from the Italian word for +shield +(and Escudo in Spanish).

+
+
+

Design

+
+

Allocator

+

Scudo was designed with security in mind, but aims at striking a good balance +between security and performance. It was designed to be highly tunable and +configurable, and while we provide some default configurations, we encourage +consumers to come up with the parameters that will work best for their use +cases.

+

The allocator combines several components that serve distinct purposes:

+
    +
  • the Primary allocator: fast and efficient, it services smaller allocation +sizes by carving reserved memory regions into blocks of identical size. There +are currently two Primary allocators implemented, specific to 32 and 64 bit +architectures. It is configurable via compile time options.

  • +
  • the Secondary allocator: slower, it services larger allocation sizes via the +memory mapping primitives of the underlying operating system. Secondary backed +allocations are surrounded by Guard Pages. It is also configurable via compile +time options.

  • +
  • the thread specific data Registry: defines how local caches operate for each +thread. There are currently two models implemented: the exclusive model where +each thread holds its own caches (using the ELF TLS); or the shared model +where threads share a fixed size pool of caches.

  • +
  • the Quarantine: offers a way to delay the deallocation operations, preventing +blocks to be immediately available for reuse. Blocks held will be recycled +once certain size criteria are reached. This is essentially a delayed freelist +which can help mitigate some use-after-free situations. This feature is fairly +costly in terms of performance and memory footprint, is mostly controlled by +runtime options and is disabled by default.

  • +
+
+
+

Allocations Header

+

Every chunk of heap memory returned to an application by the allocator will be +preceded by a header. This has two purposes:

+
    +
  • being to store various information about the chunk, that can be leveraged to +ensure consistency of the heap operations;

  • +
  • being able to detect potential corruption. For this purpose, the header is +checksummed and corruption of the header will be detected when said header is +accessed (note that if the corrupted header is not accessed, the corruption +will remain undetected).

  • +
+

The following information is stored in the header:

+
    +
  • the class ID for that chunk, which identifies the region where the chunk +resides for Primary backed allocations, or 0 for Secondary backed allocations;

  • +
  • the state of the chunk (available, allocated or quarantined);

  • +
  • the allocation type (malloc, new, new[] or memalign), to detect potential +mismatches in the allocation APIs used;

  • +
  • the size (Primary) or unused bytes amount (Secondary) for that chunk, which is +necessary for reallocation or sized-deallocation operations;

  • +
  • the offset of the chunk, which is the distance in bytes from the beginning of +the returned chunk to the beginning of the backend allocation (the “block”);

  • +
  • the 16-bit checksum;

  • +
+

This header fits within 8 bytes on all platforms supported, and contributes to a +small overhead for each allocation.

+

The checksum is computed using a CRC32 (made faster with hardware support) +of the global secret, the chunk pointer itself, and the 8 bytes of header with +the checksum field zeroed out. It is not intended to be cryptographically +strong.

+

The header is atomically loaded and stored to prevent races. This is important +as two consecutive chunks could belong to different threads. We work on local +copies and use compare-exchange primitives to update the headers in the heap +memory, and avoid any type of double-fetching.

+
+
+

Randomness

+

Randomness is a critical factor to the additional security provided by the +allocator. The allocator trusts the memory mapping primitives of the OS to +provide pages at (mostly) non-predictable locations in memory, as well as the +binaries to be compiled with ASLR. In the event one of those assumptions is +incorrect, the security will be greatly reduced. Scudo further randomizes how +blocks are allocated in the Primary, can randomize how caches are assigned to +threads.

+
+
+

Memory reclaiming

+

Primary and Secondary allocators have different behaviors with regard to +reclaiming. While Secondary mapped allocations can be unmapped on deallocation, +it isn’t the case for the Primary, which could lead to a steady growth of the +RSS of a process. To counteracty this, if the underlying OS allows it, pages +that are covered by contiguous free memory blocks in the Primary can be +released: this generally means they won’t count towards the RSS of a process and +be zero filled on subsequent accesses). This is done in the deallocation path, +and several options exist to tune this behavior.

+
+
+
+

Usage

+
+

Platform

+

If using Fuchsia or an Android version greater than 11, your memory allocations +are already service by Scudo (note that Android Svelte configurations still use +jemalloc).

+
+
+

Library

+

The allocator static library can be built from the LLVM tree thanks to the +scudo_standalone CMake rule. The associated tests can be exercised thanks to +the check-scudo_standalone CMake rule.

+

Linking the static library to your project can require the use of the +whole-archive linker flag (or equivalent), depending on your linker. +Additional flags might also be necessary.

+

Your linked binary should now make use of the Scudo allocation and deallocation +functions.

+

You may also build Scudo like this:

+
cd $LLVM/compiler-rt/lib
+clang++ -fPIC -std=c++17 -msse4.2 -O2 -pthread -shared \
+  -I scudo/standalone/include \
+  scudo/standalone/*.cpp \
+  -o $HOME/libscudo.so
+
+
+

and then use it with existing binaries as follows:

+
LD_PRELOAD=$HOME/libscudo.so ./a.out
+
+
+
+
+

Clang

+

With a recent version of Clang (post rL317337), the “old” version of the +allocator can be linked with a binary at compilation using the +-fsanitize=scudo command-line argument, if the target platform is supported. +Currently, the only other sanitizer Scudo is compatible with is UBSan +(eg: -fsanitize=scudo,undefined). Compiling with Scudo will also enforce +PIE for the output binary.

+

We will transition this to the standalone Scudo version in the future.

+
+
+

Options

+

Several aspects of the allocator can be configured on a per process basis +through the following ways:

+
    +
  • at compile time, by defining SCUDO_DEFAULT_OPTIONS to the options string +you want set by default;

  • +
  • by defining a __scudo_default_options function in one’s program that +returns the options string to be parsed. Said function must have the following +prototype: extern "C" const char* __scudo_default_options(void), with a +default visibility. This will override the compile time define;

  • +
  • through the environment variable SCUDO_OPTIONS, containing the options string +to be parsed. Options defined this way will override any definition made +through __scudo_default_options.

  • +
  • via the standard mallopt API, +using parameters that are Scudo specific.

  • +
+

When dealing with the options string, it follows a syntax similar to ASan, where +distinct options can be assigned in the same string, separated by colons.

+

For example, using the environment variable:

+
SCUDO_OPTIONS="delete_size_mismatch=false:release_to_os_interval_ms=-1" ./a.out
+
+
+

Or using the function:

+
extern "C" const char *__scudo_default_options() {
+  return "delete_size_mismatch=false:release_to_os_interval_ms=-1";
+}
+
+
+

The following “string” options are available:

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Option

64-bit default

32-bit default

Description

quarantine_size_kb

0

0

The size (in Kb) of quarantine used to delay +the actual deallocation of chunks. Lower value +may reduce memory usage but decrease the +effectiveness of the mitigation; a negative +value will fallback to the defaults. Setting +both this and thread_local_quarantine_size_kb +to zero will disable the quarantine entirely.

quarantine_max_chunk_size

0

0

Size (in bytes) up to which chunks can be +quarantined.

thread_local_quarantine_size_kb

0

0

The size (in Kb) of per-thread cache use to +offload the global quarantine. Lower value may +reduce memory usage but might increase +contention on the global quarantine. Setting +both this and quarantine_size_kb to zero will +disable the quarantine entirely.

dealloc_type_mismatch

false

false

Whether or not we report errors on +malloc/delete, new/free, new/delete[], etc.

delete_size_mismatch

true

true

Whether or not we report errors on mismatch +between sizes of new and delete.

zero_contents

false

false

Whether or not we zero chunk contents on +allocation.

pattern_fill_contents

false

false

Whether or not we fill chunk contents with a +byte pattern on allocation.

may_return_null

true

true

Whether or not a non-fatal failure can return a +NULL pointer (as opposed to terminating).

release_to_os_interval_ms

5000

5000

The minimum interval (in ms) at which a release +can be attempted (a negative value disables +reclaiming).

+

Additional flags can be specified, for example if Scudo if compiled with +GWP-ASan support.

+

The following “mallopt” options are available (options are defined in +include/scudo/interface.h):

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Option

Description

M_DECAY_TIME

Sets the release interval option to the specified +value (Android only allows 0 or 1 to respectively set +the interval to the minimum and maximum value as +specified at compile time).

M_PURGE

Forces immediate memory reclaiming (value is unused).

M_MEMTAG_TUNING

Tunes the allocator’s choice of memory tags to make +it more likely that a certain class of memory errors +will be detected. The value argument should be one of +the enumerators of scudo_memtag_tuning.

M_THREAD_DISABLE_MEM_INIT

Tunes the per-thread memory initialization, 0 being +the normal behavior, 1 disabling the automatic heap +initialization.

M_CACHE_COUNT_MAX

Set the maximum number of entries than can be cached +in the Secondary cache.

M_CACHE_SIZE_MAX

Sets the maximum size of entries that can be cached +in the Secondary cache.

M_TSDS_COUNT_MAX

Increases the maximum number of TSDs that can be used +up to the limit specified at compile time.

+
+
+
+

Error Types

+

The allocator will output an error message, and potentially terminate the +process, when an unexpected behavior is detected. The output usually starts with +"Scudo ERROR:" followed by a short summary of the problem that occurred as +well as the pointer(s) involved. Once again, Scudo is meant to be a mitigation, +and might not be the most useful of tools to help you root-cause the issue, +please consider ASan +for this purpose.

+

Here is a list of the current error messages and their potential cause:

+
    +
  • "corrupted chunk header": the checksum verification of the chunk header +has failed. This is likely due to one of two things: the header was +overwritten (partially or totally), or the pointer passed to the function is +not a chunk at all;

  • +
  • "race on chunk header": two different threads are attempting to manipulate +the same header at the same time. This is usually symptomatic of a +race-condition or general lack of locking when performing operations on that +chunk;

  • +
  • "invalid chunk state": the chunk is not in the expected state for a given +operation, eg: it is not allocated when trying to free it, or it’s not +quarantined when trying to recycle it, etc. A double-free is the typical +reason this error would occur;

  • +
  • "misaligned pointer": we strongly enforce basic alignment requirements, 8 +bytes on 32-bit platforms, 16 bytes on 64-bit platforms. If a pointer passed +to our functions does not fit those, something is definitely wrong.

  • +
  • "allocation type mismatch": when the optional deallocation type mismatch +check is enabled, a deallocation function called on a chunk has to match the +type of function that was called to allocate it. Security implications of such +a mismatch are not necessarily obvious but situational at best;

  • +
  • "invalid sized delete": when the C++14 sized delete operator is used, and +the optional check enabled, this indicates that the size passed when +deallocating a chunk is not congruent with the one requested when allocating +it. This is likely to be a compiler issue, +as was the case with Intel C++ Compiler, or some type confusion on the object +being deallocated;

  • +
  • "RSS limit exhausted": the maximum RSS optionally specified has been +exceeded;

  • +
+

Several other error messages relate to parameter checking on the libc allocation +APIs and are fairly straightforward to understand.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/search.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/search.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/search.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/search.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,132 @@ + + + + + + + + + Search — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +

Search

+
+ +

+ Please activate JavaScript to enable the search + functionality. +

+
+

+ Searching for multiple words only shows matches that contain + all words. +

+
+ + + +
+ +
+ +
+ +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/searchindex.js llvm-toolchain-13-13.0.0/llvm/docs/_build/html/searchindex.js --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/searchindex.js 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/searchindex.js 2021-09-19 16:16:43.000000000 +0000 @@ -0,0 +1 @@ +Search.setIndex({docnames:["AMDGPU/AMDGPUAsmGFX10","AMDGPU/AMDGPUAsmGFX1011","AMDGPU/AMDGPUAsmGFX7","AMDGPU/AMDGPUAsmGFX8","AMDGPU/AMDGPUAsmGFX9","AMDGPU/AMDGPUAsmGFX900","AMDGPU/AMDGPUAsmGFX904","AMDGPU/AMDGPUAsmGFX906","AMDGPU/AMDGPUAsmGFX908","AMDGPU/AMDGPUAsmGFX90a","AMDGPU/gfx1011_src","AMDGPU/gfx1011_src_1","AMDGPU/gfx1011_src_2","AMDGPU/gfx1011_src_3","AMDGPU/gfx1011_type_deviation","AMDGPU/gfx1011_vdst","AMDGPU/gfx1011_vsrc","AMDGPU/gfx10_attr","AMDGPU/gfx10_dst","AMDGPU/gfx10_fx_operand","AMDGPU/gfx10_hwreg","AMDGPU/gfx10_imm16","AMDGPU/gfx10_imm16_1","AMDGPU/gfx10_imm16_2","AMDGPU/gfx10_label","AMDGPU/gfx10_m","AMDGPU/gfx10_m_1","AMDGPU/gfx10_msg","AMDGPU/gfx10_opt","AMDGPU/gfx10_param","AMDGPU/gfx10_probe","AMDGPU/gfx10_saddr","AMDGPU/gfx10_saddr_1","AMDGPU/gfx10_sbase","AMDGPU/gfx10_sbase_1","AMDGPU/gfx10_sbase_2","AMDGPU/gfx10_sdata","AMDGPU/gfx10_sdata_1","AMDGPU/gfx10_sdata_2","AMDGPU/gfx10_sdata_3","AMDGPU/gfx10_sdata_4","AMDGPU/gfx10_sdata_5","AMDGPU/gfx10_sdst","AMDGPU/gfx10_sdst_1","AMDGPU/gfx10_sdst_2","AMDGPU/gfx10_sdst_3","AMDGPU/gfx10_sdst_4","AMDGPU/gfx10_sdst_5","AMDGPU/gfx10_sdst_6","AMDGPU/gfx10_sdst_7","AMDGPU/gfx10_sdst_8","AMDGPU/gfx10_simm32","AMDGPU/gfx10_simm32_1","AMDGPU/gfx10_simm32_2","AMDGPU/gfx10_soffset","AMDGPU/gfx10_soffset_1","AMDGPU/gfx10_soffset_2","AMDGPU/gfx10_src","AMDGPU/gfx10_src_1","AMDGPU/gfx10_src_2","AMDGPU/gfx10_src_3","AMDGPU/gfx10_src_4","AMDGPU/gfx10_src_5","AMDGPU/gfx10_src_6","AMDGPU/gfx10_src_7","AMDGPU/gfx10_src_8","AMDGPU/gfx10_srsrc","AMDGPU/gfx10_srsrc_1","AMDGPU/gfx10_ssamp","AMDGPU/gfx10_ssrc","AMDGPU/gfx10_ssrc_1","AMDGPU/gfx10_ssrc_2","AMDGPU/gfx10_ssrc_3","AMDGPU/gfx10_ssrc_4","AMDGPU/gfx10_ssrc_5","AMDGPU/gfx10_ssrc_6","AMDGPU/gfx10_ssrc_7","AMDGPU/gfx10_ssrc_8","AMDGPU/gfx10_tgt","AMDGPU/gfx10_type_deviation","AMDGPU/gfx10_vaddr","AMDGPU/gfx10_vaddr_1","AMDGPU/gfx10_vaddr_2","AMDGPU/gfx10_vaddr_3","AMDGPU/gfx10_vaddr_4","AMDGPU/gfx10_vaddr_5","AMDGPU/gfx10_vcc","AMDGPU/gfx10_vdata","AMDGPU/gfx10_vdata0","AMDGPU/gfx10_vdata0_1","AMDGPU/gfx10_vdata1","AMDGPU/gfx10_vdata1_1","AMDGPU/gfx10_vdata_1","AMDGPU/gfx10_vdata_10","AMDGPU/gfx10_vdata_2","AMDGPU/gfx10_vdata_3","AMDGPU/gfx10_vdata_4","AMDGPU/gfx10_vdata_5","AMDGPU/gfx10_vdata_6","AMDGPU/gfx10_vdata_7","AMDGPU/gfx10_vdata_8","AMDGPU/gfx10_vdata_9","AMDGPU/gfx10_vdst","AMDGPU/gfx10_vdst_1","AMDGPU/gfx10_vdst_10","AMDGPU/gfx10_vdst_11","AMDGPU/gfx10_vdst_12","AMDGPU/gfx10_vdst_13","AMDGPU/gfx10_vdst_2","AMDGPU/gfx10_vdst_3","AMDGPU/gfx10_vdst_4","AMDGPU/gfx10_vdst_5","AMDGPU/gfx10_vdst_6","AMDGPU/gfx10_vdst_7","AMDGPU/gfx10_vdst_8","AMDGPU/gfx10_vdst_9","AMDGPU/gfx10_vsrc","AMDGPU/gfx10_vsrc_1","AMDGPU/gfx10_vsrc_2","AMDGPU/gfx10_vsrc_3","AMDGPU/gfx10_waitcnt","AMDGPU/gfx7_attr","AMDGPU/gfx7_dst","AMDGPU/gfx7_hwreg","AMDGPU/gfx7_imm16","AMDGPU/gfx7_imm16_1","AMDGPU/gfx7_imm16_2","AMDGPU/gfx7_label","AMDGPU/gfx7_m","AMDGPU/gfx7_msg","AMDGPU/gfx7_opt","AMDGPU/gfx7_param","AMDGPU/gfx7_sbase","AMDGPU/gfx7_sbase_1","AMDGPU/gfx7_sdst","AMDGPU/gfx7_sdst_1","AMDGPU/gfx7_sdst_2","AMDGPU/gfx7_sdst_3","AMDGPU/gfx7_sdst_4","AMDGPU/gfx7_sdst_5","AMDGPU/gfx7_sdst_6","AMDGPU/gfx7_sdst_7","AMDGPU/gfx7_simm32","AMDGPU/gfx7_simm32_1","AMDGPU/gfx7_soffset","AMDGPU/gfx7_soffset_1","AMDGPU/gfx7_src","AMDGPU/gfx7_src_1","AMDGPU/gfx7_src_10","AMDGPU/gfx7_src_2","AMDGPU/gfx7_src_3","AMDGPU/gfx7_src_4","AMDGPU/gfx7_src_5","AMDGPU/gfx7_src_6","AMDGPU/gfx7_src_7","AMDGPU/gfx7_src_8","AMDGPU/gfx7_src_9","AMDGPU/gfx7_srsrc","AMDGPU/gfx7_srsrc_1","AMDGPU/gfx7_ssamp","AMDGPU/gfx7_ssrc","AMDGPU/gfx7_ssrc_1","AMDGPU/gfx7_ssrc_10","AMDGPU/gfx7_ssrc_2","AMDGPU/gfx7_ssrc_3","AMDGPU/gfx7_ssrc_4","AMDGPU/gfx7_ssrc_5","AMDGPU/gfx7_ssrc_6","AMDGPU/gfx7_ssrc_7","AMDGPU/gfx7_ssrc_8","AMDGPU/gfx7_ssrc_9","AMDGPU/gfx7_tgt","AMDGPU/gfx7_type_deviation","AMDGPU/gfx7_vaddr","AMDGPU/gfx7_vaddr_1","AMDGPU/gfx7_vaddr_2","AMDGPU/gfx7_vaddr_3","AMDGPU/gfx7_vcc","AMDGPU/gfx7_vdata","AMDGPU/gfx7_vdata0","AMDGPU/gfx7_vdata0_1","AMDGPU/gfx7_vdata1","AMDGPU/gfx7_vdata1_1","AMDGPU/gfx7_vdata_1","AMDGPU/gfx7_vdata_2","AMDGPU/gfx7_vdata_3","AMDGPU/gfx7_vdata_4","AMDGPU/gfx7_vdata_5","AMDGPU/gfx7_vdata_6","AMDGPU/gfx7_vdata_7","AMDGPU/gfx7_vdata_8","AMDGPU/gfx7_vdata_9","AMDGPU/gfx7_vdst","AMDGPU/gfx7_vdst_1","AMDGPU/gfx7_vdst_10","AMDGPU/gfx7_vdst_11","AMDGPU/gfx7_vdst_12","AMDGPU/gfx7_vdst_2","AMDGPU/gfx7_vdst_3","AMDGPU/gfx7_vdst_4","AMDGPU/gfx7_vdst_5","AMDGPU/gfx7_vdst_6","AMDGPU/gfx7_vdst_7","AMDGPU/gfx7_vdst_8","AMDGPU/gfx7_vdst_9","AMDGPU/gfx7_vsrc","AMDGPU/gfx7_vsrc_1","AMDGPU/gfx7_vsrc_2","AMDGPU/gfx7_vsrc_3","AMDGPU/gfx7_waitcnt","AMDGPU/gfx8_attr","AMDGPU/gfx8_dst","AMDGPU/gfx8_hwreg","AMDGPU/gfx8_imask","AMDGPU/gfx8_imm16","AMDGPU/gfx8_imm16_1","AMDGPU/gfx8_imm16_2","AMDGPU/gfx8_label","AMDGPU/gfx8_m","AMDGPU/gfx8_m_1","AMDGPU/gfx8_msg","AMDGPU/gfx8_opt","AMDGPU/gfx8_param","AMDGPU/gfx8_probe","AMDGPU/gfx8_sbase","AMDGPU/gfx8_sbase_1","AMDGPU/gfx8_sdata","AMDGPU/gfx8_sdata_1","AMDGPU/gfx8_sdata_2","AMDGPU/gfx8_sdst","AMDGPU/gfx8_sdst_1","AMDGPU/gfx8_sdst_2","AMDGPU/gfx8_sdst_3","AMDGPU/gfx8_sdst_4","AMDGPU/gfx8_sdst_5","AMDGPU/gfx8_sdst_6","AMDGPU/gfx8_sdst_7","AMDGPU/gfx8_simm32","AMDGPU/gfx8_simm32_1","AMDGPU/gfx8_simm32_2","AMDGPU/gfx8_soffset","AMDGPU/gfx8_soffset_1","AMDGPU/gfx8_soffset_2","AMDGPU/gfx8_src","AMDGPU/gfx8_src_1","AMDGPU/gfx8_src_10","AMDGPU/gfx8_src_2","AMDGPU/gfx8_src_3","AMDGPU/gfx8_src_4","AMDGPU/gfx8_src_5","AMDGPU/gfx8_src_6","AMDGPU/gfx8_src_7","AMDGPU/gfx8_src_8","AMDGPU/gfx8_src_9","AMDGPU/gfx8_srsrc","AMDGPU/gfx8_srsrc_1","AMDGPU/gfx8_ssamp","AMDGPU/gfx8_ssrc","AMDGPU/gfx8_ssrc_1","AMDGPU/gfx8_ssrc_2","AMDGPU/gfx8_ssrc_3","AMDGPU/gfx8_ssrc_4","AMDGPU/gfx8_ssrc_5","AMDGPU/gfx8_ssrc_6","AMDGPU/gfx8_ssrc_7","AMDGPU/gfx8_ssrc_8","AMDGPU/gfx8_tgt","AMDGPU/gfx8_type_deviation","AMDGPU/gfx8_vaddr","AMDGPU/gfx8_vaddr_1","AMDGPU/gfx8_vaddr_2","AMDGPU/gfx8_vaddr_3","AMDGPU/gfx8_vcc","AMDGPU/gfx8_vdata","AMDGPU/gfx8_vdata0","AMDGPU/gfx8_vdata0_1","AMDGPU/gfx8_vdata1","AMDGPU/gfx8_vdata1_1","AMDGPU/gfx8_vdata_1","AMDGPU/gfx8_vdata_10","AMDGPU/gfx8_vdata_11","AMDGPU/gfx8_vdata_12","AMDGPU/gfx8_vdata_13","AMDGPU/gfx8_vdata_14","AMDGPU/gfx8_vdata_2","AMDGPU/gfx8_vdata_3","AMDGPU/gfx8_vdata_4","AMDGPU/gfx8_vdata_5","AMDGPU/gfx8_vdata_6","AMDGPU/gfx8_vdata_7","AMDGPU/gfx8_vdata_8","AMDGPU/gfx8_vdata_9","AMDGPU/gfx8_vdst","AMDGPU/gfx8_vdst_1","AMDGPU/gfx8_vdst_10","AMDGPU/gfx8_vdst_11","AMDGPU/gfx8_vdst_12","AMDGPU/gfx8_vdst_13","AMDGPU/gfx8_vdst_14","AMDGPU/gfx8_vdst_15","AMDGPU/gfx8_vdst_16","AMDGPU/gfx8_vdst_17","AMDGPU/gfx8_vdst_2","AMDGPU/gfx8_vdst_3","AMDGPU/gfx8_vdst_4","AMDGPU/gfx8_vdst_5","AMDGPU/gfx8_vdst_6","AMDGPU/gfx8_vdst_7","AMDGPU/gfx8_vdst_8","AMDGPU/gfx8_vdst_9","AMDGPU/gfx8_vsrc","AMDGPU/gfx8_vsrc_1","AMDGPU/gfx8_vsrc_2","AMDGPU/gfx8_vsrc_3","AMDGPU/gfx8_waitcnt","AMDGPU/gfx900_fx_operand","AMDGPU/gfx900_m","AMDGPU/gfx900_src","AMDGPU/gfx900_src_1","AMDGPU/gfx900_vdst","AMDGPU/gfx904_fx_operand","AMDGPU/gfx904_m","AMDGPU/gfx904_src","AMDGPU/gfx904_src_1","AMDGPU/gfx904_vdst","AMDGPU/gfx906_fx_operand","AMDGPU/gfx906_m","AMDGPU/gfx906_m_1","AMDGPU/gfx906_src","AMDGPU/gfx906_src_1","AMDGPU/gfx906_src_2","AMDGPU/gfx906_src_3","AMDGPU/gfx906_src_4","AMDGPU/gfx906_type_deviation","AMDGPU/gfx906_vdst","AMDGPU/gfx906_vsrc","AMDGPU/gfx908_dst","AMDGPU/gfx908_fx_operand","AMDGPU/gfx908_m","AMDGPU/gfx908_m_1","AMDGPU/gfx908_opt","AMDGPU/gfx908_saddr","AMDGPU/gfx908_soffset","AMDGPU/gfx908_src","AMDGPU/gfx908_src_1","AMDGPU/gfx908_src_2","AMDGPU/gfx908_src_3","AMDGPU/gfx908_src_4","AMDGPU/gfx908_src_5","AMDGPU/gfx908_srsrc","AMDGPU/gfx908_type_deviation","AMDGPU/gfx908_vaddr","AMDGPU/gfx908_vaddr_1","AMDGPU/gfx908_vdata","AMDGPU/gfx908_vdata_1","AMDGPU/gfx908_vdst","AMDGPU/gfx908_vdst_1","AMDGPU/gfx908_vdst_2","AMDGPU/gfx908_vdst_3","AMDGPU/gfx908_vdst_4","AMDGPU/gfx908_vdst_5","AMDGPU/gfx908_vsrc","AMDGPU/gfx908_vsrc_1","AMDGPU/gfx908_vsrc_2","AMDGPU/gfx908_vsrc_3","AMDGPU/gfx908_vsrc_4","AMDGPU/gfx908_vsrc_5","AMDGPU/gfx908_vsrc_6","AMDGPU/gfx90a_dst","AMDGPU/gfx90a_fx_operand","AMDGPU/gfx90a_hwreg","AMDGPU/gfx90a_imask","AMDGPU/gfx90a_imm16","AMDGPU/gfx90a_imm16_1","AMDGPU/gfx90a_imm16_2","AMDGPU/gfx90a_label","AMDGPU/gfx90a_m","AMDGPU/gfx90a_m_1","AMDGPU/gfx90a_msg","AMDGPU/gfx90a_opt","AMDGPU/gfx90a_probe","AMDGPU/gfx90a_saddr","AMDGPU/gfx90a_saddr_1","AMDGPU/gfx90a_sbase","AMDGPU/gfx90a_sbase_1","AMDGPU/gfx90a_sbase_2","AMDGPU/gfx90a_sdata","AMDGPU/gfx90a_sdata_1","AMDGPU/gfx90a_sdata_2","AMDGPU/gfx90a_sdata_3","AMDGPU/gfx90a_sdata_4","AMDGPU/gfx90a_sdata_5","AMDGPU/gfx90a_sdst","AMDGPU/gfx90a_sdst_1","AMDGPU/gfx90a_sdst_2","AMDGPU/gfx90a_sdst_3","AMDGPU/gfx90a_sdst_4","AMDGPU/gfx90a_sdst_5","AMDGPU/gfx90a_sdst_6","AMDGPU/gfx90a_sdst_7","AMDGPU/gfx90a_simm32","AMDGPU/gfx90a_simm32_1","AMDGPU/gfx90a_simm32_2","AMDGPU/gfx90a_soffset","AMDGPU/gfx90a_soffset_1","AMDGPU/gfx90a_soffset_2","AMDGPU/gfx90a_src","AMDGPU/gfx90a_src_1","AMDGPU/gfx90a_src_10","AMDGPU/gfx90a_src_11","AMDGPU/gfx90a_src_2","AMDGPU/gfx90a_src_3","AMDGPU/gfx90a_src_4","AMDGPU/gfx90a_src_5","AMDGPU/gfx90a_src_6","AMDGPU/gfx90a_src_7","AMDGPU/gfx90a_src_8","AMDGPU/gfx90a_src_9","AMDGPU/gfx90a_srsrc","AMDGPU/gfx90a_srsrc_1","AMDGPU/gfx90a_ssamp","AMDGPU/gfx90a_ssrc","AMDGPU/gfx90a_ssrc_1","AMDGPU/gfx90a_ssrc_2","AMDGPU/gfx90a_ssrc_3","AMDGPU/gfx90a_ssrc_4","AMDGPU/gfx90a_ssrc_5","AMDGPU/gfx90a_ssrc_6","AMDGPU/gfx90a_ssrc_7","AMDGPU/gfx90a_ssrc_8","AMDGPU/gfx90a_type_deviation","AMDGPU/gfx90a_vaddr","AMDGPU/gfx90a_vaddr_1","AMDGPU/gfx90a_vaddr_2","AMDGPU/gfx90a_vaddr_3","AMDGPU/gfx90a_vaddr_4","AMDGPU/gfx90a_vaddr_5","AMDGPU/gfx90a_vcc","AMDGPU/gfx90a_vdata","AMDGPU/gfx90a_vdata0","AMDGPU/gfx90a_vdata0_1","AMDGPU/gfx90a_vdata1","AMDGPU/gfx90a_vdata1_1","AMDGPU/gfx90a_vdata_1","AMDGPU/gfx90a_vdata_10","AMDGPU/gfx90a_vdata_2","AMDGPU/gfx90a_vdata_3","AMDGPU/gfx90a_vdata_4","AMDGPU/gfx90a_vdata_5","AMDGPU/gfx90a_vdata_6","AMDGPU/gfx90a_vdata_7","AMDGPU/gfx90a_vdata_8","AMDGPU/gfx90a_vdata_9","AMDGPU/gfx90a_vdst","AMDGPU/gfx90a_vdst_1","AMDGPU/gfx90a_vdst_10","AMDGPU/gfx90a_vdst_11","AMDGPU/gfx90a_vdst_12","AMDGPU/gfx90a_vdst_13","AMDGPU/gfx90a_vdst_14","AMDGPU/gfx90a_vdst_15","AMDGPU/gfx90a_vdst_16","AMDGPU/gfx90a_vdst_17","AMDGPU/gfx90a_vdst_18","AMDGPU/gfx90a_vdst_19","AMDGPU/gfx90a_vdst_2","AMDGPU/gfx90a_vdst_3","AMDGPU/gfx90a_vdst_4","AMDGPU/gfx90a_vdst_5","AMDGPU/gfx90a_vdst_6","AMDGPU/gfx90a_vdst_7","AMDGPU/gfx90a_vdst_8","AMDGPU/gfx90a_vdst_9","AMDGPU/gfx90a_vsrc","AMDGPU/gfx90a_vsrc_1","AMDGPU/gfx90a_vsrc_2","AMDGPU/gfx90a_vsrc_3","AMDGPU/gfx90a_vsrc_4","AMDGPU/gfx90a_vsrc_5","AMDGPU/gfx90a_waitcnt","AMDGPU/gfx9_attr","AMDGPU/gfx9_dst","AMDGPU/gfx9_hwreg","AMDGPU/gfx9_imask","AMDGPU/gfx9_imm16","AMDGPU/gfx9_imm16_1","AMDGPU/gfx9_imm16_2","AMDGPU/gfx9_label","AMDGPU/gfx9_m","AMDGPU/gfx9_m_1","AMDGPU/gfx9_msg","AMDGPU/gfx9_opt","AMDGPU/gfx9_param","AMDGPU/gfx9_probe","AMDGPU/gfx9_saddr","AMDGPU/gfx9_saddr_1","AMDGPU/gfx9_sbase","AMDGPU/gfx9_sbase_1","AMDGPU/gfx9_sbase_2","AMDGPU/gfx9_sdata","AMDGPU/gfx9_sdata_1","AMDGPU/gfx9_sdata_2","AMDGPU/gfx9_sdata_3","AMDGPU/gfx9_sdata_4","AMDGPU/gfx9_sdata_5","AMDGPU/gfx9_sdst","AMDGPU/gfx9_sdst_1","AMDGPU/gfx9_sdst_2","AMDGPU/gfx9_sdst_3","AMDGPU/gfx9_sdst_4","AMDGPU/gfx9_sdst_5","AMDGPU/gfx9_sdst_6","AMDGPU/gfx9_sdst_7","AMDGPU/gfx9_simm32","AMDGPU/gfx9_simm32_1","AMDGPU/gfx9_simm32_2","AMDGPU/gfx9_soffset","AMDGPU/gfx9_soffset_1","AMDGPU/gfx9_soffset_2","AMDGPU/gfx9_src","AMDGPU/gfx9_src_1","AMDGPU/gfx9_src_10","AMDGPU/gfx9_src_2","AMDGPU/gfx9_src_3","AMDGPU/gfx9_src_4","AMDGPU/gfx9_src_5","AMDGPU/gfx9_src_6","AMDGPU/gfx9_src_7","AMDGPU/gfx9_src_8","AMDGPU/gfx9_src_9","AMDGPU/gfx9_srsrc","AMDGPU/gfx9_srsrc_1","AMDGPU/gfx9_ssamp","AMDGPU/gfx9_ssrc","AMDGPU/gfx9_ssrc_1","AMDGPU/gfx9_ssrc_2","AMDGPU/gfx9_ssrc_3","AMDGPU/gfx9_ssrc_4","AMDGPU/gfx9_ssrc_5","AMDGPU/gfx9_ssrc_6","AMDGPU/gfx9_ssrc_7","AMDGPU/gfx9_ssrc_8","AMDGPU/gfx9_tgt","AMDGPU/gfx9_type_deviation","AMDGPU/gfx9_vaddr","AMDGPU/gfx9_vaddr_1","AMDGPU/gfx9_vaddr_2","AMDGPU/gfx9_vaddr_3","AMDGPU/gfx9_vaddr_4","AMDGPU/gfx9_vaddr_5","AMDGPU/gfx9_vcc","AMDGPU/gfx9_vdata","AMDGPU/gfx9_vdata0","AMDGPU/gfx9_vdata0_1","AMDGPU/gfx9_vdata1","AMDGPU/gfx9_vdata1_1","AMDGPU/gfx9_vdata_1","AMDGPU/gfx9_vdata_10","AMDGPU/gfx9_vdata_2","AMDGPU/gfx9_vdata_3","AMDGPU/gfx9_vdata_4","AMDGPU/gfx9_vdata_5","AMDGPU/gfx9_vdata_6","AMDGPU/gfx9_vdata_7","AMDGPU/gfx9_vdata_8","AMDGPU/gfx9_vdata_9","AMDGPU/gfx9_vdst","AMDGPU/gfx9_vdst_1","AMDGPU/gfx9_vdst_10","AMDGPU/gfx9_vdst_11","AMDGPU/gfx9_vdst_12","AMDGPU/gfx9_vdst_13","AMDGPU/gfx9_vdst_2","AMDGPU/gfx9_vdst_3","AMDGPU/gfx9_vdst_4","AMDGPU/gfx9_vdst_5","AMDGPU/gfx9_vdst_6","AMDGPU/gfx9_vdst_7","AMDGPU/gfx9_vdst_8","AMDGPU/gfx9_vdst_9","AMDGPU/gfx9_vsrc","AMDGPU/gfx9_vsrc_1","AMDGPU/gfx9_vsrc_2","AMDGPU/gfx9_vsrc_3","AMDGPU/gfx9_waitcnt","AMDGPUDwarfExtensionsForHeterogeneousDebugging","AMDGPUInstructionNotation","AMDGPUInstructionSyntax","AMDGPUModifierSyntax","AMDGPUOperandSyntax","AMDGPUUsage","AddingConstrainedIntrinsics","AdvancedBuilds","AliasAnalysis","Atomics","Benchmarking","BigEndianNEON","BitCodeFormat","BlockFrequencyTerminology","BranchWeightMetadata","BugLifeCycle","Bugpoint","BugpointRedesign","BuildingADistribution","CFIVerify","CMake","CMakePrimer","CodeGenerator","CodeOfConduct","CodeReview","CodingStandards","CommandGuide/FileCheck","CommandGuide/bugpoint","CommandGuide/clang-tblgen","CommandGuide/dsymutil","CommandGuide/index","CommandGuide/lit","CommandGuide/llc","CommandGuide/lldb-tblgen","CommandGuide/lli","CommandGuide/llvm-addr2line","CommandGuide/llvm-ar","CommandGuide/llvm-as","CommandGuide/llvm-bcanalyzer","CommandGuide/llvm-config","CommandGuide/llvm-cov","CommandGuide/llvm-cxxfilt","CommandGuide/llvm-cxxmap","CommandGuide/llvm-diff","CommandGuide/llvm-dis","CommandGuide/llvm-dwarfdump","CommandGuide/llvm-exegesis","CommandGuide/llvm-extract","CommandGuide/llvm-install-name-tool","CommandGuide/llvm-lib","CommandGuide/llvm-libtool-darwin","CommandGuide/llvm-link","CommandGuide/llvm-lipo","CommandGuide/llvm-locstats","CommandGuide/llvm-mca","CommandGuide/llvm-nm","CommandGuide/llvm-objcopy","CommandGuide/llvm-objdump","CommandGuide/llvm-otool","CommandGuide/llvm-pdbutil","CommandGuide/llvm-profdata","CommandGuide/llvm-profgen","CommandGuide/llvm-ranlib","CommandGuide/llvm-readelf","CommandGuide/llvm-readobj","CommandGuide/llvm-size","CommandGuide/llvm-stress","CommandGuide/llvm-strings","CommandGuide/llvm-strip","CommandGuide/llvm-symbolizer","CommandGuide/llvm-tblgen","CommandGuide/mlir-tblgen","CommandGuide/opt","CommandGuide/tblgen","CommandLine","CompileCudaWithLLVM","CompilerWriterInfo","Contributing","Coroutines","CoverageMappingFormat","DebuggingJITedCode","DependenceGraphs/index","DeveloperPolicy","Docker","ExceptionHandling","ExtendingLLVM","Extensions","FAQ","FaultMaps","Frontend/PerformanceTips","FuzzingLLVM","GarbageCollection","GetElementPtr","GettingInvolved","GettingStarted","GettingStartedTutorials","GettingStartedVS","GitBisecting","GlobalISel/GMIR","GlobalISel/GenericOpcode","GlobalISel/IRTranslator","GlobalISel/InstructionSelect","GlobalISel/KnownBits","GlobalISel/Legalizer","GlobalISel/Pipeline","GlobalISel/Porting","GlobalISel/RegBankSelect","GlobalISel/Resources","GlobalISel/index","GoldPlugin","GwpAsan","HowToAddABuilder","HowToBuildOnARM","HowToBuildWindowsItaniumPrograms","HowToBuildWithPGO","HowToCrossCompileBuiltinsOnArm","HowToCrossCompileLLVM","HowToReleaseLLVM","HowToSetUpLLVMStyleRTTI","HowToSubmitABug","HowToUpdateDebugInfo","HowToUseAttributes","HowToUseInstrMappings","InAlloca","JITLink","LangRef","Lexicon","LibFuzzer","LinkTimeOptimization","LoopTerminology","MCJITDesignAndImplementation","MIRLangRef","MarkdownQuickstartTemplate","MarkedUpDisassembly","MeetupGuidelines","MemTagSanitizer","MemorySSA","MergeFunctions","MyFirstTypoFix","NVPTXUsage","NewPassManager","ORCv2","OpaquePointers","OptBisect","PDB/CodeViewSymbols","PDB/CodeViewTypes","PDB/DbiStream","PDB/GlobalStream","PDB/HashTable","PDB/ModiStream","PDB/MsfFile","PDB/PdbStream","PDB/PublicStream","PDB/TpiStream","PDB/index","Packaging","Passes","Phabricator","ProgrammersManual","Projects","Proposals/GitHubMove","Proposals/LLVMLibC","Proposals/TestSuite","Proposals/VariableNames","Proposals/VectorPredication","Proposals/VectorizationPlan","Reference","ReleaseNotes","ReleaseProcess","Remarks","ReportingGuide","ScudoHardenedAllocator","Security","SegmentedStacks","SourceLevelDebugging","SpeculativeLoadHardening","SphinxQuickstartTemplate","StackMaps","StackSafetyAnalysis","Statepoints","SupportLibrary","SupportPolicy","SystemLibrary","TableGen/BackEnds","TableGen/BackGuide","TableGen/ProgRef","TableGen/index","TableGenFundamentals","TestSuiteGuide","TestSuiteMakefileGuide","TestingGuide","TransformMetadata","TypeMetadata","UserGuides","Vectorizers","WritingAnLLVMBackend","WritingAnLLVMNewPMPass","WritingAnLLVMPass","XRay","XRayExample","XRayFDRFormat","YamlIO","index","tutorial/BuildingAJIT1","tutorial/BuildingAJIT2","tutorial/BuildingAJIT3","tutorial/BuildingAJIT4","tutorial/LangImpl01","tutorial/LangImpl02","tutorial/LangImpl03","tutorial/LangImpl04","tutorial/LangImpl05","tutorial/LangImpl06","tutorial/LangImpl07","tutorial/LangImpl08","tutorial/LangImpl09","tutorial/LangImpl10","tutorial/MyFirstLanguageFrontend/LangImpl01","tutorial/MyFirstLanguageFrontend/LangImpl02","tutorial/MyFirstLanguageFrontend/LangImpl03","tutorial/MyFirstLanguageFrontend/LangImpl04","tutorial/MyFirstLanguageFrontend/LangImpl05","tutorial/MyFirstLanguageFrontend/LangImpl06","tutorial/MyFirstLanguageFrontend/LangImpl07","tutorial/MyFirstLanguageFrontend/LangImpl08","tutorial/MyFirstLanguageFrontend/LangImpl09","tutorial/MyFirstLanguageFrontend/LangImpl10","tutorial/MyFirstLanguageFrontend/index","tutorial/index","yaml2obj"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,"sphinx.ext.todo":2,sphinx:56},filenames:["AMDGPU/AMDGPUAsmGFX10.rst","AMDGPU/AMDGPUAsmGFX1011.rst","AMDGPU/AMDGPUAsmGFX7.rst","AMDGPU/AMDGPUAsmGFX8.rst","AMDGPU/AMDGPUAsmGFX9.rst","AMDGPU/AMDGPUAsmGFX900.rst","AMDGPU/AMDGPUAsmGFX904.rst","AMDGPU/AMDGPUAsmGFX906.rst","AMDGPU/AMDGPUAsmGFX908.rst","AMDGPU/AMDGPUAsmGFX90a.rst","AMDGPU/gfx1011_src.rst","AMDGPU/gfx1011_src_1.rst","AMDGPU/gfx1011_src_2.rst","AMDGPU/gfx1011_src_3.rst","AMDGPU/gfx1011_type_deviation.rst","AMDGPU/gfx1011_vdst.rst","AMDGPU/gfx1011_vsrc.rst","AMDGPU/gfx10_attr.rst","AMDGPU/gfx10_dst.rst","AMDGPU/gfx10_fx_operand.rst","AMDGPU/gfx10_hwreg.rst","AMDGPU/gfx10_imm16.rst","AMDGPU/gfx10_imm16_1.rst","AMDGPU/gfx10_imm16_2.rst","AMDGPU/gfx10_label.rst","AMDGPU/gfx10_m.rst","AMDGPU/gfx10_m_1.rst","AMDGPU/gfx10_msg.rst","AMDGPU/gfx10_opt.rst","AMDGPU/gfx10_param.rst","AMDGPU/gfx10_probe.rst","AMDGPU/gfx10_saddr.rst","AMDGPU/gfx10_saddr_1.rst","AMDGPU/gfx10_sbase.rst","AMDGPU/gfx10_sbase_1.rst","AMDGPU/gfx10_sbase_2.rst","AMDGPU/gfx10_sdata.rst","AMDGPU/gfx10_sdata_1.rst","AMDGPU/gfx10_sdata_2.rst","AMDGPU/gfx10_sdata_3.rst","AMDGPU/gfx10_sdata_4.rst","AMDGPU/gfx10_sdata_5.rst","AMDGPU/gfx10_sdst.rst","AMDGPU/gfx10_sdst_1.rst","AMDGPU/gfx10_sdst_2.rst","AMDGPU/gfx10_sdst_3.rst","AMDGPU/gfx10_sdst_4.rst","AMDGPU/gfx10_sdst_5.rst","AMDGPU/gfx10_sdst_6.rst","AMDGPU/gfx10_sdst_7.rst","AMDGPU/gfx10_sdst_8.rst","AMDGPU/gfx10_simm32.rst","AMDGPU/gfx10_simm32_1.rst","AMDGPU/gfx10_simm32_2.rst","AMDGPU/gfx10_soffset.rst","AMDGPU/gfx10_soffset_1.rst","AMDGPU/gfx10_soffset_2.rst","AMDGPU/gfx10_src.rst","AMDGPU/gfx10_src_1.rst","AMDGPU/gfx10_src_2.rst","AMDGPU/gfx10_src_3.rst","AMDGPU/gfx10_src_4.rst","AMDGPU/gfx10_src_5.rst","AMDGPU/gfx10_src_6.rst","AMDGPU/gfx10_src_7.rst","AMDGPU/gfx10_src_8.rst","AMDGPU/gfx10_srsrc.rst","AMDGPU/gfx10_srsrc_1.rst","AMDGPU/gfx10_ssamp.rst","AMDGPU/gfx10_ssrc.rst","AMDGPU/gfx10_ssrc_1.rst","AMDGPU/gfx10_ssrc_2.rst","AMDGPU/gfx10_ssrc_3.rst","AMDGPU/gfx10_ssrc_4.rst","AMDGPU/gfx10_ssrc_5.rst","AMDGPU/gfx10_ssrc_6.rst","AMDGPU/gfx10_ssrc_7.rst","AMDGPU/gfx10_ssrc_8.rst","AMDGPU/gfx10_tgt.rst","AMDGPU/gfx10_type_deviation.rst","AMDGPU/gfx10_vaddr.rst","AMDGPU/gfx10_vaddr_1.rst","AMDGPU/gfx10_vaddr_2.rst","AMDGPU/gfx10_vaddr_3.rst","AMDGPU/gfx10_vaddr_4.rst","AMDGPU/gfx10_vaddr_5.rst","AMDGPU/gfx10_vcc.rst","AMDGPU/gfx10_vdata.rst","AMDGPU/gfx10_vdata0.rst","AMDGPU/gfx10_vdata0_1.rst","AMDGPU/gfx10_vdata1.rst","AMDGPU/gfx10_vdata1_1.rst","AMDGPU/gfx10_vdata_1.rst","AMDGPU/gfx10_vdata_10.rst","AMDGPU/gfx10_vdata_2.rst","AMDGPU/gfx10_vdata_3.rst","AMDGPU/gfx10_vdata_4.rst","AMDGPU/gfx10_vdata_5.rst","AMDGPU/gfx10_vdata_6.rst","AMDGPU/gfx10_vdata_7.rst","AMDGPU/gfx10_vdata_8.rst","AMDGPU/gfx10_vdata_9.rst","AMDGPU/gfx10_vdst.rst","AMDGPU/gfx10_vdst_1.rst","AMDGPU/gfx10_vdst_10.rst","AMDGPU/gfx10_vdst_11.rst","AMDGPU/gfx10_vdst_12.rst","AMDGPU/gfx10_vdst_13.rst","AMDGPU/gfx10_vdst_2.rst","AMDGPU/gfx10_vdst_3.rst","AMDGPU/gfx10_vdst_4.rst","AMDGPU/gfx10_vdst_5.rst","AMDGPU/gfx10_vdst_6.rst","AMDGPU/gfx10_vdst_7.rst","AMDGPU/gfx10_vdst_8.rst","AMDGPU/gfx10_vdst_9.rst","AMDGPU/gfx10_vsrc.rst","AMDGPU/gfx10_vsrc_1.rst","AMDGPU/gfx10_vsrc_2.rst","AMDGPU/gfx10_vsrc_3.rst","AMDGPU/gfx10_waitcnt.rst","AMDGPU/gfx7_attr.rst","AMDGPU/gfx7_dst.rst","AMDGPU/gfx7_hwreg.rst","AMDGPU/gfx7_imm16.rst","AMDGPU/gfx7_imm16_1.rst","AMDGPU/gfx7_imm16_2.rst","AMDGPU/gfx7_label.rst","AMDGPU/gfx7_m.rst","AMDGPU/gfx7_msg.rst","AMDGPU/gfx7_opt.rst","AMDGPU/gfx7_param.rst","AMDGPU/gfx7_sbase.rst","AMDGPU/gfx7_sbase_1.rst","AMDGPU/gfx7_sdst.rst","AMDGPU/gfx7_sdst_1.rst","AMDGPU/gfx7_sdst_2.rst","AMDGPU/gfx7_sdst_3.rst","AMDGPU/gfx7_sdst_4.rst","AMDGPU/gfx7_sdst_5.rst","AMDGPU/gfx7_sdst_6.rst","AMDGPU/gfx7_sdst_7.rst","AMDGPU/gfx7_simm32.rst","AMDGPU/gfx7_simm32_1.rst","AMDGPU/gfx7_soffset.rst","AMDGPU/gfx7_soffset_1.rst","AMDGPU/gfx7_src.rst","AMDGPU/gfx7_src_1.rst","AMDGPU/gfx7_src_10.rst","AMDGPU/gfx7_src_2.rst","AMDGPU/gfx7_src_3.rst","AMDGPU/gfx7_src_4.rst","AMDGPU/gfx7_src_5.rst","AMDGPU/gfx7_src_6.rst","AMDGPU/gfx7_src_7.rst","AMDGPU/gfx7_src_8.rst","AMDGPU/gfx7_src_9.rst","AMDGPU/gfx7_srsrc.rst","AMDGPU/gfx7_srsrc_1.rst","AMDGPU/gfx7_ssamp.rst","AMDGPU/gfx7_ssrc.rst","AMDGPU/gfx7_ssrc_1.rst","AMDGPU/gfx7_ssrc_10.rst","AMDGPU/gfx7_ssrc_2.rst","AMDGPU/gfx7_ssrc_3.rst","AMDGPU/gfx7_ssrc_4.rst","AMDGPU/gfx7_ssrc_5.rst","AMDGPU/gfx7_ssrc_6.rst","AMDGPU/gfx7_ssrc_7.rst","AMDGPU/gfx7_ssrc_8.rst","AMDGPU/gfx7_ssrc_9.rst","AMDGPU/gfx7_tgt.rst","AMDGPU/gfx7_type_deviation.rst","AMDGPU/gfx7_vaddr.rst","AMDGPU/gfx7_vaddr_1.rst","AMDGPU/gfx7_vaddr_2.rst","AMDGPU/gfx7_vaddr_3.rst","AMDGPU/gfx7_vcc.rst","AMDGPU/gfx7_vdata.rst","AMDGPU/gfx7_vdata0.rst","AMDGPU/gfx7_vdata0_1.rst","AMDGPU/gfx7_vdata1.rst","AMDGPU/gfx7_vdata1_1.rst","AMDGPU/gfx7_vdata_1.rst","AMDGPU/gfx7_vdata_2.rst","AMDGPU/gfx7_vdata_3.rst","AMDGPU/gfx7_vdata_4.rst","AMDGPU/gfx7_vdata_5.rst","AMDGPU/gfx7_vdata_6.rst","AMDGPU/gfx7_vdata_7.rst","AMDGPU/gfx7_vdata_8.rst","AMDGPU/gfx7_vdata_9.rst","AMDGPU/gfx7_vdst.rst","AMDGPU/gfx7_vdst_1.rst","AMDGPU/gfx7_vdst_10.rst","AMDGPU/gfx7_vdst_11.rst","AMDGPU/gfx7_vdst_12.rst","AMDGPU/gfx7_vdst_2.rst","AMDGPU/gfx7_vdst_3.rst","AMDGPU/gfx7_vdst_4.rst","AMDGPU/gfx7_vdst_5.rst","AMDGPU/gfx7_vdst_6.rst","AMDGPU/gfx7_vdst_7.rst","AMDGPU/gfx7_vdst_8.rst","AMDGPU/gfx7_vdst_9.rst","AMDGPU/gfx7_vsrc.rst","AMDGPU/gfx7_vsrc_1.rst","AMDGPU/gfx7_vsrc_2.rst","AMDGPU/gfx7_vsrc_3.rst","AMDGPU/gfx7_waitcnt.rst","AMDGPU/gfx8_attr.rst","AMDGPU/gfx8_dst.rst","AMDGPU/gfx8_hwreg.rst","AMDGPU/gfx8_imask.rst","AMDGPU/gfx8_imm16.rst","AMDGPU/gfx8_imm16_1.rst","AMDGPU/gfx8_imm16_2.rst","AMDGPU/gfx8_label.rst","AMDGPU/gfx8_m.rst","AMDGPU/gfx8_m_1.rst","AMDGPU/gfx8_msg.rst","AMDGPU/gfx8_opt.rst","AMDGPU/gfx8_param.rst","AMDGPU/gfx8_probe.rst","AMDGPU/gfx8_sbase.rst","AMDGPU/gfx8_sbase_1.rst","AMDGPU/gfx8_sdata.rst","AMDGPU/gfx8_sdata_1.rst","AMDGPU/gfx8_sdata_2.rst","AMDGPU/gfx8_sdst.rst","AMDGPU/gfx8_sdst_1.rst","AMDGPU/gfx8_sdst_2.rst","AMDGPU/gfx8_sdst_3.rst","AMDGPU/gfx8_sdst_4.rst","AMDGPU/gfx8_sdst_5.rst","AMDGPU/gfx8_sdst_6.rst","AMDGPU/gfx8_sdst_7.rst","AMDGPU/gfx8_simm32.rst","AMDGPU/gfx8_simm32_1.rst","AMDGPU/gfx8_simm32_2.rst","AMDGPU/gfx8_soffset.rst","AMDGPU/gfx8_soffset_1.rst","AMDGPU/gfx8_soffset_2.rst","AMDGPU/gfx8_src.rst","AMDGPU/gfx8_src_1.rst","AMDGPU/gfx8_src_10.rst","AMDGPU/gfx8_src_2.rst","AMDGPU/gfx8_src_3.rst","AMDGPU/gfx8_src_4.rst","AMDGPU/gfx8_src_5.rst","AMDGPU/gfx8_src_6.rst","AMDGPU/gfx8_src_7.rst","AMDGPU/gfx8_src_8.rst","AMDGPU/gfx8_src_9.rst","AMDGPU/gfx8_srsrc.rst","AMDGPU/gfx8_srsrc_1.rst","AMDGPU/gfx8_ssamp.rst","AMDGPU/gfx8_ssrc.rst","AMDGPU/gfx8_ssrc_1.rst","AMDGPU/gfx8_ssrc_2.rst","AMDGPU/gfx8_ssrc_3.rst","AMDGPU/gfx8_ssrc_4.rst","AMDGPU/gfx8_ssrc_5.rst","AMDGPU/gfx8_ssrc_6.rst","AMDGPU/gfx8_ssrc_7.rst","AMDGPU/gfx8_ssrc_8.rst","AMDGPU/gfx8_tgt.rst","AMDGPU/gfx8_type_deviation.rst","AMDGPU/gfx8_vaddr.rst","AMDGPU/gfx8_vaddr_1.rst","AMDGPU/gfx8_vaddr_2.rst","AMDGPU/gfx8_vaddr_3.rst","AMDGPU/gfx8_vcc.rst","AMDGPU/gfx8_vdata.rst","AMDGPU/gfx8_vdata0.rst","AMDGPU/gfx8_vdata0_1.rst","AMDGPU/gfx8_vdata1.rst","AMDGPU/gfx8_vdata1_1.rst","AMDGPU/gfx8_vdata_1.rst","AMDGPU/gfx8_vdata_10.rst","AMDGPU/gfx8_vdata_11.rst","AMDGPU/gfx8_vdata_12.rst","AMDGPU/gfx8_vdata_13.rst","AMDGPU/gfx8_vdata_14.rst","AMDGPU/gfx8_vdata_2.rst","AMDGPU/gfx8_vdata_3.rst","AMDGPU/gfx8_vdata_4.rst","AMDGPU/gfx8_vdata_5.rst","AMDGPU/gfx8_vdata_6.rst","AMDGPU/gfx8_vdata_7.rst","AMDGPU/gfx8_vdata_8.rst","AMDGPU/gfx8_vdata_9.rst","AMDGPU/gfx8_vdst.rst","AMDGPU/gfx8_vdst_1.rst","AMDGPU/gfx8_vdst_10.rst","AMDGPU/gfx8_vdst_11.rst","AMDGPU/gfx8_vdst_12.rst","AMDGPU/gfx8_vdst_13.rst","AMDGPU/gfx8_vdst_14.rst","AMDGPU/gfx8_vdst_15.rst","AMDGPU/gfx8_vdst_16.rst","AMDGPU/gfx8_vdst_17.rst","AMDGPU/gfx8_vdst_2.rst","AMDGPU/gfx8_vdst_3.rst","AMDGPU/gfx8_vdst_4.rst","AMDGPU/gfx8_vdst_5.rst","AMDGPU/gfx8_vdst_6.rst","AMDGPU/gfx8_vdst_7.rst","AMDGPU/gfx8_vdst_8.rst","AMDGPU/gfx8_vdst_9.rst","AMDGPU/gfx8_vsrc.rst","AMDGPU/gfx8_vsrc_1.rst","AMDGPU/gfx8_vsrc_2.rst","AMDGPU/gfx8_vsrc_3.rst","AMDGPU/gfx8_waitcnt.rst","AMDGPU/gfx900_fx_operand.rst","AMDGPU/gfx900_m.rst","AMDGPU/gfx900_src.rst","AMDGPU/gfx900_src_1.rst","AMDGPU/gfx900_vdst.rst","AMDGPU/gfx904_fx_operand.rst","AMDGPU/gfx904_m.rst","AMDGPU/gfx904_src.rst","AMDGPU/gfx904_src_1.rst","AMDGPU/gfx904_vdst.rst","AMDGPU/gfx906_fx_operand.rst","AMDGPU/gfx906_m.rst","AMDGPU/gfx906_m_1.rst","AMDGPU/gfx906_src.rst","AMDGPU/gfx906_src_1.rst","AMDGPU/gfx906_src_2.rst","AMDGPU/gfx906_src_3.rst","AMDGPU/gfx906_src_4.rst","AMDGPU/gfx906_type_deviation.rst","AMDGPU/gfx906_vdst.rst","AMDGPU/gfx906_vsrc.rst","AMDGPU/gfx908_dst.rst","AMDGPU/gfx908_fx_operand.rst","AMDGPU/gfx908_m.rst","AMDGPU/gfx908_m_1.rst","AMDGPU/gfx908_opt.rst","AMDGPU/gfx908_saddr.rst","AMDGPU/gfx908_soffset.rst","AMDGPU/gfx908_src.rst","AMDGPU/gfx908_src_1.rst","AMDGPU/gfx908_src_2.rst","AMDGPU/gfx908_src_3.rst","AMDGPU/gfx908_src_4.rst","AMDGPU/gfx908_src_5.rst","AMDGPU/gfx908_srsrc.rst","AMDGPU/gfx908_type_deviation.rst","AMDGPU/gfx908_vaddr.rst","AMDGPU/gfx908_vaddr_1.rst","AMDGPU/gfx908_vdata.rst","AMDGPU/gfx908_vdata_1.rst","AMDGPU/gfx908_vdst.rst","AMDGPU/gfx908_vdst_1.rst","AMDGPU/gfx908_vdst_2.rst","AMDGPU/gfx908_vdst_3.rst","AMDGPU/gfx908_vdst_4.rst","AMDGPU/gfx908_vdst_5.rst","AMDGPU/gfx908_vsrc.rst","AMDGPU/gfx908_vsrc_1.rst","AMDGPU/gfx908_vsrc_2.rst","AMDGPU/gfx908_vsrc_3.rst","AMDGPU/gfx908_vsrc_4.rst","AMDGPU/gfx908_vsrc_5.rst","AMDGPU/gfx908_vsrc_6.rst","AMDGPU/gfx90a_dst.rst","AMDGPU/gfx90a_fx_operand.rst","AMDGPU/gfx90a_hwreg.rst","AMDGPU/gfx90a_imask.rst","AMDGPU/gfx90a_imm16.rst","AMDGPU/gfx90a_imm16_1.rst","AMDGPU/gfx90a_imm16_2.rst","AMDGPU/gfx90a_label.rst","AMDGPU/gfx90a_m.rst","AMDGPU/gfx90a_m_1.rst","AMDGPU/gfx90a_msg.rst","AMDGPU/gfx90a_opt.rst","AMDGPU/gfx90a_probe.rst","AMDGPU/gfx90a_saddr.rst","AMDGPU/gfx90a_saddr_1.rst","AMDGPU/gfx90a_sbase.rst","AMDGPU/gfx90a_sbase_1.rst","AMDGPU/gfx90a_sbase_2.rst","AMDGPU/gfx90a_sdata.rst","AMDGPU/gfx90a_sdata_1.rst","AMDGPU/gfx90a_sdata_2.rst","AMDGPU/gfx90a_sdata_3.rst","AMDGPU/gfx90a_sdata_4.rst","AMDGPU/gfx90a_sdata_5.rst","AMDGPU/gfx90a_sdst.rst","AMDGPU/gfx90a_sdst_1.rst","AMDGPU/gfx90a_sdst_2.rst","AMDGPU/gfx90a_sdst_3.rst","AMDGPU/gfx90a_sdst_4.rst","AMDGPU/gfx90a_sdst_5.rst","AMDGPU/gfx90a_sdst_6.rst","AMDGPU/gfx90a_sdst_7.rst","AMDGPU/gfx90a_simm32.rst","AMDGPU/gfx90a_simm32_1.rst","AMDGPU/gfx90a_simm32_2.rst","AMDGPU/gfx90a_soffset.rst","AMDGPU/gfx90a_soffset_1.rst","AMDGPU/gfx90a_soffset_2.rst","AMDGPU/gfx90a_src.rst","AMDGPU/gfx90a_src_1.rst","AMDGPU/gfx90a_src_10.rst","AMDGPU/gfx90a_src_11.rst","AMDGPU/gfx90a_src_2.rst","AMDGPU/gfx90a_src_3.rst","AMDGPU/gfx90a_src_4.rst","AMDGPU/gfx90a_src_5.rst","AMDGPU/gfx90a_src_6.rst","AMDGPU/gfx90a_src_7.rst","AMDGPU/gfx90a_src_8.rst","AMDGPU/gfx90a_src_9.rst","AMDGPU/gfx90a_srsrc.rst","AMDGPU/gfx90a_srsrc_1.rst","AMDGPU/gfx90a_ssamp.rst","AMDGPU/gfx90a_ssrc.rst","AMDGPU/gfx90a_ssrc_1.rst","AMDGPU/gfx90a_ssrc_2.rst","AMDGPU/gfx90a_ssrc_3.rst","AMDGPU/gfx90a_ssrc_4.rst","AMDGPU/gfx90a_ssrc_5.rst","AMDGPU/gfx90a_ssrc_6.rst","AMDGPU/gfx90a_ssrc_7.rst","AMDGPU/gfx90a_ssrc_8.rst","AMDGPU/gfx90a_type_deviation.rst","AMDGPU/gfx90a_vaddr.rst","AMDGPU/gfx90a_vaddr_1.rst","AMDGPU/gfx90a_vaddr_2.rst","AMDGPU/gfx90a_vaddr_3.rst","AMDGPU/gfx90a_vaddr_4.rst","AMDGPU/gfx90a_vaddr_5.rst","AMDGPU/gfx90a_vcc.rst","AMDGPU/gfx90a_vdata.rst","AMDGPU/gfx90a_vdata0.rst","AMDGPU/gfx90a_vdata0_1.rst","AMDGPU/gfx90a_vdata1.rst","AMDGPU/gfx90a_vdata1_1.rst","AMDGPU/gfx90a_vdata_1.rst","AMDGPU/gfx90a_vdata_10.rst","AMDGPU/gfx90a_vdata_2.rst","AMDGPU/gfx90a_vdata_3.rst","AMDGPU/gfx90a_vdata_4.rst","AMDGPU/gfx90a_vdata_5.rst","AMDGPU/gfx90a_vdata_6.rst","AMDGPU/gfx90a_vdata_7.rst","AMDGPU/gfx90a_vdata_8.rst","AMDGPU/gfx90a_vdata_9.rst","AMDGPU/gfx90a_vdst.rst","AMDGPU/gfx90a_vdst_1.rst","AMDGPU/gfx90a_vdst_10.rst","AMDGPU/gfx90a_vdst_11.rst","AMDGPU/gfx90a_vdst_12.rst","AMDGPU/gfx90a_vdst_13.rst","AMDGPU/gfx90a_vdst_14.rst","AMDGPU/gfx90a_vdst_15.rst","AMDGPU/gfx90a_vdst_16.rst","AMDGPU/gfx90a_vdst_17.rst","AMDGPU/gfx90a_vdst_18.rst","AMDGPU/gfx90a_vdst_19.rst","AMDGPU/gfx90a_vdst_2.rst","AMDGPU/gfx90a_vdst_3.rst","AMDGPU/gfx90a_vdst_4.rst","AMDGPU/gfx90a_vdst_5.rst","AMDGPU/gfx90a_vdst_6.rst","AMDGPU/gfx90a_vdst_7.rst","AMDGPU/gfx90a_vdst_8.rst","AMDGPU/gfx90a_vdst_9.rst","AMDGPU/gfx90a_vsrc.rst","AMDGPU/gfx90a_vsrc_1.rst","AMDGPU/gfx90a_vsrc_2.rst","AMDGPU/gfx90a_vsrc_3.rst","AMDGPU/gfx90a_vsrc_4.rst","AMDGPU/gfx90a_vsrc_5.rst","AMDGPU/gfx90a_waitcnt.rst","AMDGPU/gfx9_attr.rst","AMDGPU/gfx9_dst.rst","AMDGPU/gfx9_hwreg.rst","AMDGPU/gfx9_imask.rst","AMDGPU/gfx9_imm16.rst","AMDGPU/gfx9_imm16_1.rst","AMDGPU/gfx9_imm16_2.rst","AMDGPU/gfx9_label.rst","AMDGPU/gfx9_m.rst","AMDGPU/gfx9_m_1.rst","AMDGPU/gfx9_msg.rst","AMDGPU/gfx9_opt.rst","AMDGPU/gfx9_param.rst","AMDGPU/gfx9_probe.rst","AMDGPU/gfx9_saddr.rst","AMDGPU/gfx9_saddr_1.rst","AMDGPU/gfx9_sbase.rst","AMDGPU/gfx9_sbase_1.rst","AMDGPU/gfx9_sbase_2.rst","AMDGPU/gfx9_sdata.rst","AMDGPU/gfx9_sdata_1.rst","AMDGPU/gfx9_sdata_2.rst","AMDGPU/gfx9_sdata_3.rst","AMDGPU/gfx9_sdata_4.rst","AMDGPU/gfx9_sdata_5.rst","AMDGPU/gfx9_sdst.rst","AMDGPU/gfx9_sdst_1.rst","AMDGPU/gfx9_sdst_2.rst","AMDGPU/gfx9_sdst_3.rst","AMDGPU/gfx9_sdst_4.rst","AMDGPU/gfx9_sdst_5.rst","AMDGPU/gfx9_sdst_6.rst","AMDGPU/gfx9_sdst_7.rst","AMDGPU/gfx9_simm32.rst","AMDGPU/gfx9_simm32_1.rst","AMDGPU/gfx9_simm32_2.rst","AMDGPU/gfx9_soffset.rst","AMDGPU/gfx9_soffset_1.rst","AMDGPU/gfx9_soffset_2.rst","AMDGPU/gfx9_src.rst","AMDGPU/gfx9_src_1.rst","AMDGPU/gfx9_src_10.rst","AMDGPU/gfx9_src_2.rst","AMDGPU/gfx9_src_3.rst","AMDGPU/gfx9_src_4.rst","AMDGPU/gfx9_src_5.rst","AMDGPU/gfx9_src_6.rst","AMDGPU/gfx9_src_7.rst","AMDGPU/gfx9_src_8.rst","AMDGPU/gfx9_src_9.rst","AMDGPU/gfx9_srsrc.rst","AMDGPU/gfx9_srsrc_1.rst","AMDGPU/gfx9_ssamp.rst","AMDGPU/gfx9_ssrc.rst","AMDGPU/gfx9_ssrc_1.rst","AMDGPU/gfx9_ssrc_2.rst","AMDGPU/gfx9_ssrc_3.rst","AMDGPU/gfx9_ssrc_4.rst","AMDGPU/gfx9_ssrc_5.rst","AMDGPU/gfx9_ssrc_6.rst","AMDGPU/gfx9_ssrc_7.rst","AMDGPU/gfx9_ssrc_8.rst","AMDGPU/gfx9_tgt.rst","AMDGPU/gfx9_type_deviation.rst","AMDGPU/gfx9_vaddr.rst","AMDGPU/gfx9_vaddr_1.rst","AMDGPU/gfx9_vaddr_2.rst","AMDGPU/gfx9_vaddr_3.rst","AMDGPU/gfx9_vaddr_4.rst","AMDGPU/gfx9_vaddr_5.rst","AMDGPU/gfx9_vcc.rst","AMDGPU/gfx9_vdata.rst","AMDGPU/gfx9_vdata0.rst","AMDGPU/gfx9_vdata0_1.rst","AMDGPU/gfx9_vdata1.rst","AMDGPU/gfx9_vdata1_1.rst","AMDGPU/gfx9_vdata_1.rst","AMDGPU/gfx9_vdata_10.rst","AMDGPU/gfx9_vdata_2.rst","AMDGPU/gfx9_vdata_3.rst","AMDGPU/gfx9_vdata_4.rst","AMDGPU/gfx9_vdata_5.rst","AMDGPU/gfx9_vdata_6.rst","AMDGPU/gfx9_vdata_7.rst","AMDGPU/gfx9_vdata_8.rst","AMDGPU/gfx9_vdata_9.rst","AMDGPU/gfx9_vdst.rst","AMDGPU/gfx9_vdst_1.rst","AMDGPU/gfx9_vdst_10.rst","AMDGPU/gfx9_vdst_11.rst","AMDGPU/gfx9_vdst_12.rst","AMDGPU/gfx9_vdst_13.rst","AMDGPU/gfx9_vdst_2.rst","AMDGPU/gfx9_vdst_3.rst","AMDGPU/gfx9_vdst_4.rst","AMDGPU/gfx9_vdst_5.rst","AMDGPU/gfx9_vdst_6.rst","AMDGPU/gfx9_vdst_7.rst","AMDGPU/gfx9_vdst_8.rst","AMDGPU/gfx9_vdst_9.rst","AMDGPU/gfx9_vsrc.rst","AMDGPU/gfx9_vsrc_1.rst","AMDGPU/gfx9_vsrc_2.rst","AMDGPU/gfx9_vsrc_3.rst","AMDGPU/gfx9_waitcnt.rst","AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst","AMDGPUInstructionNotation.rst","AMDGPUInstructionSyntax.rst","AMDGPUModifierSyntax.rst","AMDGPUOperandSyntax.rst","AMDGPUUsage.rst","AddingConstrainedIntrinsics.rst","AdvancedBuilds.rst","AliasAnalysis.rst","Atomics.rst","Benchmarking.rst","BigEndianNEON.rst","BitCodeFormat.rst","BlockFrequencyTerminology.rst","BranchWeightMetadata.rst","BugLifeCycle.rst","Bugpoint.rst","BugpointRedesign.md","BuildingADistribution.rst","CFIVerify.rst","CMake.rst","CMakePrimer.rst","CodeGenerator.rst","CodeOfConduct.rst","CodeReview.rst","CodingStandards.rst","CommandGuide/FileCheck.rst","CommandGuide/bugpoint.rst","CommandGuide/clang-tblgen.rst","CommandGuide/dsymutil.rst","CommandGuide/index.rst","CommandGuide/lit.rst","CommandGuide/llc.rst","CommandGuide/lldb-tblgen.rst","CommandGuide/lli.rst","CommandGuide/llvm-addr2line.rst","CommandGuide/llvm-ar.rst","CommandGuide/llvm-as.rst","CommandGuide/llvm-bcanalyzer.rst","CommandGuide/llvm-config.rst","CommandGuide/llvm-cov.rst","CommandGuide/llvm-cxxfilt.rst","CommandGuide/llvm-cxxmap.rst","CommandGuide/llvm-diff.rst","CommandGuide/llvm-dis.rst","CommandGuide/llvm-dwarfdump.rst","CommandGuide/llvm-exegesis.rst","CommandGuide/llvm-extract.rst","CommandGuide/llvm-install-name-tool.rst","CommandGuide/llvm-lib.rst","CommandGuide/llvm-libtool-darwin.rst","CommandGuide/llvm-link.rst","CommandGuide/llvm-lipo.rst","CommandGuide/llvm-locstats.rst","CommandGuide/llvm-mca.rst","CommandGuide/llvm-nm.rst","CommandGuide/llvm-objcopy.rst","CommandGuide/llvm-objdump.rst","CommandGuide/llvm-otool.rst","CommandGuide/llvm-pdbutil.rst","CommandGuide/llvm-profdata.rst","CommandGuide/llvm-profgen.rst","CommandGuide/llvm-ranlib.rst","CommandGuide/llvm-readelf.rst","CommandGuide/llvm-readobj.rst","CommandGuide/llvm-size.rst","CommandGuide/llvm-stress.rst","CommandGuide/llvm-strings.rst","CommandGuide/llvm-strip.rst","CommandGuide/llvm-symbolizer.rst","CommandGuide/llvm-tblgen.rst","CommandGuide/mlir-tblgen.rst","CommandGuide/opt.rst","CommandGuide/tblgen.rst","CommandLine.rst","CompileCudaWithLLVM.rst","CompilerWriterInfo.rst","Contributing.rst","Coroutines.rst","CoverageMappingFormat.rst","DebuggingJITedCode.rst","DependenceGraphs/index.rst","DeveloperPolicy.rst","Docker.rst","ExceptionHandling.rst","ExtendingLLVM.rst","Extensions.rst","FAQ.rst","FaultMaps.rst","Frontend/PerformanceTips.rst","FuzzingLLVM.rst","GarbageCollection.rst","GetElementPtr.rst","GettingInvolved.rst","GettingStarted.rst","GettingStartedTutorials.rst","GettingStartedVS.rst","GitBisecting.rst","GlobalISel/GMIR.rst","GlobalISel/GenericOpcode.rst","GlobalISel/IRTranslator.rst","GlobalISel/InstructionSelect.rst","GlobalISel/KnownBits.rst","GlobalISel/Legalizer.rst","GlobalISel/Pipeline.rst","GlobalISel/Porting.rst","GlobalISel/RegBankSelect.rst","GlobalISel/Resources.rst","GlobalISel/index.rst","GoldPlugin.rst","GwpAsan.rst","HowToAddABuilder.rst","HowToBuildOnARM.rst","HowToBuildWindowsItaniumPrograms.rst","HowToBuildWithPGO.rst","HowToCrossCompileBuiltinsOnArm.rst","HowToCrossCompileLLVM.rst","HowToReleaseLLVM.rst","HowToSetUpLLVMStyleRTTI.rst","HowToSubmitABug.rst","HowToUpdateDebugInfo.rst","HowToUseAttributes.rst","HowToUseInstrMappings.rst","InAlloca.rst","JITLink.rst","LangRef.rst","Lexicon.rst","LibFuzzer.rst","LinkTimeOptimization.rst","LoopTerminology.rst","MCJITDesignAndImplementation.rst","MIRLangRef.rst","MarkdownQuickstartTemplate.md","MarkedUpDisassembly.rst","MeetupGuidelines.rst","MemTagSanitizer.rst","MemorySSA.rst","MergeFunctions.rst","MyFirstTypoFix.rst","NVPTXUsage.rst","NewPassManager.rst","ORCv2.rst","OpaquePointers.rst","OptBisect.rst","PDB/CodeViewSymbols.rst","PDB/CodeViewTypes.rst","PDB/DbiStream.rst","PDB/GlobalStream.rst","PDB/HashTable.rst","PDB/ModiStream.rst","PDB/MsfFile.rst","PDB/PdbStream.rst","PDB/PublicStream.rst","PDB/TpiStream.rst","PDB/index.rst","Packaging.rst","Passes.rst","Phabricator.rst","ProgrammersManual.rst","Projects.rst","Proposals/GitHubMove.rst","Proposals/LLVMLibC.rst","Proposals/TestSuite.rst","Proposals/VariableNames.rst","Proposals/VectorPredication.rst","Proposals/VectorizationPlan.rst","Reference.rst","ReleaseNotes.rst","ReleaseProcess.rst","Remarks.rst","ReportingGuide.rst","ScudoHardenedAllocator.rst","Security.rst","SegmentedStacks.rst","SourceLevelDebugging.rst","SpeculativeLoadHardening.md","SphinxQuickstartTemplate.rst","StackMaps.rst","StackSafetyAnalysis.rst","Statepoints.rst","SupportLibrary.rst","SupportPolicy.rst","SystemLibrary.rst","TableGen/BackEnds.rst","TableGen/BackGuide.rst","TableGen/ProgRef.rst","TableGen/index.rst","TableGenFundamentals.rst","TestSuiteGuide.md","TestSuiteMakefileGuide.rst","TestingGuide.rst","TransformMetadata.rst","TypeMetadata.rst","UserGuides.rst","Vectorizers.rst","WritingAnLLVMBackend.rst","WritingAnLLVMNewPMPass.rst","WritingAnLLVMPass.rst","XRay.rst","XRayExample.rst","XRayFDRFormat.rst","YamlIO.rst","index.rst","tutorial/BuildingAJIT1.rst","tutorial/BuildingAJIT2.rst","tutorial/BuildingAJIT3.rst","tutorial/BuildingAJIT4.rst","tutorial/LangImpl01.rst","tutorial/LangImpl02.rst","tutorial/LangImpl03.rst","tutorial/LangImpl04.rst","tutorial/LangImpl05.rst","tutorial/LangImpl06.rst","tutorial/LangImpl07.rst","tutorial/LangImpl08.rst","tutorial/LangImpl09.rst","tutorial/LangImpl10.rst","tutorial/MyFirstLanguageFrontend/LangImpl01.rst","tutorial/MyFirstLanguageFrontend/LangImpl02.rst","tutorial/MyFirstLanguageFrontend/LangImpl03.rst","tutorial/MyFirstLanguageFrontend/LangImpl04.rst","tutorial/MyFirstLanguageFrontend/LangImpl05.rst","tutorial/MyFirstLanguageFrontend/LangImpl06.rst","tutorial/MyFirstLanguageFrontend/LangImpl07.rst","tutorial/MyFirstLanguageFrontend/LangImpl08.rst","tutorial/MyFirstLanguageFrontend/LangImpl09.rst","tutorial/MyFirstLanguageFrontend/LangImpl10.rst","tutorial/MyFirstLanguageFrontend/index.rst","tutorial/index.rst","yaml2obj.rst"],objects:{"":{"-lto-pass-remarks-filter":[754,0,1,"cmdoption-lto-pass-remarks-filter"],"-lto-pass-remarks-format":[754,0,1,"cmdoption-lto-pass-remarks-format"],"-lto-pass-remarks-hotness-threshold":[754,0,1,"cmdoption-lto-pass-remarks-hotness-threshold"],"-lto-pass-remarks-output":[754,0,1,"cmdoption-lto-pass-remarks-output"],"-lto-pass-remarks-with-hotness":[754,0,1,"cmdoption-lto-pass-remarks-with-hotness"],"-opt-remarks-filename":[754,0,1,"cmdoption-opt-remarks-filename"],"-opt-remarks-filter":[754,0,1,"cmdoption-opt-remarks-filter"],"-opt-remarks-format":[754,0,1,"cmdoption-opt-remarks-format"],"-opt-remarks-with-hotness":[754,0,1,"cmdoption-opt-remarks-with-hotness"],"-pass-remarks":[754,0,1,"cmdoption-pass-remarks"],"-pass-remarks-analysis":[754,0,1,"cmdoption-pass-remarks-analysis"],"-pass-remarks-filter":[754,0,1,"cmdoption-pass-remarks-filter"],"-pass-remarks-format":[754,0,1,"cmdoption-pass-remarks-format"],"-pass-remarks-hotness-threshold":[754,0,1,"cmdoption-pass-remarks-hotness-threshold"],"-pass-remarks-missed":[754,0,1,"cmdoption-pass-remarks-missed"],"-pass-remarks-output":[754,0,1,"cmdoption-pass-remarks-output"],"-pass-remarks-with-hotness":[754,0,1,"cmdoption-pass-remarks-with-hotness"]},"llvm-ar":{"--format":[621,0,1,"cmdoption-llvm-ar-format"],"--help":[621,0,1,"cmdoption-llvm-ar-h"],"--rsp-quoting":[621,0,1,"cmdoption-llvm-ar-rsp-quoting"],"--version":[621,0,1,"cmdoption-llvm-ar-version"],"-M":[621,0,1,"cmdoption-llvm-ar-M"],"-h":[621,0,1,"cmdoption-llvm-ar-h"],"@<FILE>":[621,0,1,"cmdoption-llvm-ar-arg-FILE"],"``posix``":[621,0,1,"cmdoption-llvm-ar-arg-posix"],"default":[621,0,1,"cmdoption-llvm-ar-arg-default"],ADDLIB:[621,0,1,"cmdoption-llvm-ar-arg-ADDLIB"],ADDMOD:[621,0,1,"cmdoption-llvm-ar-arg-ADDMOD"],CREATE:[621,0,1,"cmdoption-llvm-ar-arg-CREATE"],CREATETHIN:[621,0,1,"cmdoption-llvm-ar-arg-CREATETHIN"],D:[621,0,1,"cmdoption-llvm-ar-arg-D"],DELETE:[621,0,1,"cmdoption-llvm-ar-arg-DELETE"],END:[621,0,1,"cmdoption-llvm-ar-arg-END"],L:[621,0,1,"cmdoption-llvm-ar-arg-L"],N:[621,0,1,"cmdoption-llvm-ar-arg-N"],O:[621,0,1,"cmdoption-llvm-ar-arg-O"],P:[621,0,1,"cmdoption-llvm-ar-arg-P"],S:[621,0,1,"cmdoption-llvm-ar-arg-S"],SAVE:[621,0,1,"cmdoption-llvm-ar-arg-SAVE"],T:[621,0,1,"cmdoption-llvm-ar-arg-T"],This:[621,0,1,"cmdoption-llvm-ar-arg-This"],U:[621,0,1,"cmdoption-llvm-ar-arg-U"],V:[621,0,1,"cmdoption-llvm-ar-arg-V"],a:[621,0,1,"cmdoption-llvm-ar-arg-a"],b:[621,0,1,"cmdoption-llvm-ar-arg-b"],c:[621,0,1,"cmdoption-llvm-ar-arg-c"],d:[621,0,1,"cmdoption-llvm-ar-arg-d"],either:[621,0,1,"cmdoption-llvm-ar-arg-This"],i:[621,0,1,"cmdoption-llvm-ar-arg-i"],m:[621,0,1,"cmdoption-llvm-ar-arg-m"],o:[621,0,1,"cmdoption-llvm-ar-arg-o"],otherwise:[621,0,1,"cmdoption-llvm-ar-arg-posix"],p:[621,0,1,"cmdoption-llvm-ar-arg-p"],q:[621,0,1,"cmdoption-llvm-ar-arg-q"],r:[621,0,1,"cmdoption-llvm-ar-arg-r"],s:[621,0,1,"cmdoption-llvm-ar-arg-s"],u:[621,0,1,"cmdoption-llvm-ar-arg-u"],v:[621,0,1,"cmdoption-llvm-ar-arg-0"],x:[621,0,1,"cmdoption-llvm-ar-arg-x"]},"llvm-bcanalyzer":{"-dump":[623,0,1,"cmdoption-llvm-bcanalyzer-dump"],"-help":[623,0,1,"cmdoption-llvm-bcanalyzer-help"],"-nodetails":[623,0,1,"cmdoption-llvm-bcanalyzer-nodetails"],"-verify":[623,0,1,"cmdoption-llvm-bcanalyzer-verify"]},"llvm-cov-export":{"-arch":[625,0,1,"cmdoption-llvm-cov-export-arch"],"-compilation-dir":[625,0,1,"cmdoption-llvm-cov-export-compilation-dir"],"-format":[625,0,1,"cmdoption-llvm-cov-export-format"],"-ignore-filename-regex":[625,0,1,"cmdoption-llvm-cov-export-ignore-filename-regex"],"-j":[625,0,1,"cmdoption-llvm-cov-export-num-threads"],"-num-threads":[625,0,1,"cmdoption-llvm-cov-export-num-threads"],"-skip-expansions":[625,0,1,"cmdoption-llvm-cov-export-skip-expansions"],"-skip-functions":[625,0,1,"cmdoption-llvm-cov-export-skip-functions"],"-summary-only":[625,0,1,"cmdoption-llvm-cov-export-summary-only"]},"llvm-cov-gcov":{"--all-blocks":[625,0,1,"cmdoption-llvm-cov-gcov-a"],"--branch-counts":[625,0,1,"cmdoption-llvm-cov-gcov-c"],"--branch-probabilities":[625,0,1,"cmdoption-llvm-cov-gcov-b"],"--demangled-names":[625,0,1,"cmdoption-llvm-cov-gcov-m"],"--function-summaries":[625,0,1,"cmdoption-llvm-cov-gcov-f"],"--hash-filenames":[625,0,1,"cmdoption-llvm-cov-gcov-x"],"--help":[625,0,1,"cmdoption-llvm-cov-gcov-help"],"--long-file-names":[625,0,1,"cmdoption-llvm-cov-gcov-l"],"--no-output":[625,0,1,"cmdoption-llvm-cov-gcov-n"],"--object-directory":[625,0,1,"cmdoption-llvm-cov-gcov-o"],"--object-file":[625,0,1,"cmdoption-llvm-cov-gcov-o"],"--preserve-paths":[625,0,1,"cmdoption-llvm-cov-gcov-p"],"--stdout":[625,0,1,"cmdoption-llvm-cov-gcov-t"],"--unconditional-branches":[625,0,1,"cmdoption-llvm-cov-gcov-u"],"-a":[625,0,1,"cmdoption-llvm-cov-gcov-a"],"-b":[625,0,1,"cmdoption-llvm-cov-gcov-b"],"-c":[625,0,1,"cmdoption-llvm-cov-gcov-c"],"-f":[625,0,1,"cmdoption-llvm-cov-gcov-f"],"-l":[625,0,1,"cmdoption-llvm-cov-gcov-l"],"-m":[625,0,1,"cmdoption-llvm-cov-gcov-m"],"-n":[625,0,1,"cmdoption-llvm-cov-gcov-n"],"-o":[625,0,1,"cmdoption-llvm-cov-gcov-o"],"-p":[625,0,1,"cmdoption-llvm-cov-gcov-p"],"-r":[625,0,1,"cmdoption-llvm-cov-gcov-r"],"-s":[625,0,1,"cmdoption-llvm-cov-gcov-s"],"-t":[625,0,1,"cmdoption-llvm-cov-gcov-t"],"-u":[625,0,1,"cmdoption-llvm-cov-gcov-u"],"-version":[625,0,1,"cmdoption-llvm-cov-gcov-version"],"-x":[625,0,1,"cmdoption-llvm-cov-gcov-x"]},"llvm-cov-report":{"-arch":[625,0,1,"cmdoption-llvm-cov-report-arch"],"-compilation-dir":[625,0,1,"cmdoption-llvm-cov-report-compilation-dir"],"-ignore-filename-regex":[625,0,1,"cmdoption-llvm-cov-report-ignore-filename-regex"],"-show-branch-summary":[625,0,1,"cmdoption-llvm-cov-report-show-branch-summary"],"-show-functions":[625,0,1,"cmdoption-llvm-cov-report-show-functions"],"-show-instantiation-summary":[625,0,1,"cmdoption-llvm-cov-report-show-instantiation-summary"],"-show-region-summary":[625,0,1,"cmdoption-llvm-cov-report-show-region-summary"],"-use-color":[625,0,1,"cmdoption-llvm-cov-report-use-color"]},"llvm-cov-show":{"-Xdemangler":[625,0,1,"cmdoption-llvm-cov-show-Xdemangler"],"-arch":[625,0,1,"cmdoption-llvm-cov-show-arch"],"-compilation-dir":[625,0,1,"cmdoption-llvm-cov-show-compilation-dir"],"-format":[625,0,1,"cmdoption-llvm-cov-show-format"],"-ignore-filename-regex":[625,0,1,"cmdoption-llvm-cov-show-ignore-filename-regex"],"-j":[625,0,1,"cmdoption-llvm-cov-show-num-threads"],"-line-coverage-gt":[625,0,1,"cmdoption-llvm-cov-show-line-coverage-gt"],"-line-coverage-lt":[625,0,1,"cmdoption-llvm-cov-show-line-coverage-lt"],"-name":[625,0,1,"cmdoption-llvm-cov-show-name"],"-name-regex":[625,0,1,"cmdoption-llvm-cov-show-name-regex"],"-name-whitelist":[625,0,1,"cmdoption-llvm-cov-show-name-whitelist"],"-num-threads":[625,0,1,"cmdoption-llvm-cov-show-num-threads"],"-output-dir":[625,0,1,"cmdoption-llvm-cov-show-output-dir"],"-path-equivalence":[625,0,1,"cmdoption-llvm-cov-show-path-equivalence"],"-region-coverage-gt":[625,0,1,"cmdoption-llvm-cov-show-region-coverage-gt"],"-region-coverage-lt":[625,0,1,"cmdoption-llvm-cov-show-region-coverage-lt"],"-show-branches":[625,0,1,"cmdoption-llvm-cov-show-show-branches"],"-show-expansions":[625,0,1,"cmdoption-llvm-cov-show-show-expansions"],"-show-instantiations":[625,0,1,"cmdoption-llvm-cov-show-show-instantiations"],"-show-line-counts":[625,0,1,"cmdoption-llvm-cov-show-show-line-counts"],"-show-line-counts-or-regions":[625,0,1,"cmdoption-llvm-cov-show-show-line-counts-or-regions"],"-show-regions":[625,0,1,"cmdoption-llvm-cov-show-show-regions"],"-tab-size":[625,0,1,"cmdoption-llvm-cov-show-tab-size"],"-use-color":[625,0,1,"cmdoption-llvm-cov-show-use-color"]},"llvm-cxxfilt":{"--format":[626,0,1,"cmdoption-llvm-cxxfilt-format"],"--help":[626,0,1,"cmdoption-llvm-cxxfilt-help"],"--no-strip-underscore":[626,0,1,"cmdoption-llvm-cxxfilt-no-strip-underscore"],"--strip-underscore":[626,0,1,"cmdoption-llvm-cxxfilt-strip-underscore"],"--types":[626,0,1,"cmdoption-llvm-cxxfilt-types"],"--version":[626,0,1,"cmdoption-llvm-cxxfilt-version"],"-_":[626,0,1,"cmdoption-llvm-cxxfilt-strip-underscore"],"-h":[626,0,1,"cmdoption-llvm-cxxfilt-help"],"-n":[626,0,1,"cmdoption-llvm-cxxfilt-no-strip-underscore"],"-s":[626,0,1,"cmdoption-llvm-cxxfilt-format"],"-t":[626,0,1,"cmdoption-llvm-cxxfilt-types"],"@<FILE>":[626,0,1,"cmdoption-llvm-cxxfilt-arg-FILE"]},"llvm-cxxmap":{"-Wambiguous":[627,0,1,"cmdoption-llvm-cxxmap-Wambiguous"],"-Wincomplete":[627,0,1,"cmdoption-llvm-cxxmap-Wincomplete"],"-o":[627,0,1,"cmdoption-llvm-cxxmap-output"],"-output":[627,0,1,"cmdoption-llvm-cxxmap-output"],"-r":[627,0,1,"cmdoption-llvm-cxxmap-remapping-file"],"-remapping-file":[627,0,1,"cmdoption-llvm-cxxmap-remapping-file"]},"llvm-dwarfdump":{"--all":[630,0,1,"cmdoption-llvm-dwarfdump-a"],"--apple-names":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--apple-namespaces":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--apple-objc":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--apple-types":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--arch":[630,0,1,"cmdoption-llvm-dwarfdump-arch"],"--color":[630,0,1,"cmdoption-llvm-dwarfdump-color"],"--debug-abbrev":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-addr":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-aranges":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-cu-index":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-frame":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-gnu-pubnames":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-gnu-pubtypes":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-info":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-line":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-line-str":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-loc":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-loclists":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-macro":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-names":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-pubnames":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-pubtypes":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-ranges":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-rnglists":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-str":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-str-offsets":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-tu-index":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--debug-types":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--diff":[630,0,1,"cmdoption-llvm-dwarfdump-diff"],"--eh-frame":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--find":[630,0,1,"cmdoption-llvm-dwarfdump-f"],"--gdb-index":[630,0,1,"cmdoption-llvm-dwarfdump-debug-abbrev"],"--help":[630,0,1,"cmdoption-llvm-dwarfdump-h"],"--help-list":[630,0,1,"cmdoption-llvm-dwarfdump-help-list"],"--ignore-case":[630,0,1,"cmdoption-llvm-dwarfdump-i"],"--lookup":[630,0,1,"cmdoption-llvm-dwarfdump-lookup"],"--name":[630,0,1,"cmdoption-llvm-dwarfdump-n"],"--parent-recurse-depth":[630,0,1,"cmdoption-llvm-dwarfdump-parent-recurse-depth"],"--quiet":[630,0,1,"cmdoption-llvm-dwarfdump-quiet"],"--recurse-depth":[630,0,1,"cmdoption-llvm-dwarfdump-r"],"--regex":[630,0,1,"cmdoption-llvm-dwarfdump-x"],"--show-children":[630,0,1,"cmdoption-llvm-dwarfdump-c"],"--show-form":[630,0,1,"cmdoption-llvm-dwarfdump-F"],"--show-parents":[630,0,1,"cmdoption-llvm-dwarfdump-p"],"--show-section-sizes":[630,0,1,"cmdoption-llvm-dwarfdump-show-section-sizes"],"--statistics":[630,0,1,"cmdoption-llvm-dwarfdump-statistics"],"--summarize-types":[630,0,1,"cmdoption-llvm-dwarfdump-summarize-types"],"--uuid":[630,0,1,"cmdoption-llvm-dwarfdump-u"],"--verbose":[630,0,1,"cmdoption-llvm-dwarfdump-v"],"--verify":[630,0,1,"cmdoption-llvm-dwarfdump-verify"],"--version":[630,0,1,"cmdoption-llvm-dwarfdump-version"],"-F":[630,0,1,"cmdoption-llvm-dwarfdump-F"],"-a":[630,0,1,"cmdoption-llvm-dwarfdump-a"],"-c":[630,0,1,"cmdoption-llvm-dwarfdump-c"],"-f":[630,0,1,"cmdoption-llvm-dwarfdump-f"],"-h":[630,0,1,"cmdoption-llvm-dwarfdump-h"],"-i":[630,0,1,"cmdoption-llvm-dwarfdump-i"],"-n":[630,0,1,"cmdoption-llvm-dwarfdump-n"],"-o":[630,0,1,"cmdoption-llvm-dwarfdump-o"],"-p":[630,0,1,"cmdoption-llvm-dwarfdump-p"],"-r":[630,0,1,"cmdoption-llvm-dwarfdump-r"],"-u":[630,0,1,"cmdoption-llvm-dwarfdump-u"],"-v":[630,0,1,"cmdoption-llvm-dwarfdump-v"],"-x":[630,0,1,"cmdoption-llvm-dwarfdump-x"],"@<FILE>":[630,0,1,"cmdoption-llvm-dwarfdump-arg-FILE"]},"llvm-exegesis":{"--dump-object-to-disk":[631,0,1,"cmdoption-llvm-exegesis-dump-object-to-disk"],"-analysis-clustering":[631,0,1,"cmdoption-llvm-exegesis-analysis-clustering"],"-analysis-clustering-epsilon":[631,0,1,"cmdoption-llvm-exegesis-analysis-clustering-epsilon"],"-analysis-clusters-output-file":[631,0,1,"cmdoption-llvm-exegesis-analysis-clusters-output-file"],"-analysis-display-unstable-clusters":[631,0,1,"cmdoption-llvm-exegesis-analysis-display-unstable-clusters"],"-analysis-inconsistencies-output-file":[631,0,1,"cmdoption-llvm-exegesis-analysis-inconsistencies-output-file"],"-analysis-inconsistency-epsilon":[631,0,1,"cmdoption-llvm-exegesis-analysis-inconsistency-epsilon"],"-analysis-numpoints":[631,0,1,"cmdoption-llvm-exegesis-analysis-numpoints"],"-benchmarks-file":[631,0,1,"cmdoption-llvm-exegesis-benchmarks-file"],"-help":[631,0,1,"cmdoption-llvm-exegesis-help"],"-ignore-invalid-sched-class":[631,0,1,"cmdoption-llvm-exegesis-ignore-invalid-sched-class"],"-loop-body-size":[631,0,1,"cmdoption-llvm-exegesis-loop-body-size"],"-max-configs-per-opcode":[631,0,1,"cmdoption-llvm-exegesis-max-configs-per-opcode"],"-mcpu":[631,0,1,"cmdoption-llvm-exegesis-mcpu"],"-mode":[631,0,1,"cmdoption-llvm-exegesis-mode"],"-num-repetitions":[631,0,1,"cmdoption-llvm-exegesis-num-repetitions"],"-opcode-index":[631,0,1,"cmdoption-llvm-exegesis-opcode-index"],"-opcode-name":[631,0,1,"cmdoption-llvm-exegesis-opcode-name"],"-repetition-mode":[631,0,1,"cmdoption-llvm-exegesis-repetition-mode"],"-snippets-file":[631,0,1,"cmdoption-llvm-exegesis-snippets-file"],"-x86-lbr-sample-period":[631,0,1,"cmdoption-llvm-exegesis-x86-lbr-sample-period"]},"llvm-install-name-tool":{"--help":[633,0,1,"cmdoption-llvm-install-name-tool-help"],"--version":[633,0,1,"cmdoption-llvm-install-name-tool-version"],"-V":[633,0,1,"cmdoption-llvm-install-name-tool-version"],"-add_rpath":[633,0,1,"cmdoption-llvm-install-name-tool-add_rpath"],"-change":[633,0,1,"cmdoption-llvm-install-name-tool-change"],"-delete_all_rpaths":[633,0,1,"cmdoption-llvm-install-name-tool-delete_all_rpaths"],"-delete_rpath":[633,0,1,"cmdoption-llvm-install-name-tool-delete_rpath"],"-h":[633,0,1,"cmdoption-llvm-install-name-tool-help"],"-id":[633,0,1,"cmdoption-llvm-install-name-tool-id"],"-rpath":[633,0,1,"cmdoption-llvm-install-name-tool-rpath"]},"llvm-libtool-darwin":{"-D":[635,0,1,"cmdoption-llvm-libtool-darwin-D"],"-L":[635,0,1,"cmdoption-llvm-libtool-darwin-L"],"-U":[635,0,1,"cmdoption-llvm-libtool-darwin-U"],"-V":[635,0,1,"cmdoption-llvm-libtool-darwin-V"],"-arch_only":[635,0,1,"cmdoption-llvm-libtool-darwin-arch_only"],"-color":[635,0,1,"cmdoption-llvm-libtool-darwin-color"],"-filelist":[635,0,1,"cmdoption-llvm-libtool-darwin-filelist"],"-h":[635,0,1,"cmdoption-llvm-libtool-darwin-h"],"-help":[635,0,1,"cmdoption-llvm-libtool-darwin-h"],"-help-list":[635,0,1,"cmdoption-llvm-libtool-darwin-help-list"],"-l":[635,0,1,"cmdoption-llvm-libtool-darwin-l"],"-no_warning_for_no_symbols":[635,0,1,"cmdoption-llvm-libtool-darwin-no_warning_for_no_symbols"],"-o":[635,0,1,"cmdoption-llvm-libtool-darwin-o"],"-static":[635,0,1,"cmdoption-llvm-libtool-darwin-static"],"-version":[635,0,1,"cmdoption-llvm-libtool-darwin-version"]},"llvm-link":{"-S":[636,0,1,"cmdoption-llvm-link-S"],"-d":[636,0,1,"cmdoption-llvm-link-d"],"-f":[636,0,1,"cmdoption-llvm-link-f"],"-help":[636,0,1,"cmdoption-llvm-link-help"],"-o":[636,0,1,"cmdoption-llvm-link-o"],"-v":[636,0,1,"cmdoption-llvm-link-v"]},"llvm-lipo":{"-archs":[637,0,1,"cmdoption-llvm-lipo-archs"],"-create":[637,0,1,"cmdoption-llvm-lipo-create"],"-h":[637,0,1,"cmdoption-llvm-lipo-help"],"-help":[637,0,1,"cmdoption-llvm-lipo-help"],"-info":[637,0,1,"cmdoption-llvm-lipo-info"],"-replace":[637,0,1,"cmdoption-llvm-lipo-replace"],"-segalign":[637,0,1,"cmdoption-llvm-lipo-segalign"],"-thin":[637,0,1,"cmdoption-llvm-lipo-thin"],"-verify_arch":[637,0,1,"cmdoption-llvm-lipo-verify_arch"],"-version":[637,0,1,"cmdoption-llvm-lipo-version"]},"llvm-locstats":{"--compare":[638,0,1,"cmdoption-llvm-locstats-compare"],"--draw-plot":[638,0,1,"cmdoption-llvm-locstats-draw-plot"],"--ignore-debug-entry-values":[638,0,1,"cmdoption-llvm-locstats-ignore-debug-entry-values"],"--only-formal-parameters":[638,0,1,"cmdoption-llvm-locstats-only-formal-parameters"],"--only-variables":[638,0,1,"cmdoption-llvm-locstats-only-variables"]},"llvm-mca":{"-all-stats":[639,0,1,"cmdoption-llvm-mca-all-stats"],"-all-views":[639,0,1,"cmdoption-llvm-mca-all-views"],"-bottleneck-analysis":[639,0,1,"cmdoption-llvm-mca-bottleneck-analysis"],"-disable-cb":[639,0,1,"cmdoption-llvm-mca-disable-cb"],"-dispatch":[639,0,1,"cmdoption-llvm-mca-dispatch"],"-dispatch-stats":[639,0,1,"cmdoption-llvm-mca-dispatch-stats"],"-help":[639,0,1,"cmdoption-llvm-mca-help"],"-instruction-info":[639,0,1,"cmdoption-llvm-mca-instruction-info"],"-instruction-tables":[639,0,1,"cmdoption-llvm-mca-instruction-tables"],"-iterations":[639,0,1,"cmdoption-llvm-mca-iterations"],"-json":[639,0,1,"cmdoption-llvm-mca-json"],"-lqueue":[639,0,1,"cmdoption-llvm-mca-lqueue"],"-march":[639,0,1,"cmdoption-llvm-mca-march"],"-mcpu":[639,0,1,"cmdoption-llvm-mca-mcpu"],"-mtriple":[639,0,1,"cmdoption-llvm-mca-mtriple"],"-noalias":[639,0,1,"cmdoption-llvm-mca-noalias"],"-o":[639,0,1,"cmdoption-llvm-mca-o"],"-output-asm-variant":[639,0,1,"cmdoption-llvm-mca-output-asm-variant"],"-print-imm-hex":[639,0,1,"cmdoption-llvm-mca-print-imm-hex"],"-register-file-size":[639,0,1,"cmdoption-llvm-mca-register-file-size"],"-register-file-stats":[639,0,1,"cmdoption-llvm-mca-register-file-stats"],"-resource-pressure":[639,0,1,"cmdoption-llvm-mca-resource-pressure"],"-retire-stats":[639,0,1,"cmdoption-llvm-mca-retire-stats"],"-scheduler-stats":[639,0,1,"cmdoption-llvm-mca-scheduler-stats"],"-show-encoding":[639,0,1,"cmdoption-llvm-mca-show-encoding"],"-squeue":[639,0,1,"cmdoption-llvm-mca-squeue"],"-timeline":[639,0,1,"cmdoption-llvm-mca-timeline"],"-timeline-max-cycles":[639,0,1,"cmdoption-llvm-mca-timeline-max-cycles"],"-timeline-max-iterations":[639,0,1,"cmdoption-llvm-mca-timeline-max-iterations"]},"llvm-nm":{"--add-dyldinfo":[640,0,1,"cmdoption-llvm-nm-add-dyldinfo"],"--add-inlinedinfo":[640,0,1,"cmdoption-llvm-nm-add-inlinedinfo"],"--arch":[640,0,1,"cmdoption-llvm-nm-arch"],"--debug-syms":[640,0,1,"cmdoption-llvm-nm-debug-syms"],"--defined-only":[640,0,1,"cmdoption-llvm-nm-defined-only"],"--demangle":[640,0,1,"cmdoption-llvm-nm-demangle"],"--dyldinfo-only":[640,0,1,"cmdoption-llvm-nm-dyldinfo-only"],"--dynamic":[640,0,1,"cmdoption-llvm-nm-dynamic"],"--extern-only":[640,0,1,"cmdoption-llvm-nm-extern-only"],"--format":[640,0,1,"cmdoption-llvm-nm-format"],"--help":[640,0,1,"cmdoption-llvm-nm-help"],"--no-demangle":[640,0,1,"cmdoption-llvm-nm-no-demangle"],"--no-dyldinfo":[640,0,1,"cmdoption-llvm-nm-no-dyldinfo"],"--no-llvm-bc":[640,0,1,"cmdoption-llvm-nm-no-llvm-bc"],"--no-sort":[640,0,1,"cmdoption-llvm-nm-no-sort"],"--no-weak":[640,0,1,"cmdoption-llvm-nm-no-weak"],"--numeric-sort":[640,0,1,"cmdoption-llvm-nm-numeric-sort"],"--portability":[640,0,1,"cmdoption-llvm-nm-portability"],"--print-armap":[640,0,1,"cmdoption-llvm-nm-print-armap"],"--print-file-name":[640,0,1,"cmdoption-llvm-nm-print-file-name"],"--print-size":[640,0,1,"cmdoption-llvm-nm-print-size"],"--quiet":[640,0,1,"cmdoption-llvm-nm-quiet"],"--radix":[640,0,1,"cmdoption-llvm-nm-radix"],"--reverse-sort":[640,0,1,"cmdoption-llvm-nm-reverse-sort"],"--size-sort":[640,0,1,"cmdoption-llvm-nm-size-sort"],"--special-syms":[640,0,1,"cmdoption-llvm-nm-special-syms"],"--undefined-only":[640,0,1,"cmdoption-llvm-nm-undefined-only"],"--version":[640,0,1,"cmdoption-llvm-nm-version"],"-A":[640,0,1,"cmdoption-llvm-nm-print-file-name"],"-B":[640,0,1,"cmdoption-llvm-nm-B"],"-C":[640,0,1,"cmdoption-llvm-nm-demangle"],"-D":[640,0,1,"cmdoption-llvm-nm-dynamic"],"-P":[640,0,1,"cmdoption-llvm-nm-portability"],"-S":[640,0,1,"cmdoption-llvm-nm-print-size"],"-V":[640,0,1,"cmdoption-llvm-nm-version"],"-a":[640,0,1,"cmdoption-llvm-nm-debug-syms"],"-f":[640,0,1,"cmdoption-llvm-nm-format"],"-g":[640,0,1,"cmdoption-llvm-nm-extern-only"],"-h":[640,0,1,"cmdoption-llvm-nm-help"],"-j":[640,0,1,"cmdoption-llvm-nm-j"],"-m":[640,0,1,"cmdoption-llvm-nm-m"],"-n":[640,0,1,"cmdoption-llvm-nm-numeric-sort"],"-o":[640,0,1,"cmdoption-llvm-nm-print-file-name"],"-p":[640,0,1,"cmdoption-llvm-nm-no-sort"],"-r":[640,0,1,"cmdoption-llvm-nm-reverse-sort"],"-s":[640,0,1,"cmdoption-llvm-nm-0"],"-t":[640,0,1,"cmdoption-llvm-nm-radix"],"-u":[640,0,1,"cmdoption-llvm-nm-undefined-only"],"-v":[640,0,1,"cmdoption-llvm-nm-numeric-sort"],"-x":[640,0,1,"cmdoption-llvm-nm-x"],"@<FILE>":[640,0,1,"cmdoption-llvm-nm-arg-FILE"]},"llvm-objcopy":{"--add-gnu-debuglink":[641,0,1,"cmdoption-llvm-objcopy-add-gnu-debuglink"],"--add-section":[641,0,1,"cmdoption-llvm-objcopy-add-section"],"--add-symbol":[641,0,1,"cmdoption-llvm-objcopy-add-symbol"],"--adjust-start":[641,0,1,"cmdoption-llvm-objcopy-change-start"],"--allow-broken-links":[641,0,1,"cmdoption-llvm-objcopy-allow-broken-links"],"--binary-architecture":[641,0,1,"cmdoption-llvm-objcopy-binary-architecture"],"--change-start":[641,0,1,"cmdoption-llvm-objcopy-change-start"],"--compress-debug-sections":[641,0,1,"cmdoption-llvm-objcopy-compress-debug-sections"],"--decompress-debug-sections":[641,0,1,"cmdoption-llvm-objcopy-decompress-debug-sections"],"--disable-deterministic-archives":[641,0,1,"cmdoption-llvm-objcopy-disable-deterministic-archives"],"--discard-all":[641,0,1,"cmdoption-llvm-objcopy-discard-all"],"--discard-locals":[641,0,1,"cmdoption-llvm-objcopy-discard-locals"],"--dump-section":[641,0,1,"cmdoption-llvm-objcopy-dump-section"],"--enable-deterministic-archives":[641,0,1,"cmdoption-llvm-objcopy-enable-deterministic-archives"],"--extract-dwo":[641,0,1,"cmdoption-llvm-objcopy-extract-dwo"],"--extract-main-partition":[641,0,1,"cmdoption-llvm-objcopy-extract-main-partition"],"--extract-partition":[641,0,1,"cmdoption-llvm-objcopy-extract-partition"],"--globalize-symbol":[641,0,1,"cmdoption-llvm-objcopy-globalize-symbol"],"--globalize-symbols":[641,0,1,"cmdoption-llvm-objcopy-globalize-symbols"],"--help":[641,0,1,"cmdoption-llvm-objcopy-help"],"--input-target":[641,0,1,"cmdoption-llvm-objcopy-input-target"],"--keep-file-symbols":[641,0,1,"cmdoption-llvm-objcopy-keep-file-symbols"],"--keep-global-symbol":[641,0,1,"cmdoption-llvm-objcopy-keep-global-symbol"],"--keep-global-symbols":[641,0,1,"cmdoption-llvm-objcopy-keep-global-symbols"],"--keep-section":[641,0,1,"cmdoption-llvm-objcopy-keep-section"],"--keep-symbol":[641,0,1,"cmdoption-llvm-objcopy-keep-symbol"],"--keep-symbols":[641,0,1,"cmdoption-llvm-objcopy-keep-symbols"],"--keep-undefined":[641,0,1,"cmdoption-llvm-objcopy-keep-undefined"],"--localize-hidden":[641,0,1,"cmdoption-llvm-objcopy-localize-hidden"],"--localize-symbol":[641,0,1,"cmdoption-llvm-objcopy-localize-symbol"],"--localize-symbols":[641,0,1,"cmdoption-llvm-objcopy-localize-symbols"],"--new-symbol-visibility":[641,0,1,"cmdoption-llvm-objcopy-new-symbol-visibility"],"--only-keep-debug":[641,0,1,"cmdoption-llvm-objcopy-only-keep-debug"],"--only-section":[641,0,1,"cmdoption-llvm-objcopy-only-section"],"--output-target":[641,0,1,"cmdoption-llvm-objcopy-output-target"],"--prefix-alloc-sections":[641,0,1,"cmdoption-llvm-objcopy-prefix-alloc-sections"],"--prefix-symbols":[641,0,1,"cmdoption-llvm-objcopy-prefix-symbols"],"--preserve-dates":[641,0,1,"cmdoption-llvm-objcopy-preserve-dates"],"--redefine-sym":[641,0,1,"cmdoption-llvm-objcopy-redefine-sym"],"--redefine-syms":[641,0,1,"cmdoption-llvm-objcopy-redefine-syms"],"--regex":[641,0,1,"cmdoption-llvm-objcopy-regex"],"--remove-section":[641,0,1,"cmdoption-llvm-objcopy-remove-section"],"--rename-section":[641,0,1,"cmdoption-llvm-objcopy-rename-section"],"--set-section-alignment":[641,0,1,"cmdoption-llvm-objcopy-set-section-alignment"],"--set-section-flags":[641,0,1,"cmdoption-llvm-objcopy-set-section-flags"],"--set-start-addr":[641,0,1,"cmdoption-llvm-objcopy-set-start-addr"],"--split-dwo":[641,0,1,"cmdoption-llvm-objcopy-split-dwo"],"--strip-all":[641,0,1,"cmdoption-llvm-objcopy-strip-all"],"--strip-all-gnu":[641,0,1,"cmdoption-llvm-objcopy-strip-all-gnu"],"--strip-debug":[641,0,1,"cmdoption-llvm-objcopy-strip-debug"],"--strip-dwo":[641,0,1,"cmdoption-llvm-objcopy-strip-dwo"],"--strip-non-alloc":[641,0,1,"cmdoption-llvm-objcopy-strip-non-alloc"],"--strip-sections":[641,0,1,"cmdoption-llvm-objcopy-strip-sections"],"--strip-symbol":[641,0,1,"cmdoption-llvm-objcopy-strip-symbol"],"--strip-symbols":[641,0,1,"cmdoption-llvm-objcopy-strip-symbols"],"--strip-unneeded":[641,0,1,"cmdoption-llvm-objcopy-strip-unneeded"],"--strip-unneeded-symbol":[641,0,1,"cmdoption-llvm-objcopy-strip-unneeded-symbol"],"--strip-unneeded-symbols":[641,0,1,"cmdoption-llvm-objcopy-strip-unneeded-symbols"],"--target":[641,0,1,"cmdoption-llvm-objcopy-target"],"--version":[641,0,1,"cmdoption-llvm-objcopy-version"],"--weaken":[641,0,1,"cmdoption-llvm-objcopy-weaken"],"--weaken-symbol":[641,0,1,"cmdoption-llvm-objcopy-weaken-symbol"],"--weaken-symbols":[641,0,1,"cmdoption-llvm-objcopy-weaken-symbols"],"--wildcard":[641,0,1,"cmdoption-llvm-objcopy-wildcard"],"-B":[641,0,1,"cmdoption-llvm-objcopy-binary-architecture"],"-D":[641,0,1,"cmdoption-llvm-objcopy-enable-deterministic-archives"],"-F":[641,0,1,"cmdoption-llvm-objcopy-target"],"-I":[641,0,1,"cmdoption-llvm-objcopy-input-target"],"-K":[641,0,1,"cmdoption-llvm-objcopy-keep-symbol"],"-L":[641,0,1,"cmdoption-llvm-objcopy-localize-symbol"],"-N":[641,0,1,"cmdoption-llvm-objcopy-strip-symbol"],"-O":[641,0,1,"cmdoption-llvm-objcopy-output-target"],"-R":[641,0,1,"cmdoption-llvm-objcopy-remove-section"],"-S":[641,0,1,"cmdoption-llvm-objcopy-strip-all"],"-U":[641,0,1,"cmdoption-llvm-objcopy-disable-deterministic-archives"],"-V":[641,0,1,"cmdoption-llvm-objcopy-version"],"-W":[641,0,1,"cmdoption-llvm-objcopy-weaken-symbol"],"-X":[641,0,1,"cmdoption-llvm-objcopy-discard-locals"],"-g":[641,0,1,"cmdoption-llvm-objcopy-strip-debug"],"-h":[641,0,1,"cmdoption-llvm-objcopy-help"],"-j":[641,0,1,"cmdoption-llvm-objcopy-only-section"],"-p":[641,0,1,"cmdoption-llvm-objcopy-preserve-dates"],"-w":[641,0,1,"cmdoption-llvm-objcopy-wildcard"],"-x":[641,0,1,"cmdoption-llvm-objcopy-discard-all"],"@<FILE>":[641,0,1,"cmdoption-llvm-objcopy-arg-FILE"]},"llvm-objdump":{"--adjust-vma":[642,0,1,"cmdoption-llvm-objdump-adjust-vma"],"--all-headers":[642,0,1,"cmdoption-llvm-objdump-x"],"--arch":[642,0,1,"cmdoption-llvm-objdump-arch"],"--arch-name":[642,0,1,"cmdoption-llvm-objdump-arch-name"],"--archive-headers":[642,0,1,"cmdoption-llvm-objdump-a"],"--archive-member-offsets":[642,0,1,"cmdoption-llvm-objdump-archive-member-offsets"],"--bind":[642,0,1,"cmdoption-llvm-objdump-bind"],"--data-in-code":[642,0,1,"cmdoption-llvm-objdump-data-in-code"],"--debug-vars":[642,0,1,"cmdoption-llvm-objdump-debug-vars"],"--debug-vars-indent":[642,0,1,"cmdoption-llvm-objdump-debug-vars-indent"],"--demangle":[642,0,1,"cmdoption-llvm-objdump-C"],"--dis-symname":[642,0,1,"cmdoption-llvm-objdump-dis-symname"],"--disassemble":[642,0,1,"cmdoption-llvm-objdump-d"],"--disassemble-all":[642,0,1,"cmdoption-llvm-objdump-D"],"--disassemble-symbols":[642,0,1,"cmdoption-llvm-objdump-disassemble-symbols"],"--disassemble-zeroes":[642,0,1,"cmdoption-llvm-objdump-z"],"--disassembler-options":[642,0,1,"cmdoption-llvm-objdump-M"],"--dsym":[642,0,1,"cmdoption-llvm-objdump-dsym"],"--dwarf":[642,0,1,"cmdoption-llvm-objdump-dwarf"],"--dylib-id":[642,0,1,"cmdoption-llvm-objdump-dylib-id"],"--dylibs-used":[642,0,1,"cmdoption-llvm-objdump-dylibs-used"],"--dynamic-reloc":[642,0,1,"cmdoption-llvm-objdump-R"],"--dynamic-syms":[642,0,1,"cmdoption-llvm-objdump-T"],"--exports-trie":[642,0,1,"cmdoption-llvm-objdump-exports-trie"],"--fault-map-section":[642,0,1,"cmdoption-llvm-objdump-fault-map-section"],"--file-headers":[642,0,1,"cmdoption-llvm-objdump-f"],"--full-contents":[642,0,1,"cmdoption-llvm-objdump-s"],"--full-leading-addr":[642,0,1,"cmdoption-llvm-objdump-full-leading-addr"],"--function-starts":[642,0,1,"cmdoption-llvm-objdump-function-starts"],"--headers":[642,0,1,"cmdoption-llvm-objdump-h"],"--help":[642,0,1,"cmdoption-llvm-objdump-help"],"--indirect-symbols":[642,0,1,"cmdoption-llvm-objdump-indirect-symbols"],"--info-plist":[642,0,1,"cmdoption-llvm-objdump-info-plist"],"--lazy-bind":[642,0,1,"cmdoption-llvm-objdump-lazy-bind"],"--line-numbers":[642,0,1,"cmdoption-llvm-objdump-l"],"--link-opt-hints":[642,0,1,"cmdoption-llvm-objdump-link-opt-hints"],"--macho":[642,0,1,"cmdoption-llvm-objdump-0"],"--mattr":[642,0,1,"cmdoption-llvm-objdump-mattr"],"--mcpu":[642,0,1,"cmdoption-llvm-objdump-mcpu"],"--no-leading-addr":[642,0,1,"cmdoption-llvm-objdump-no-leading-addr"],"--no-leading-headers":[642,0,1,"cmdoption-llvm-objdump-no-leading-headers"],"--no-print-imm-hex":[642,0,1,"cmdoption-llvm-objdump-no-print-imm-hex"],"--no-show-raw-insn":[642,0,1,"cmdoption-llvm-objdump-no-show-raw-insn"],"--no-symbolic-operands":[642,0,1,"cmdoption-llvm-objdump-no-symbolic-operands"],"--non-verbose":[642,0,1,"cmdoption-llvm-objdump-non-verbose"],"--objc-meta-data":[642,0,1,"cmdoption-llvm-objdump-objc-meta-data"],"--prefix":[642,0,1,"cmdoption-llvm-objdump-prefix"],"--prefix-strip":[642,0,1,"cmdoption-llvm-objdump-prefix-strip"],"--print-imm-hex":[642,0,1,"cmdoption-llvm-objdump-print-imm-hex"],"--private-header":[642,0,1,"cmdoption-llvm-objdump-private-header"],"--private-headers":[642,0,1,"cmdoption-llvm-objdump-p"],"--raw-clang-ast":[642,0,1,"cmdoption-llvm-objdump-raw-clang-ast"],"--rebase":[642,0,1,"cmdoption-llvm-objdump-rebase"],"--reloc":[642,0,1,"cmdoption-llvm-objdump-r"],"--rpaths":[642,0,1,"cmdoption-llvm-objdump-rpaths"],"--section":[642,0,1,"cmdoption-llvm-objdump-j"],"--section-headers":[642,0,1,"cmdoption-llvm-objdump-h"],"--show-lma":[642,0,1,"cmdoption-llvm-objdump-show-lma"],"--source":[642,0,1,"cmdoption-llvm-objdump-S"],"--start-address":[642,0,1,"cmdoption-llvm-objdump-start-address"],"--stop-address":[642,0,1,"cmdoption-llvm-objdump-stop-address"],"--symbol-description":[642,0,1,"cmdoption-llvm-objdump-symbol-description"],"--symbolize-operands":[642,0,1,"cmdoption-llvm-objdump-symbolize-operands"],"--syms":[642,0,1,"cmdoption-llvm-objdump-t"],"--triple":[642,0,1,"cmdoption-llvm-objdump-triple"],"--universal-headers":[642,0,1,"cmdoption-llvm-objdump-universal-headers"],"--unwind-info":[642,0,1,"cmdoption-llvm-objdump-u"],"--version":[642,0,1,"cmdoption-llvm-objdump-v"],"--weak-bind":[642,0,1,"cmdoption-llvm-objdump-weak-bind"],"--wide":[642,0,1,"cmdoption-llvm-objdump-w"],"--x86-asm-syntax":[642,0,1,"cmdoption-llvm-objdump-x86-asm-syntax"],"-C":[642,0,1,"cmdoption-llvm-objdump-C"],"-D":[642,0,1,"cmdoption-llvm-objdump-D"],"-M":[642,0,1,"cmdoption-llvm-objdump-M"],"-R":[642,0,1,"cmdoption-llvm-objdump-R"],"-S":[642,0,1,"cmdoption-llvm-objdump-S"],"-T":[642,0,1,"cmdoption-llvm-objdump-T"],"-a":[642,0,1,"cmdoption-llvm-objdump-a"],"-d":[642,0,1,"cmdoption-llvm-objdump-d"],"-f":[642,0,1,"cmdoption-llvm-objdump-f"],"-g":[642,0,1,"cmdoption-llvm-objdump-g"],"-h":[642,0,1,"cmdoption-llvm-objdump-h"],"-j":[642,0,1,"cmdoption-llvm-objdump-j"],"-l":[642,0,1,"cmdoption-llvm-objdump-l"],"-m":[642,0,1,"cmdoption-llvm-objdump-0"],"-p":[642,0,1,"cmdoption-llvm-objdump-p"],"-r":[642,0,1,"cmdoption-llvm-objdump-r"],"-s":[642,0,1,"cmdoption-llvm-objdump-s"],"-t":[642,0,1,"cmdoption-llvm-objdump-t"],"-u":[642,0,1,"cmdoption-llvm-objdump-u"],"-v":[642,0,1,"cmdoption-llvm-objdump-v"],"-w":[642,0,1,"cmdoption-llvm-objdump-w"],"-x":[642,0,1,"cmdoption-llvm-objdump-x"],"-z":[642,0,1,"cmdoption-llvm-objdump-z"],"@<FILE>":[642,0,1,"cmdoption-llvm-objdump-arg-FILE"],att:[642,0,1,"cmdoption-llvm-objdump-arg-att"],intel:[642,0,1,"cmdoption-llvm-objdump-arg-intel"]},"llvm-otool":{"--help":[643,0,1,"cmdoption-llvm-otool-help"],"--help-hidden":[643,0,1,"cmdoption-llvm-otool-help-hidden"],"--version":[643,0,1,"cmdoption-llvm-otool-version"],"-C":[643,0,1,"cmdoption-llvm-otool-C"],"-D":[643,0,1,"cmdoption-llvm-otool-D"],"-G":[643,0,1,"cmdoption-llvm-otool-G"],"-I":[643,0,1,"cmdoption-llvm-otool-I"],"-L":[643,0,1,"cmdoption-llvm-otool-L"],"-P":[643,0,1,"cmdoption-llvm-otool-P"],"-V":[643,0,1,"cmdoption-llvm-otool-V"],"-X":[643,0,1,"cmdoption-llvm-otool-X"],"-arch":[643,0,1,"cmdoption-llvm-otool-arch"],"-d":[643,0,1,"cmdoption-llvm-otool-0"],"-f":[643,0,1,"cmdoption-llvm-otool-f"],"-h":[643,0,1,"cmdoption-llvm-otool-h"],"-j":[643,0,1,"cmdoption-llvm-otool-j"],"-l":[643,0,1,"cmdoption-llvm-otool-1"],"-mcpu":[643,0,1,"cmdoption-llvm-otool-mcpu"],"-o":[643,0,1,"cmdoption-llvm-otool-o"],"-p":[643,0,1,"cmdoption-llvm-otool-2"],"-r":[643,0,1,"cmdoption-llvm-otool-r"],"-s":[643,0,1,"cmdoption-llvm-otool-s"],"-t":[643,0,1,"cmdoption-llvm-otool-t"],"-v":[643,0,1,"cmdoption-llvm-otool-3"],"-x":[643,0,1,"cmdoption-llvm-otool-4"],"@<FILE>":[643,0,1,"cmdoption-llvm-otool-arg-FILE"]},"llvm-pdbutil-bytes":{"-block-range":[644,0,1,"cmdoption-llvm-pdbutil-bytes-block-range"],"-byte-range":[644,0,1,"cmdoption-llvm-pdbutil-bytes-byte-range"],"-chunks":[644,0,1,"cmdoption-llvm-pdbutil-bytes-chunks"],"-ec":[644,0,1,"cmdoption-llvm-pdbutil-bytes-ec"],"-files":[644,0,1,"cmdoption-llvm-pdbutil-bytes-files"],"-fpm":[644,0,1,"cmdoption-llvm-pdbutil-bytes-fpm"],"-id":[644,0,1,"cmdoption-llvm-pdbutil-bytes-id"],"-mod":[644,0,1,"cmdoption-llvm-pdbutil-bytes-mod"],"-modi":[644,0,1,"cmdoption-llvm-pdbutil-bytes-modi"],"-name-map":[644,0,1,"cmdoption-llvm-pdbutil-bytes-name-map"],"-sc":[644,0,1,"cmdoption-llvm-pdbutil-bytes-sc"],"-sm":[644,0,1,"cmdoption-llvm-pdbutil-bytes-sm"],"-split-chunks":[644,0,1,"cmdoption-llvm-pdbutil-bytes-split-chunks"],"-stream-data":[644,0,1,"cmdoption-llvm-pdbutil-bytes-stream-data"],"-syms":[644,0,1,"cmdoption-llvm-pdbutil-bytes-syms"],"-type":[644,0,1,"cmdoption-llvm-pdbutil-bytes-type"],"-type-server":[644,0,1,"cmdoption-llvm-pdbutil-bytes-type-server"]},"llvm-pdbutil-dump":{"-all":[644,0,1,"cmdoption-llvm-pdbutil-dump-all"],"-dependents":[644,0,1,"cmdoption-llvm-pdbutil-dump-dependents"],"-files":[644,0,1,"cmdoption-llvm-pdbutil-dump-files"],"-global-extras":[644,0,1,"cmdoption-llvm-pdbutil-dump-global-extras"],"-globals":[644,0,1,"cmdoption-llvm-pdbutil-dump-globals"],"-id-data":[644,0,1,"cmdoption-llvm-pdbutil-dump-id-data"],"-id-extras":[644,0,1,"cmdoption-llvm-pdbutil-dump-id-extras"],"-id-index":[644,0,1,"cmdoption-llvm-pdbutil-dump-id-index"],"-ids":[644,0,1,"cmdoption-llvm-pdbutil-dump-ids"],"-il":[644,0,1,"cmdoption-llvm-pdbutil-dump-il"],"-l":[644,0,1,"cmdoption-llvm-pdbutil-dump-l"],"-modi":[644,0,1,"cmdoption-llvm-pdbutil-dump-modi"],"-modules":[644,0,1,"cmdoption-llvm-pdbutil-dump-modules"],"-public-extras":[644,0,1,"cmdoption-llvm-pdbutil-dump-public-extras"],"-publics":[644,0,1,"cmdoption-llvm-pdbutil-dump-publics"],"-section-contribs":[644,0,1,"cmdoption-llvm-pdbutil-dump-section-contribs"],"-section-headers":[644,0,1,"cmdoption-llvm-pdbutil-dump-section-headers"],"-section-map":[644,0,1,"cmdoption-llvm-pdbutil-dump-section-map"],"-stream-blocks":[644,0,1,"cmdoption-llvm-pdbutil-dump-stream-blocks"],"-streams":[644,0,1,"cmdoption-llvm-pdbutil-dump-streams"],"-string-table":[644,0,1,"cmdoption-llvm-pdbutil-dump-string-table"],"-summary":[644,0,1,"cmdoption-llvm-pdbutil-dump-summary"],"-sym-data":[644,0,1,"cmdoption-llvm-pdbutil-dump-sym-data"],"-symbols":[644,0,1,"cmdoption-llvm-pdbutil-dump-symbols"],"-type-data":[644,0,1,"cmdoption-llvm-pdbutil-dump-type-data"],"-type-extras":[644,0,1,"cmdoption-llvm-pdbutil-dump-type-extras"],"-type-index":[644,0,1,"cmdoption-llvm-pdbutil-dump-type-index"],"-types":[644,0,1,"cmdoption-llvm-pdbutil-dump-types"],"-xme":[644,0,1,"cmdoption-llvm-pdbutil-dump-xme"],"-xmi":[644,0,1,"cmdoption-llvm-pdbutil-dump-xmi"]},"llvm-pdbutil-merge":{"-pdb":[644,0,1,"cmdoption-llvm-pdbutil-merge-pdb"]},"llvm-pdbutil-pretty":{"-all":[644,0,1,"cmdoption-llvm-pdbutil-pretty-all"],"-class-definitions":[644,0,1,"cmdoption-llvm-pdbutil-pretty-class-definitions"],"-class-order":[644,0,1,"cmdoption-llvm-pdbutil-pretty-class-order"],"-class-recurse-depth":[644,0,1,"cmdoption-llvm-pdbutil-pretty-class-recurse-depth"],"-classes":[644,0,1,"cmdoption-llvm-pdbutil-pretty-classes"],"-color-output":[644,0,1,"cmdoption-llvm-pdbutil-pretty-color-output"],"-compilands":[644,0,1,"cmdoption-llvm-pdbutil-pretty-compilands"],"-enums":[644,0,1,"cmdoption-llvm-pdbutil-pretty-enums"],"-exclude-compilands":[644,0,1,"cmdoption-llvm-pdbutil-pretty-exclude-compilands"],"-exclude-symbols":[644,0,1,"cmdoption-llvm-pdbutil-pretty-exclude-symbols"],"-exclude-types":[644,0,1,"cmdoption-llvm-pdbutil-pretty-exclude-types"],"-externals":[644,0,1,"cmdoption-llvm-pdbutil-pretty-externals"],"-globals":[644,0,1,"cmdoption-llvm-pdbutil-pretty-globals"],"-include-compilands":[644,0,1,"cmdoption-llvm-pdbutil-pretty-include-compilands"],"-include-symbols":[644,0,1,"cmdoption-llvm-pdbutil-pretty-include-symbols"],"-include-types":[644,0,1,"cmdoption-llvm-pdbutil-pretty-include-types"],"-lines":[644,0,1,"cmdoption-llvm-pdbutil-pretty-lines"],"-load-address":[644,0,1,"cmdoption-llvm-pdbutil-pretty-load-address"],"-min-class-padding":[644,0,1,"cmdoption-llvm-pdbutil-pretty-min-class-padding"],"-min-class-padding-imm":[644,0,1,"cmdoption-llvm-pdbutil-pretty-min-class-padding-imm"],"-min-type-size":[644,0,1,"cmdoption-llvm-pdbutil-pretty-min-type-size"],"-module-syms":[644,0,1,"cmdoption-llvm-pdbutil-pretty-module-syms"],"-no-compiler-generated":[644,0,1,"cmdoption-llvm-pdbutil-pretty-no-compiler-generated"],"-no-enum-definitions":[644,0,1,"cmdoption-llvm-pdbutil-pretty-no-enum-definitions"],"-no-system-libs":[644,0,1,"cmdoption-llvm-pdbutil-pretty-no-system-libs"],"-sym-types":[644,0,1,"cmdoption-llvm-pdbutil-pretty-sym-types"],"-symbol-order":[644,0,1,"cmdoption-llvm-pdbutil-pretty-symbol-order"],"-typedefs":[644,0,1,"cmdoption-llvm-pdbutil-pretty-typedefs"],"-types":[644,0,1,"cmdoption-llvm-pdbutil-pretty-types"]},"llvm-pdbutil-yaml2pdb":{"-pdb":[644,0,1,"cmdoption-llvm-pdbutil-yaml2pdb-pdb"]},"llvm-profdata-merge":{"-binary":[645,0,1,"cmdoption-llvm-profdata-merge-binary"],"-compress-all-sections":[645,0,1,"cmdoption-llvm-profdata-merge-compress-all-sections"],"-extbinary":[645,0,1,"cmdoption-llvm-profdata-merge-extbinary"],"-f":[645,0,1,"cmdoption-llvm-profdata-merge-input-files"],"-failure-mode":[645,0,1,"cmdoption-llvm-profdata-merge-failure-mode"],"-gcc":[645,0,1,"cmdoption-llvm-profdata-merge-gcc"],"-gen-partial-profile":[645,0,1,"cmdoption-llvm-profdata-merge-gen-partial-profile"],"-help":[645,0,1,"cmdoption-llvm-profdata-merge-help"],"-input-files":[645,0,1,"cmdoption-llvm-profdata-merge-input-files"],"-instr":[645,0,1,"cmdoption-llvm-profdata-merge-instr"],"-instr-prof-cold-threshold":[645,0,1,"cmdoption-llvm-profdata-merge-instr-prof-cold-threshold"],"-j":[645,0,1,"cmdoption-llvm-profdata-merge-num-threads"],"-num-threads":[645,0,1,"cmdoption-llvm-profdata-merge-num-threads"],"-o":[645,0,1,"cmdoption-llvm-profdata-merge-output"],"-output":[645,0,1,"cmdoption-llvm-profdata-merge-output"],"-prof-sym-list":[645,0,1,"cmdoption-llvm-profdata-merge-prof-sym-list"],"-r":[645,0,1,"cmdoption-llvm-profdata-merge-remapping-file"],"-remapping-file":[645,0,1,"cmdoption-llvm-profdata-merge-remapping-file"],"-sample":[645,0,1,"cmdoption-llvm-profdata-merge-sample"],"-sparse":[645,0,1,"cmdoption-llvm-profdata-merge-sparse"],"-suppl-min-size-threshold":[645,0,1,"cmdoption-llvm-profdata-merge-suppl-min-size-threshold"],"-supplement-instr-with-sample":[645,0,1,"cmdoption-llvm-profdata-merge-supplement-instr-with-sample"],"-text":[645,0,1,"cmdoption-llvm-profdata-merge-text"],"-use-md5":[645,0,1,"cmdoption-llvm-profdata-merge-use-md5"],"-weighted-input":[645,0,1,"cmdoption-llvm-profdata-merge-weighted-input"],"-zero-counter-threshold":[645,0,1,"cmdoption-llvm-profdata-merge-zero-counter-threshold"]},"llvm-profdata-overlap":{"-cs":[645,0,1,"cmdoption-llvm-profdata-overlap-cs"],"-function":[645,0,1,"cmdoption-llvm-profdata-overlap-function"],"-help":[645,0,1,"cmdoption-llvm-profdata-overlap-help"],"-o":[645,0,1,"cmdoption-llvm-profdata-overlap-o"],"-value-cutoff":[645,0,1,"cmdoption-llvm-profdata-overlap-value-cutoff"]},"llvm-profdata-show":{"-all-functions":[645,0,1,"cmdoption-llvm-profdata-show-all-functions"],"-counts":[645,0,1,"cmdoption-llvm-profdata-show-counts"],"-function":[645,0,1,"cmdoption-llvm-profdata-show-function"],"-help":[645,0,1,"cmdoption-llvm-profdata-show-help"],"-instr":[645,0,1,"cmdoption-llvm-profdata-show-instr"],"-list-below-cutoff":[645,0,1,"cmdoption-llvm-profdata-show-list-below-cutoff"],"-memop-sizes":[645,0,1,"cmdoption-llvm-profdata-show-memop-sizes"],"-o":[645,0,1,"cmdoption-llvm-profdata-show-output"],"-output":[645,0,1,"cmdoption-llvm-profdata-show-output"],"-sample":[645,0,1,"cmdoption-llvm-profdata-show-sample"],"-show-prof-sym-list":[645,0,1,"cmdoption-llvm-profdata-show-show-prof-sym-list"],"-show-sec-info-only":[645,0,1,"cmdoption-llvm-profdata-show-show-sec-info-only"],"-showcs":[645,0,1,"cmdoption-llvm-profdata-show-showcs"],"-text":[645,0,1,"cmdoption-llvm-profdata-show-text"],"-topn":[645,0,1,"cmdoption-llvm-profdata-show-topn"],"-value-cutoff":[645,0,1,"cmdoption-llvm-profdata-show-value-cutoff"]},"llvm-profgen":{"--binary":[646,0,1,"cmdoption-llvm-profgen-binary"],"--format":[646,0,1,"cmdoption-llvm-profgen-format"],"--output":[646,0,1,"cmdoption-llvm-profgen-output"],"--perfscript":[646,0,1,"cmdoption-llvm-profgen-perfscript"],"--show-disassembly":[646,0,1,"cmdoption-llvm-profgen-show-disassembly"],"--show-mmap-events":[646,0,1,"cmdoption-llvm-profgen-show-mmap-events"],"--x86-asm-syntax":[646,0,1,"cmdoption-llvm-profgen-x86-asm-syntax"]},"llvm-readelf":{"--addrsig":[648,0,1,"cmdoption-llvm-readelf-addrsig"],"--all":[648,0,1,"cmdoption-llvm-readelf-all"],"--arch-specific":[648,0,1,"cmdoption-llvm-readelf-arch-specific"],"--bb-addr-map":[648,0,1,"cmdoption-llvm-readelf-bb-addr-map"],"--cg-profile":[648,0,1,"cmdoption-llvm-readelf-cg-profile"],"--demangle":[648,0,1,"cmdoption-llvm-readelf-demangle"],"--dyn-relocations":[648,0,1,"cmdoption-llvm-readelf-dyn-relocations"],"--dyn-symbols":[648,0,1,"cmdoption-llvm-readelf-dyn-symbols"],"--dyn-syms":[648,0,1,"cmdoption-llvm-readelf-dyn-symbols"],"--dynamic":[648,0,1,"cmdoption-llvm-readelf-dynamic-table"],"--dynamic-table":[648,0,1,"cmdoption-llvm-readelf-dynamic-table"],"--elf-linker-options":[648,0,1,"cmdoption-llvm-readelf-elf-linker-options"],"--elf-output-style":[648,0,1,"cmdoption-llvm-readelf-elf-output-style"],"--expand-relocs":[648,0,1,"cmdoption-llvm-readelf-expand-relocs"],"--file-header":[648,0,1,"cmdoption-llvm-readelf-file-header"],"--gnu-hash-table":[648,0,1,"cmdoption-llvm-readelf-gnu-hash-table"],"--hash-symbols":[648,0,1,"cmdoption-llvm-readelf-hash-symbols"],"--hash-table":[648,0,1,"cmdoption-llvm-readelf-hash-table"],"--headers":[648,0,1,"cmdoption-llvm-readelf-headers"],"--help":[648,0,1,"cmdoption-llvm-readelf-help"],"--hex-dump":[648,0,1,"cmdoption-llvm-readelf-hex-dump"],"--histogram":[648,0,1,"cmdoption-llvm-readelf-histogram"],"--needed-libs":[648,0,1,"cmdoption-llvm-readelf-needed-libs"],"--notes":[648,0,1,"cmdoption-llvm-readelf-notes"],"--program-headers":[648,0,1,"cmdoption-llvm-readelf-program-headers"],"--raw-relr":[648,0,1,"cmdoption-llvm-readelf-raw-relr"],"--relocations":[648,0,1,"cmdoption-llvm-readelf-relocations"],"--relocs":[648,0,1,"cmdoption-llvm-readelf-relocations"],"--section-data":[648,0,1,"cmdoption-llvm-readelf-section-data"],"--section-details":[648,0,1,"cmdoption-llvm-readelf-section-details"],"--section-groups":[648,0,1,"cmdoption-llvm-readelf-section-groups"],"--section-headers":[648,0,1,"cmdoption-llvm-readelf-sections"],"--section-mapping":[648,0,1,"cmdoption-llvm-readelf-section-mapping"],"--section-relocations":[648,0,1,"cmdoption-llvm-readelf-section-relocations"],"--section-symbols":[648,0,1,"cmdoption-llvm-readelf-section-symbols"],"--sections":[648,0,1,"cmdoption-llvm-readelf-sections"],"--segments":[648,0,1,"cmdoption-llvm-readelf-program-headers"],"--stack-sizes":[648,0,1,"cmdoption-llvm-readelf-stack-sizes"],"--stackmap":[648,0,1,"cmdoption-llvm-readelf-stackmap"],"--string-dump":[648,0,1,"cmdoption-llvm-readelf-string-dump"],"--symbols":[648,0,1,"cmdoption-llvm-readelf-symbols"],"--syms":[648,0,1,"cmdoption-llvm-readelf-symbols"],"--unwind":[648,0,1,"cmdoption-llvm-readelf-unwind"],"--version":[648,0,1,"cmdoption-llvm-readelf-version"],"--version-info":[648,0,1,"cmdoption-llvm-readelf-version-info"],"-A":[648,0,1,"cmdoption-llvm-readelf-arch-specific"],"-C":[648,0,1,"cmdoption-llvm-readelf-demangle"],"-I":[648,0,1,"cmdoption-llvm-readelf-histogram"],"-S":[648,0,1,"cmdoption-llvm-readelf-sections"],"-V":[648,0,1,"cmdoption-llvm-readelf-version-info"],"-d":[648,0,1,"cmdoption-llvm-readelf-dynamic-table"],"-e":[648,0,1,"cmdoption-llvm-readelf-headers"],"-g":[648,0,1,"cmdoption-llvm-readelf-section-groups"],"-h":[648,0,1,"cmdoption-llvm-readelf-file-header"],"-l":[648,0,1,"cmdoption-llvm-readelf-program-headers"],"-n":[648,0,1,"cmdoption-llvm-readelf-notes"],"-p":[648,0,1,"cmdoption-llvm-readelf-string-dump"],"-r":[648,0,1,"cmdoption-llvm-readelf-relocations"],"-s":[648,0,1,"cmdoption-llvm-readelf-symbols"],"-t":[648,0,1,"cmdoption-llvm-readelf-section-details"],"-u":[648,0,1,"cmdoption-llvm-readelf-unwind"],"-x":[648,0,1,"cmdoption-llvm-readelf-hex-dump"],"@<FILE>":[648,0,1,"cmdoption-llvm-readelf-arg-FILE"]},"llvm-readobj":{"--addrsig":[649,0,1,"cmdoption-llvm-readobj-addrsig"],"--all":[649,0,1,"cmdoption-llvm-readobj-all"],"--arch-specific":[649,0,1,"cmdoption-llvm-readobj-arch-specific"],"--bb-addr-map":[649,0,1,"cmdoption-llvm-readobj-bb-addr-map"],"--cg-profile":[649,0,1,"cmdoption-llvm-readobj-cg-profile"],"--codeview":[649,0,1,"cmdoption-llvm-readobj-codeview"],"--codeview-ghash":[649,0,1,"cmdoption-llvm-readobj-codeview-ghash"],"--codeview-merged-types":[649,0,1,"cmdoption-llvm-readobj-codeview-merged-types"],"--codeview-subsection-bytes":[649,0,1,"cmdoption-llvm-readobj-codeview-subsection-bytes"],"--coff-basereloc":[649,0,1,"cmdoption-llvm-readobj-coff-basereloc"],"--coff-debug-directory":[649,0,1,"cmdoption-llvm-readobj-coff-debug-directory"],"--coff-directives":[649,0,1,"cmdoption-llvm-readobj-coff-directives"],"--coff-exports":[649,0,1,"cmdoption-llvm-readobj-coff-exports"],"--coff-imports":[649,0,1,"cmdoption-llvm-readobj-coff-imports"],"--coff-load-config":[649,0,1,"cmdoption-llvm-readobj-coff-load-config"],"--coff-resources":[649,0,1,"cmdoption-llvm-readobj-coff-resources"],"--coff-tls-directory":[649,0,1,"cmdoption-llvm-readobj-coff-tls-directory"],"--demangle":[649,0,1,"cmdoption-llvm-readobj-demangle"],"--dependent-libraries":[649,0,1,"cmdoption-llvm-readobj-dependent-libraries"],"--dt":[649,0,1,"cmdoption-llvm-readobj-dyn-symbols"],"--dyn-relocations":[649,0,1,"cmdoption-llvm-readobj-dyn-relocations"],"--dyn-symbols":[649,0,1,"cmdoption-llvm-readobj-dyn-symbols"],"--dyn-syms":[649,0,1,"cmdoption-llvm-readobj-dyn-symbols"],"--dynamic":[649,0,1,"cmdoption-llvm-readobj-dynamic-table"],"--dynamic-table":[649,0,1,"cmdoption-llvm-readobj-dynamic-table"],"--elf-linker-options":[649,0,1,"cmdoption-llvm-readobj-elf-linker-options"],"--elf-output-style":[649,0,1,"cmdoption-llvm-readobj-elf-output-style"],"--expand-relocs":[649,0,1,"cmdoption-llvm-readobj-expand-relocs"],"--file-header":[649,0,1,"cmdoption-llvm-readobj-file-header"],"--gnu-hash-table":[649,0,1,"cmdoption-llvm-readobj-gnu-hash-table"],"--hash-symbols":[649,0,1,"cmdoption-llvm-readobj-hash-symbols"],"--hash-table":[649,0,1,"cmdoption-llvm-readobj-hash-table"],"--headers":[649,0,1,"cmdoption-llvm-readobj-headers"],"--help":[649,0,1,"cmdoption-llvm-readobj-help"],"--hex-dump":[649,0,1,"cmdoption-llvm-readobj-hex-dump"],"--histogram":[649,0,1,"cmdoption-llvm-readobj-histogram"],"--macho-data-in-code":[649,0,1,"cmdoption-llvm-readobj-macho-data-in-code"],"--macho-dsymtab":[649,0,1,"cmdoption-llvm-readobj-macho-dsymtab"],"--macho-indirect-symbols":[649,0,1,"cmdoption-llvm-readobj-macho-indirect-symbols"],"--macho-linker-options":[649,0,1,"cmdoption-llvm-readobj-macho-linker-options"],"--macho-segment":[649,0,1,"cmdoption-llvm-readobj-macho-segment"],"--macho-version-min":[649,0,1,"cmdoption-llvm-readobj-macho-version-min"],"--needed-libs":[649,0,1,"cmdoption-llvm-readobj-needed-libs"],"--notes":[649,0,1,"cmdoption-llvm-readobj-notes"],"--program-headers":[649,0,1,"cmdoption-llvm-readobj-program-headers"],"--raw-relr":[649,0,1,"cmdoption-llvm-readobj-raw-relr"],"--relocations":[649,0,1,"cmdoption-llvm-readobj-relocations"],"--relocs":[649,0,1,"cmdoption-llvm-readobj-relocations"],"--sd":[649,0,1,"cmdoption-llvm-readobj-section-data"],"--section-data":[649,0,1,"cmdoption-llvm-readobj-section-data"],"--section-groups":[649,0,1,"cmdoption-llvm-readobj-section-groups"],"--section-headers":[649,0,1,"cmdoption-llvm-readobj-sections"],"--section-mapping":[649,0,1,"cmdoption-llvm-readobj-section-mapping"],"--section-relocations":[649,0,1,"cmdoption-llvm-readobj-section-relocations"],"--section-symbols":[649,0,1,"cmdoption-llvm-readobj-section-symbols"],"--sections":[649,0,1,"cmdoption-llvm-readobj-sections"],"--segments":[649,0,1,"cmdoption-llvm-readobj-program-headers"],"--sr":[649,0,1,"cmdoption-llvm-readobj-section-relocations"],"--st":[649,0,1,"cmdoption-llvm-readobj-section-symbols"],"--stack-sizes":[649,0,1,"cmdoption-llvm-readobj-stack-sizes"],"--stackmap":[649,0,1,"cmdoption-llvm-readobj-stackmap"],"--string-dump":[649,0,1,"cmdoption-llvm-readobj-string-dump"],"--string-table":[649,0,1,"cmdoption-llvm-readobj-string-table"],"--symbols":[649,0,1,"cmdoption-llvm-readobj-symbols"],"--syms":[649,0,1,"cmdoption-llvm-readobj-symbols"],"--unwind":[649,0,1,"cmdoption-llvm-readobj-unwind"],"--version":[649,0,1,"cmdoption-llvm-readobj-version"],"--version-info":[649,0,1,"cmdoption-llvm-readobj-version-info"],"-A":[649,0,1,"cmdoption-llvm-readobj-arch-specific"],"-C":[649,0,1,"cmdoption-llvm-readobj-demangle"],"-I":[649,0,1,"cmdoption-llvm-readobj-histogram"],"-S":[649,0,1,"cmdoption-llvm-readobj-sections"],"-V":[649,0,1,"cmdoption-llvm-readobj-version-info"],"-d":[649,0,1,"cmdoption-llvm-readobj-dynamic-table"],"-e":[649,0,1,"cmdoption-llvm-readobj-headers"],"-g":[649,0,1,"cmdoption-llvm-readobj-section-groups"],"-h":[649,0,1,"cmdoption-llvm-readobj-file-header"],"-l":[649,0,1,"cmdoption-llvm-readobj-program-headers"],"-n":[649,0,1,"cmdoption-llvm-readobj-notes"],"-p":[649,0,1,"cmdoption-llvm-readobj-string-dump"],"-r":[649,0,1,"cmdoption-llvm-readobj-relocations"],"-s":[649,0,1,"cmdoption-llvm-readobj-symbols"],"-u":[649,0,1,"cmdoption-llvm-readobj-unwind"],"-x":[649,0,1,"cmdoption-llvm-readobj-hex-dump"],"@<FILE>":[649,0,1,"cmdoption-llvm-readobj-arg-FILE"]},"llvm-size":{"--arch":[650,0,1,"cmdoption-llvm-size-arch"],"--common":[650,0,1,"cmdoption-llvm-size-common"],"--format":[650,0,1,"cmdoption-llvm-size-format"],"--help":[650,0,1,"cmdoption-llvm-size-help"],"--radix":[650,0,1,"cmdoption-llvm-size-radix"],"--totals":[650,0,1,"cmdoption-llvm-size-totals"],"--version":[650,0,1,"cmdoption-llvm-size-version"],"-A":[650,0,1,"cmdoption-llvm-size-A"],"-B":[650,0,1,"cmdoption-llvm-size-B"],"-d":[650,0,1,"cmdoption-llvm-size-d"],"-h":[650,0,1,"cmdoption-llvm-size-help"],"-l":[650,0,1,"cmdoption-llvm-size-l"],"-m":[650,0,1,"cmdoption-llvm-size-m"],"-o":[650,0,1,"cmdoption-llvm-size-o"],"-t":[650,0,1,"cmdoption-llvm-size-totals"],"-x":[650,0,1,"cmdoption-llvm-size-x"],"@<FILE>":[650,0,1,"cmdoption-llvm-size-arg-FILE"]},"llvm-stress":{"-o":[651,0,1,"cmdoption-llvm-stress-o"],"-seed":[651,0,1,"cmdoption-llvm-stress-seed"],"-size":[651,0,1,"cmdoption-llvm-stress-size"]},"llvm-strings":{"--all":[652,0,1,"cmdoption-llvm-strings-all"],"--bytes":[652,0,1,"cmdoption-llvm-strings-bytes"],"--help":[652,0,1,"cmdoption-llvm-strings-help"],"--print-file-name":[652,0,1,"cmdoption-llvm-strings-print-file-name"],"--radix":[652,0,1,"cmdoption-llvm-strings-radix"],"--version":[652,0,1,"cmdoption-llvm-strings-version"],"-a":[652,0,1,"cmdoption-llvm-strings-all"],"-f":[652,0,1,"cmdoption-llvm-strings-print-file-name"],"-h":[652,0,1,"cmdoption-llvm-strings-help"],"-n":[652,0,1,"cmdoption-llvm-strings-bytes"],"-t":[652,0,1,"cmdoption-llvm-strings-radix"],"@<FILE>":[652,0,1,"cmdoption-llvm-strings-arg-FILE"]},"llvm-strip":{"--allow-broken-links":[653,0,1,"cmdoption-llvm-strip-allow-broken-links"],"--disable-deterministic-archives":[653,0,1,"cmdoption-llvm-strip-disable-deterministic-archives"],"--discard-all":[653,0,1,"cmdoption-llvm-strip-discard-all"],"--discard-locals":[653,0,1,"cmdoption-llvm-strip-discard-locals"],"--enable-deterministic-archives":[653,0,1,"cmdoption-llvm-strip-enable-deterministic-archives"],"--help":[653,0,1,"cmdoption-llvm-strip-help"],"--keep-file-symbols":[653,0,1,"cmdoption-llvm-strip-keep-file-symbols"],"--keep-section":[653,0,1,"cmdoption-llvm-strip-keep-section"],"--keep-symbol":[653,0,1,"cmdoption-llvm-strip-keep-symbol"],"--no-strip-all":[653,0,1,"cmdoption-llvm-strip-no-strip-all"],"--only-keep-debug":[653,0,1,"cmdoption-llvm-strip-only-keep-debug"],"--preserve-dates":[653,0,1,"cmdoption-llvm-strip-preserve-dates"],"--regex":[653,0,1,"cmdoption-llvm-strip-regex"],"--remove-section":[653,0,1,"cmdoption-llvm-strip-remove-section"],"--strip-all":[653,0,1,"cmdoption-llvm-strip-strip-all"],"--strip-all-gnu":[653,0,1,"cmdoption-llvm-strip-strip-all-gnu"],"--strip-debug":[653,0,1,"cmdoption-llvm-strip-strip-debug"],"--strip-sections":[653,0,1,"cmdoption-llvm-strip-strip-sections"],"--strip-symbol":[653,0,1,"cmdoption-llvm-strip-strip-symbol"],"--strip-unneeded":[653,0,1,"cmdoption-llvm-strip-strip-unneeded"],"--version":[653,0,1,"cmdoption-llvm-strip-version"],"--wildcard":[653,0,1,"cmdoption-llvm-strip-wildcard"],"-D":[653,0,1,"cmdoption-llvm-strip-enable-deterministic-archives"],"-K":[653,0,1,"cmdoption-llvm-strip-keep-symbol"],"-N":[653,0,1,"cmdoption-llvm-strip-strip-symbol"],"-R":[653,0,1,"cmdoption-llvm-strip-remove-section"],"-S":[653,0,1,"cmdoption-llvm-strip-strip-debug"],"-T":[653,0,1,"cmdoption-llvm-strip-T"],"-U":[653,0,1,"cmdoption-llvm-strip-disable-deterministic-archives"],"-V":[653,0,1,"cmdoption-llvm-strip-version"],"-X":[653,0,1,"cmdoption-llvm-strip-discard-locals"],"-d":[653,0,1,"cmdoption-llvm-strip-strip-debug"],"-g":[653,0,1,"cmdoption-llvm-strip-strip-debug"],"-h":[653,0,1,"cmdoption-llvm-strip-help"],"-o":[653,0,1,"cmdoption-llvm-strip-o"],"-p":[653,0,1,"cmdoption-llvm-strip-preserve-dates"],"-s":[653,0,1,"cmdoption-llvm-strip-strip-all"],"-w":[653,0,1,"cmdoption-llvm-strip-wildcard"],"-x":[653,0,1,"cmdoption-llvm-strip-discard-all"],"@<FILE>":[653,0,1,"cmdoption-llvm-strip-arg-FILE"]},"llvm-symbolizer":{"--addresses":[654,0,1,"cmdoption-llvm-symbolizer-print-address"],"--adjust-vma":[654,0,1,"cmdoption-llvm-symbolizer-adjust-vma"],"--basenames":[654,0,1,"cmdoption-llvm-symbolizer-basenames"],"--default-arch":[654,0,1,"cmdoption-llvm-symbolizer-default-arch"],"--demangle":[654,0,1,"cmdoption-llvm-symbolizer-demangle"],"--dia":[654,0,1,"cmdoption-llvm-symbolizer-dia"],"--dsym-hint":[654,0,1,"cmdoption-llvm-symbolizer-dsym-hint"],"--dwp":[654,0,1,"cmdoption-llvm-symbolizer-dwp"],"--exe":[654,0,1,"cmdoption-llvm-symbolizer-obj"],"--fallback-debug-path":[654,0,1,"cmdoption-llvm-symbolizer-fallback-debug-path"],"--functions":[654,0,1,"cmdoption-llvm-symbolizer-functions"],"--help":[654,0,1,"cmdoption-llvm-symbolizer-help"],"--inlines":[654,0,1,"cmdoption-llvm-symbolizer-inlining"],"--inlining":[654,0,1,"cmdoption-llvm-symbolizer-inlining"],"--no-demangle":[654,0,1,"cmdoption-llvm-symbolizer-no-demangle"],"--no-inlines":[654,0,1,"cmdoption-llvm-symbolizer-no-inlines"],"--obj":[654,0,1,"cmdoption-llvm-symbolizer-obj"],"--output-style":[654,0,1,"cmdoption-llvm-symbolizer-output-style"],"--pretty-print":[654,0,1,"cmdoption-llvm-symbolizer-pretty-print"],"--print-address":[654,0,1,"cmdoption-llvm-symbolizer-print-address"],"--print-source-context-lines":[654,0,1,"cmdoption-llvm-symbolizer-print-source-context-lines"],"--relativenames":[654,0,1,"cmdoption-llvm-symbolizer-relativenames"],"--verbose":[654,0,1,"cmdoption-llvm-symbolizer-verbose"],"--version":[654,0,1,"cmdoption-llvm-symbolizer-version"],"-C":[654,0,1,"cmdoption-llvm-symbolizer-demangle"],"-a":[654,0,1,"cmdoption-llvm-symbolizer-print-address"],"-e":[654,0,1,"cmdoption-llvm-symbolizer-obj"],"-f":[654,0,1,"cmdoption-llvm-symbolizer-functions"],"-h":[654,0,1,"cmdoption-llvm-symbolizer-help"],"-i":[654,0,1,"cmdoption-llvm-symbolizer-inlining"],"-p":[654,0,1,"cmdoption-llvm-symbolizer-pretty-print"],"-s":[654,0,1,"cmdoption-llvm-symbolizer-basenames"],"-v":[654,0,1,"cmdoption-llvm-symbolizer-version"],"@<FILE>":[654,0,1,"cmdoption-llvm-symbolizer-arg-FILE"]},FileCheck:{"--allow-deprecated-dag-overlap":[611,0,1,"cmdoption-FileCheck-allow-deprecated-dag-overlap"],"--allow-empty":[611,0,1,"cmdoption-FileCheck-allow-empty"],"--allow-unused-prefixes":[611,0,1,"cmdoption-FileCheck-allow-unused-prefixes"],"--check-prefix":[611,0,1,"cmdoption-FileCheck-check-prefix"],"--check-prefixes":[611,0,1,"cmdoption-FileCheck-check-prefixes"],"--color":[611,0,1,"cmdoption-FileCheck-color"],"--comment-prefixes":[611,0,1,"cmdoption-FileCheck-comment-prefixes"],"--dump-input":[611,0,1,"cmdoption-FileCheck-dump-input"],"--dump-input-context":[611,0,1,"cmdoption-FileCheck-dump-input-context"],"--dump-input-filter":[611,0,1,"cmdoption-FileCheck-dump-input-filter"],"--enable-var-scope":[611,0,1,"cmdoption-FileCheck-enable-var-scope"],"--ignore-case":[611,0,1,"cmdoption-FileCheck-ignore-case"],"--implicit-check-not":[611,0,1,"cmdoption-FileCheck-implicit-check-not"],"--input-file":[611,0,1,"cmdoption-FileCheck-input-file"],"--match-full-lines":[611,0,1,"cmdoption-FileCheck-match-full-lines"],"--strict-whitespace":[611,0,1,"cmdoption-FileCheck-strict-whitespace"],"-D#<FMT>,<NUMVAR>":[611,0,1,"cmdoption-FileCheck-D-FMT-NUMVAR"],"-D<VAR":[611,0,1,"cmdoption-FileCheck-D-VAR"],"-help":[611,0,1,"cmdoption-FileCheck-help"],"-v":[611,0,1,"cmdoption-FileCheck-v"],"-version":[611,0,1,"cmdoption-FileCheck-version"],"-vv":[611,0,1,"cmdoption-FileCheck-vv"]},dsymutil:{"--accelerator":[614,0,1,"cmdoption-dsymutil-accelerator"],"--arch":[614,0,1,"cmdoption-dsymutil-arch"],"--dump-debug-map":[614,0,1,"cmdoption-dsymutil-dump-debug-map"],"--flat":[614,0,1,"cmdoption-dsymutil-flat"],"--gen-reproducer":[614,0,1,"cmdoption-dsymutil-gen-reproducer"],"--help":[614,0,1,"cmdoption-dsymutil-help"],"--keep-function-for-static":[614,0,1,"cmdoption-dsymutil-keep-function-for-static"],"--minimize":[614,0,1,"cmdoption-dsymutil-minimize"],"--no-odr":[614,0,1,"cmdoption-dsymutil-no-odr"],"--no-output":[614,0,1,"cmdoption-dsymutil-no-output"],"--no-swiftmodule-timestamp":[614,0,1,"cmdoption-dsymutil-no-swiftmodule-timestamp"],"--num-threads":[614,0,1,"cmdoption-dsymutil-num-threads"],"--object-prefix-map":[614,0,1,"cmdoption-dsymutil-object-prefix-map"],"--oso-prepend-path":[614,0,1,"cmdoption-dsymutil-oso-prepend-path"],"--out":[614,0,1,"cmdoption-dsymutil-out"],"--papertrail":[614,0,1,"cmdoption-dsymutil-papertrail"],"--remarks-output-format":[614,0,1,"cmdoption-dsymutil-remarks-output-format"],"--remarks-prepend-path":[614,0,1,"cmdoption-dsymutil-remarks-prepend-path"],"--statistics":[614,0,1,"cmdoption-dsymutil-statistics"],"--symbol-map":[614,0,1,"cmdoption-dsymutil-symbol-map"],"--symtab":[614,0,1,"cmdoption-dsymutil-s"],"--toolchain":[614,0,1,"cmdoption-dsymutil-toolchain"],"--update":[614,0,1,"cmdoption-dsymutil-u"],"--use-reproducer":[614,0,1,"cmdoption-dsymutil-use-reproducer"],"--verbose":[614,0,1,"cmdoption-dsymutil-verbose"],"--verify":[614,0,1,"cmdoption-dsymutil-verify"],"--version":[614,0,1,"cmdoption-dsymutil-v"],"-S":[614,0,1,"cmdoption-dsymutil-S"],"-f":[614,0,1,"cmdoption-dsymutil-flat"],"-h":[614,0,1,"cmdoption-dsymutil-help"],"-j":[614,0,1,"cmdoption-dsymutil-num-threads"],"-o":[614,0,1,"cmdoption-dsymutil-out"],"-s":[614,0,1,"cmdoption-dsymutil-s"],"-u":[614,0,1,"cmdoption-dsymutil-u"],"-v":[614,0,1,"cmdoption-dsymutil-v"],"-y":[614,0,1,"cmdoption-dsymutil-y"],"-z":[614,0,1,"cmdoption-dsymutil-minimize"]},lit:{"--config-prefix":[616,0,1,"cmdoption-lit-config-prefix"],"--debug":[616,0,1,"cmdoption-lit-debug"],"--echo-all-commands":[616,0,1,"cmdoption-lit-vv"],"--filter":[616,0,1,"cmdoption-lit-filter"],"--filter-out":[616,0,1,"cmdoption-lit-filter-out"],"--help":[616,0,1,"cmdoption-lit-h"],"--ignore-fail":[616,0,1,"cmdoption-lit-ignore-fail"],"--max-failures":[616,0,1,"cmdoption-lit-max-failures"],"--max-tests":[616,0,1,"cmdoption-lit-max-tests"],"--max-time":[616,0,1,"cmdoption-lit-max-time"],"--no-indirectly-run-check":[616,0,1,"cmdoption-lit-no-indirectly-run-check"],"--no-progress-bar":[616,0,1,"cmdoption-lit-no-progress-bar"],"--num-shards":[616,0,1,"cmdoption-lit-num-shards"],"--order":[616,0,1,"cmdoption-lit-order"],"--param":[616,0,1,"cmdoption-lit-D"],"--path":[616,0,1,"cmdoption-lit-path"],"--quiet":[616,0,1,"cmdoption-lit-q"],"--run-shard":[616,0,1,"cmdoption-lit-run-shard"],"--show-all":[616,0,1,"cmdoption-lit-a"],"--show-suites":[616,0,1,"cmdoption-lit-show-suites"],"--show-tests":[616,0,1,"cmdoption-lit-show-tests"],"--show-unsupported":[616,0,1,"cmdoption-lit-show-unsupported"],"--show-xfail":[616,0,1,"cmdoption-lit-show-xfail"],"--shuffle":[616,0,1,"cmdoption-lit-shuffle"],"--succinct":[616,0,1,"cmdoption-lit-s"],"--time-tests":[616,0,1,"cmdoption-lit-time-tests"],"--timeout":[616,0,1,"cmdoption-lit-timeout"],"--verbose":[616,0,1,"cmdoption-lit-v"],"--vg":[616,0,1,"cmdoption-lit-vg"],"--vg-arg":[616,0,1,"cmdoption-lit-vg-arg"],"--vg-leak":[616,0,1,"cmdoption-lit-vg-leak"],"--workers":[616,0,1,"cmdoption-lit-j"],"--xfail":[616,0,1,"cmdoption-lit-xfail"],"--xfail-not":[616,0,1,"cmdoption-lit-xfail-not"],"-D":[616,0,1,"cmdoption-lit-D"],"-a":[616,0,1,"cmdoption-lit-a"],"-h":[616,0,1,"cmdoption-lit-h"],"-j":[616,0,1,"cmdoption-lit-j"],"-q":[616,0,1,"cmdoption-lit-q"],"-s":[616,0,1,"cmdoption-lit-s"],"-v":[616,0,1,"cmdoption-lit-v"],"-vv":[616,0,1,"cmdoption-lit-vv"]},llc:{"--disable-excess-fp-precision":[617,0,1,"cmdoption-llc-disable-excess-fp-precision"],"--enable-no-infs-fp-math":[617,0,1,"cmdoption-llc-enable-no-infs-fp-math"],"--enable-no-nans-fp-math":[617,0,1,"cmdoption-llc-enable-no-nans-fp-math"],"--enable-no-signed-zeros-fp-math":[617,0,1,"cmdoption-llc-enable-no-signed-zeros-fp-math"],"--enable-no-trapping-fp-math":[617,0,1,"cmdoption-llc-enable-no-trapping-fp-math"],"--enable-unsafe-fp-math":[617,0,1,"cmdoption-llc-enable-unsafe-fp-math"],"--frame-pointer":[617,0,1,"cmdoption-llc-frame-pointer"],"--load":[617,0,1,"cmdoption-llc-load"],"--print-after-isel":[617,0,1,"cmdoption-llc-print-after-isel"],"--regalloc":[617,0,1,"cmdoption-llc-regalloc"],"--spiller":[617,0,1,"cmdoption-llc-spiller"],"--stats":[617,0,1,"cmdoption-llc-stats"],"--time-passes":[617,0,1,"cmdoption-llc-time-passes"],"--x86-asm-syntax":[617,0,1,"cmdoption-llc-x86-asm-syntax"],"-O":[617,0,1,"cmdoption-llc-O"],"-filetype":[617,0,1,"cmdoption-llc-filetype"],"-help":[617,0,1,"cmdoption-llc-help"],"-march":[617,0,1,"cmdoption-llc-march"],"-mattr":[617,0,1,"cmdoption-llc-mattr"],"-mcpu":[617,0,1,"cmdoption-llc-mcpu"],"-meabi":[617,0,1,"cmdoption-llc-meabi"],"-mtriple":[617,0,1,"cmdoption-llc-mtriple"],"-o":[617,0,1,"cmdoption-llc-o"],"-remarks-section":[617,0,1,"cmdoption-llc-remarks-section"],"-stack-size-section":[617,0,1,"cmdoption-llc-stack-size-section"]},lli:{"-code-model":[619,0,1,"cmdoption-lli-code-model"],"-disable-excess-fp-precision":[619,0,1,"cmdoption-lli-disable-excess-fp-precision"],"-disable-post-RA-scheduler":[619,0,1,"cmdoption-lli-disable-post-RA-scheduler"],"-disable-spill-fusing":[619,0,1,"cmdoption-lli-disable-spill-fusing"],"-enable-no-infs-fp-math":[619,0,1,"cmdoption-lli-enable-no-infs-fp-math"],"-enable-no-nans-fp-math":[619,0,1,"cmdoption-lli-enable-no-nans-fp-math"],"-enable-unsafe-fp-math":[619,0,1,"cmdoption-lli-enable-unsafe-fp-math"],"-fake-argv0":[619,0,1,"cmdoption-lli-fake-argv0"],"-force-interpreter":[619,0,1,"cmdoption-lli-force-interpreter"],"-help":[619,0,1,"cmdoption-lli-help"],"-jit-enable-eh":[619,0,1,"cmdoption-lli-jit-enable-eh"],"-join-liveintervals":[619,0,1,"cmdoption-lli-join-liveintervals"],"-load":[619,0,1,"cmdoption-lli-load"],"-march":[619,0,1,"cmdoption-lli-march"],"-mattr":[619,0,1,"cmdoption-lli-mattr"],"-mcpu":[619,0,1,"cmdoption-lli-mcpu"],"-mtriple":[619,0,1,"cmdoption-lli-mtriple"],"-nozero-initialized-in-bss":[619,0,1,"cmdoption-lli-nozero-initialized-in-bss"],"-pre-RA-sched":[619,0,1,"cmdoption-lli-pre-RA-sched"],"-regalloc":[619,0,1,"cmdoption-lli-regalloc"],"-relocation-model":[619,0,1,"cmdoption-lli-relocation-model"],"-soft-float":[619,0,1,"cmdoption-lli-soft-float"],"-spiller":[619,0,1,"cmdoption-lli-spiller"],"-stats":[619,0,1,"cmdoption-lli-stats"],"-time-passes":[619,0,1,"cmdoption-lli-time-passes"],"-version":[619,0,1,"cmdoption-lli-version"],"-x86-asm-syntax":[619,0,1,"cmdoption-lli-x86-asm-syntax"]},opt:{"-S":[657,0,1,"cmdoption-opt-S"],"-debug":[657,0,1,"cmdoption-opt-debug"],"-f":[657,0,1,"cmdoption-opt-f"],"-help":[657,0,1,"cmdoption-opt-help"],"-load":[657,0,1,"cmdoption-opt-load"],"-o":[657,0,1,"cmdoption-opt-o"],"-stats":[657,0,1,"cmdoption-opt-stats"],"-strip-debug":[657,0,1,"cmdoption-opt-strip-debug"],"-time-passes":[657,0,1,"cmdoption-opt-time-passes"],"-verify-each":[657,0,1,"cmdoption-opt-verify-each"],"-{passname}":[657,0,1,"cmdoption-opt-passname"]},tblgen:{"-D":[658,0,1,"cmdoption-tblgen-D"],"-I":[658,0,1,"cmdoption-tblgen-I"],"-asmformat-error-is-fatal":[658,0,1,"cmdoption-tblgen-asmformat-error-is-fatal"],"-asmparsernum":[658,0,1,"cmdoption-tblgen-asmparsernum"],"-asmwriternum":[658,0,1,"cmdoption-tblgen-asmwriternum"],"-clang-component":[658,0,1,"cmdoption-tblgen-clang-component"],"-class":[658,0,1,"cmdoption-tblgen-class"],"-combiners":[658,0,1,"cmdoption-tblgen-combiners"],"-d":[658,0,1,"cmdoption-tblgen-0"],"-debug":[658,0,1,"cmdoption-tblgen-debug"],"-dialect":[658,0,1,"cmdoption-tblgen-dialect"],"-dialect-opclass-base":[658,0,1,"cmdoption-tblgen-dialect-opclass-base"],"-dump-json":[658,0,1,"cmdoption-tblgen-dump-json"],"-gen-arm-bf16":[658,0,1,"cmdoption-tblgen-gen-arm-bf16"],"-gen-arm-cde-builtin-aliases":[658,0,1,"cmdoption-tblgen-gen-arm-cde-builtin-aliases"],"-gen-arm-cde-builtin-codegen":[658,0,1,"cmdoption-tblgen-gen-arm-cde-builtin-codegen"],"-gen-arm-cde-builtin-def":[658,0,1,"cmdoption-tblgen-gen-arm-cde-builtin-def"],"-gen-arm-cde-builtin-sema":[658,0,1,"cmdoption-tblgen-gen-arm-cde-builtin-sema"],"-gen-arm-cde-header":[658,0,1,"cmdoption-tblgen-gen-arm-cde-header"],"-gen-arm-fp16":[658,0,1,"cmdoption-tblgen-gen-arm-fp16"],"-gen-arm-mve-builtin-aliases":[658,0,1,"cmdoption-tblgen-gen-arm-mve-builtin-aliases"],"-gen-arm-mve-builtin-codegen":[658,0,1,"cmdoption-tblgen-gen-arm-mve-builtin-codegen"],"-gen-arm-mve-builtin-def":[658,0,1,"cmdoption-tblgen-gen-arm-mve-builtin-def"],"-gen-arm-mve-builtin-sema":[658,0,1,"cmdoption-tblgen-gen-arm-mve-builtin-sema"],"-gen-arm-mve-header":[658,0,1,"cmdoption-tblgen-gen-arm-mve-header"],"-gen-arm-neon":[658,0,1,"cmdoption-tblgen-gen-arm-neon"],"-gen-arm-neon-sema":[658,0,1,"cmdoption-tblgen-gen-arm-neon-sema"],"-gen-arm-neon-test":[658,0,1,"cmdoption-tblgen-gen-arm-neon-test"],"-gen-arm-sve-builtin-codegen":[658,0,1,"cmdoption-tblgen-gen-arm-sve-builtin-codegen"],"-gen-arm-sve-builtins":[658,0,1,"cmdoption-tblgen-gen-arm-sve-builtins"],"-gen-arm-sve-header":[658,0,1,"cmdoption-tblgen-gen-arm-sve-header"],"-gen-arm-sve-sema-rangechecks":[658,0,1,"cmdoption-tblgen-gen-arm-sve-sema-rangechecks"],"-gen-arm-sve-typeflags":[658,0,1,"cmdoption-tblgen-gen-arm-sve-typeflags"],"-gen-asm-matcher":[658,0,1,"cmdoption-tblgen-gen-asm-matcher"],"-gen-asm-parser":[658,0,1,"cmdoption-tblgen-gen-asm-parser"],"-gen-asm-writer":[658,0,1,"cmdoption-tblgen-gen-asm-writer"],"-gen-attr-docs":[658,0,1,"cmdoption-tblgen-gen-attr-docs"],"-gen-attrs":[658,0,1,"cmdoption-tblgen-gen-attrs"],"-gen-automata":[658,0,1,"cmdoption-tblgen-gen-automata"],"-gen-avail-interface-decls":[658,0,1,"cmdoption-tblgen-gen-avail-interface-decls"],"-gen-avail-interface-defs":[658,0,1,"cmdoption-tblgen-gen-avail-interface-defs"],"-gen-callingconv":[658,0,1,"cmdoption-tblgen-gen-callingconv"],"-gen-clang-attr-ast-visitor":[658,0,1,"cmdoption-tblgen-gen-clang-attr-ast-visitor"],"-gen-clang-attr-classes":[658,0,1,"cmdoption-tblgen-gen-clang-attr-classes"],"-gen-clang-attr-has-attribute-impl":[658,0,1,"cmdoption-tblgen-gen-clang-attr-has-attribute-impl"],"-gen-clang-attr-impl":[658,0,1,"cmdoption-tblgen-gen-clang-attr-impl"],"-gen-clang-attr-list"":[658,0,1,"cmdoption-tblgen-gen-clang-attr-list"],"-gen-clang-attr-node-traverse":[658,0,1,"cmdoption-tblgen-gen-clang-attr-node-traverse"],"-gen-clang-attr-parsed-attr-impl":[658,0,1,"cmdoption-tblgen-gen-clang-attr-parsed-attr-impl"],"-gen-clang-attr-parsed-attr-kinds":[658,0,1,"cmdoption-tblgen-gen-clang-attr-parsed-attr-kinds"],"-gen-clang-attr-parsed-attr-list":[658,0,1,"cmdoption-tblgen-gen-clang-attr-parsed-attr-list"],"-gen-clang-attr-parser-string-switches":[658,0,1,"cmdoption-tblgen-gen-clang-attr-parser-string-switches"],"-gen-clang-attr-pch-read":[658,0,1,"cmdoption-tblgen-gen-clang-attr-pch-read"],"-gen-clang-attr-pch-write":[658,0,1,"cmdoption-tblgen-gen-clang-attr-pch-write"],"-gen-clang-attr-spelling-index":[658,0,1,"cmdoption-tblgen-gen-clang-attr-spelling-index"],"-gen-clang-attr-subject-match-rule-list":[658,0,1,"cmdoption-tblgen-gen-clang-attr-subject-match-rule-list"],"-gen-clang-attr-subject-match-rules-parser-string-switches":[658,0,1,"cmdoption-tblgen-gen-clang-attr-subject-match-rules-parser-string-switches"],"-gen-clang-attr-template-instantiate":[658,0,1,"cmdoption-tblgen-gen-clang-attr-template-instantiate"],"-gen-clang-attr-text-node-dump":[658,0,1,"cmdoption-tblgen-gen-clang-attr-text-node-dump"],"-gen-clang-basic-reader":[658,0,1,"cmdoption-tblgen-gen-clang-basic-reader"],"-gen-clang-basic-writer":[658,0,1,"cmdoption-tblgen-gen-clang-basic-writer"],"-gen-clang-comment-command-info":[658,0,1,"cmdoption-tblgen-gen-clang-comment-command-info"],"-gen-clang-comment-command-list":[658,0,1,"cmdoption-tblgen-gen-clang-comment-command-list"],"-gen-clang-comment-html-named-character-references":[658,0,1,"cmdoption-tblgen-gen-clang-comment-html-named-character-references"],"-gen-clang-comment-html-tags":[658,0,1,"cmdoption-tblgen-gen-clang-comment-html-tags"],"-gen-clang-comment-html-tags-properties":[658,0,1,"cmdoption-tblgen-gen-clang-comment-html-tags-properties"],"-gen-clang-comment-nodes":[658,0,1,"cmdoption-tblgen-gen-clang-comment-nodes"],"-gen-clang-data-collectors":[658,0,1,"cmdoption-tblgen-gen-clang-data-collectors"],"-gen-clang-decl-nodes":[658,0,1,"cmdoption-tblgen-gen-clang-decl-nodes"],"-gen-clang-diag-groups":[658,0,1,"cmdoption-tblgen-gen-clang-diag-groups"],"-gen-clang-diags-defs":[658,0,1,"cmdoption-tblgen-gen-clang-diags-defs"],"-gen-clang-diags-index-name":[658,0,1,"cmdoption-tblgen-gen-clang-diags-index-name"],"-gen-clang-opcodes":[658,0,1,"cmdoption-tblgen-gen-clang-opcodes"],"-gen-clang-opencl-builtins":[658,0,1,"cmdoption-tblgen-gen-clang-opencl-builtins"],"-gen-clang-sa-checkers":[658,0,1,"cmdoption-tblgen-gen-clang-sa-checkers"],"-gen-clang-stmt-nodes":[658,0,1,"cmdoption-tblgen-gen-clang-stmt-nodes"],"-gen-clang-test-pragma-attribute-supported-attributes":[658,0,1,"cmdoption-tblgen-gen-clang-test-pragma-attribute-supported-attributes"],"-gen-clang-type-nodes":[658,0,1,"cmdoption-tblgen-gen-clang-type-nodes"],"-gen-clang-type-reader":[658,0,1,"cmdoption-tblgen-gen-clang-type-reader"],"-gen-clang-type-writer":[658,0,1,"cmdoption-tblgen-gen-clang-type-writer"],"-gen-compress-inst-emitter":[658,0,1,"cmdoption-tblgen-gen-compress-inst-emitter"],"-gen-ctags":[658,0,1,"cmdoption-tblgen-gen-ctags"],"-gen-dag-isel":[658,0,1,"cmdoption-tblgen-gen-dag-isel"],"-gen-dfa-packetizer":[658,0,1,"cmdoption-tblgen-gen-dfa-packetizer"],"-gen-diag-docs":[658,0,1,"cmdoption-tblgen-gen-diag-docs"],"-gen-dialect-doc":[658,0,1,"cmdoption-tblgen-gen-dialect-doc"],"-gen-directive-decl":[658,0,1,"cmdoption-tblgen-1"],"-gen-directive-gen":[658,0,1,"cmdoption-tblgen-gen-directive-gen"],"-gen-directive-impl":[658,0,1,"cmdoption-tblgen-gen-directive-impl"],"-gen-disassembler":[658,0,1,"cmdoption-tblgen-gen-disassembler"],"-gen-emitter":[658,0,1,"cmdoption-tblgen-gen-emitter"],"-gen-enum-decls":[658,0,1,"cmdoption-tblgen-gen-enum-decls"],"-gen-enum-defs":[658,0,1,"cmdoption-tblgen-gen-enum-defs"],"-gen-enum-from-llvmir-conversions":[658,0,1,"cmdoption-tblgen-gen-enum-from-llvmir-conversions"],"-gen-enum-to-llvmir-conversions":[658,0,1,"cmdoption-tblgen-gen-enum-to-llvmir-conversions"],"-gen-exegesis":[658,0,1,"cmdoption-tblgen-gen-exegesis"],"-gen-fast-isel":[658,0,1,"cmdoption-tblgen-gen-fast-isel"],"-gen-global-isel":[658,0,1,"cmdoption-tblgen-gen-global-isel"],"-gen-global-isel-combiner":[658,0,1,"cmdoption-tblgen-gen-global-isel-combiner"],"-gen-instr-docs":[658,0,1,"cmdoption-tblgen-gen-instr-docs"],"-gen-instr-info":[658,0,1,"cmdoption-tblgen-gen-instr-info"],"-gen-intrinsic-enums":[658,0,1,"cmdoption-tblgen-gen-intrinsic-enums"],"-gen-intrinsic-impl":[658,0,1,"cmdoption-tblgen-gen-intrinsic-impl"],"-gen-llvmir-conversions":[658,0,1,"cmdoption-tblgen-gen-llvmir-conversions"],"-gen-llvmir-intrinsics":[658,0,1,"cmdoption-tblgen-gen-llvmir-intrinsics"],"-gen-op-decls":[658,0,1,"cmdoption-tblgen-gen-op-decls"],"-gen-op-defs":[658,0,1,"cmdoption-tblgen-gen-op-defs"],"-gen-op-doc":[658,0,1,"cmdoption-tblgen-gen-op-doc"],"-gen-opt-docs":[658,0,1,"cmdoption-tblgen-gen-opt-docs"],"-gen-opt-parser-defs":[658,0,1,"cmdoption-tblgen-gen-opt-parser-defs"],"-gen-opt-rst":[658,0,1,"cmdoption-tblgen-gen-opt-rst"],"-gen-pass-decls":[658,0,1,"cmdoption-tblgen-gen-pass-decls"],"-gen-pass-doc":[658,0,1,"cmdoption-tblgen-gen-pass-doc"],"-gen-pseudo-lowering":[658,0,1,"cmdoption-tblgen-gen-pseudo-lowering"],"-gen-register-bank":[658,0,1,"cmdoption-tblgen-gen-register-bank"],"-gen-register-info":[658,0,1,"cmdoption-tblgen-gen-register-info"],"-gen-rewriters":[658,0,1,"cmdoption-tblgen-gen-rewriters"],"-gen-riscv-vector-builtin-codegen":[658,0,1,"cmdoption-tblgen-gen-riscv-vector-builtin-codegen"],"-gen-riscv-vector-builtins":[658,0,1,"cmdoption-tblgen-gen-riscv-vector-builtins"],"-gen-riscv-vector-header":[658,0,1,"cmdoption-tblgen-gen-riscv-vector-header"],"-gen-searchable-tables":[658,0,1,"cmdoption-tblgen-gen-searchable-tables"],"-gen-spirv-avail-impls":[658,0,1,"cmdoption-tblgen-gen-spirv-avail-impls"],"-gen-spirv-capability-implication":[658,0,1,"cmdoption-tblgen-gen-spirv-capability-implication"],"-gen-spirv-enum-avail-decls":[658,0,1,"cmdoption-tblgen-gen-spirv-enum-avail-decls"],"-gen-spirv-enum-avail-defs":[658,0,1,"cmdoption-tblgen-gen-spirv-enum-avail-defs"],"-gen-spirv-op-utils":[658,0,1,"cmdoption-tblgen-gen-spirv-op-utils"],"-gen-spirv-serialization":[658,0,1,"cmdoption-tblgen-gen-spirv-serialization"],"-gen-struct-attr-decls":[658,0,1,"cmdoption-tblgen-gen-struct-attr-decls"],"-gen-struct-attr-defs":[658,0,1,"cmdoption-tblgen-gen-struct-attr-defs"],"-gen-subtarget":[658,0,1,"cmdoption-tblgen-gen-subtarget"],"-gen-typedef-decls":[658,0,1,"cmdoption-tblgen-gen-typedef-decls"],"-gen-typedef-defs":[658,0,1,"cmdoption-tblgen-gen-typedef-defs"],"-gen-x86-EVEX2VEX-tables":[658,0,1,"cmdoption-tblgen-gen-x86-EVEX2VEX-tables"],"-gen-x86-fold-tables":[658,0,1,"cmdoption-tblgen-gen-x86-fold-tables"],"-gicombiner-show-expansions":[658,0,1,"cmdoption-tblgen-gicombiner-show-expansions"],"-gicombiner-stop-after-build":[658,0,1,"cmdoption-tblgen-gicombiner-stop-after-build"],"-gicombiner-stop-after-parse":[658,0,1,"cmdoption-tblgen-gicombiner-stop-after-parse"],"-gisel-coverage-file":[658,0,1,"cmdoption-tblgen-gisel-coverage-file"],"-help":[658,0,1,"cmdoption-tblgen-help"],"-help-list":[658,0,1,"cmdoption-tblgen-help-list"],"-instrument-coverage":[658,0,1,"cmdoption-tblgen-instrument-coverage"],"-instrument-gisel-coverage":[658,0,1,"cmdoption-tblgen-instrument-gisel-coverage"],"-intrinsic-prefix":[658,0,1,"cmdoption-tblgen-intrinsic-prefix"],"-llvmir-intrinsics-filter":[658,0,1,"cmdoption-tblgen-llvmir-intrinsics-filter"],"-long-string-literals":[658,0,1,"cmdoption-tblgen-long-string-literals"],"-match-prefix":[658,0,1,"cmdoption-tblgen-match-prefix"],"-name":[658,0,1,"cmdoption-tblgen-name"],"-null-backend":[658,0,1,"cmdoption-tblgen-null-backend"],"-o":[658,0,1,"cmdoption-tblgen-o"],"-omit-comments":[658,0,1,"cmdoption-tblgen-omit-comments"],"-op-exclude-regex":[658,0,1,"cmdoption-tblgen-op-exclude-regex"],"-op-include-regex":[658,0,1,"cmdoption-tblgen-op-include-regex"],"-optimize-match-table":[658,0,1,"cmdoption-tblgen-optimize-match-table"],"-print-detailed-records":[658,0,1,"cmdoption-tblgen-print-detailed-records"],"-print-enums":[658,0,1,"cmdoption-tblgen-print-enums"],"-print-records":[658,0,1,"cmdoption-tblgen-print-records"],"-print-sets":[658,0,1,"cmdoption-tblgen-print-sets"],"-register-info-debug":[658,0,1,"cmdoption-tblgen-register-info-debug"],"-stats":[658,0,1,"cmdoption-tblgen-stats"],"-time-phases":[658,0,1,"cmdoption-tblgen-time-phases"],"-typedefs-dialect":[658,0,1,"cmdoption-tblgen-typedefs-dialect"],"-version":[658,0,1,"cmdoption-tblgen-version"],"-warn-on-skipped-patterns":[658,0,1,"cmdoption-tblgen-warn-on-skipped-patterns"],"-write-if-changed":[658,0,1,"cmdoption-tblgen-write-if-changed"],"gen-lldb-option-defs":[658,0,1,"cmdoption-tblgen-arg-gen-lldb-option-defs"],"gen-lldb-property-defs":[658,0,1,"cmdoption-tblgen-arg-gen-lldb-property-defs"],"gen-lldb-property-enum-defs":[658,0,1,"cmdoption-tblgen-arg-gen-lldb-property-enum-defs"]}},objnames:{"0":["std","cmdoption","program option"]},objtypes:{"0":"std:cmdoption"},terms:{"000":759,"0000":[684,769,782],"00000":684,"000000":[773,780,784,805,807],"00000000041516c6":784,"00000006":759,"000000e":[804,805,806,808],"000001":784,"000002":784,"000003":784,"000006":784,"000007":784,"000008":784,"000010":784,"000014":784,"000016":784,"000017":784,"000018":784,"000019":784,"000023":784,"00002fc0":784,"000032":784,"000035":784,"000037":784,"000041":784,"000046":784,"000049":784,"000050":784,"000063":784,"000075":784,"0001":782,"00010010":710,"000102030405060708090a0b0c0d0e0f":710,"000106":784,"000120":784,"000123":784,"000138":784,"000155":784,"000156":784,"000214":784,"000215":784,"0003":782,"000302":784,"000310":784,"000342":784,"0004":782,"000471":784,"0005":782,"000562":784,"0007":782,"000737":784,"000774":784,"000799":784,"0008":748,"001":597,"00100001":710,"00110101":710,"001375":784,"001596":784,"003400":773,"004523":784,"0050":769,"0058":631,"00n":664,"010":589,"01010011":710,"0106":769,"011":597,"011200":773,"012345":639,"0123456789":639,"0152":650,"0156":769,"01pi0":588,"03d516c6":784,"041688":745,"041721":745,"041739":745,"0462":773,"049886":745,"04e":710,"0625":710,"062624":784,"067200":773,"07b":661,"0b000000":780,"0b00000000":710,"0b000011":780,"0b000100":780,"0b000111":780,"0b0011":[588,597],"0b01101101":710,"0b01111000":710,"0b100":770,"0b10000000":710,"0b101":770,"0b1010":[588,589],"0b10110110":710,"0b111":770,"0b11100001":710,"0b1111":588,"0b11111110":710,"0b11111111":710,"0baz":743,"0cleanup":669,"0dev":773,"0f00000000":724,"0f0d0ed1c78f":682,"0f3f800000":724,"0f3fb8aa3b":724,"0f42d20000":724,"0f7f800000":724,"0fb5bfbe8e":724,"0fbf317200":724,"0fc2d20000":724,"0ffh":589,"0git":702,"0th":677,"0x0":[590,597,607,730,743],"0x00":[590,664,730,762,770],"0x000":590,"0x0000":[585,590,738],"0x00000":687,"0x000000":687,"0x00000000":[590,759],"0x0000000000000000":590,"0x0000000000000010":759,"0x0000000001f2dc7a":665,"0x0000000001f2fc2f":665,"0x00000000029bdb45":665,"0x0000000002a8306":665,"0x0000000004589ff8":665,"0x000000000458ed10":665,"0x00000000046122f0":665,"0x00000000ffefffff":589,"0x00000001":759,"0x00000002":759,"0x00000003":759,"0x00000004":759,"0x00000009":759,"0x0000002a":665,"0x00000064":759,"0x00000067":759,"0x0000006e":759,"0x00000083":759,"0x000000c9":611,"0x000000ff":687,"0x00000100":759,"0x00000110":759,"0x00000120":759,"0x00000130":759,"0x00000140":759,"0x00000147":759,"0x00000150":759,"0x00000233":611,"0x000003bd":759,"0x000003cd":759,"0x000003f3":759,"0x000003ff":607,"0x00000ff0":687,"0x00001000":759,"0x00001023":759,"0x00001203":759,"0x00001c00":607,"0x00002000":759,"0x00002023":759,"0x00002200":759,"0x000034f0":759,"0x00003500":759,"0x00003550":759,"0x00007fff":607,"0x00007ffff788c09b":665,"0x00007ffff7fd0007":665,"0x00007ffff7fd003c":665,"0x00007ffff7fd0095":665,"0x00007ffff7fd0098":665,"0x00007fffffffc798":665,"0x00007fffffffe108":665,"0x00007fffffffe118":665,"0x00007fffffffe140":665,"0x0000fefe":611,"0x0001":[585,590],"0x0001023":759,"0x0002":[585,590],"0x0002023":759,"0x0003":[585,590,738],"0x0004":[585,590],"0x0007":738,"0x0008":738,"0x001":590,"0x0010":738,"0x0011":738,"0x0012":738,"0x0013":738,"0x0014":738,"0x002":590,"0x0020":738,"0x0021":738,"0x0022":738,"0x0023":738,"0x0024":738,"0x003":590,"0x0030":738,"0x0031":738,"0x0032":738,"0x0033":738,"0x0034":738,"0x004":590,"0x0040":738,"0x0041":738,"0x0042":738,"0x0043":738,"0x0044":738,"0x0045":738,"0x0046":738,"0x005":590,"0x0050":738,"0x0051":738,"0x0052":738,"0x0053":738,"0x0054":738,"0x0055":738,"0x0056":738,"0x006":590,"0x0068":738,"0x0069":738,"0x007":590,"0x0070":738,"0x0071":738,"0x0072":738,"0x0073":738,"0x0074":738,"0x0075":738,"0x0076":738,"0x0077":738,"0x0078":738,"0x0079":738,"0x007a":738,"0x007b":738,"0x008":590,"0x009":590,"0x00a":590,"0x00b":590,"0x00c":590,"0x00d":590,"0x00e":590,"0x00f":590,"0x00ff0000":607,"0x01":[590,664,730,759,771,785],"0x010":590,"0x011":590,"0x01f":590,"0x02":[590,664,730,759,770],"0x020":590,"0x021":590,"0x022":590,"0x023":590,"0x024":590,"0x025":590,"0x026":590,"0x027":590,"0x028":590,"0x029":590,"0x02a":590,"0x02b":590,"0x02c":590,"0x02d":590,"0x02e":590,"0x02f":590,"0x03":[590,730,762],"0x030":590,"0x031":590,"0x032":590,"0x033":590,"0x034":590,"0x035":590,"0x036":590,"0x037":590,"0x038":590,"0x039":590,"0x03a":590,"0x03b":590,"0x03c":590,"0x03d":590,"0x03e":590,"0x03f":590,"0x04":[590,730,759,770,785],"0x040":590,"0x04000000":759,"0x041":590,"0x042":590,"0x043":590,"0x044":590,"0x045":590,"0x05":[590,730,762],"0x06":[590,730,762],"0x07":[590,730,762],"0x08":[590,730,759],"0x09":730,"0x0a":[730,762],"0x0abcd":710,"0x0b":[730,762],"0x0b17c0de":597,"0x0c":[664,730],"0x0d":762,"0x0e":762,"0x0f":762,"0x0f0":687,"0x0ff":[590,687],"0x0fffff":589,"0x0fffffff":785,"0x1":[588,590,607,743,759,762,768],"0x10":[27,129,220,378,490,588,631,730,759,762],"0x100":[590,730,759,760],"0x1000":[738,759],"0x100000":589,"0x10000000":590,"0x10000001":590,"0x10000002":590,"0x10000003":590,"0x10000004":590,"0x10000005":590,"0x10000006":590,"0x1000000a":590,"0x1000000b":590,"0x1000000c":590,"0x1000000d":590,"0x1000000f":590,"0x10000010":590,"0x10000011":590,"0x10000015":590,"0x100000f24":654,"0x12":[27,129,220,378,490,710],"0x1234":[726,759],"0x12345678":759,"0x1235":710,"0x13":730,"0x14":768,"0x14c":[731,814],"0x16":730,"0x1881":[20,123,212,370,482],"0x1afp":589,"0x1c2":671,"0x1e":597,"0x1f84":654,"0x1ff00":589,"0x2":[588,607,743,762,768],"0x20":759,"0x200":[590,759,760],"0x2000":[590,759],"0x20000":590,"0x2001":585,"0x2002":585,"0x20117e":642,"0x21":710,"0x2413bc":782,"0x25":642,"0x29273623":759,"0x2a":[596,695],"0x3":[588,607,743,762,768],"0x30":585,"0x300":590,"0x3000":644,"0x3039":743,"0x31":585,"0x35":710,"0x3c00":589,"0x3e08":585,"0x3e09":585,"0x3e0a":585,"0x3e0b":585,"0x3e0c":585,"0x3f":671,"0x3f800000":589,"0x3fe9":759,"0x3fea":759,"0x3feaed548f090ce":805,"0x3feb":759,"0x3fed":759,"0x4":[588,607,650,762,768],"0x40":759,"0x400":[590,759,760],"0x4000":[644,759],"0x40000000":716,"0x400480":654,"0x400486":654,"0x400490":654,"0x4004a0":654,"0x4004b0":654,"0x4004b6":654,"0x4004be":654,"0x4004d0":654,"0x401167":654,"0x4200":759,"0x432ff973cafa8000":710,"0x45":695,"0x45c2cb0":665,"0x494e494d":736,"0x4c":768,"0x4d544f4e":736,"0x5":[607,730,762,768],"0x53":710,"0x5321":710,"0x55585c0867ba":695,"0x55585c0af787":695,"0x55585c0af7b3":695,"0x55585c0af7cf":695,"0x55585c0afa55":695,"0x57":695,"0x5cf8c24cdb18bdac":664,"0x6":[607,768],"0x601028":654,"0x60500020":814,"0x6a":650,"0x6fff4c02":671,"0x7":607,"0x70b298":782,"0x744e60":712,"0x744ea0":712,"0x7c000000":759,"0x7f":710,"0x7fecc966952b":695,"0x7feccab26000":695,"0x7fefffff00000000":589,"0x7fefffffffffffff":589,"0x7fff":590,"0x7fffffff":710,"0x8":[588,607,730,743],"0x80":[684,759,768],"0x800":[590,759],"0x8000":[585,590],"0x80000000":[590,759],"0x8100":585,"0x82638293":759,"0x83":695,"0x8664":731,"0x9":[607,743],"0x90":764,"0x9f":695,"0xa":[590,607,743,768],"0xa0463440":611,"0xa0463443":611,"0xa0463447":611,"0xabcdef00":764,"0xac":768,"0xb":607,"0xc":[597,607,611],"0xc00":590,"0xc3":770,"0xd":[597,607,730,768],"0xe":597,"0xe1":585,"0xe2":585,"0xe3":585,"0xe4":585,"0xe5":585,"0xe6":585,"0xe7":585,"0xe8":[585,770],"0xe9":585,"0xea":585,"0xeb":[585,695],"0xec":585,"0xeffe0000":731,"0xf":[588,768,770],"0xfc":590,"0xfe":590,"0xfefe":611,"0xff":[588,589,590,710,770],"0xff0":687,"0xff00":589,"0xffbef174":782,"0xffefffff":589,"0xffefffff00000000":589,"0xfff":588,"0xfff8000000000000":710,"0xffff":[20,27,120,123,129,209,212,220,314,370,378,479,482,490,584,585,588,589,590,710,731,762],"0xffff000000000002":762,"0xfffff":589,"0xfffffffc":589,"0xffffffff":[585,589,590,597,710],"0xffffffffffefffff":589,"0xffffffffffff00ff":589,"0xffffffffffffff00":589,"0xffffffffffffffff":585,"0xh":710,"0xk":710,"0xl":710,"0xl00000000000000004000900000000000":710,"0xm":710,"0xr":710,"0xxxxxxxxx":759,"100":[120,209,314,479,584,590,594,610,638,639,645,676,684,685,695,710,712,714,759,769,779,782,806,807,808,809,810],"1000":[712,735,741,811,812],"10000":[631,722],"10010001":710,"101":769,"10110":684,"102":[589,639,769,784],"1020":[607,710],"1023":[590,607],"1024":[590,607,659,710,735,779],"1025":607,"102593":784,"1026":607,"102kb":659,"103":[590,784],"1030":710,"104":589,"1054":652,"106":[589,650],"1066":652,"108055":784,"1083":784,"1087":590,"1088":590,"109":[639,770],"10m":712,"10x":[660,723],"110":[597,784],"1101":652,"1108":810,"111":[590,724,773],"1110":597,"11111":684,"111111":684,"1112":652,"112":[590,758],"1123":652,"1124":681,"1129":[590,810],"113":710,"1130":[590,652],"114":[650,681],"1141":652,"1183":810,"1184":810,"120":[590,665,710,770,805],"1200":712,"1210":810,"1218":810,"122":710,"1222":773,"123":[710,716,722,759,784,807,808],"12303":769,"1234":[589,590,654],"12345":[702,743,745],"1234567":710,"123908342":743,"123kkk":659,"123mb":659,"124":[723,784],"125":[682,710,745,784],"125000e":710,"126":650,"126744":784,"127":[588,590,710],"127715":784,"128":[34,132,225,384,497,588,590,596,597,607,664,710,714,724,736,738,743,768,780],"128974848":659,"128mib":671,"129":590,"1298":743,"129894":748,"129907":748,"129920":748,"129923":748,"129926":748,"129934":748,"129941":748,"129996":743,"12x10":710,"130":[710,784],"130111":748,"130179":748,"130181":748,"130213":748,"130214":748,"130228":748,"130229":748,"130249":748,"130266":748,"130292":748,"130304":748,"130306":748,"130309":748,"130310":748,"130312":748,"130313":748,"130318":748,"130320":748,"130323":748,"130328":748,"130329":748,"130353":748,"130355":748,"130388":748,"130415":748,"130425":748,"130430":748,"130435":748,"130465":748,"130629":748,"130630":748,"1329373163":710,"133700e":804,"134":743,"138":[743,784],"1388":769,"139":784,"139563":784,"139605":784,"139607":784,"139610":784,"139612":784,"139617":784,"1401":769,"1402264":748,"1404":769,"14159":590,"14159f":590,"142":784,"143":590,"1439":810,"144":710,"1444":743,"1447":769,"1459":810,"147345":784,"14740650423002898831":710,"1491":784,"15155600":784,"1523017872":712,"1530":784,"1535":590,"1536":590,"1560":769,"1564":784,"1565878005":710,"159":590,"15915494":589,"15915494309189532":589,"1592":589,"159279":784,"15gb":605,"15mb":745,"15th":588,"160":[590,710],"1600":710,"16000":735,"162":650,"164":784,"16628590":784,"16777216":710,"16777217":710,"1681":784,"16b":596,"16bit":809,"16gb":[681,696],"16mib":671,"172":768,"1723":784,"175":590,"1789":769,"1791":[590,769],"1792":590,"182":650,"1829":769,"1840":769,"187":784,"188":784,"18th":677,"191":590,"1967":784,"1980":750,"1981":666,"1984":784,"1987":666,"1989":676,"199":650,"1991":750,"1994":676,"19941610":736,"19950410":738,"19950623":736,"19950814":736,"19951122":738,"19960307":[731,736],"19961031":738,"19970604":736,"19970605":731,"19970606":731,"19990604":736,"19990903":[731,738],"1afp10":589,"1cleanup":669,"1cy":639,"1d_arrai":588,"1dx":664,"1gb":745,"1st":667,"200":[590,695,783],"2000":[588,590,782],"20000":764,"200000e":806,"20000404":736,"2002":607,"20030901":736,"2004":607,"20040203":738,"2008":[590,684,710,750],"20091201":[731,736],"2010":605,"2011":[745,750],"2012":[671,743,786],"2013":[715,731,743],"20140508":736,"20140516":731,"2015":[692,712,750],"2016":[660,692,721,745,750,783],"2017":[605,671,675,679,681,690,692,712,750,785],"2018":[684,710,720,760,783],"2019":[602,609,681,692,720,745,748],"2020":[682,721],"203":650,"2047":[588,590],"2048":[588,590,607,712,735],"207":590,"209":650,"2097496":650,"2100":590,"2101248":650,"2105344":650,"2105360":650,"212":601,"213":601,"213336":784,"21340":665,"214":601,"2147483648":[710,724],"215":601,"216":601,"21649":681,"217":601,"2192":773,"2200":590,"2200g":590,"223":590,"224":590,"225":710,"225708":784,"22c":652,"2303":590,"2304":590,"2308":769,"231150":759,"231154":759,"2317":769,"23333":674,"234":[589,804],"234000e":804,"23421e":710,"234e2":589,"235":710,"236":652,"239":590,"240":803,"2400g":590,"241":652,"242":[671,784],"2468601609":710,"247":710,"248":743,"24a":652,"24e8":743,"24mb":712,"250":745,"2500":590,"250000e":716,"253":652,"254":710,"255":[588,589,590,607,665,710,759,802,803,804,805,806,807,808,809,810],"2559":590,"256":[588,589,590,607,671,710,728,759],"2560":590,"257":[607,710],"258":[607,652],"2590":599,"25mb":712,"260":590,"2601000000":784,"26096":784,"260x":590,"261":652,"264":650,"2692":784,"2700":769,"271":784,"272":[590,639],"281474976710654":762,"2815":590,"2816":590,"2817":743,"285":590,"2882400000":764,"290":590,"2900":710,"290x":590,"2919":743,"294":650,"2963":784,"297":639,"2ap3":671,"2d_arrai":588,"2d_msaa":588,"2d_msaa_arrai":588,"2gb":760,"2nd":[667,710,712,743],"2x3x4":710,"300":[616,639,710],"3000":[590,743],"302":774,"305064":668,"306":639,"3069":784,"3071":590,"3072":590,"308":665,"30pm":786,"31027":695,"3120":769,"31337":804,"314":639,"3148790418":710,"31511":712,"315994":784,"32088":784,"321":681,"3213":743,"3221225472":659,"32768":[21,23,24,124,126,127,214,216,217,372,374,375,484,486,487,735],"32bit":809,"32gb":605,"3327":590,"3328":590,"3340b":590,"335":731,"3363":784,"339450":773,"343":769,"345":743,"348":769,"350":747,"351":[590,747],"352":[590,747],"357":747,"358":747,"3583":590,"359":747,"360":[747,784],"362":747,"363":747,"36652":784,"367":747,"370":747,"371":747,"3714":784,"372":747,"376":747,"377":710,"380":590,"3800":590,"381":784,"3811":712,"3827":712,"383":590,"384":590,"385":590,"3868":769,"387":[665,710],"392":743,"39331465":784,"39337525":784,"3963":712,"399":[639,784],"3cy":639,"3dnow":809,"3dnowa":809,"3gb":681,"3pm":678,"3rd":[667,678,710,712,759,788],"3x4":710,"400":[645,803],"4000":[588,590],"40000":645,"4000g":590,"40219":696,"403":773,"40535375":784,"408":723,"4095":[588,710,712,735],"4096":[588,710,735,738],"4097":735,"4098":735,"4099":735,"40kib":695,"40th":802,"410":710,"4112":642,"412":650,"415":590,"416":590,"4167":712,"41m":659,"421":710,"423325":773,"429":710,"4300g":590,"4300ge":590,"434":743,"4350g":590,"4350ge":590,"4377547752858689819":710,"446":784,"447":590,"448":590,"449":590,"450":590,"451":590,"452":590,"453":590,"454":590,"455":590,"456":[716,773,807],"457":590,"458":590,"4584":784,"459":590,"460":590,"4600g":590,"4600ge":590,"463":590,"463331":784,"463340":784,"464":[590,773],"465":590,"4650g":590,"4650ge":590,"467":[590,665],"468":590,"469":590,"470":590,"4700g":590,"4700ge":590,"470948":784,"471":590,"472":590,"472618":784,"474":773,"4750ge":590,"480":590,"4819":769,"4gb":709,"4gib":671,"4kib":735,"4th":[607,710],"500":639,"5000":[590,695,756],"503":747,"5046":743,"506":[773,784],"507":747,"508":[710,773],"5100":590,"511":590,"512":[590,710,735],"513":590,"51440360":784,"51615":681,"5197":769,"5200":590,"521":747,"527":747,"5312":712,"532":743,"5321":745,"542":769,"543":769,"5434426023268520":784,"5434426023523052":784,"5434426029925386":784,"5434426030031128":784,"5434426046951388":784,"5434426047282020":784,"5434426047857332":784,"5434426047984152":784,"5434426048036584":784,"5434426048042292":784,"5434426048055056":784,"5434426048067316":784,"547":695,"548":747,"549":747,"5500":590,"554":[665,747],"556":652,"560":769,"5600":590,"5600m":590,"563098":773,"566":[652,769],"5700":590,"577":652,"584":769,"58421550":784,"586":652,"590":599,"595":652,"59620e187c6ac38b36382685ccd2b63b":773,"5981":769,"5gb":681,"5rqxku":784,"5th":710,"600":[645,652],"6000":590,"60000":645,"603":747,"609":652,"610":639,"621":747,"626":745,"62635":784,"626455":784,"627":747,"628":747,"6295592":654,"638838075":710,"643":665,"648":747,"649":747,"6497":769,"64bit":[710,716,753],"64k":731,"654":747,"65500":589,"65535":[21,22,23,24,124,125,126,127,214,215,216,217,372,373,374,375,484,485,486,487,588,710],"65600":589,"657":769,"658":769,"66ghz":696,"66s":681,"6700":590,"672368":784,"6757":769,"680":724,"6800":590,"6900":590,"69819":784,"6986":769,"6th":710,"7000":590,"7004155349499253778":710,"7009":769,"703":676,"705":676,"7050b":590,"7100":590,"7150b":590,"7200p":590,"725":743,"7300":590,"7350b":590,"73670648":665,"7400p":590,"746":769,"747":769,"7500":[590,712],"754":[590,684,710],"756":747,"758":769,"7600p":590,"7646":743,"7651369219802541373":710,"767":590,"768":590,"769":590,"7790":590,"77x":760,"789":807,"7938":769,"7942":712,"7976922776554302e308":589,"7976931348623157e308":589,"7e15":659,"7foo_bar":627,"7nm":590,"7th":710,"7ykb2k5f":634,"8000":735,"8011":[678,696,723],"8014":696,"80386":[594,809],"80f351b51825":668,"80x86":696,"80x87":607,"8191":735,"83348":773,"841471":805,"84s":723,"8500b":590,"8500p":590,"8600b":590,"8600p":590,"8700b":590,"8700p":590,"8770":590,"8780p":590,"879":769,"8800b":590,"8800p":590,"8825":769,"896800":773,"8981":769,"8gib":671,"900":639,"9000":735,"9009":769,"9010":590,"9015":712,"90p":784,"9210":590,"928":773,"930803":731,"9342":769,"9410":590,"9600p":590,"9630p":590,"9633790":784,"9700p":590,"9730p":590,"97724f18c79c":682,"9800p":590,"9830p":590,"989":716,"999":710,"9990":696,"9994":696,"999999999":754,"99p":784,"\u03c6":[684,710],"abstract":[585,590,607,610,666,667,669,672,703,711,713,728,741,743,754,759,769,771,780,782,786,802,804,805,806,807,808,809,810],"boolean":[590,597,605,607,610,617,663,664,709,710,738,741,743,768,769,770,775,780,806],"break":[585,596,599,605,607,610,659,667,669,670,671,676,696,698,702,705,706,709,710,714,718,723,727,740,746,759,764,766,770,779,780,782,802,803,804,805,806,807,808,809,810],"byte":[54,55,144,145,240,241,242,342,403,404,516,517,585,588,589,590,593,594,596,597,607,614,623,630,638,639,642,643,648,649,652,663,664,670,671,677,684,688,695,709,710,712,716,720,727,729,730,731,733,734,735,736,738,739,743,754,756,758,759,762,764,770,777,780,785],"caf\u00e9":719,"case":[585,586,588,589,590,591,593,594,596,597,599,601,603,604,605,606,607,608,609,610,611,615,616,621,624,626,630,632,639,640,641,654,657,659,660,662,663,664,668,669,670,672,673,674,676,677,679,684,693,697,698,699,700,701,702,703,704,705,707,708,709,710,712,713,714,715,716,717,718,720,721,722,723,725,727,728,730,731,736,738,741,742,743,744,745,747,748,751,754,755,756,757,758,759,760,761,762,764,765,766,768,769,770,774,775,776,777,779,780,782,786,788,789,802,803,804,805,806,807,808,809,810,811],"catch":[659,665,667,677,695,700,710,711,743,760,804,805],"char":[590,593,610,658,659,663,664,665,677,695,703,709,710,712,713,724,726,731,735,738,743,756,759,760,768,779,780,782,783,784,788,803,804,805,806,807,808,809,810],"class":[596,597,608,611,631,639,641,644,653,658,662,666,667,669,670,675,676,679,680,683,684,686,688,689,690,691,698,706,708,709,711,715,718,720,722,726,728,730,731,736,738,740,741,748,749,750,751,756,759,760,765,768,771,777,781,783,786,788,789,790,791,803,804,805,806,807,808,809,810,811],"const":[590,593,607,610,632,659,664,676,687,695,703,708,709,712,713,716,721,725,726,728,730,743,756,759,768,769,780,782,783,784,786,788,789,790,791,803,804,805,806,807,808,809,810],"default":[66,93,100,101,104,105,106,107,112,115,157,189,190,191,194,195,196,201,203,204,254,281,282,283,297,298,299,300,301,306,309,354,455,456,457,471,472,557,564,565,568,569,570,571,576,579,585,588,590,591,592,593,594,597,599,603,605,607,611,612,614,616,617,619,620,621,624,625,626,629,630,631,632,635,639,640,641,642,644,645,646,648,649,650,652,653,654,658,659,660,663,667,670,671,674,675,676,679,681,691,694,695,696,698,699,700,701,703,705,708,709,710,712,715,716,718,723,724,726,740,742,743,744,745,754,756,757,759,760,762,764,766,768,769,770,771,773,774,775,776,777,779,780,782,783,784,788,789,803,804,805,806,807,808,809,810],"enum":[591,593,607,610,644,658,659,669,670,688,703,705,706,707,709,710,730,731,736,738,743,759,768,771,780,786,802,803,804,805,806,807,808,809,810,814],"export":[78,117,120,171,205,209,266,310,314,479,542,580,584,588,590,593,603,605,607,642,644,649,659,665,679,694,702,705,710,713,726,739,745,753,791,805,807],"final":[585,590,594,597,600,601,603,604,607,609,610,616,639,642,645,659,664,667,668,669,671,676,681,698,699,701,703,708,709,710,713,721,722,723,729,730,731,734,736,743,744,745,750,753,754,757,759,764,768,769,770,771,775,780,782,783,785,788,790,802,803,804,805,806,807,808,809,810,811],"float":[25,52,53,128,143,219,238,239,316,321,326,338,377,401,402,489,514,515,587,588,597,598,599,607,617,639,659,660,670,674,677,683,691,697,701,722,724,743,759,771,778,779,780,786,802,803,804,805,806],"function":[585,591,593,594,596,597,598,601,602,603,604,605,611,614,615,617,620,621,623,625,626,627,628,630,639,641,642,643,644,645,648,649,654,657,658,660,661,665,667,668,669,671,674,675,676,677,679,684,686,688,689,693,694,695,698,703,704,705,706,707,708,709,711,712,713,714,715,718,720,721,723,725,726,727,728,729,730,731,733,734,736,740,746,748,750,754,756,758,762,763,764,766,768,769,770,773,777,778,780,781,782,784,786,788,789,791,802,803,805,806,807,808,809,811,812,813],"goto":[610,710,722,779,806,807,808,809,810],"h\u00e4hnle":748,"h\u00e4hnledistinguish":748,"import":[593,594,596,597,599,601,603,604,605,606,607,608,609,610,611,616,628,639,644,645,649,659,662,667,674,676,678,679,680,681,697,698,699,701,702,704,707,709,710,712,722,723,725,726,729,731,734,735,736,741,753,756,759,760,762,779,780,781,782,783,786,789,803,804,805,806,807,810,811],"instanceof":[743,768],"int":[593,594,596,601,607,610,611,612,626,639,654,659,660,663,664,665,666,669,672,676,677,679,694,695,707,708,709,710,712,713,714,716,717,722,724,726,738,743,759,760,761,764,768,769,770,771,776,779,780,783,784,786,788,802,803,804,805,806,807,808,809,810,811,814],"long":[585,592,593,594,599,601,602,606,607,609,610,612,616,623,625,639,645,658,661,667,669,671,672,674,675,677,679,682,685,689,695,702,703,710,720,723,735,739,742,743,753,755,759,760,761,764,765,766,770,775,780,782,786,789,804,805,811],"na\u00efv":748,"new":[585,591,592,594,597,600,601,603,605,606,607,609,610,611,612,616,621,631,639,641,654,657,658,659,662,665,668,674,675,676,678,679,680,684,689,693,694,701,702,703,704,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,726,727,728,731,735,738,742,745,748,750,751,754,756,758,762,764,765,766,768,770,771,776,778,780,781,786,787,788,789,790,803,804,805,806,807,809,810,811],"null":[36,37,39,40,43,45,48,49,54,55,56,69,70,71,72,73,78,171,266,542,585,590,593,597,607,610,617,619,658,663,669,671,673,674,695,705,709,710,716,722,724,726,728,731,736,741,743,754,756,759,762,764,768,769,770,780,782,786,789,803,804,805,806,807,808,809,810],"public":[603,606,610,644,659,667,675,676,678,679,696,703,709,724,731,739,747,753,755,757,759,760,766,777,780,781,782,786,788,789,790,791,803,804,805,806,807,808,809,810],"return":[110,111,199,200,304,305,355,467,468,574,575,585,588,590,591,593,594,596,597,599,601,602,607,609,611,614,619,625,626,630,631,638,639,644,645,648,649,651,654,658,659,660,664,665,667,669,670,672,674,676,679,684,685,686,688,690,695,703,706,707,708,709,710,712,713,714,715,717,718,720,721,722,723,724,725,726,728,741,756,758,759,761,762,763,764,765,768,769,770,777,779,780,781,782,783,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811],"short":[593,596,601,607,610,616,654,658,667,672,681,710,721,724,731,743,750,756,762,764,773,780,783,786,789,806,807,808,810],"static":[585,590,593,594,595,601,603,604,605,607,612,614,615,619,635,639,658,659,663,670,672,674,676,677,679,691,695,698,701,703,704,708,709,710,711,712,713,714,715,720,722,726,743,744,756,759,760,764,768,769,770,777,780,781,782,783,786,788,789,790,791,802,803,804,805,806,807,808,809,810,812],"super":[607,713,770,780],"switch":[591,594,604,605,611,620,641,653,658,659,669,670,694,697,698,703,707,714,741,743,745,748,760,764,768,769,773,779,780,785,786,788,803,804,805,806,807,808,809,810],"throw":[585,607,633,660,663,674,708,709,710,726,741,743,782,790],"transient":[585,667,695],"trevi\u00f1o":602,"true":[590,593,594,599,606,607,610,611,612,616,619,625,631,639,645,654,659,663,664,667,676,677,683,684,688,694,695,697,701,703,705,707,709,710,712,714,716,722,724,725,731,739,741,743,745,756,757,759,768,769,771,775,776,777,780,781,782,783,784,786,790,803,804,805,806,807,808,809,810],"try":[593,594,595,596,600,601,602,605,607,608,610,612,630,659,660,662,667,668,674,676,678,679,682,695,704,712,716,717,719,721,722,723,725,726,738,741,743,745,746,755,756,757,760,765,782,783,786,789,790,791,805,807,808,809,811],"var":[606,611,630,642,722,759,768,770,808,809,810],"void":[590,593,594,596,597,598,607,610,611,627,659,660,663,664,669,670,672,673,676,677,687,694,695,705,708,709,712,713,714,716,721,723,724,726,727,730,738,741,743,756,759,760,762,764,777,779,780,781,782,783,784,786,788,789,790,791,803,804,805,806,807,808,809,810],"while":[213,371,483,585,587,588,590,592,593,597,599,600,601,605,606,607,608,610,611,616,639,657,659,662,663,665,666,667,668,669,673,675,677,678,679,681,682,684,685,688,689,693,695,699,700,701,705,707,708,710,711,712,713,714,716,719,721,722,723,728,729,730,731,735,736,738,739,740,741,742,745,748,753,754,755,756,757,759,762,764,765,768,769,770,771,775,779,782,784,802,803,804,805,806,807,808,809,810,811],ACE:765,AND:[590,594,607,639,666,710,770,779],Adding:[605,710,726,768,778,804,807,808,809,811,812,813],Age:[731,736,770],And:[602,605,610,659,676,677,705,706,710,713,721,722,723,726,727,735,743,759,760,765,768,770,775,779,780,782,786,789,806,810],Are:600,BBs:[610,722],Being:[585,608,667,710,742,808],But:[585,590,596,607,610,660,677,701,710,717,719,722,723,725,731,743,747,759,760,761,766,770,780,786,805],CIEs:585,CUs:[590,654],CVS:745,DIEs:[638,759],Das:748,Doing:[607,610,674,743,755,759,764,805],ERE:611,EXE:739,EXEs:739,For:[1,5,6,7,8,288,306,308,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,603,605,606,607,608,609,610,611,612,616,617,619,621,625,627,630,631,633,635,639,641,642,644,645,650,653,654,657,659,660,662,663,664,666,667,669,670,671,672,673,675,676,677,680,681,683,684,685,689,691,693,695,697,698,701,702,703,704,705,707,708,709,710,711,712,713,714,715,716,717,718,720,721,722,723,724,725,726,727,728,729,730,731,735,736,739,740,741,742,743,744,745,746,748,750,752,753,754,756,757,759,760,761,762,764,765,766,768,769,770,771,773,774,775,776,778,779,780,781,782,783,784,785,786,787,788,789,790,802,803,804,805,806,807,808,809,810,811,812],GAS:780,GDS:[80,120,173,209,268,314,431,479,544,584,588,590],Going:[610,721],HDs:660,Has:[588,757,786],ICE:[679,711],IDE:[605,681],IDEs:[605,606],IDs:[590,710,722,745],IFs:779,Ids:590,Into:742,Its:[585,590,597,605,659,663,666,676,710,713,716,721,747,757,769,770,777,785,802,805],L0s:590,L1s:590,LDS:[20,80,107,120,123,173,196,209,212,268,301,314,370,431,457,479,482,544,571,584,588,589,590],LHS:[610,710,722,743,768,803,804,805,806,807,808,809,810],LIS:607,LTS:[667,679],MFS:659,MIs:607,NOT:[697,710,728,770,775],Near:738,Not:[585,588,590,597,606,607,608,611,614,641,645,659,667,669,671,676,710,713,738,753,760,773,780,782,785,806,810],OLE:738,ORE:748,One:[585,588,590,593,594,596,600,601,603,605,606,607,610,611,614,640,654,659,663,668,674,676,677,679,681,682,683,689,698,707,709,710,712,714,720,721,722,725,726,730,733,738,739,743,745,746,759,760,764,768,769,770,782,784,786,803,804,805,806,807,810,811,812],Ops:[607,807,808,809,810],PCs:712,PRs:667,QPS:760,RHS:[610,710,722,743,768,803,804,805,806,807,808,809,810],RLS:585,Res:[610,722],SAs:590,Such:[585,588,590,594,607,610,659,663,667,674,709,710,724,743,745,757,759,760,764,765,768,770],THE:704,THEN:590,TLS:[597,607,649,669,710,726,756,764],That:[585,594,595,596,603,606,607,609,610,639,663,667,676,677,679,682,683,694,702,703,708,709,710,714,718,721,722,726,743,745,757,760,764,765,769,770,780,781,786,805,811],The:[20,21,22,23,24,27,30,51,52,53,120,123,124,125,126,127,129,142,143,209,212,213,214,215,216,217,220,223,237,238,239,314,370,371,372,373,374,375,378,380,400,401,402,479,482,483,484,485,486,487,490,493,513,514,515,584,586,587,588,589,590,591,592,594,595,596,597,598,599,600,601,602,603,604,605,606,608,609,612,614,615,616,617,619,620,621,623,624,625,627,629,630,631,632,634,635,637,638,639,640,641,642,644,645,646,648,649,650,651,652,653,654,657,658,660,661,662,663,664,665,666,667,668,669,670,671,672,675,678,681,682,683,684,685,686,687,688,689,691,693,695,696,697,698,699,701,704,706,707,708,712,713,714,715,716,717,718,720,722,725,726,728,729,730,740,741,742,744,746,747,748,749,751,752,753,755,756,757,758,759,761,762,763,764,765,766,767,768,772,773,774,775,776,777,778,781,783,785,786,787,788,789,790,792,793,794,795,796,797,798,799,800,801,804,805,806,807,808,809,810,811,812],Their:[585,685,710,743,760,770],Then:[585,589,590,601,607,659,662,669,687,695,696,703,704,708,710,712,713,714,718,722,741,742,743,754,759,760,761,763,770,780,782,805,807,808,809,810],There:[288,306,308,585,587,588,589,590,593,594,596,597,599,600,603,605,606,607,609,610,611,639,641,645,659,660,662,663,664,666,667,669,670,671,672,675,676,677,679,681,683,684,685,686,687,688,689,690,695,696,697,700,701,702,705,708,709,710,712,714,716,717,720,721,722,723,724,725,726,727,729,734,736,741,743,744,745,746,748,749,754,756,759,760,764,765,766,768,769,770,771,773,774,775,776,780,782,783,785,786,803,804,807,808,810,811],These:[120,209,314,479,584,585,587,588,589,590,593,594,596,597,598,601,603,604,605,607,610,611,612,615,616,617,625,630,634,639,649,654,659,660,663,666,667,670,671,674,675,676,677,679,681,683,684,685,688,695,699,702,706,707,709,710,712,716,723,724,725,726,727,728,740,741,743,744,745,747,748,749,750,752,754,757,759,760,762,764,765,766,768,769,770,771,773,775,777,779,780,782,783,785,788,789,802,803,804,805,806,807,808,809,810,811],Ths:623,Tied:710,Ties:590,Use:[588,589,590,595,596,600,605,607,611,612,614,617,620,621,625,630,635,639,640,641,642,645,653,654,657,662,666,667,676,679,696,697,698,699,702,709,712,713,716,717,724,742,746,751,753,759,761,770,773,775,780,782],Used:[588,590,616,639,648,707,710,711,768,780],Useful:[590,624,743,768,778],Uses:[605,669,679,698,710,711,770,771,780],Using:[585,588,590,605,611,621,659,663,695,710,724,742,744,745,751,753,754,760,769,778,780,781,786,790,804,808,810,813],VMs:712,WITH:[610,788,789,790,791],Will:[712,746,764],With:[585,595,597,606,607,611,621,622,623,629,632,636,657,659,663,677,703,708,709,710,711,712,726,745,746,748,754,756,759,771,777,783,786,790,802,803,804,805,806,807,808,810,812],XPS:765,Yes:[587,667,672,677,679,712,722,741],__1:627,__aarch64_casm_ord:594,__aarch64_ldaddn_ord:594,__aarch64_ldclrn_ord:594,__aarch64_ldeorn_ord:594,__aarch64_ldsetn_ord:594,__aarch64_swpn_ord:594,__active_lane_pc:590,__aeabi_:661,__annot:710,__anon_expr:[803,804,805,806,807,808,809,810],__apple_nam:759,__apple_namespac:759,__apple_objc:759,__apple_typ:759,__asm:639,__atomic_compare_exchang:594,__atomic_compare_exchange_n:594,__atomic_exchang:594,__atomic_exchange_n:594,__atomic_fetch_add_n:594,__atomic_fetch_and_n:594,__atomic_fetch_nand_n:594,__atomic_fetch_or_n:594,__atomic_fetch_sub_n:594,__atomic_fetch_xor_n:594,__atomic_load:594,__atomic_load_n:594,__atomic_stor:594,__atomic_store_n:594,__attribute__:[660,759,777,783],__bitcod:597,__bss:640,__builtin_eh_return:669,__builtin_expect:599,__builtin_expect_with_prob:599,__builtin_longjmp:669,__builtin_setjmp:669,__builtin_trap:712,__builtin_unreach:714,__c_specific_handl:669,__chkstk:671,__clang__:660,__class_type_info:698,__clear_cach:710,__cuda__:660,__cuda_arch__:660,__cuda_ftz:724,__cudacc__:660,__cxa_allocate_except:669,__cxa_begin_catch:669,__cxa_call_unexpect:669,__cxa_demangl:675,__cxa_end_catch:669,__cxa_rethrow:669,__cxa_throw:669,__cxx11:627,__cxx_:784,__cxx_global_var_init:784,__cxxabiv1:698,__cxxframehandler3:669,__cxxthrowexcept:669,__data:[640,650,710],__declspec:[805,806,807,808,809,810],__divergent_lane_pc:590,__divergent_lane_pc_1_1_els:590,__divergent_lane_pc_1_1_then:590,__divergent_lane_pc_1_els:590,__divergent_lane_pc_1_then:590,__dso_handl:709,__dwarf:759,__eh_fram:[607,709],__except:669,__fastcal:710,__gcmap_:676,__global__:[590,660],__gwp_asan_default_opt:695,__gxx_personality_v0:669,__has_attribut:768,__i386__:811,__image_info:710,__imp_:710,__info_plist:643,__internal_accurate_powf:724,__jit_debug_register_cod:665,__kmpc_fork_cal:710,__lex_1_1_save_exec:590,__lex_1_save_exec:590,__libc_start_main:[665,695],__libunwind_config:669,__llvm:[597,754],__llvm__:710,__llvm_coverage_map:664,__llvm_covmap:664,__llvm_deoptim:710,__llvm_faultmap:673,__llvm_memcpy_element_unordered_atomic_:[710,764],__llvm_memcpy_element_unordered_atomic_safepoint_1:764,__llvm_memcpy_element_unordered_atomic_safepoint_:764,__llvm_memmove_element_unordered_atomic_:[710,764],__llvm_memmove_element_unordered_atomic_safepoint_:764,__llvm_memset_element_unordered_atomic_:710,__llvm_stackmap:762,__main:782,__mod_init:709,__morestack:758,__next__:663,__nv_isinff:724,__nv_isnanf:724,__nv_powf:724,__nv_truncf:724,__nvcc__:660,__objc:[650,710],__objc_imageinfo:710,__profn_foo:664,__remark:[617,754],__scudo_default_opt:756,__stack_chk_fail:[710,716],__stack_chk_guard:710,__start_:710,__stdcall:710,__stop_:710,__sync_fetch_and_add_n:594,__sync_fetch_and_and_n:594,__sync_fetch_and_max_n:594,__sync_fetch_and_min_n:594,__sync_fetch_and_nand_n:594,__sync_fetch_and_or_n:594,__sync_fetch_and_sub_n:594,__sync_fetch_and_umax_n:594,__sync_fetch_and_umin_n:594,__sync_fetch_and_xor_n:594,__sync_lock_test_and_set_n:594,__sync_synchron:594,__sync_val_compare_and_swap_n:594,__syncthread:724,__text:[607,640,643,650,773],__text_exec:640,__try:669,__uint64:590,__uint_64:590,__unwind_info:607,__vectorcal:710,__xray_log_fin:783,__xray_log_flushlog:783,__xray_log_init_mod:783,__xray_log_process_buff:783,__xray_log_register_mod:783,__xray_log_select_mod:783,__xray_log_set_buffer_iter:783,__xray_patch:783,__xray_patch_funct:783,_aligna:759,_amdgpu_:590,_amdgpu_cs_shdr_intrl_data:590,_args_:606,_b128:587,_b16:587,_b256:587,_b32:587,_b512:587,_b64:587,_b8:587,_binary_:641,_bool:808,_bootstrap_default_passthrough:592,_buffer:590,_compil:605,_crit_edg:710,_cuda_ftz:724,_cxxthrowexcept:669,_dcleanup:669,_distribution_compon:603,_division_by_zero:590,_dpp:[587,590],_e32:[587,590],_e64:[587,590],_end:641,_except_handler3:669,_except_handler4:669,_f16:587,_f32:587,_f64:587,_flag:[605,606],_flags_:606,_flags_relwithdebinfo:603,_function_nam:671,_how:609,_i16:587,_i32:587,_i64:587,_i8:587,_i_bitcast:688,_index:768,_inexact:590,_info:590,_init:590,_inv:590,_invalid_oper:590,_invl:590,_is_:710,_job:605,_link_into_tool:782,_lz_o:769,_m4enum:710,_main:[654,726,814],_name_:606,_overflow:590,_p1:759,_p3:759,_padding_record:730,_pei386_runtime_reloc:698,_pk:587,_r0h:669,_regoffset:771,_ri:770,_rr:770,_runtim:762,_s64:684,_sdwa:[587,590],_segment_buff:590,_segment_ptr:590,_segment_s:590,_segment_wavefront_offset:590,_shdr_intrl_data:590,_size:[590,641],_sourc:590,_source_dir:605,_start:[641,642,665,695],_suffstr:770,_suffstringsuffix:770,_trunc:684,_try:710,_ty:670,_u16:587,_u32:587,_u64:587,_u8:587,_underflow:590,_unwind_resum:669,_v4:590,_var:606,_void:684,_vol:590,_w_side_effect:684,_win32:[765,805,806,807,808,809,810],_z3bari:626,_z3bazv:654,_z3foo3barv:627,_z3foov:[626,710],_z4leaki:760,_z5hellov:652,_zero:590,_zfoov:710,_zgv:710,_zn1a1nei:710,_zst1a:777,_zstlsicst11char_traitsiceerst13basic_ostreamit_t0_es6_st17basic_string_viewis3_s4_:695,_ztid:710,_ztii:710,_zts1a:[710,777],_zts1b:777,_zts1c:777,_zts1d:777,_ztv1a:777,_ztv1b:777,_ztv1c:777,_ztv1d:777,_ztv3bar:611,_ztv3foo:611,a0463440:611,a0463443:611,a10:590,a12:590,a15:697,a16:[0,4,9,84,435,548],a252:589,a253:589,a254:589,a255:589,a32:596,a57:661,a64:596,a_ctor_bas:611,aad8i8:607,aad:607,aapc:710,aarch32:661,aarch64:[594,596,604,607,641,642,675,679,683,691,693,697,705,710,716,720,748,762,768,780,783],aarch64registerinfo:716,abandon:[693,743,804],abbrev:[597,630],abbrevi:[590,610,623,630,711,748,769],abbrevid:[597,754],abbrevop0:597,abbrevop1:597,abbrevwidth:597,abc:[585,710],abcd:597,abcdef0:702,abcdillnoopsstuuvv:621,abi:[585,596,604,605,621,627,642,663,667,671,672,679,683,685,690,700,701,702,708,710,724,727,740,741,759,762,775,777,780,805,809,810,811],abid:[8,9,757],abil:[585,607,608,657,659,665,670,674,676,689,705,709,710,714,718,743,744,745,746,759,760,786,802,806,807,808],abl:[585,590,593,595,601,602,607,609,610,611,616,639,641,653,659,660,663,664,667,672,674,676,677,679,680,682,684,685,688,689,701,704,708,709,710,711,713,717,725,726,739,742,743,745,746,750,754,755,756,757,759,760,761,762,764,776,779,780,782,783,786,789,805,806,807,810,811],abnorm:[663,674,765],abort:[601,607,610,669,682,710,714,743],about:[585,587,589,590,593,594,597,598,602,603,605,606,607,609,610,611,614,616,617,623,635,636,637,639,640,644,645,648,649,650,659,660,663,665,667,668,669,670,673,674,675,676,677,678,679,680,681,682,683,685,687,689,695,696,697,699,700,701,702,703,704,705,707,709,710,711,712,713,714,715,716,717,718,720,721,722,723,724,725,726,727,731,733,734,735,738,739,740,741,742,743,751,752,753,754,755,756,757,759,760,761,762,764,765,766,768,769,773,774,776,777,778,779,780,782,783,784,789,790,803,804,805,806,807,808,809,810,811,812],abov:[20,27,120,123,129,209,212,213,220,314,370,371,378,479,482,483,490,584,585,587,588,589,590,593,594,596,597,598,603,604,605,606,607,608,609,610,611,612,617,639,645,654,659,660,663,667,670,672,673,674,675,676,677,679,681,684,688,694,699,700,701,702,703,704,705,707,709,710,713,714,715,716,717,718,721,722,725,726,728,731,741,743,744,745,746,750,753,755,757,758,759,760,761,762,764,766,768,769,770,774,775,777,780,781,782,784,786,788,802,803,804,805,806,807,808,810,811],abs8:671,abs:[25,128,219,316,321,326,338,377,489,709,724],abs_f:771,abs_fp32:771,abs_fp64:771,abs_fp80:771,absenc:[601,607,639,671,716,743,760],absent:[585,611,710,743],absl:627,absolut:[20,27,120,123,129,209,212,220,314,370,378,479,482,490,584,588,590,598,605,610,611,625,640,641,642,644,654,664,670,679,683,684,701,709,710,731,743,754,775,780,782,785],absolute_express:[20,21,22,23,24,27,30,51,52,53,120,123,124,125,126,127,129,142,143,209,212,213,214,215,216,217,220,223,237,238,239,314,370,371,372,373,374,375,378,380,400,401,402,479,482,483,484,485,486,487,490,493,513,514,515,584],absolute_symbol:709,absolute_tsc:785,absolutesymbol:726,abstracttyperead:658,abstracttypewrit:658,absv16i8:780,absv1i64:780,abtu:621,abus:743,academia:667,acc0:589,acc250:589,acc2:589,acc3:589,acc:[589,770],acc_vgpr:590,acceler:[614,616,630,733,811],accept:[585,586,588,590,602,606,608,610,611,625,627,640,641,659,662,663,667,668,671,672,675,679,681,683,688,694,703,710,712,718,722,724,742,743,760,763,766,769,770,775],access:[20,123,212,370,482,588,589,593,594,596,605,609,625,639,640,641,642,653,659,662,663,666,668,669,670,674,676,677,679,681,683,688,689,695,696,705,709,712,715,716,720,721,724,725,726,728,729,733,735,738,741,743,746,752,756,759,760,762,763,764,768,770,776,777,779,780,782,783,786,789,804,808,811],accessor:[607,759,782,803],accessti:710,accessty1:710,accessty2:710,accident:[610,659,726,743,765,766,782],accommod:[597,607,671,683,710,724],accompani:[610,754],accomplish:[609,667,676,703,709,736,741,743,760,770,802],accord:[590,597,598,616,639,659,676,684,685,690,703,710,716,743,748,750,753,757,762,769,770,780,786,804],accordingli:[669,676,707,710,714,735,757],account:[590,604,608,644,662,667,676,684,688,696,703,710,719,722,723,742,745,755,760,783,784,788,789],accqual:590,accum:590,accum_offset:590,accumul:[589,590,684,712,741,759,770,779],accumulateconstantoffset:722,accur:[590,593,599,631,639,676,679,685,699,705,713,738,741,745,759,769,810,811],accuraci:[639,705,710,741],accvgpr:590,achiev:[594,602,605,609,666,667,674,675,683,685,707,710,713,714,720,723,749,750,759,760,765],acknowledg:[755,757],acm:[607,676,747],aco:593,acq:594,acq_rel:[590,594,710],acquaint:788,acquir:[590,667,710,712,721,743,770],acquisit:743,acronym:695,across:[594,596,603,604,606,607,609,623,659,660,663,667,669,674,679,684,686,691,709,710,711,712,714,724,726,728,736,741,743,745,754,759,760,762,764,770,779,782,805,811],act:[585,598,607,610,611,621,684,710,712,721,722,741,743,754,755,757,759,769,770,775],action:[585,590,591,599,600,607,610,663,669,670,675,679,709,710,711,726,728,742,743,745,755,757,764,766,780,782,785],actionscript:811,actiontyp:769,activ:[585,590,593,605,607,609,639,641,653,663,667,668,669,676,678,702,708,709,712,719,723,726,735,743,745,757,760,762,766,773,780,782,789,790,791,813],actor:663,actual:[14,79,84,172,175,267,270,333,350,430,435,543,548,585,586,590,593,594,596,597,600,601,606,607,610,611,616,621,631,635,639,659,660,663,665,666,667,669,672,673,674,676,677,681,696,698,699,703,710,711,712,714,715,721,722,723,725,726,727,731,741,743,744,745,753,756,759,760,763,764,765,773,775,780,781,782,786,788,789,802,803,804,805,806,807,808,810,811],actual_access:590,actualaccqu:590,acycl:[607,658,711,738,759,769,780],ada:669,adapt:[660,667,672,695,700,719,741,743,765],adaptor:725,adc32mi8:771,adc32mi:771,adc32mr:771,adc32ri8:771,adc32ri:771,adc32rm:771,adc32rr:771,adc64mi32:771,adc64mi8:771,adc64mr:771,adc64ri32:771,adc64ri8:771,adc64rm:771,adc64rr:771,adc:[590,711,743],add16mi8:771,add16mi:771,add16mr:771,add16ri8:771,add16ri:771,add16rm:771,add16rr:771,add32mi8:771,add32mi:771,add32mr:771,add32ri8:771,add32ri8_db:631,add32ri:771,add32ri_db:631,add32rm:[759,771],add32rr:[631,759,770,771],add32rr_db:631,add32rr_rev:631,add64i32:631,add64mi32:771,add64mi8:771,add64mr:771,add64ri32:[631,771],add64ri8:631,add64rr:631,add8rr:607,add:[93,96,97,100,101,104,105,106,107,112,113,114,115,186,187,189,190,191,194,195,196,201,202,203,204,281,282,283,286,287,294,295,296,297,298,299,300,301,306,307,308,309,354,447,448,455,456,457,469,470,471,472,557,560,561,564,565,568,569,570,571,576,577,578,579,585,587,590,593,594,597,600,601,605,606,607,609,610,611,612,616,621,625,633,635,639,640,641,642,644,654,657,659,660,662,663,665,667,668,669,670,671,672,673,674,676,678,679,681,683,684,685,689,694,695,699,700,701,703,704,705,706,707,709,712,714,716,719,720,722,723,724,725,727,728,740,741,742,743,744,745,746,748,749,750,753,759,760,762,764,765,766,769,770,771,773,774,775,776,778,780,781,782,783,784,786,788,789,790,791,803,804,805,806,807,808,809,810,811,812],add_:606,add_custom_command:606,add_custom_target:606,add_definit:605,add_dep:606,add_depend:606,add_execut:[605,606],add_librari:605,add_llvm_execut:606,add_llvm_fuzz:675,add_llvm_librari:[605,782],add_llvm_pass_plugin:782,add_llvm_tool:675,add_llvm_unittest:605,add_pf:707,add_pt:707,add_ri:770,add_rpath:633,add_rr:770,add_subdirectori:[605,782],addabsolutesymbol:709,addanonymoussymbol:709,addast:791,addcodegenprepar:725,addcom:676,addcommonsymbol:709,adddefinedsymbol:709,added:[55,56,145,241,242,404,405,517,518,585,590,591,597,601,602,603,604,605,607,610,611,621,625,629,635,641,659,665,667,670,671,676,682,688,696,700,707,709,710,712,713,714,716,717,718,722,725,726,727,742,743,745,746,747,748,752,755,759,761,762,764,765,766,768,770,774,775,776,779,780,782,786,788,789,790,791,803,805,806,807,808,809,810],addedcomplex:[770,771],addend:[590,709,710],addenda:661,addendum:[680,778],addexternalsymbol:709,addfunctionast:791,addgener:[726,788,789,790,791],addi:[710,770],addimm:607,addincom:[806,807,808,809,810],adding:[585,590,591,598,601,602,605,607,609,610,611,616,617,621,630,663,664,667,670,671,674,675,678,696,698,700,703,707,709,710,717,725,726,728,743,745,757,759,761,766,770,775,776,778,780,782,783,788,790,804,805,806,807,808,809,810,811,812],addinstselector:780,addintervalsforspil:607,addirmodul:726,addit:[585,588,589,592,593,594,597,601,603,605,606,607,608,610,611,612,617,621,623,625,631,632,637,639,640,644,650,657,658,659,660,661,663,664,667,669,671,674,676,677,678,679,681,683,684,685,687,688,689,690,691,698,699,700,701,702,704,705,708,710,711,712,714,716,718,719,720,721,722,723,724,728,731,739,741,743,744,745,747,748,750,756,757,759,760,761,762,763,764,765,766,768,769,771,774,775,776,777,779,780,782,783,803,804,805,807,808,809,811],addition:[593,594,603,607,641,653,659,667,676,679,683,688,690,694,710,721,729,739,741,743,757,760,763,764],addl:760,addlazyirmodul:726,addlib:621,addllvm:[605,606],addmbb:607,addmod:621,addmodul:[726,788,789,790,791,805,806,807,808],addmoduleflag:810,addobject:709,addop:759,addpass:725,addpassestoemitfil:[782,809],addpassestoemitmc:715,addpdrm:770,addpdrr:770,addplugin:709,addpreemitpass:780,addpreserv:[593,687],addpsrm:770,addpsrr:770,addq:[631,762],addr1:759,addr2:[654,759],addr2lin:[615,654,695],addr3:654,addr64:[2,176],addr:[611,630,641,642,648,649,650,654,663,705,716,760,780],addrawvalu:706,addreg:607,addregbankcoverag:691,addregfrm:780,addregisterclass:[607,780],addrequir:687,address:[24,31,33,34,55,56,67,81,82,84,127,132,133,145,158,174,175,176,217,224,225,241,242,255,269,270,341,349,351,375,381,383,384,404,405,419,432,433,435,487,494,496,497,517,518,531,545,546,548,587,588,589,593,594,595,596,597,605,609,610,615,620,630,639,640,641,642,643,644,648,649,650,659,660,662,663,665,667,669,674,675,676,683,684,696,698,703,704,705,708,709,711,712,716,720,722,723,725,726,727,729,730,731,740,741,742,743,745,751,757,758,759,762,764,766,770,771,779,780,782,784,785,788,790,805,806,807,808,811],address_rang:585,address_s:[585,590,724],address_spac:590,addressis32bit:731,addressmodesemitt:769,addressof:723,addresss:770,addresssanit:[695,710,712,720,763],addressspac:683,addri:780,addrr:780,addrri:780,addrrr:780,addrsig:[592,648,649,671],addrsig_sym:671,addrspac:[669,683,710,724,764],addrspacecast:[590,684],addrspacequ:590,addsdrm:770,addsdrr:770,addssrm:770,addssrr:770,addtmp1:805,addtmp4:804,addtmp:[804,805,806,807,808,809,810],addtolinkord:726,addtypenam:743,addx:770,addxri:716,adequ:[605,676,681,760,780],adher:[608,610,667,726,743,748,765,771],aditya:692,adjac:[590,710,743,770],adjust:[585,590,605,616,641,642,645,654,669,674,679,681,708,722,739,760,764,775,780],adjustpassmanag:[724,725],admin:[600,667,681,696,757],administr:[681,696,709],adopt:[607,610,667,678,709,726,757,760,765],adorn:[710,761],adrian:662,adrp:671,adsiz:770,adsizebit:770,adsizex:770,adt:[610,679,788,789,790,791,804,805,806,807,808,809,810],adttest:605,advanc:[585,603,605,606,659,667,676,703,709,710,717,719,726,759,761,778,780,782,803,810],advancedbuild:699,advantag:[594,596,607,610,659,663,667,676,677,679,683,706,710,712,713,714,743,745,746,754,760,807,808,811],advent:710,adventur:743,advertis:669,advic:[612,667,674,676,678,697,742],advis:[603,606,621,688,706,766,806],advisori:[608,757],advoc:[608,610],aed0d21a62db:682,aed0d21a6:682,aentri:[768,769],affect:[112,288,306,308,576,588,590,594,596,604,605,608,609,611,639,641,650,659,660,667,693,701,702,704,710,711,722,723,724,725,728,743,745,748,753,757,759,765,766,773,775,782,805,806],affili:[606,667,757],affin:616,affix:664,afn:710,aforement:[667,735,738,776],aform_1:607,aform_2:607,afre:708,aft:776,after:[68,159,256,420,532,585,588,589,590,592,593,594,595,596,597,598,601,604,605,606,607,608,611,612,614,616,617,619,621,625,629,630,639,644,654,657,658,659,663,664,667,668,669,671,672,675,676,677,679,681,682,684,685,686,688,689,693,696,697,698,701,702,703,705,708,709,710,712,714,715,716,717,718,719,721,722,724,725,726,727,731,735,741,743,744,748,752,754,756,757,759,761,762,763,764,765,766,768,769,770,773,774,775,776,777,780,781,782,785,786,788,789,803,804,805,806,807,808,809,810,811],afterbb:[806,807,808,809,810],afterloop:[806,807,808,809,810],afterward:[667,674,710,714,741,760,764],again:[592,596,606,609,611,616,631,659,663,665,669,674,681,682,688,710,717,721,722,729,743,748,756,758,761,766,774,776,782,804,805,806,808,811],against:[592,599,600,603,605,608,611,624,639,644,667,669,671,672,674,675,693,695,698,703,708,709,710,712,723,724,726,740,741,743,745,746,748,756,757,760,774,775,777,783,786,804],age:[608,770],agenc:755,agent:[590,710],agg1:710,agg2:710,agg3:710,agg:710,aggreg:[610,630,644,669,708,711,743,780,784],aggress:[593,607,610,660,673,676,679,710,711,743,759,803,806],agnost:[596,607,641,653,710,726,759,765],ago:667,agpr0:590,agpr255:590,agpr:[590,710],agre:[608,609,667,685,723,733,741,748,757,759,786],agreement:[585,748],agrep:774,ahead:[610,667,669,679,681,703,704,709,726,745,764,782,788,803,811,812],ahm:[692,757],aid:[676,709,710,760,769],aim:[596,600,601,606,610,612,664,667,675,709,710,716,726,727,743,750,751,755,756,757,759,765,788],ain:590,air:710,aix:[607,661],aka:[592,593,607,667,710,711,712,725,738,739,743,802,803,804,805,806,807,808,809,810],al_aliasset:780,al_superregsset:780,alac:773,alacconvert:773,albeit:[684,808],albini:757,alex:[664,748],algebra:741,algn:784,algo:782,algorithm:[593,610,628,631,639,659,666,669,676,689,710,714,721,722,731,741,743,750,760,768,770,780,803,804,805,806,807,808,809,811],alia:[585,594,597,611,616,620,632,639,640,647,649,678,708,711,716,721,722,725,726,727,776,778,780,782],alias:[585,588,593,597,631,639,642,649,658,677,702,721,741,762,780,811],aliasanalysi:[710,782],aliasanalysisdebugg:593,aliase:[597,710,722],aliaseeti:710,aliasesset:593,aliasopt:659,aliasresult:593,aliasset:[593,780],alic:768,align32bit:597,align:[585,589,590,594,607,611,637,641,644,661,663,664,667,669,676,677,684,695,705,706,709,710,713,716,720,722,724,741,743,750,756,759,762,764,780,785,786],align_nod:710,aligna:759,alignlog2:710,alignmentoffset:709,alignstack:[597,710],alignstyl:743,aliv:[607,663,710,722,741,743,760,782],all:[20,123,176,212,213,370,371,482,483,585,588,589,590,591,592,593,594,595,596,597,599,600,601,602,603,604,605,606,607,608,610,611,612,613,614,615,616,617,618,620,621,623,624,625,626,627,628,630,631,632,633,635,638,639,640,641,642,643,644,645,648,649,650,652,653,654,655,656,657,658,659,660,661,662,663,666,667,668,669,670,671,674,675,676,677,678,679,681,682,683,684,688,689,691,693,694,695,696,697,699,700,701,702,703,704,705,707,708,709,710,711,712,713,714,715,716,719,721,722,723,724,725,726,727,728,729,730,731,734,735,736,738,739,742,743,744,746,747,748,749,750,752,753,754,755,756,757,759,760,762,763,764,765,766,768,769,770,771,773,774,775,776,777,779,780,781,782,783,784,785,786,787,788,789,790,791,802,803,804,805,806,807,808,810,811,812],all_build:681,all_ones_mask:760,all_zeros_mask:760,allanalyseson:725,allevi:[607,659,710,743],alli:710,alloc:[20,123,212,370,482,585,590,593,594,605,610,617,619,639,640,641,653,669,671,674,676,677,679,683,686,708,709,710,712,715,716,720,726,728,741,750,751,760,762,763,764,780,782,784,786,788,790,805,806,807,808,811],alloca:[590,607,610,663,669,676,677,705,708,721,741,743,759,762,763,764,808,809,810],allocainst:[610,710,743,808,809,810],allocat:[607,641,710,780],allocatestr:709,allocationinst:743,allocs:[597,710],allon:710,allow:[585,588,589,590,593,594,596,597,599,603,605,606,607,608,609,610,611,612,616,617,621,625,631,639,641,649,653,660,663,664,665,666,667,668,669,670,671,674,676,677,678,679,681,684,685,688,695,696,698,699,701,703,705,706,708,709,710,712,713,714,715,716,718,720,721,722,723,724,725,726,727,728,729,731,738,740,741,742,743,745,748,751,752,754,756,757,758,759,760,761,762,764,765,768,770,771,773,775,776,777,779,780,782,783,784,786,788,789,790,802,803,804,805,806,807,808,809,810,811,812,814],allow_retri:616,allowlist:726,allowsanycontiguousstorag:743,allowsanysmalls:743,allroot:774,alltargetsasmpars:603,alltargetsdesc:603,alltargetsdisassembl:603,alltargetsinfo:603,almost:[585,594,596,607,609,610,639,667,670,674,675,676,682,684,711,743,745,764,765],alon:[597,602,607,659,667,698,719,721,760,775,803],along:[594,597,605,607,610,640,641,645,648,649,660,663,664,666,667,671,674,676,679,699,703,707,709,710,715,716,717,721,723,724,736,741,743,750,758,759,760,761,769,770,780,781,782,789,804,805,811,812],alongsid:[597,642,662,679,725,745,754,781,782],alpha:[710,780],alphabet:[590,768,770],alphacompilationcallback:780,alphajitinfo:780,alphanumer:[626,641],alreadi:[585,590,593,599,600,602,603,604,605,606,607,610,633,639,640,659,660,662,663,667,670,676,679,683,688,691,694,695,703,704,705,708,709,710,717,722,723,725,726,727,728,743,744,745,747,748,755,756,757,758,759,760,761,762,764,771,775,776,780,781,782,786,788,790,804,805,806,807,808,809,810],also:[585,586,587,589,590,591,593,594,596,597,599,600,601,602,603,604,605,606,607,609,611,621,625,631,639,644,645,659,660,662,663,664,666,667,668,669,670,671,672,674,675,676,677,678,679,681,683,684,685,686,688,689,691,694,695,697,698,699,700,701,702,705,706,707,708,709,710,711,712,713,714,716,717,719,721,722,723,724,725,726,728,729,731,735,740,741,742,743,744,745,746,748,750,752,753,755,756,757,759,760,761,762,764,765,768,769,770,771,773,774,775,776,777,779,780,781,782,783,784,785,786,788,789,790,802,803,804,805,806,807,808,809,810,811,812],alt_always_instru:783,alt_never_instru:783,alter:[659,710,743,759,776],altern:[585,588,589,590,601,603,605,607,609,614,616,625,648,660,667,669,674,677,679,682,683,688,689,691,694,704,709,710,712,714,726,728,743,750,766,773,780,782,783,786,803,808,814],although:[585,594,597,606,607,609,610,616,659,660,667,670,676,679,681,689,698,703,710,712,714,716,717,731,736,738,743,761,764,769,770,775,782,807],altivec:[607,710,711],altogeth:[699,750],alu32_rr:707,alu:[590,639],alwai:[585,590,593,594,596,597,599,600,602,605,606,607,608,609,610,611,616,621,623,632,639,659,660,663,664,667,669,671,672,674,679,682,684,688,699,703,710,711,713,714,716,721,723,731,733,738,739,742,743,744,745,754,755,757,759,760,763,764,765,766,768,770,774,775,781,783,784,785,786,803,804,805,806,807,808,809,810,811],always_inlin:660,always_instru:783,alwaysinlin:[597,710,781],alwaysinlinerpass:781,amara:748,amaz:807,amazingli:806,ambigu:[588,659,703,770,775,803,807],amd64:[679,681],amd:[585,588,590,607,639,661,701,760,768],amd_code_version_major:590,amd_kernel_code_version_minor:590,amd_machine_kind:590,amd_machine_version_major:590,amd_machine_version_step:590,amd_queue_t:590,amdfam10:809,amdgpu:[0,1,2,3,4,5,6,7,8,9,585,639,679,710,716,748,768,769,778],amdgpu_flat_work_group_s:590,amdgpu_hsa_note_code_object_version_:590,amdgpu_hsa_note_hsail_:590,amdgpu_hsa_note_isa_:590,amdgpu_num_sgpr:590,amdgpu_num_vgpr:590,amdgpu_waves_per_eu:590,amdgpubufferintrins:769,amdgpubufferload:769,amdgpuimagedimatomicintrins:769,amdgpursrcintrins:769,amdgpusample_lz_o:769,amdgpusamplevari:769,amdgputargetmachin:725,amdhsa_accum_offset:590,amdhsa_dx10_clamp:590,amdhsa_exception_fp_denorm_src:590,amdhsa_exception_fp_ieee_div_zero:590,amdhsa_exception_fp_ieee_inexact:590,amdhsa_exception_fp_ieee_invalid_op:590,amdhsa_exception_fp_ieee_overflow:590,amdhsa_exception_fp_ieee_underflow:590,amdhsa_exception_int_div_zero:590,amdhsa_float_denorm_mode_16_64:590,amdhsa_float_denorm_mode_32:590,amdhsa_float_round_mode_16_64:590,amdhsa_float_round_mode_32:590,amdhsa_forward_progress:590,amdhsa_fp16_overflow:590,amdhsa_group_segment_fixed_s:590,amdhsa_ieee_mod:590,amdhsa_kernarg_s:590,amdhsa_memory_ord:590,amdhsa_next_free_sgpr:590,amdhsa_next_free_spgr:590,amdhsa_next_free_vgpr:590,amdhsa_private_segment_fixed_s:590,amdhsa_reserve_:590,amdhsa_reserve_flat_scratch:590,amdhsa_reserve_vcc:590,amdhsa_reserve_xnack_mask:590,amdhsa_system_sgpr_private_segment_wavefront_offset:590,amdhsa_system_sgpr_workgroup_id_i:590,amdhsa_system_sgpr_workgroup_id_x:590,amdhsa_system_sgpr_workgroup_id_z:590,amdhsa_system_sgpr_workgroup_info:590,amdhsa_system_vgpr_workitem_id:590,amdhsa_tg_split:590,amdhsa_user_sgpr_dispatch_id:590,amdhsa_user_sgpr_dispatch_ptr:590,amdhsa_user_sgpr_flat_scratch_init:590,amdhsa_user_sgpr_kernarg_segment_ptr:590,amdhsa_user_sgpr_private_segment_buff:590,amdhsa_user_sgpr_private_segment_s:590,amdhsa_user_sgpr_queue_ptr:590,amdhsa_wavefront_size32:590,amdhsa_workgroup_processor_mod:590,amdkernelcodet:590,amen:[607,746],amend:[662,667,679,742],amini:748,aminiinconsist:748,among:[585,607,610,611,660,671,674,679,685,710,712,713,722,724,741,743,745,768],amongst:[585,594],amort:693,amount:[590,601,603,607,608,610,617,619,639,644,657,659,667,669,670,672,676,679,681,684,704,710,712,722,741,743,745,746,756,757,758,759,762,764,768,771,782,783,790,805,812],amper:697,ampersand:[120,209,314,479,584],ampl:766,amx:710,anachronist:597,analog:[585,669,676,689,710,714,727,743,760,764,782],analys:[593,598,604,607,657,670,679,710,714,741,743,748,750,759,778,781,782,783,784],analysi:[594,598,599,604,610,623,657,664,670,672,674,676,678,679,683,693,707,710,711,713,714,716,721,727,728,750,751,754,759,760,768,774,778,781,784,787,805,808,810],analysisalias:754,analysisfpcommut:754,analysisresultmodel:725,analysisusag:[593,687],analyt:747,analyz:[605,607,610,615,631,657,658,663,666,667,674,679,681,691,711,714,722,741,743,759,760,768,774,782,783,784,806],analyzebranch:780,ancestor:[590,703,770],anchor:759,ancient:592,andrew:[676,745],andric:757,android:[679,756,775],ands:711,andw:611,anew:759,angl:[660,770,786],ani:[120,209,314,479,584,585,588,589,590,592,593,594,595,596,597,598,600,601,602,603,604,605,606,607,608,609,610,611,612,614,616,617,621,623,624,625,628,629,630,631,635,639,640,641,642,644,645,648,649,652,653,654,657,658,659,662,663,664,665,666,667,668,669,670,671,672,674,675,676,677,678,679,681,683,684,686,688,695,697,698,701,702,703,705,706,707,709,710,711,713,714,715,716,717,718,719,720,721,722,723,724,725,726,728,729,730,731,735,738,739,741,743,744,745,747,748,750,752,754,755,756,757,759,760,761,762,764,765,766,768,769,770,771,774,775,776,777,779,780,781,782,783,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811],annot:[590,599,611,616,639,645,660,669,674,709,711,716,724,745,748,760],announc:[678,719,745,748,753,766,783],anon:710,anonym:[639,709,723,755,759,768,769,770,780,782,784,803,804,805,806,807,808,809,810],anonymous_0:769,anonymous_1:769,anoth:[585,588,589,590,593,594,595,596,597,603,605,606,607,608,610,611,616,621,625,639,641,644,653,654,657,659,663,664,666,667,669,670,671,672,676,679,681,684,686,687,688,689,691,697,698,699,701,707,708,709,710,711,712,713,714,717,721,722,723,724,725,726,727,730,738,739,741,745,748,759,760,761,762,763,764,765,768,769,770,771,775,776,777,780,782,783,784,785,786,789,803,804,805,806,807,808,809,810,811],another_op:716,anotherbranch:745,anothercategori:659,answer:[593,600,610,659,667,672,674,677,686,703,710,717,721,722,741,746,751,805,806,808],anti:[639,666],anticip:[585,745,748,785],antisymmetr:722,antisymmetri:722,anxiou:605,any_other_cond:710,anyextload:684,anyhow:710,anymor:[600,776,782],anyon:[594,606,609,667,668,721,723,755,757,759,780],anyregcc:[597,710,762],anyth:[585,590,594,603,607,610,617,619,639,641,660,663,667,672,679,681,695,698,703,710,714,725,726,727,729,735,741,743,745,759,760,762,764,770,771,776,805,806],anywai:[610,666,701,710,721,741,748,759],anywher:[608,611,669,674,695,710,713,717,722,761,770,775,803,808],ap2:710,apach:[610,667,672,702,765,788,789,790,791],apart:[593,596,605,683,710,735,743],apertur:[20,370,482,590],apfloat:[679,804,805,806,807,808,809,810],api:[593,605,610,644,659,661,662,676,679,680,683,687,694,702,705,709,712,724,736,739,740,746,748,749,756,762,766,768,778,783,787,789,790,791,804,805,810],api_create_info:590,api_shader_hash:590,apilipenko:757,apint:[716,743,784],app:[740,743,747],appar:[590,808],appeal:760,appear:[585,589,590,593,594,601,610,611,616,621,628,639,641,659,664,667,671,677,705,710,716,723,726,729,730,731,736,738,739,741,743,745,750,759,760,764,768,770,775,780],appel89:676,appel:676,append:[585,597,605,611,612,614,621,622,625,635,688,705,710,743,744,769,773,775,783],appendinglinkag:743,appendix:585,appenduniqu:710,appertain:710,appl:[606,607,611,614,630,633,665,667,669,678,679,710,757,759,768],apple_nam:[614,759],apple_namespac:759,apple_objc:759,apple_typ:[614,759],appli:[68,159,256,420,532,585,587,588,589,590,593,594,596,600,601,605,606,607,608,609,610,616,621,625,630,641,650,653,657,659,663,666,667,671,674,677,679,683,691,695,696,699,700,702,705,709,710,712,713,714,715,725,726,728,731,738,741,742,743,745,748,750,752,753,757,759,760,765,766,770,773,776,780,782,785,786,789,805,807,808,811],applic:[585,590,596,597,605,607,609,610,621,624,625,640,649,659,660,661,667,669,673,674,676,683,709,710,712,715,722,741,743,751,756,760,762,764,766,770,771,773,775,778,780,782,784,785,787,788,802,805,807,811],applu331:747,applymergedloc:705,appreci:[585,662,667,699],approach:[585,590,596,601,602,605,607,639,659,660,663,667,669,677,679,689,693,703,707,709,710,720,722,741,743,748,750,757,758,759,764,766,780],appropri:[585,590,593,594,596,607,609,610,611,616,625,639,645,659,663,667,669,670,674,675,676,679,684,685,686,696,701,702,703,706,707,710,713,724,726,733,738,741,742,743,745,746,748,753,755,759,764,765,766,769,770,771,780,782,784,786,788,790,803,807,808],approv:[609,667,702,742,748,757,766],approx:[660,724],approxim:[599,616,660,681,695,710,711,712,757,776],apr:765,apt:[723,761],aptr:710,apu:590,aq2:710,aqlwrap:590,arang:630,arbitrari:[585,590,593,597,607,610,616,631,659,663,669,675,676,677,684,693,705,710,716,721,726,739,743,757,759,760,762,764,768,769,770,780,782,785,803,804,805,808,811,812],arbitrarili:[585,703,710,733,759,783,806],arc:[625,679,723,742,748,769],arcanist:[678,679,723,742],arch1:640,arch2:640,arch:[614,617,619,625,630,637,639,640,641,642,643,648,649,650,654,660,679,697,699,726,753,783,809],arch_nam:654,arch_onli:635,arch_vgpr:590,architect:590,architectur:[585,594,596,604,605,607,611,614,616,617,619,625,630,631,635,637,639,640,641,642,648,649,650,654,660,662,669,674,675,679,680,681,695,697,699,701,702,709,710,712,716,723,724,726,727,730,750,753,756,760,766,768,770,771,775,777,778,780,783,787,809,813],architecture_name_s:590,archiv:[603,615,625,630,634,640,641,642,650,653,661,667,678,694,713,731,743,744,745,755,756,783],archiveiter:743,arcp:710,arcpatch:742,arctan:786,area:[585,590,596,600,602,607,609,662,664,667,669,676,702,709,711,712,719,723,742,743,753,757,760,765,766,775,808],aren:[593,607,608,610,660,667,676,677,679,681,688,699,710,716,741,745,760,766,805,806,811],areprivatesymbolsstrip:731,arg1:[663,710,712,783,802],arg2:[663,710,712,802],arg3:663,arg:[590,610,612,616,619,625,645,659,663,675,699,704,705,709,716,722,754,759,768,769,770,785,802,803,804,805,806,807,808,809,810],arg_begin:743,arg_empti:743,arg_end:743,arg_index:710,arg_iter:743,arg_siz:[676,743,804,805,806,807,808,809,810],argc:[659,664,665,709,710,712,724,743,759,784],argidx:810,arglist:710,argmemonli:[597,674,710],argn:606,argnam:[659,803,804,805,806,807,808,809,810],argstart:659,argsv:[804,805,806,807,808,809,810],argti:710,arguabl:683,argument1:770,argument2:770,argument:[27,129,220,378,490,585,593,596,597,599,602,604,605,607,610,611,612,613,615,616,618,619,621,623,624,625,630,639,640,645,654,655,656,658,660,664,668,669,670,672,674,675,676,677,679,681,685,690,694,699,703,704,707,708,709,712,715,716,722,727,728,745,751,754,756,758,759,760,762,763,764,768,769,770,771,774,775,780,782,783,785,788,789,790,803,804,805,806,807,808,809,810,811,812],argumentlisttyp:743,argv0:619,argv:[601,606,619,659,664,665,709,712,724,743,759,784],argvalu:[659,665],aris:[667,677,710,743,770],arithbinop_rf:770,arithmet:[664,674,684,741,743,760,770,779,804,808],ariti:676,arm32:661,arm64:[661,679,709,773],arm7:783,arm:[594,607,641,642,648,649,653,658,669,674,678,679,688,699,710,711,716,718,748,749,757,768,770,775,778,780],arm_aapcs_vfpcc:597,arm_aapcscc:597,arm_apcscc:597,arm_bf16:658,arm_cd:658,arm_fp16:658,arm_mv:658,arm_neon:[596,658,768],arm_sv:658,arm_sve_builtin:658,arm_sve_builtin_cg_map:658,arm_sve_sema_rangecheck:658,arm_sve_typeflag:658,armap:640,armgenasmmatch:768,armgenregisterinfo:768,armhf:[700,701],arminstrinfo:780,armv5:594,armv6:[661,697],armv7:[596,661,679,697,701,710],armv7a:700,armv8:[661,710,720,751,771],arnaud:719,around:[585,593,594,604,606,609,611,638,639,659,667,669,676,677,679,689,698,706,708,710,712,719,721,722,725,726,727,728,741,743,745,748,759,760,761,764,765,770,780,781,782,783,803,806,810,811],arr1:760,arr2:760,arr:[677,710,760],arrai:[585,588,590,593,596,597,607,617,644,654,663,669,674,676,712,722,724,730,731,734,735,738,741,759,760,768,769,775,776,779,780,803,808,811],arrang:[607,609,659,676,703,710,741,760,766,775,782,789,790],arrayidx1:710,arrayidx2:710,arrayidx3:710,arrayidx4:710,arrayidx:710,arrayref:[665,769],arraytyp:743,arriv:677,arsenault:748,arsenaultagre:748,art:676,artem:660,articl:[722,741,745,787,806,808],articul:609,artifact:[677,702,712,722,750],artifact_prefix:712,artifici:590,artur:757,asan:[712,751,756],asanstackvariabledescript:763,asc:747,ascend:[585,664,710],ascii:[597,642,652,710,712,759,770,802,803,804,805,806,807,808,809,810],asciz:671,asf:667,ashr:684,asi:780,asid:[689,710,721,743],ask:[585,593,600,608,609,660,662,667,670,674,676,679,681,702,703,709,719,722,723,725,743,745,746,753,755,757,759,764,766,782,790,803,806,808,810,811],aslr:756,asm:[597,603,617,619,639,642,646,658,724,754],asmformat:658,asmmatchconvert:770,asmmatcheremitt:768,asmnam:780,asmpars:[670,679,768],asmparsernum:658,asmprint:[607,676,710,768,780],asmstr:[770,771,780],asmvariantnam:770,asmwrit:[670,676],asmwriternum:658,aspect:[585,605,607,609,663,666,676,689,691,695,710,735,736,743,745,750,756,759,803,804,806,810,811],aspir:807,aspx:[610,747],assembl:[175,270,435,548,585,586,587,588,589,597,614,615,617,619,623,628,629,639,642,646,657,658,660,661,667,669,670,672,677,679,681,700,716,718,724,726,745,746,751,758,764,768,771,774,775,778,805,809,813],assert:[605,607,659,662,679,697,702,704,709,710,711,712,723,724,740,741,743,753,759,775,780,782,786,789,804,805,807,808,809,810],assign:[585,589,590,594,597,598,599,600,607,610,611,631,645,659,662,666,667,669,671,672,677,683,689,691,695,705,707,709,710,711,720,722,723,724,728,743,745,756,759,768,770,776,780,786,804,809,810,812],assigne:600,assignvirt2phi:607,assignvirt2stackslot:607,assist:[610,667,674,695,704,705,728,807],associ:[585,590,596,597,598,599,606,607,609,610,617,659,663,664,665,666,669,671,676,704,706,709,711,715,716,721,726,728,741,743,748,754,756,759,762,764,765,766,769,770,777,778,780,783,785,803,805],assort:810,assum:[585,587,589,590,592,593,597,598,599,601,605,607,609,610,616,617,619,626,631,635,639,641,644,645,654,660,663,667,669,670,672,674,676,677,679,681,682,683,698,709,714,722,723,724,726,731,733,734,735,736,738,739,741,743,745,746,748,753,757,758,759,760,762,764,768,769,770,774,776,777,780,781,782,784,786,788,803,804,807,808,809,810,811,812],assumpt:[607,610,611,617,639,664,667,669,710,714,756,759,760,766,788,810,811],assur:[667,786],ast:[642,658,660,679,711,768,770,788,790,802,804,805,807,808,810,811,812,813],astcontext:610,astdump:768,astlay:791,astnod:768,astread:768,astwrit:768,asymmetr:677,asymmetri:[673,722],asymptomat:612,async:726,async_context:663,async_coroutin:663,async_function_point:663,async_op1:663,async_op2:663,async_op:663,asynchr:710,asynchron:[590,594,663,709,710,726],at_apple_properti:759,at_apple_property_attribut:759,at_apple_property_sett:759,at_apple_runtime_class:759,at_artifici:759,at_byte_s:759,at_decl_fil:759,at_decl_lin:759,at_encod:759,at_nam:759,at_typ:759,atabl:[768,769],atan2:802,atc:590,athlon:809,atkinson:747,atoi:710,atom:[36,37,38,93,96,97,100,101,110,111,186,187,189,190,191,199,200,281,282,283,286,287,304,305,354,355,386,387,388,444,447,448,451,452,467,468,499,500,501,557,560,561,564,565,574,575,588,590,611,666,667,674,684,688,721,735,743,751,756,759,764,766],atom_count0:759,atom_count:759,atomic_:594,atomic_cmpxchg:594,atomic_f:594,atomic_load_:594,atomic_swap:594,atomicexpand:594,atomicexpandpass:594,atomicrmw:[590,594,684],atomtyp:759,atop:676,att:[617,619,642,646,710],attach:[663,665,667,679,696,704,705,716,723,724,726,742,743,759,776,777,785,788,804,805,806,807,808],attachedcal:710,attack:[608,710],attacker_data:760,attacker_offset:760,attempt:[585,589,593,597,601,607,612,620,626,639,641,642,643,657,660,663,667,669,674,679,688,695,700,705,709,710,712,715,726,741,743,754,755,756,760,762,764,765,766,768,780,782,784,785,790],attend:719,attende:719,attent:[608,610,667,685,702,710,755,775,780,788],attornei:667,attr0:[17,121,210,480,597],attr1:597,attr32:[17,121,210,480],attr:[0,2,3,4,590,597,610,658,710,759,768,783,784],attract:[585,667,676,719],attrdump:768,attrgrp0:597,attrgrp1:597,attrgrp:597,attribut:[17,121,210,480,597,607,617,619,628,630,641,642,648,649,653,658,663,669,671,676,683,686,689,704,707,713,722,724,727,728,730,742,743,749,751,754,758,764,768,769,770,771,776,780,781,782],attributelist:[597,768],attributerefer:768,attrimpl:768,attrinfomap:768,attrkind:[706,768],attrlist:768,attrparsedattrimpl:768,attrparsedattrkind:768,attrparsedattrlist:768,attrparserstringswitch:768,attrpchread:768,attrpchwrit:768,attrrec:769,attrrecord:769,attrspel:768,attrspellinglistindex:768,attrtemplateinstanti:768,attrvisitor:768,atyp:677,audienc:[674,787],audit:760,augment:[585,590,599,685,691,743,807],augmentation_str:[585,590],augmentation_string_s:590,august:715,aurora:749,authent:[678,696],author:[585,602,607,609,610,611,663,667,672,680,696,703,709,710,711,716,722,723,726,742,748,759,760],authorit:644,auto:[588,607,625,626,645,698,709,710,714,725,726,727,743,750,759,768,769,776,778,783,788,789,790,791,803,804,805,806,807,808,809,810],autoconf:[606,667,697],autocrlf:679,autodetect:[605,611,617,619,625,639],autogener:667,autoimport:698,autoinsert:743,autom:[592,607,658,667,679,704,709,723,742,744,748,768,771],automat:[585,587,590,593,600,603,605,607,610,615,616,617,631,641,659,662,667,672,676,678,679,682,694,696,701,702,703,708,709,714,716,717,722,726,742,743,744,745,750,751,756,758,759,761,768,770,773,776,780,782,783,786,804,806,808,809,810],automata:658,automaton:[607,768],autotool:679,autovector:779,aux:743,auxiliari:[590,768,780],auxiliarydata:814,avail:[31,341,381,494,585,589,590,594,603,605,606,607,610,611,615,616,617,619,624,625,631,638,639,642,650,657,658,659,660,663,665,667,669,672,674,678,679,681,683,687,688,689,691,694,695,696,698,699,700,702,703,705,710,712,725,726,740,741,742,743,745,747,749,751,752,753,754,755,756,757,759,760,762,764,765,766,767,769,770,771,772,773,774,775,777,779,780,782,783,784,788,802,804,805,806,807,809],available_extern:[597,710],available_featur:[616,775],avalanch:743,avenu:755,averag:[599,623,639,695,712,806,809],avl:743,avoid:[585,588,590,593,595,596,597,601,605,606,607,611,631,639,649,667,671,675,676,677,685,686,688,693,699,703,705,708,710,712,713,714,722,741,743,745,748,750,753,756,757,759,760,762,763,764,765,766,775,776,779,780,782,808,810],avr:[679,748],avx1:775,avx2:[710,775],avx512:[683,710,749,752],avx:[710,775,779],awai:[598,603,610,611,660,663,667,676,684,703,710,713,719,723,727,741,743,745,759,760,766,782],awaken:669,awar:[593,594,605,606,607,610,611,659,660,667,677,679,710,712,723,726,743,745,749,757,759,764],awkward:[659,677],axi:[760,779],axpi:660,ayal:750,azul:757,b0000:590,b128:587,b13e8756b13a00cf168300179061fb4b91fefb:712,b16:[0,4,9,587,589],b16x2:587,b16x3:587,b16x4:587,b256:587,b32:[0,2,3,4,9,587,589,724],b32x2:[0,2,3,4,9],b512:587,b64:[0,2,3,4,9,587,589],b64x2:[0,2,3,4,9],b96:587,b_ctor_bas:611,bach:765,back:[585,590,594,601,605,607,610,621,625,650,654,663,664,667,669,674,676,677,680,683,688,694,695,697,701,702,709,710,712,714,723,726,727,740,741,743,745,756,757,758,759,760,762,764,766,768,770,775,778,786,789,804,805,806,807,808,809,810],backedg:[598,714,741,764,806,807],backend:[585,594,599,601,612,619,624,639,642,658,660,661,669,670,674,675,679,688,689,690,691,696,699,702,705,708,710,716,725,727,743,748,749,756,759,762,764,770,778,813],backendutil:725,background:[606,608,717,743,745,768,784],backport:[701,757],backslash:712,backtrac:[607,810],backtrace_symbol:695,backup:[694,745],backward:[585,590,597,659,664,706,735,743,760,764,777,779],bad:[601,610,611,612,682,699,705,710,722,743,753,759,760,765,786,807,810],badarchivememb:743,badfileformat:743,badli:[674,743],bag:675,bail:[710,783,789],bailouton:743,bake:[664,686,726],balanc:[592,594,667,756],ball:719,bam:610,ban:[705,755],bandwidth:710,bangoper:770,bank:[588,658,684,689,690,691],bank_mask:[0,1,3,4,7,8,9,590],banner:743,bar:[605,606,607,609,610,611,616,626,627,639,645,652,654,659,660,663,664,667,671,672,708,710,719,724,726,743,745,754,759,764,766,770,775,777,779,781,786,804,806],bar_bodi:726,bar_in_b_cc:599,bar_map:610,baranalysi:725,bare:[624,700,710,762,780,788],baremet:700,baremetalarm:700,barlist:610,barrier0:710,barrier:[594,639,700,710,760,764],bart:590,base0:773,base1:773,base2:[710,773],base:[33,34,55,56,132,133,145,224,225,241,242,383,384,404,405,496,497,517,518,585,587,589,590,592,593,597,599,601,603,604,605,606,608,609,611,616,617,620,623,625,626,627,639,641,644,645,646,658,659,662,663,664,667,668,669,671,675,676,679,682,683,684,686,688,693,695,697,700,701,702,704,707,708,709,711,712,714,715,716,720,721,722,723,724,726,727,728,730,742,744,745,746,747,749,750,751,753,755,756,757,759,760,761,765,766,768,769,771,774,775,777,778,779,780,781,782,783,786,788,789,790,791,803,804,805,806,807,808,809,810],base_offset:710,baseclass:770,basedonaddress:730,basedonseg:730,basedonsegmentaddress:730,basedonsegmentvalu:730,basedonself:730,basedontyp:730,basedonvalu:730,basefieldnam:769,baseinst:590,baseinstrinfo:768,baselay:[789,791],baselin:[607,760,773],basenam:[654,729,759],basename_t:616,baseobject:760,baseopcod:[707,780],baserec:769,basereg:607,baseregisterinfo:768,basereloc:649,baseti:710,basety1:710,basety2:710,basetyp:710,basevertex:590,bash:[616,668,681,761,765],basi:[585,590,623,654,659,667,668,676,709,710,721,728,743,745,750,756,757,760,764,807],basic:[590,594,598,599,600,606,607,610,617,621,623,625,628,631,632,641,648,649,658,659,660,663,664,666,667,669,672,675,676,677,679,681,684,686,689,693,702,704,705,706,711,714,717,721,723,724,726,730,739,742,744,750,754,756,759,760,761,762,764,765,770,773,774,779,784,789,802,804,805,806,807,808,809,810,811,812],basic_:770,basic_p:770,basic_r:770,basic_ss:770,basicaa:782,basicaliasanalysi:[741,782,810],basicblock:[610,679,684,721,741,748,750,782,804,805,806,807,808,809,810],basicblocklisttyp:743,basicblockutil:743,basicirlayermaterializationunit:789,basicread:658,basictyp:710,basicwrit:658,batch:681,battl:[644,746],bayer:747,baz:[606,610,611,645,654,659,743,759,770,777,779,806],baz_in_c_cc:599,bazanalysi:725,bazel:766,bazioti:721,bb0_1:724,bb0_26:724,bb0_28:724,bb0_29:724,bb0_2:724,bb0_30:724,bb0_4:724,bb0_5:724,bb10:760,bb1:[597,689,705,710,721,759,760],bb2:[597,689,705,710,721,759],bb3:710,bb4:689,bb_0:671,bb_1:671,bb_continu:710,bb_exit:710,bbar:768,bbrefer:716,bcanalyz:[597,615,754],bcc:[716,780],bcpl:770,bcplcomment:770,bcreader:624,bcsymbolmap:614,bdce:711,bear:611,beat:693,beauti:[802,804,807],becam:585,becaus:[585,588,590,592,593,594,596,597,599,601,603,606,607,610,611,616,621,639,640,644,659,660,663,664,666,667,669,672,673,674,675,676,677,679,682,686,688,691,695,699,701,703,704,705,706,707,708,710,712,713,714,715,716,720,721,722,728,730,733,735,741,742,743,745,747,748,754,757,759,760,762,765,769,770,771,774,775,776,777,779,780,781,782,786,789,803,804,805,806,807,808,809,810,811],becom:[585,590,596,606,607,610,611,630,639,642,654,659,660,663,667,674,677,683,685,687,689,697,703,705,709,710,714,723,725,727,741,743,745,748,757,759,760,762,764,766,770,771,775,779,780,782,783,789,803,806,808],been:[585,590,591,594,596,601,602,604,605,606,607,609,610,611,612,614,616,623,625,639,657,659,660,662,663,664,665,667,669,670,671,672,674,676,677,684,685,686,688,689,698,702,705,709,710,712,715,716,717,722,723,724,725,726,728,730,731,734,736,738,739,741,742,743,744,745,747,748,750,752,753,754,755,756,757,759,760,762,764,765,766,767,769,770,771,776,777,780,781,782,783,784,786,788,789,790,803,804,805,806,807,808,809,810,811],befor:[36,37,38,93,96,97,100,101,186,187,189,190,191,281,282,283,286,287,354,386,387,388,444,447,448,451,452,499,500,501,557,560,561,564,565,585,588,589,590,593,594,596,601,604,605,606,607,609,611,614,616,619,621,626,631,635,639,641,644,652,654,657,659,660,662,663,667,668,669,670,672,673,674,676,677,679,681,682,686,687,688,689,698,700,701,702,703,704,705,708,709,710,714,715,716,719,721,722,723,724,725,726,728,731,741,742,743,748,750,753,754,757,759,760,761,762,763,764,768,770,774,775,776,779,780,782,783,784,785,786,788,789,802,803,804,805,806,807,808,809,810,811],beforehand:714,began:[610,726,745],begin:[585,590,593,597,605,607,610,611,621,628,639,659,667,669,674,676,679,681,689,702,703,709,711,715,721,722,724,725,729,730,731,735,743,748,754,756,759,760,762,765,768,769,770,775,780,783,785,786,788,789,806,808,809,810],beginassembl:676,beginn:[600,662,679],behalf:[662,667,710,742],behav:[585,590,593,594,602,606,610,625,639,642,660,663,667,669,673,678,704,709,710,714,790],behavior:[585,590,594,600,602,606,607,608,610,611,612,616,621,628,639,659,660,663,667,669,670,672,674,676,704,705,709,710,712,714,726,728,741,743,755,756,757,760,770,777,779,780,782,803,806,808],behaviour:[596,602,659,684,689,710,716,775,779],behind:[607,610,611,666,667,705,706,707,710,724,743,764,778,782,788],being:[20,123,175,212,270,370,435,482,548,585,588,589,590,593,594,596,597,600,601,602,603,604,605,606,607,608,609,610,611,612,614,616,621,623,628,629,631,639,645,650,659,660,662,663,664,667,669,670,673,675,677,679,682,687,688,697,698,700,702,703,704,706,708,709,710,712,714,715,721,722,723,724,725,726,728,731,733,735,739,741,743,745,748,755,756,757,759,760,762,764,765,766,768,770,771,775,776,779,780,781,782,783,784,785,786,788,789,790,791,803,804,806,807,808,810,811],belevich:660,belief:608,believ:[585,608,667,704,710,726,728,743,755,760,805,806,807],belong:[590,659,663,666,669,671,684,710,722,743,756,777,785,808],below:[20,27,120,123,129,209,212,213,220,314,370,371,378,479,482,483,490,584,585,586,588,589,590,592,593,594,597,599,605,606,607,609,610,611,617,621,630,632,639,645,650,654,659,660,662,663,664,666,667,674,679,681,687,689,695,697,699,701,702,703,708,709,710,714,716,721,722,724,726,727,728,731,736,741,742,743,744,745,747,750,753,757,758,759,760,762,764,766,768,770,775,777,779,780,781,782,783,784,785,786,789,803,805,806,807,808,809,810],benchmark:[605,615,660,667,678,679,702,743,744,751,753,759,760,774,775,779],benchspec:774,benderski:660,beneath:761,benefici:[603,663,685,691,710,766,776,779],benefit:[585,590,593,603,610,674,676,677,683,688,699,709,710,712,714,741,744,748,759,766,789,790,791,805,809],benign:743,benjamin:676,bentri:768,berkelei:650,bernstein:759,besid:[659,678,710,745,755,760,775,780,807],bespok:676,best:[593,596,603,606,607,609,610,619,631,639,667,669,672,676,682,691,699,705,710,714,719,723,741,743,750,756,757,759,760,761,762,764,766,778,802,803,812],better:[585,594,600,607,609,610,611,614,631,639,667,669,678,683,684,689,693,695,697,699,704,705,710,711,712,714,719,720,721,722,723,725,726,727,741,743,745,759,760,769,774,779,782,783,784,788,803,806,808,811],bettor:726,between:[585,588,590,592,593,594,596,597,606,607,610,611,626,628,630,638,639,644,645,649,659,663,664,666,667,668,669,670,672,676,679,682,683,684,689,698,700,702,705,707,708,709,710,711,712,714,715,716,718,721,722,724,726,727,729,731,734,738,741,742,745,748,753,754,756,759,760,762,764,765,766,768,770,773,775,776,778,780,783,785,786,802,805,806],bewar:[676,719],beyl:[748,757],beylsdistinguish:748,beyond:[590,606,607,658,669,674,677,687,689,710,717,718,721,726,743,757,760,761,771],bf16:[658,710],bf16x2:[8,9],bf16x4:9,bfd:[675,694],bfdname:641,bff:743,bfi:710,bfloat:[597,710],bfoo:768,bfs:747,bia:595,bicub:747,bidirect:743,big:[595,603,607,610,619,667,674,688,697,710,743,747,753,775,778,780,785,804,807,808],bigblock:619,bigger:[585,610,663,714,786],biggest:[607,708],bigmip:641,bigmsf:735,bikesh:703,bilinear:747,bin:[592,612,625,659,665,668,675,679,681,682,689,694,697,699,700,701,723,744,745,753,771,773,775,780,781,784],binari:[590,594,597,599,601,603,604,605,607,614,615,621,622,624,625,629,632,633,636,642,644,645,646,650,654,657,659,660,661,662,664,667,668,671,675,676,679,681,694,695,699,700,701,709,712,715,720,722,723,724,731,734,738,740,741,743,753,756,759,760,768,780,784,785,804,805,806,808,809,810,811,812,814],binary128:710,binary16:710,binary32:710,binary64:710,binary_nam:654,binaryexprast:[803,804,805,806,807,808,809,810],binaryoper:[610,743],binarypreced:[807,808,809,810],bind:[605,610,641,642,672,689,710,726,751,766,770,780,786,803,804,805,806,807,808,809,810],bindex:780,bindir:624,binfmt:679,binfmt_misc:679,bininteg:770,binloc:810,binop:[589,710,803,804,805,806,807,808,809,810],binoppreced:[803,804,805,806,807,808,809,810],binoprh:[803,804,805,806,807,808,809,810],binoprr:770,binoprr_rf:770,binpath:665,binutil:[605,661,668,679,694,696,701],bipartit:709,bisect:[678,743,751],bisect_script:745,bison:774,bit:[19,20,23,24,27,30,31,32,33,34,51,55,56,81,82,83,96,97,98,110,111,112,114,117,120,123,126,127,129,132,133,142,145,174,176,186,187,199,200,205,209,212,213,216,217,220,223,224,225,237,269,279,280,286,287,288,290,291,294,295,296,304,305,306,308,309,310,314,315,320,325,337,341,351,355,369,370,371,374,375,378,380,381,382,383,384,400,404,405,432,433,434,447,448,449,467,468,470,479,482,483,486,487,490,493,494,495,496,497,513,517,518,545,546,547,560,561,562,574,575,576,578,580,584,587,588,589,592,594,596,599,605,606,607,611,623,631,660,661,663,664,667,669,670,671,675,676,677,679,681,683,684,685,688,693,696,703,706,708,711,712,714,716,720,722,723,724,726,730,731,735,736,738,752,753,756,759,762,764,766,768,769,770,771,777,780,782,785,786,802,803,804,805,806,807,808,809,810,811,812],bitbucket:[745,747],bitcast:[593,596,611,663,669,676,677,683,684,722,727,741,777,780],bitcod:[601,603,607,612,615,617,621,622,624,628,629,632,634,657,667,670,679,681,694,704,710,722,724,727,741,743,751,773,782],bitcoderead:670,bitcodewrit:[670,743],bite:672,bitfield:[594,730,731,735,785],bitless:738,bitmap:597,bitmask:[588,710,730,738],bitmask_perm:588,bitpack:775,bitpattern:710,bitrecti:769,bits_0_7:684,bits_16_23:684,bits_24_31:684,bits_8_15:684,bitset:[712,777,780,786],bitsetcas:786,bitwidth:[597,684,710,722,764],bitwis:[589,607,741,770],bjark:660,bjarn:743,black:[689,783],blah:[610,659,712],blame:[608,723,742,745],blank:[610,611,621,627,645,667,717,742,761,770,803,811,812],blanket:726,blarg:743,blatant:808,blatantli:757,bleed:[606,678],blend:607,blgp:[8,9],blind:607,blissfulli:[659,660],bloat:[610,664,695],blob:[590,597,652,710,745,753,754],block:[585,590,593,599,600,602,606,607,619,623,625,628,630,631,632,642,644,645,648,649,663,666,667,669,674,676,684,685,686,689,693,702,705,708,709,711,712,714,717,721,722,724,733,734,739,748,750,751,753,754,756,757,758,759,762,768,777,779,780,803,804,805,806,807,808,809,810],blockaddress:[684,710],blockcodes:754,blockdim:724,blocker:[702,753],blockextractor:689,blockfrequencyinfo:[598,751],blockid:[597,754],blockidx:724,blockinfo_block:754,blocklen:597,blocklen_32:597,blockmapaddr:735,blocknam:597,blockscalartrait:786,blocksiz:735,blocksizei:724,blocksizex:724,blocksizez:724,blog:[725,745,760],blogspot:760,bloom:607,blow:712,blr:671,blunt:784,blx:671,bmi2:760,bnextind:710,bnf:590,board:[697,701,755,757],boat:[610,748],bob:[768,769,786],bodi:[585,590,597,601,611,631,659,663,666,667,669,705,710,712,714,716,722,726,743,748,758,760,764,771,780,782,784,790,791,803,804,805,806,807,808,809,810],bodyexpr:[806,807,808,809,810],bodyitem:770,bodysequ:785,bodyv:[808,809,810],boehm:676,bogner:692,bogu:[782,809],boil:[596,667,703,745,806,807],boiler:[659,743],boilerpl:[659,676,703,781],bold:[717,761],bonair:590,bondhugula:747,bone:[762,780,788],book:[610,661,662,682,711,743,782,787],bookkeep:[741,743],bool:[593,594,603,605,607,610,639,659,663,686,687,703,710,712,722,725,728,743,754,759,769,780,781,782,783,784,786,804,805,806,807,808,809,810],boolean128:738,boolean16:738,boolean32:738,boolean64:738,boolean8:738,boolordefault:659,booltmp:[804,805,806,807,808,809,810],boost:595,bootstrap:[603,679],bootstrap_:[592,603,668],bore:[611,782],boringssl:712,bork:659,borrow:[710,743],bot:[667,678,679,723,745],both:[85,176,271,352,436,549,585,586,588,589,590,591,592,593,594,597,599,601,603,605,606,607,608,609,610,611,616,621,625,634,637,639,640,644,645,659,660,661,664,666,667,669,672,674,675,676,677,681,683,684,688,691,693,695,700,701,702,703,705,709,710,712,714,715,720,721,722,725,726,731,735,736,740,741,743,748,750,755,756,757,758,759,760,762,764,766,768,769,770,771,775,776,779,780,782,783,784,786,788,789,803,804,805,806,808,810,811,812],bother:[725,789,808],botsalgn:747,botsspar:747,bottom:[598,619,659,686,688,710,711,714,723,741,742,765,770,779,780,782,789],bou_fals:659,bou_tru:659,bou_unset:659,bougacha:[692,757],bound:[585,589,639,667,688,708,710,716,724,743,763,764,770,780,786],bound_ctrl:[0,1,3,4,7,8,9,590],boundari:[594,596,597,604,607,663,695,710,714,726,741,743,760,805],bourn:[659,679],box:[607,667,689,742,759,760,783],bpf:[607,679,712,748],bpf_add:607,bpf_alu64:607,bpf_alu:607,bpf_and:607,bpf_arsh:607,bpf_b:607,bpf_call:607,bpf_class:607,bpf_div:607,bpf_dw:607,bpf_end:607,bpf_exit:607,bpf_h:607,bpf_imm:607,bpf_ja:607,bpf_jeq:607,bpf_jge:607,bpf_jgt:607,bpf_jmp:607,bpf_jne:607,bpf_jset:607,bpf_jsge:607,bpf_jsgt:607,bpf_k:607,bpf_ld:607,bpf_ldx:607,bpf_lsh:607,bpf_mem:607,bpf_mod:607,bpf_mov:607,bpf_mul:607,bpf_neg:607,bpf_op:607,bpf_or:607,bpf_rsh:607,bpf_st:607,bpf_stx:607,bpf_sub:607,bpf_w:607,bpf_x:607,bpf_xadd:607,bpf_xor:607,bpl:771,bptr:710,bra:724,brace:[611,669,710,743,770,775],bracket:[611,660,667,669,710,716,757,759,770,786],bradburi:748,bradburyconcern:748,bradburytransit:748,brain:[597,610,710],branch:[24,127,217,375,487,585,589,604,605,607,610,625,631,639,642,661,663,664,667,668,669,672,673,674,679,682,684,705,710,714,716,723,728,742,743,748,750,751,764,766,777,806,807,808,809,810],branch_weight:[598,599],branchfold:[705,780],branchfunnel:710,branchless:760,branchprob:599,branchprobabilityinfo:775,brand:[781,782],brave:803,breadth:[619,747,812],breagen:747,breakag:[667,745,766],breakcriticaledg:782,breakdown:600,breakpoint:[590,601,665,705,710,810,812],breg:585,brendan:784,breviti:[596,724,726],brew:743,brian:750,brick:722,brief:[593,605,606,610,644,659,679,722],briefli:[664,699,722,782],brig:590,bring:[674,676,690,709,712,754,764,788,811],broad:[659,667,676,743,746,805],broadcast:[588,607,710,711],broader:760,broadli:688,broke:[667,723],broken:[590,596,609,616,641,653,667,672,682,696,710,723,743,745,748,760,764,766,770,775,780,782,787],broker:710,brought:[722,745,757],brows:[605,675,742,745,804],browsabl:605,browser:[699,742,784],brtarget8:780,brtarget:780,bruce:743,bruno:748,bsd:[621,640,667,679],bsnork:768,bss:[590,619,640,650,652,671],bswap32r_bswap64r_movsx64rr32:631,bswap:670,bt331:747,btver2:639,bubbl:723,bucket:[638,644,648,649,733,736,738,743,759],bucket_count:759,budget:590,buf:754,buf_data_format_10_10_10_2:588,buf_data_format_10_11_11:588,buf_data_format_11_11_10:588,buf_data_format_16:588,buf_data_format_16_16:588,buf_data_format_16_16_16_16:588,buf_data_format_2_10_10_10:588,buf_data_format_32:588,buf_data_format_32_32:588,buf_data_format_32_32_32:588,buf_data_format_32_32_32_32:588,buf_data_format_8:588,buf_data_format_8_8:588,buf_data_format_8_8_8_8:588,buf_data_format_invalid:588,buf_data_format_reserved_15:588,buf_fmt_10_10_10_2_sint:588,buf_fmt_10_10_10_2_snorm:588,buf_fmt_10_10_10_2_sscal:588,buf_fmt_10_10_10_2_uint:588,buf_fmt_10_10_10_2_unorm:588,buf_fmt_10_10_10_2_usc:588,buf_fmt_10_11_11_float:588,buf_fmt_10_11_11_sint:588,buf_fmt_10_11_11_snorm:588,buf_fmt_10_11_11_sscal:588,buf_fmt_10_11_11_uint:588,buf_fmt_10_11_11_unorm:588,buf_fmt_10_11_11_usc:588,buf_fmt_11_11_10_float:588,buf_fmt_11_11_10_sint:588,buf_fmt_11_11_10_snorm:588,buf_fmt_11_11_10_sscal:588,buf_fmt_11_11_10_uint:588,buf_fmt_11_11_10_unorm:588,buf_fmt_11_11_10_usc:588,buf_fmt_16_16_16_16_float:588,buf_fmt_16_16_16_16_sint:588,buf_fmt_16_16_16_16_snorm:588,buf_fmt_16_16_16_16_sscal:588,buf_fmt_16_16_16_16_uint:588,buf_fmt_16_16_16_16_unorm:588,buf_fmt_16_16_16_16_usc:588,buf_fmt_16_16_float:588,buf_fmt_16_16_sint:588,buf_fmt_16_16_snorm:588,buf_fmt_16_16_sscal:588,buf_fmt_16_16_uint:588,buf_fmt_16_16_unorm:588,buf_fmt_16_16_usc:588,buf_fmt_16_float:588,buf_fmt_16_sint:588,buf_fmt_16_snorm:588,buf_fmt_16_sscal:588,buf_fmt_16_uint:588,buf_fmt_16_unorm:588,buf_fmt_16_usc:588,buf_fmt_2_10_10_10_sint:588,buf_fmt_2_10_10_10_snorm:588,buf_fmt_2_10_10_10_sscal:588,buf_fmt_2_10_10_10_uint:588,buf_fmt_2_10_10_10_unorm:588,buf_fmt_2_10_10_10_usc:588,buf_fmt_32_32_32_32_float:588,buf_fmt_32_32_32_32_sint:588,buf_fmt_32_32_32_32_uint:588,buf_fmt_32_32_32_float:588,buf_fmt_32_32_32_sint:588,buf_fmt_32_32_32_uint:588,buf_fmt_32_32_float:588,buf_fmt_32_32_sint:588,buf_fmt_32_32_uint:588,buf_fmt_32_float:588,buf_fmt_32_sint:588,buf_fmt_32_uint:588,buf_fmt_8_8_8_8_sint:588,buf_fmt_8_8_8_8_snorm:588,buf_fmt_8_8_8_8_sscal:588,buf_fmt_8_8_8_8_uint:588,buf_fmt_8_8_8_8_unorm:588,buf_fmt_8_8_8_8_usc:588,buf_fmt_8_8_sint:588,buf_fmt_8_8_snorm:588,buf_fmt_8_8_sscal:588,buf_fmt_8_8_uint:588,buf_fmt_8_8_unorm:588,buf_fmt_8_8_usc:588,buf_fmt_8_sint:588,buf_fmt_8_snorm:588,buf_fmt_8_sscal:588,buf_fmt_8_uint:588,buf_fmt_8_unorm:588,buf_fmt_8_usc:588,buf_fmt_invalid:588,buf_num_format_float:588,buf_num_format_reserved_6:588,buf_num_format_sint:588,buf_num_format_snorm:588,buf_num_format_snorm_ogl:588,buf_num_format_ssc:588,buf_num_format_uint:588,buf_num_format_unorm:588,buf_num_format_usc:588,buffer:[34,66,67,104,105,106,107,115,117,132,157,158,194,195,196,203,204,205,225,254,255,279,280,290,291,294,295,296,297,298,299,300,301,309,310,349,384,418,419,455,456,457,471,472,497,530,531,568,569,570,571,579,580,604,631,639,663,669,709,710,713,731,736,738,743,759,768,783,785,803,804,805,806,807,808,809,810,811],buffer_atomic_add:[0,2,3,4,9],buffer_atomic_add_f32:[8,9],buffer_atomic_add_f64:9,buffer_atomic_add_x2:[0,2,3,4,9],buffer_atomic_and:[0,2,3,4,9],buffer_atomic_and_x2:[0,2,3,4,9],buffer_atomic_cmpswap:[0,2,3,4,9],buffer_atomic_cmpswap_x2:[0,2,3,4,9],buffer_atomic_dec:[0,2,3,4,9],buffer_atomic_dec_x2:[0,2,3,4,9],buffer_atomic_fcmpswap:[0,2],buffer_atomic_fcmpswap_x2:[0,2],buffer_atomic_fmax:[0,2],buffer_atomic_fmax_x2:[0,2],buffer_atomic_fmin:[0,2],buffer_atomic_fmin_x2:[0,2],buffer_atomic_inc:[0,2,3,4,9,590],buffer_atomic_inc_x2:[0,2,3,4,9],buffer_atomic_max_f64:9,buffer_atomic_min_f64:9,buffer_atomic_or:[0,2,3,4,9],buffer_atomic_or_x2:[0,2,3,4,9],buffer_atomic_pk_add_f16:[8,9],buffer_atomic_smax:[0,2,3,4,9],buffer_atomic_smax_x2:[0,2,3,4,9],buffer_atomic_smin:[0,2,3,4,9],buffer_atomic_smin_x2:[0,2,3,4,9],buffer_atomic_sub:[0,2,3,4,9],buffer_atomic_sub_x2:[0,2,3,4,9],buffer_atomic_swap:[0,2,3,4,9],buffer_atomic_swap_x2:[0,2,3,4,9],buffer_atomic_umax:[0,2,3,4,9],buffer_atomic_umax_x2:[0,2,3,4,9],buffer_atomic_umin:[0,2,3,4,9],buffer_atomic_umin_x2:[0,2,3,4,9],buffer_atomic_xor:[0,2,3,4,9],buffer_atomic_xor_x2:[0,2,3,4,9],buffer_gl0_inv:[0,590],buffer_gl1_inv:[0,590],buffer_gl:590,buffer_invl2:[9,590],buffer_load:590,buffer_load_dword:[0,2,3,4,9,590],buffer_load_dwordx2:[0,2,3,4,9],buffer_load_dwordx3:[0,2,3,4,9],buffer_load_dwordx4:[0,2,3,4,9],buffer_load_format_d16_hi_x:[4,9],buffer_load_format_d16_x:[0,3,4,9],buffer_load_format_d16_xi:[0,3,4,9],buffer_load_format_d16_xyz:[0,3,4,9],buffer_load_format_d16_xyzw:[0,3,4,9],buffer_load_format_x:[0,2,3,4,9],buffer_load_format_xi:[0,2,3,4,9],buffer_load_format_xyz:[0,2,3,4,9],buffer_load_format_xyzw:[0,2,3,4,9],buffer_load_sbyt:[0,2,3,4,9],buffer_load_sbyte_d16:[0,4,9],buffer_load_sbyte_d16_hi:[0,4,9],buffer_load_short_d16:[0,4,9],buffer_load_short_d16_hi:[0,4,9],buffer_load_sshort:[0,2,3,4,9],buffer_load_ubyt:[0,2,3,4,9],buffer_load_ubyte_d16:[0,4,9],buffer_load_ubyte_d16_hi:[0,4,9],buffer_load_ushort:[0,2,3,4,9],buffer_s:785,buffer_store_byt:[0,2,3,4,9],buffer_store_byte_d16_hi:[0,4,9],buffer_store_dword:[0,2,3,4,9],buffer_store_dwordx2:[0,2,3,4,9],buffer_store_dwordx3:[0,2,3,4,9],buffer_store_dwordx4:[0,2,3,4,9,587,590],buffer_store_format_d16_hi_x:[4,9],buffer_store_format_d16_x:[0,3,4,9],buffer_store_format_d16_xi:[0,3,4,9],buffer_store_format_d16_xyz:[0,3,4,9],buffer_store_format_d16_xyzw:[0,3,4,9],buffer_store_format_x:[0,2,3,4,9],buffer_store_format_xi:[0,2,3,4,9,590],buffer_store_format_xyz:[0,2,3,4,9],buffer_store_format_xyzw:[0,2,3,4,9],buffer_store_lds_dword:[3,4,9],buffer_store_short:[0,2,3,4,9],buffer_store_short_d16_hi:[0,4,9],buffer_wbinvl1:[2,3,4,9,590],buffer_wbinvl1_vol:[2,3,4,9,590],buffer_wbl2:[9,590],bug:[589,590,601,602,603,604,605,606,607,608,609,610,611,612,632,639,665,667,674,678,679,682,689,695,701,702,709,710,711,712,716,720,722,741,742,743,745,746,748,751,757,763,771,775,804,808,811],buggi:704,buggy_cod:695,bugpoint:[615,632,667,678,679,704,728,751,782],bugzilla:[600,667,675,678,702,712,745,753,757,764,775],bui:[697,757],buid:663,build:[590,593,597,606,607,609,610,614,616,617,621,624,625,635,657,658,659,660,662,663,665,666,667,669,676,678,682,689,695,709,710,713,725,726,728,731,740,741,750,753,756,759,760,761,766,768,769,770,771,773,774,775,777,780,783,802,803,804,805,806,807,808,809,810,811,812],build_arch:744,build_dir:745,build_docker_imag:668,build_shared_lib:[603,605],buildattribut:700,buildbot:[609,616,667,675,678,702,723,742,745,746,766,775],builddir:696,builder:[666,667,678,679,706,726,727,743,746,749,782,790,803,804,805,806,807,808,809,810],buildingajit:679,buildmi:607,buildmod:775,buildmodul:788,buildnumb:731,buildpermoduledefaultpipelin:725,built:[592,597,603,605,624,625,627,644,654,659,662,667,668,669,674,675,677,679,681,682,688,693,694,696,698,699,700,701,703,709,710,711,712,721,723,726,731,738,739,741,743,744,745,746,750,756,759,760,768,769,771,773,774,775,780,784,785,803,804,805,806,807,808,810,811,812],builtin:[594,597,605,616,658,710,724,743,745,749,778,782,807,808,809,810],bulk:[607,667,709,720,743,808],bulki:590,bullet:703,bump:[676,702,743,758],bunch:[592,606,672,743,774,805,807],bundl:[614,630,663,681,709,764,783],bundler:590,bunzip2:679,bur:711,burden:[667,710,721,760,766],burg:711,burgess:[721,757],burk:747,burn:697,burr:619,busi:[667,757],busiest:742,button:[714,742],bvalu:768,bwave:747,bwaves_:747,bwaves_r:747,by_valu:590,byarg:710,bypass:[588,590,639,660,709,710],byproduct:688,byref:[597,710],byte_0:[588,590],byte_1:[588,590],byte_2:[588,590],byte_3:588,bytearrai:710,bytecod:[597,604],byteswap:670,byval:[597,607,710,727],byvalu:590,bz2:679,bzero:684,bzip2:679,bzoo:768,c11:[594,660,731,734,759],c11bytes:[731,734],c11lineinfo:734,c11size:734,c13:[644,731,734],c13bytes:[731,734],c13lineinfo:734,c13size:734,c17:746,c1_1:645,c1_2:645,c1_n:645,c1_u_1:645,c1_u_i:645,c1x:710,c2_1:645,c2_2:645,c2_n:645,c2_u_2:645,c2_u_:645,c2_v_1:645,c2_v_2:645,c2_v_i:645,c2_v_t:645,c5y977rlqpw:609,c89:610,c99:710,c_ctor_bas:611,c_str:[659,724,743,802,803,804,805,806,807,808,809,810],cach:[588,590,592,593,594,603,606,607,614,631,639,685,697,710,715,721,725,726,741,743,756,759,760,762,773,782,810],cachefil:773,cactubssn:747,cactubssn_r:747,caico:590,calcul:[585,590,593,598,599,607,615,625,639,669,677,684,709,710,711,725,741,743,759,770,775,780,782,806],calcvalu:770,calendar:678,call2:611,call32m:770,call32r:770,call64pcrel32:716,call:[592,594,596,597,598,599,601,603,604,605,606,611,616,619,623,625,632,641,645,658,659,660,661,663,664,665,666,668,669,670,673,674,676,679,684,688,689,690,699,705,706,709,711,712,713,714,715,716,718,720,721,722,723,724,725,726,727,728,730,736,740,742,744,745,746,750,756,757,758,759,762,763,764,765,768,769,770,774,775,777,782,783,784,785,786,788,789,790,802,803,804,805,806,807,808,809,810,811,812],call_branch_weight:599,call_convent:590,call_site_num:669,callabl:[610,709,710,724,726,791,804],callahan:750,callback:[593,607,659,663,676,712,725,743,762,780,788,790],callbackvh:743,callbas:743,callbr:743,callcount:743,calle:[585,596,607,660,663,672,709,712,722,741,760,762,770,777,780,782,803,804,805,806,807,808,809,810],calledcount:601,calledgekind:709,callee_ctxt:663,calleef:[804,805,806,807,808,809,810],caller:[585,590,596,599,601,607,654,660,663,669,671,672,676,710,712,722,741,743,758,760,780,782,806,807],caller_context:663,caller_pc:712,callexprast:[803,804,805,806,807,808,809,810],callgraph:[648,649],callgraphscc:[728,782],callgraphsccpass:728,callingconv:[597,658,780],callinst:743,calllow:[685,690],callloweringinfo:784,callon:743,callpcrel32:770,callq:[758,760,762,764],callseq_end:764,callseq_start:764,callsit:[599,710,716,760,762,764],callsitesforfunct:709,callthroughmgr:726,calltmp1:[804,806],calltmp2:805,calltmp6:808,calltmp:[804,805,806,807,808,809,810],calltwo:743,calm:808,cam4_:747,cam4_r:747,came:[592,640,669,710,760,806,811],camel:[610,748],camelback:748,camelcas:748,camera:719,can:[585,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,603,605,606,607,608,610,611,612,614,616,617,619,621,623,624,625,626,627,628,629,630,631,633,634,637,639,641,642,644,645,647,651,653,654,657,658,659,660,662,663,664,665,666,667,668,669,670,671,673,675,676,678,679,681,682,683,684,685,686,687,688,689,691,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,730,731,733,735,736,738,739,741,742,743,744,745,746,747,748,749,750,752,753,754,755,756,757,759,761,762,763,764,765,766,768,769,770,771,773,774,775,776,777,779,780,781,782,783,784,785,786,788,789,790,802,803,804,805,806,807,808,809,810,811,812],canadian:679,canari:[695,710],cancel:669,canconstantfoldcallto:670,candeletealltext:723,candid:[600,667,668,673,710,716,750,753,766],canfoldasload:[770,771],canlosslesslybitcastto:722,cannot:[107,196,301,457,571,585,586,588,589,590,593,594,596,599,600,601,605,606,607,608,610,611,612,621,625,626,639,645,653,654,659,660,663,664,667,669,671,676,677,684,686,700,705,708,710,715,721,725,735,736,741,742,743,745,746,748,755,757,759,760,765,766,768,770,773,776,779,780,782,788,804,811],canon:[585,590,596,669,674,710,726,741,743,745,759,760,778],canonic:[607,611,684,743,782],canonicalis:596,canreserveresourc:607,cantfail:[788,789,790,791],cap:610,capabl:[593,607,658,659,660,661,666,670,679,695,710,713,720,721,724,736,743,749,754,768,780,782,804,805,807,808,811],capac:[733,743],capit:[610,667,748],capston:712,captur:[585,597,601,603,605,607,611,659,670,686,748,750,759,760,762,775,780,783,785,803,804,805,806,807,808,809,810,811],card:[697,764],care:[590,591,593,594,607,608,610,611,660,675,679,695,705,709,710,712,716,718,721,725,726,727,743,745,754,759,760,765,766,771,774,780,782,808,811],carefulli:[585,667,674,676,709,710,759,808],careless:610,caret:[625,710],carol:768,carri:[639,666,677,684,687,710,718,743,757,770,776],carrizo:590,carruth:[748,760],carruthacronym:748,carruthcamelback:748,carruthdistinguish:748,carruthfunct:748,carruthinconsist:748,carruthlow:748,carv:756,cascad:705,case_branch_weight:599,cassert:[610,724,805,806,807,808,809],cast210:[685,710],cast:[585,596,604,610,659,684,703,710,722,726,727,750,764,769,770,771,777,805,806,807,808,810,811],cast_or_nul:743,castinst:743,casual:667,cat:[626,652,654,659,676,679,695,709,712,775,781],catapult:783,catastroph:760,catch3:669,catchi:723,catchpad:669,catchret:669,catchswitch:669,categor:[659,714,741,768,780],categori:[610,630,635,644,704,710,741,743,759,766,768,775,780,785],cater:[766,768],caught:[610,669,695,710],caus:[585,590,593,594,596,600,601,605,607,609,610,611,612,614,616,619,623,625,628,634,639,641,645,654,657,659,667,670,671,672,676,679,682,686,687,688,689,694,695,698,699,700,704,709,710,712,715,723,725,727,728,741,743,744,745,756,757,759,760,763,764,765,766,768,770,775,777,779,782,786,803,804,806,807,808,810],caution:[594,764],caveat:[602,660,710,810],cayman:590,cbar:768,cbaz:768,cbe:704,cbpf:607,cbsz:[8,9],cbtw:607,cbw:607,cc_sparc32:780,ccach:605,ccache_dir:605,ccache_maxs:605,ccassigntoreg:780,ccassigntoregwithshadow:780,ccassigntostack:780,ccc:[597,710],ccdelegateto:780,ccif:780,ccifcc:780,ccifinreg:780,ccifnest:780,ccifnotvararg:780,cciftyp:780,ccmake:605,ccomment:770,cconv:710,ccpassbyv:780,ccpromotetotyp:780,ccr:683,cctool:[605,635],cctype:[803,804,805,806,807,808,809,810],cd8_eltsiz:770,cd8_form:770,cd8_scale:770,cdbg_user:590,cde:658,cdecl:710,cdna1:590,cedar:590,cee:743,ceil:[590,735,779],cell:780,center:[743,810],centerpoint:667,central:[669,759,765],centri:768,centric:710,cenum:768,cerr:[724,784],certain:[585,590,593,596,605,607,609,611,616,621,631,639,652,659,663,664,670,671,676,679,689,705,709,710,712,725,729,735,736,740,743,746,756,760,762,768,770,782,783,784,805,807,808,809],certainli:[594,609,674,676,684,714,745,811],cfa:[590,669],cfd:747,cfe:[667,679,719,723,742,745,750,766],cfg:[607,616,669,705,708,710,714,722,725,728,743,750,775,779,780,782,806],cfganalys:725,cfgsimplifi:743,cfguard_checkcc:[597,710],cfi:[585,604,669,709,711,716,760],cfi_instruct:716,cfi_offset:716,cflag:[712,744],cfoo:768,cfrac:774,cg_profil:671,cgft_objectfil:809,cgo:[660,750],cgp_from:671,cgp_to:671,cgp_weight:671,cgpm:725,cgroup:668,cgscc:725,cgsccanalysismanag:725,cgsccpassmanag:725,ch9:810,chain:[585,591,592,599,605,607,610,616,630,639,663,681,683,686,710,711,713,714,721,722,741,759,765,768,776,780,782,808],challeng:[667,693,710,760,764,807],champion:757,chanc:[599,663,667,674,709,710,715,727,771,783,786,812],chandler:[748,760],chandlerc:[742,760],chang:[588,589,593,596,597,599,600,601,603,605,607,608,609,610,611,612,616,622,624,628,630,633,639,641,658,659,660,662,663,664,666,670,672,674,675,676,677,678,680,681,682,684,685,688,695,696,702,703,705,706,708,709,710,711,713,714,715,716,720,722,724,725,726,727,728,731,736,740,741,746,748,753,754,760,762,764,766,767,768,772,773,774,776,779,780,782,785,786,788,789,790,791,805,806,807,808,809,810],changebit:712,changebyt:712,changelog:712,channel:[17,121,210,480,588,590,608,662,678,709,760],chao:592,chapter:[585,662,722,787,802,811,812],char16_t:738,char32_t:738,char6:597,charact:[585,588,590,607,616,621,625,626,639,640,641,642,652,653,658,659,664,710,716,718,743,759,768,770,775,802,803,804,805,806,807,808,809,810],character16:738,character32:738,character:[631,710,760],characterist:[67,158,255,349,419,531,585,607,631,670,710,731,743,750,780,790,814],charg:[607,719,757],charscalarti:710,chart:779,charter:667,chase:704,chat:[608,667,678],cheap:[594,610,667,710,743],cheaper:[673,684,743,760],cheapest:691,cheapli:[721,760],cheat:790,check:[585,589,590,592,594,601,604,605,606,607,610,612,614,616,631,639,658,659,663,664,667,668,669,674,675,676,677,679,681,682,684,686,689,694,696,697,699,700,701,702,703,704,705,709,712,713,714,716,717,722,723,724,725,726,728,742,745,750,751,752,753,756,758,759,762,763,764,766,769,773,774,775,776,777,780,781,782,784,786,788,802,803,804,805,806,807,808,809,810,811],checkcudaerror:724,checkcustomhazard:639,checker:[658,677,748,768],checkformat:743,checkout:[605,668,682,698,699,702,740,752,753,761],checkpoint:669,checksum:[585,671,710,712,756],checksumkind:[671,710],chees:610,cherri:702,child:[669,703,710,712,743,807],child_begin:743,child_end:743,child_iter:743,childidx:743,childorerr:743,children:[630,703,729,743],childvalid:743,chill:667,chip:[590,607,617,619,683,697,701,724,775,780],chmod:679,choco:681,chocolatei:681,choic:[585,609,610,617,629,670,683,686,697,710,712,723,742,743,756,762,764,802,806,811],choos:[585,590,596,605,607,608,612,619,631,642,659,660,667,676,695,696,700,701,704,707,710,714,719,721,726,742,743,746,748,755,759,773,780,782,783,784,803,805,806],chop:759,chose:[753,760],chosen:[596,607,610,616,676,695,710,764,770,783],chri:[607,660,667,745,748,782],chriscox:747,chrome:783,chromium:[695,702,757],chronolog:[745,748],chunk:[596,597,607,610,644,709,712,741,756,758,759],churn:790,cie:[585,590,711],cie_id:585,cie_point:585,cimag:807,cin:672,cindex:780,cint2006:773,circl:703,circt:678,circuit:807,circular:[585,610,783],circumst:[600,611,669,709,710,714,743,757,759,780,782,805,808],circumv:659,cis501:595,cis:595,cite:748,citi:[678,719],citizen:743,claim:[594,667,785],clamp:[0,1,2,3,4,5,6,7,8,9,590,710],clampnumel:688,clampscalar:688,clang:[585,594,599,602,603,604,605,609,610,611,614,615,616,617,625,627,639,642,654,662,664,665,667,668,669,672,674,677,678,681,682,694,695,698,702,703,704,705,709,710,711,712,713,723,724,725,726,727,728,739,742,743,744,745,748,749,753,754,757,759,769,770,771,773,775,776,777,779,782,783,784,788,789,790,791,803,804,805,806,807,808,809,810],clang_attr_arg_context_list:768,clang_attr_identifier_arg_list:768,clang_bootstrap_cmake_arg:592,clang_cc1:[611,775],clang_cl:775,clang_cpp:775,clang_enable_bootstrap:592,clangd:723,clangxx:775,clarif:[610,661,667,785],clarifi:[585,723,759],clariti:[710,755,806],clase:658,clash:[610,710,719,791],classic:[602,607,610,695,741,743,760],classid:770,classif:[710,763,765],classifi:[593,602,710],classnam:[658,731,769],classof:768,classpair:769,classrec:769,classref:770,claus:[607,667,669,710,743,770],clean:[607,612,616,667,669,679,686,699,704,710,713,724,727,741,742,743,745,760,782],cleaner:[610,659,743],cleanest:585,cleanli:[610,667,710,782],cleanup:[663,667,710,711,726,741,759,790,791,805,806],cleanuppad:[663,669],cleanupret:[663,669],clear:[605,609,610,611,657,667,676,677,702,705,708,710,723,725,731,735,738,743,745,748,760,766,769,770,783,803,804,805,806,807,808,809,810],clearer:610,cleargraphattr:743,clearli:[585,600,610,667,714,721,730,745,757,759,762,766],clearresourc:607,clenumv:659,clenumvaln:[659,769],clever:[703,811],cleverli:596,click:[667,681,723,742,761],client:[590,597,605,607,610,659,667,676,703,709,715,718,726,743,745,751,754,759,762,773,777,780,782,788,789,805,808],cling:726,clip:665,clobber:[585,590,593,716,760,770],clock:[639,710,769,782],clone:[605,616,667,679,681,694,723,743,750,768,773,774,782,808],close:[603,607,610,611,663,677,678,679,681,688,702,709,710,711,712,723,742,743,753,760,766,782,803],close_fd_mask:712,closer:[677,711],closest:[585,710,810],closur:811,clr:[731,764],clrb:607,clrl:607,clrq:607,clrw:607,cluster:[631,683,689],cluster_id:631,clutter:[602,610,667],cmake:[592,595,667,668,675,681,682,694,695,696,697,717,723,725,743,745,747,756,761,766,775,778,780,781,784],cmake_:[603,605,606],cmake_asm_compiler_target:700,cmake_asm_flag:700,cmake_build_typ:[603,605,606,679,773],cmake_c_compil:[679,773],cmake_c_compiler_external_toolchain:700,cmake_c_compiler_target:700,cmake_c_flag:[700,773],cmake_c_flags_optim:773,cmake_cflag:700,cmake_cxx_compil:679,cmake_cxx_flags_releas:679,cmake_cxx_flags_relwithdebinfo:679,cmake_cxx_standard:605,cmake_fortran_compil:773,cmake_instal:605,cmake_install_prefix:[592,605,679,681,701],cmake_minimum_requir:[605,606],cmake_module_path:605,cmake_parse_argu:606,cmake_sysroot:700,cmake_toolchain_fil:[605,679],cmake_try_compile_target:700,cmake_verbose_makefil:592,cmakebuildtyp:723,cmakecach:[605,606,784],cmakefil:[679,773],cmakelist:[592,605,606,679,702,769,773,780,781,782],cmakeparseargu:606,cmd:595,cmdsetuserdata:590,cmov:760,cmovcc:760,cmoveq:760,cmovneq:760,cmp32ri8:607,cmp:[607,642,710,741,743,760],cmpb:671,cmpflag:722,cmpinst:[716,743],cmpname:768,cmpnumber:722,cmpq:[671,758],cmpswap:588,cmptmp:[804,805,806,807,808,809,810],cmpxchg:[594,684],cnt:[120,209,314,479,584],co_await:663,coalesc:[607,619,628,743],coars:[590,616],cocoa:710,code:[27,86,129,177,220,272,378,437,490,550,585,589,591,592,593,594,596,597,599,600,602,603,604,605,606,611,612,615,616,617,620,625,626,633,635,638,640,641,642,643,644,646,648,649,650,652,653,659,662,663,666,668,670,671,673,674,675,677,678,681,683,686,688,689,692,693,694,695,697,698,699,700,701,702,703,705,706,708,709,711,712,713,714,717,719,720,722,724,725,726,727,728,729,730,731,739,740,745,746,747,750,751,752,753,754,757,758,760,762,763,764,766,768,769,770,771,773,774,775,777,779,783,784,786,802,811,812,813],code_alignment_factor:[585,590],code_object_uri:590,code_own:[662,667,742],codebas:[610,667,685,727,742,757],codeblock:676,codeemittergen:768,codegen:[590,591,607,611,612,658,660,663,670,673,674,676,679,684,687,689,705,710,713,716,724,725,727,768,775,778,780,781,782,791,804,805,806,807,808,809,810],codegener:704,codegenopt:728,codegenschedmodel:780,codeid:754,codepath:[594,743],codeprop:590,coder:745,coderecti:769,codereview:723,codes:[770,771],codeview:[644,649,731,736],codeviewdebug:759,coding_style_cpp:748,codlay:790,coerc:[663,722],coerce_offset0:611,coexist:766,coff:[597,607,640,641,661,678,681,698,709,710,726,731,736],cofflinkgraphbuild:709,cohen:765,coher:[588,590,710],coincid:776,col:[710,810],cold:[597,598,645,674,710],coldcc:[597,710,764],colder:598,colfield:707,collabor:[585,667,745],collaps:[710,714,760],collat:709,colleagu:608,collect:[585,593,598,599,605,625,630,639,658,667,674,680,683,695,699,706,709,711,712,713,715,726,738,741,743,745,747,750,754,760,762,763,768,769,770,773,774,779,784,812],collect_and_build_with_pgo:699,collector:[597,658,672,711,764,811],collis:[610,710,743,745,759],colloqui:710,colombet:692,colon:[590,603,605,606,611,695,756,768,770,775,786],color:[78,171,266,542,590,607,611,625,630,635,644,710,723,743,750,784],colour:608,column:[585,590,610,611,639,642,654,664,671,679,705,707,754,759,774,779,786,810],columnend:664,columnstart:664,com:[595,602,605,610,634,679,681,682,700,702,710,712,719,723,738,742,743,745,747,748,760,773,774,783,813],com_fir:607,combin:[120,176,209,314,479,584,585,588,590,594,601,603,605,611,623,625,633,637,639,642,653,658,659,660,663,664,666,667,670,671,676,677,679,684,688,689,691,705,709,710,712,716,722,723,725,728,729,739,743,750,756,759,760,764,766,770,776,779,780,782,783,784,786,803,804,811],combine1:607,combine2:607,combinedalloc:756,comdat:[597,671],come:[585,590,597,603,605,606,607,608,610,612,625,639,663,666,667,670,676,677,679,705,707,708,709,710,714,716,721,722,724,725,726,731,738,739,743,744,746,754,756,757,759,760,764,770,773,775,782,783,786,788,790,802,804,805,806,808,811],comfort:[723,755],comma:[120,209,213,314,371,479,483,584,586,587,590,611,631,658,659,710,716,718,743,770,775,786,806],command:[590,592,593,595,601,603,605,607,610,611,612,616,617,619,622,623,626,629,630,631,632,633,634,635,636,639,640,641,643,644,648,649,650,652,653,654,657,658,660,662,665,667,668,675,678,679,686,689,694,696,699,700,702,704,706,709,710,712,713,723,726,728,743,745,753,756,762,764,768,769,770,771,773,774,775,778,780,782,784,802,803,804,805,806,807,808,809,810],commandlin:[616,778],commandlinepars:784,commasepar:659,comment:[590,600,607,609,611,621,639,650,652,658,662,667,676,698,703,710,711,712,721,722,741,742,743,745,748,752,768,769,770,771,775,781,783,786,802,803,804,805,806,807,808,809,810,811,812],commentstr:780,commerci:[585,667,764],commit:[600,610,639,662,674,678,682,696,702,711,717,727,731,735,748,761,764,766,775],committ:667,committe:[608,743],common:[585,590,592,593,594,596,597,603,606,607,609,610,613,618,621,640,641,650,655,656,659,660,662,663,666,667,669,670,672,674,675,676,677,683,686,688,703,705,709,710,711,712,715,717,721,722,723,725,726,731,741,744,745,750,759,760,761,762,764,765,770,771,780,782,785,786,803,805,806,807,808,811],commonli:[597,605,610,621,676,709,710,726,743,748,780],commun:[593,602,609,610,660,662,663,667,672,674,681,709,710,711,715,723,724,726,743,745,746,748,755,757,760,762,765,782],commut:[593,607,741,780],compact:[585,607,623,645,654,662,663,669,676,710,743,762,810,811],compactli:[663,743,770],compani:[667,719,757],companion:614,compar:[588,589,592,593,594,598,599,607,611,628,630,638,639,644,666,669,707,710,712,723,741,743,744,750,753,756,759,760,768,769,770,773,774,775,779,786,803,806,807,808,809,810],comparefp:770,comparison:[589,594,599,671,674,677,684,689,703,710,741,743,753,759,764,775,779,790,803,807],compat:[585,588,590,594,596,597,605,606,607,610,615,619,621,625,641,642,643,644,649,652,658,659,660,663,664,669,670,671,675,676,679,681,698,702,706,708,710,724,735,738,741,743,751,756,759,760,762,764,775,776,777,811],compatible_class:607,compbinari:646,compet:667,compil:[24,127,217,375,487,585,590,592,593,594,596,597,601,602,603,604,606,607,609,611,612,613,615,618,619,625,630,631,639,640,644,645,654,655,656,658,659,662,663,664,666,667,668,669,670,674,675,676,677,678,680,681,682,683,685,688,689,693,695,696,697,698,699,702,704,709,711,712,713,714,720,721,722,723,724,725,728,729,731,738,739,741,742,743,744,745,746,747,748,750,751,752,753,754,756,757,759,762,763,764,765,766,768,770,771,774,775,776,777,778,779,780,781,782,783,784,785,787,788,789,791,802,803,804,806,807,808,811,812,813],compiland:[644,731,734,739],compile_command:723,compile_tim:773,compilecallbackmanag:[790,791],compilelay:[726,788,789,790,791],compilemodul:784,compileondemand:790,compileondemandlay:[726,790,791],compiler_rt:667,compiler_rt_test_compile_cflag:700,compiler_rt_test_compiler_cflag:700,compileutil:[788,789,790,791],complain:[611,679,682,694],complaint:811,complement:[585,677,710,743,760],complet:[585,590,593,597,598,600,605,606,607,610,612,615,616,625,630,657,659,663,666,667,669,676,678,679,681,682,685,689,693,696,699,700,702,709,710,711,712,713,714,715,721,722,723,724,725,726,741,743,745,750,754,755,759,760,764,765,768,769,770,780,782,786,788,789,790,791,802,803,804,805,806,807,808,809,810],complex128:738,complex16:738,complex32:738,complex32partialprecis:738,complex48:738,complex64:738,complex80:738,complex:[585,606,607,610,639,663,670,676,712,722,725,728,738,741,759,760,764,765,766,768,769,770,771,779,780,782,789,803,805,807,808,810,811],complexpattern:[607,780],complextyp:814,compli:667,compliant:[590,660,746],complic:[592,594,605,606,669,670,676,679,681,700,703,708,709,710,714,723,750,760,768,777,779,780],compliment:585,compon:[585,588,590,603,605,606,609,611,616,621,625,627,651,658,665,666,668,675,676,678,679,681,698,700,702,704,709,710,711,715,724,726,735,736,741,743,744,745,754,756,760,762,766,770,774,778,780,783,785,788,813],compos:[585,587,589,603,664,681,710,716,726,754,757,766,785,789],composit:[710,743,750,754],compound:664,compr:[0,2,3,4,117,205,310,580],comprehens:[679,741,760,775],compress:[117,205,310,580,588,597,605,623,641,645,658,664,679,681,712,743],compris:[585,590,664,710,750],compriz:623,compromis:[710,743,760],comput:[20,120,123,209,212,314,370,479,482,584,585,588,590,593,598,601,607,610,623,639,660,664,666,672,674,687,696,709,710,711,712,714,721,725,731,738,741,743,747,750,756,757,760,762,764,770,780,782,802,804,805,806,807,808,809,810,811],computation:711,compute_20:724,compute_factori:665,compute_pgm_rsrc1_fwd_progress:590,compute_pgm_rsrc1_mem_ord:590,compute_pgm_rsrc1_sgpr:590,compute_pgm_rsrc1_vgpr:590,compute_pgm_rsrc1_wgp_mod:590,compute_pgm_rsrc2_user_sgpr:590,compute_user_data_0:590,compute_xx:[660,724],computearea:703,computeknownbit:685,con:[596,770],concat:710,concat_vector:607,concaten:[585,590,591,596,597,679,684,688,710,770,775],concentr:[639,700],concept:[585,590,596,597,607,674,677,683,703,709,710,714,722,724,726,742,743,754,759,760,765,777,781,788,789,804,806],conceptu:[585,590,596,607,610,611,659,669,677,710,743,761,776],concern:[585,609,667,676,677,710,725,745,748,764,766,789],concis:[607,610,659,667,677],conclud:[639,710,806,807],conclus:[639,722,790,810,812,813],concret:[597,607,659,666,683,688,689,710,721,743,749,759,764,769,771,780,782,805,812],concurr:[590,605,616,676,709,712,725,726,743,751,788],concurrentircompil:[726,788,789,790,791],cond1:770,cond2:770,cond:[598,663,672,710,714,721,759,770,780,806,807,808,809,810],cond_fals:808,cond_next:808,cond_tru:808,condbranch:780,condclaus:770,condit:[86,177,272,437,550,585,589,590,594,599,602,604,605,606,607,610,625,659,663,664,667,669,672,674,677,679,683,684,705,711,713,714,716,722,724,728,738,743,756,759,764,765,775,776,780,782,802,803,804,806,807,808,809,810],condition:[606,616,676,760,770],condmovfp:770,condn:770,condoper:770,conduct:[609,639,667,678,719,723],condv:[806,807,808,809,810],confer:[607,676,678,719],confid:[585,667,710,727],confidenti:[667,755],config:[592,605,615,616,631,649,667,675,679,696,700,709,743,744,745,775,780,788,789,790,791,803,804,805,806,807,808,809,810],configur:[590,605,607,610,611,631,667,678,681,682,693,694,695,699,700,709,710,712,724,726,740,743,744,745,753,754,756,764,766,775,778,779,780,781,782,783,784,785,786,788,789],confin:710,confirm:[600,610,679,712],conflict:[590,607,611,649,667,683,710,721,743,745,748,775],conform:[585,590,604,605,610,616,617,662,667,672,676,702,709,710,743,746,786,789,790],confront:[674,677],confus:[602,610,611,621,667,669,676,677,685,705,710,722,723,743,748,750,756,760,769,780],confusingli:594,congrat:699,congruenc:711,congruent:756,conjunct:[585,603,605,611,614,644,645,684,709,710,731,753,765,770],connect:[78,590,666,688,696,711,714,721,741,742,743,750,760,764,773,776,788],consecut:[585,589,590,596,611,710,750,756,759,770,777,779,782],consensu:[609,667,748,749,757,766],consequ:[585,594,601,608,639,660,669,679,682,710,712,714,729,743,760,762,766,775],conserv:[590,593,594,598,610,639,676,709,710,713,725,760,762,764,776,782],conservat:776,consid:[585,590,593,596,597,598,602,607,608,609,610,611,612,616,621,627,640,652,659,660,663,664,665,666,667,669,671,672,673,676,677,679,683,684,685,688,691,693,697,699,703,705,707,708,710,711,714,716,721,722,725,738,741,743,745,748,753,755,756,759,760,761,762,763,764,765,770,771,773,775,777,779,782,789,803,806,808,810,811],consider:[585,607,608,671,679,712,743,757,759,764,765,771,786],consist:[585,590,593,594,597,606,607,609,610,611,614,616,623,631,640,645,659,664,667,669,671,674,675,676,677,686,689,695,699,702,706,709,710,712,714,716,722,728,735,736,743,745,748,756,759,760,764,766,769,770,771,774,777,780,782,783,804,805,806,807,808,809,810],consol:[607,678,712,717,723,761,783,805],const0:597,const_arg_iter:[743,769],const_global_iter:743,const_iter:[743,769],const_name_iter:769,const_op_iter:743,const_record_iter:769,const_use_iter:743,constant:[10,11,34,54,57,59,60,63,64,66,67,68,69,70,75,77,96,97,120,132,144,146,147,151,152,153,154,157,158,159,160,161,162,167,175,186,187,209,225,240,243,244,249,250,251,252,254,255,256,257,258,261,265,270,286,287,314,317,318,322,323,328,329,330,342,343,344,345,349,384,403,406,407,410,413,418,419,420,421,422,425,429,435,447,448,479,497,516,519,520,522,527,528,529,530,531,532,533,534,537,541,548,560,561,584,585,590,593,594,597,599,607,610,623,632,644,660,663,664,670,672,674,676,679,683,686,687,705,709,711,722,724,726,728,750,762,764,769,776,777,780,782,784,785,804,806,807,808,809,810],constant_tsc:785,constantarrai:743,constantexpr:748,constantfold:670,constantfoldcal:670,constantfp:[743,804,805,806,807,808,809,810],constantindex:762,constantint:[610,710,716,743],constantpoolsect:780,constantstruct:743,constdata:716,constexpr:[658,660,768],constindex:762,constitu:[669,684,806],constitut:[596,597,690,743,755,775,789],constprop:743,constrain:[639,669,672,679,683,686,689,691,709,721,740,770,778,804],constrainedop:591,constraint:[605,607,610,611,657,667,669,670,683,689,709,721,741,743,745,750,757,759,764,770,771,780,782],construct:[590,593,594,597,603,604,605,606,608,610,639,659,663,669,670,671,676,707,708,710,717,725,726,738,743,750,759,761,764,768,770,777,780,781,782,784,786,788,790,802,803,804,805,806,807,808,810,811,812],constructor:[594,607,641,659,660,663,672,703,706,708,709,710,715,726,759,769,780,782,786,788,789,790,806],constval:743,constvcal:710,consult:[605,610,621,678,679,681,711,721,729,731,736,757,782],consum:[585,590,621,623,631,639,644,659,663,669,671,677,679,681,683,684,688,697,710,712,718,723,731,733,734,735,736,738,739,754,756,762,768,771,775,803,804,805,806,807,808,809,810],consumeerror:743,consumpt:[607,639,712,754,782],cont6:669,cont:[663,669,708,710],conta:710,contact:[608,667,696,702,712,719,723,752,755,757,782],contain:[585,590,592,593,594,596,597,598,599,600,601,603,605,606,607,609,611,614,616,617,621,622,625,627,630,631,634,638,639,640,641,642,645,648,649,652,654,658,659,660,662,663,664,665,666,667,668,669,671,674,676,677,678,679,681,685,686,688,689,694,695,696,697,698,700,701,702,703,704,705,707,709,710,711,713,714,715,716,721,722,723,724,725,726,727,728,729,730,731,733,734,735,736,738,741,742,744,745,747,750,752,753,756,757,759,760,761,762,764,766,768,769,770,771,773,774,775,777,779,780,781,782,783,785,786,788,789,790,791,804,805,806,810],containingtyp:710,containsfoo:610,contb:710,content:[590,593,596,605,610,611,612,621,623,625,630,637,639,641,642,643,648,649,653,659,667,671,673,680,684,709,710,713,717,722,731,738,739,741,743,745,750,754,756,761,770,771,774,775,780,782,784],content_disposition_typ:667,contenti:667,context:[593,594,610,611,645,654,659,663,667,669,672,674,677,683,688,696,703,709,711,714,718,722,724,726,742,743,749,755,760,768,770,771,775,782,785,788,803,804,805,808,810],context_projection_funct:663,context_s:663,contextu:[710,785],contigu:[585,590,688,709,710,733,735,739,743,756,759,785],continu:[585,590,593,597,601,604,607,609,611,612,639,644,659,662,667,669,685,702,703,709,710,712,713,714,723,727,731,742,743,745,758,760,769,775,785,788,803,811],contract:[660,674,709,710,743,762,768],contradict:710,contrari:[607,710],contrast:[589,593,607,610,669,703,723,725,743,745,748,750,782,804],contrib:[644,679],contribut:[585,590,607,609,614,644,667,674,678,680,700,709,710,723,734,739,741,743,745,756,757,764,770,782,783,787,813],contributor:[592,605,606,609,667,723,745,746,757],contriv:[672,770],control:[19,85,176,213,271,315,320,325,337,352,369,371,436,483,549,588,589,590,594,601,605,607,610,611,616,617,619,628,639,663,666,667,668,669,670,673,674,679,681,708,710,711,712,713,714,715,724,725,741,743,745,750,751,756,758,759,762,764,775,776,777,778,779,780,783,803,804,805,807,808,809,810,811,812,813],conv:672,convei:[667,669,674,676,683,710,776],conveni:[603,605,607,610,611,616,627,659,663,664,667,683,689,703,709,710,721,724,726,741,743,744,770,775,783,805,806,807,811],convent:[585,597,606,610,658,661,663,669,685,703,714,717,722,723,738,743,748,759,761,762,764,768,769,804,810],convention:603,converg:[597,710],convers:[590,591,607,609,611,658,659,673,674,688,689,705,743,770,786],convert:[52,53,143,238,239,294,295,296,309,401,402,514,515,585,588,589,590,591,594,596,607,615,616,621,625,629,641,659,667,670,673,674,677,679,684,688,689,695,699,706,714,722,724,742,743,745,754,759,768,769,770,780,782,783,784,786,802,804,805,806,807,808,809,810,811,814],converttoerrorcod:743,cooki:[710,743],cool:[659,667,782,804,805,806,807,808,809,810],cooper:[676,694,709],coordin:[84,175,270,435,548,676,710,764,786],cope:[659,743],copi:[78,117,171,205,266,310,542,580,585,590,593,594,597,601,605,607,611,615,619,624,625,626,659,663,667,669,675,676,679,683,684,686,688,689,690,691,694,701,702,709,711,714,715,716,717,721,723,724,731,741,743,747,756,757,758,759,760,761,764,769,775,776,779,780,782,812],coprocessor:683,copyabl:710,copycost:[691,780],copyleft:667,copypart:712,copyphysreg:780,copyright:[672,782],core2:775,core:[1,5,6,7,8,590,593,595,605,606,663,667,669,670,678,679,688,690,693,695,696,697,701,710,712,714,723,726,728,745,746,760,768,775,782,788,789,790,791,804,805,806,807,808,810],corei7:[775,779],corner:762,cornerston:677,coroaddr:663,corollari:[710,734,736],corospit:663,coroutin:751,corp:712,corpora:712,corpu:675,corpus1:712,corpus2:712,corpus_dir:712,correct:[590,593,594,596,605,607,610,659,660,663,667,670,674,679,681,689,694,696,701,704,710,711,714,723,728,736,741,742,743,753,758,759,760,764,765,773,774,779,782,783,805],correctli:[585,590,594,600,605,607,610,611,659,667,669,672,677,679,698,700,705,710,722,726,736,742,743,744,745,754,758,759,760,764,775,782,786,803,806],correl:[590,639,676,710,777],correspond:[27,129,220,378,490,585,588,590,591,594,596,597,599,601,605,607,610,611,617,624,625,628,631,639,654,659,663,664,666,667,669,670,671,672,676,677,679,681,684,703,704,705,707,710,711,714,716,721,722,723,724,725,729,731,733,734,735,736,741,742,743,745,754,759,764,766,768,769,770,771,775,777,779,780,782,785,786,803,804,806,810],corrupt:[610,669,712,756,811],cortex:[661,697,701,706],cortexa53model:780,cortexa57model:780,cos:[593,779,786,802,803,804,805],cosin:[710,780],cost:[585,603,607,610,639,667,683,690,691,693,695,707,709,710,711,713,719,722,740,743,745,750,754,760,766,779,780,811],costa:768,costli:[748,756,760,785],could:[585,590,593,594,601,602,605,606,607,610,611,616,627,631,639,659,663,667,668,672,673,674,675,676,677,679,683,684,688,691,694,696,701,703,705,706,707,708,709,710,711,714,717,721,723,725,726,728,730,735,741,742,743,745,747,748,754,756,757,759,760,761,762,764,765,766,770,775,776,777,780,783,785,786,789,790,791,803,804,805,806,808,809,811],couldn:[593,684,722,809],counsel:667,count:[120,209,314,479,584,585,589,590,601,610,621,625,631,639,645,659,664,669,670,676,684,706,712,714,726,731,733,743,745,750,754,756,759,762,769,770,773,774,776,780,784,785,803],counter:[20,123,212,370,482,585,589,590,592,594,599,601,605,631,639,645,710,712,728,759,769,785],counteracti:756,counterpart:[585,591,710,726],coupl:[594,609,674,689,710,713,719,725,727,742,743,745,764,781,782,783,806,807,810,811],cours:[593,605,659,670,689,705,710,712,716,722,723,728,743,766,782,789,802,811,812],court:667,courtesi:[609,662,667],cout:[610,672,695,724,786,809],cov:[605,615,638,664,712],cover:[585,603,607,611,625,627,638,659,661,664,667,674,676,677,679,681,684,691,696,699,702,710,712,717,719,722,723,727,730,738,743,756,759,761,762,763,775,780,782,783,807,812],coverag:[604,605,615,638,645,658,667,675,690,699,710,751,759,778,784],cp0:683,cp1:683,cp2:683,cp3:683,cpi:716,cpp:[591,605,606,607,610,611,616,624,652,654,659,665,670,672,676,679,689,695,709,710,722,723,724,725,726,745,756,759,764,765,768,769,775,779,780,781,782,784,788,789,790,791,803,804,805,806,807,808,809,810],cppflag:744,cppguid:748,cppperformancebenchmark:747,cpprefer:743,cpptypenam:[768,769],cpsr:716,cptmp0:780,cptmp1:780,cpu0:813,cpu1:786,cpu2017:747,cpu2:786,cpu:[585,590,594,595,597,607,614,616,617,619,630,631,639,642,643,660,679,697,701,706,710,712,713,720,722,760,773,780,782,784,785,786,809],cpu_id:785,cpu_nam:631,cpu_powerpc:786,cpu_x86:786,cpu_x86_64:786,cpufreq:[595,697],cpufrequtil:697,cpuinfo:697,cpun:595,cpunam:[617,619,639],cpuset:595,cpusubtyp:637,cputyp:[597,637],cpux:595,cr0:710,cr7:710,craft:743,crafter:757,crash:[602,612,619,676,679,682,695,705,710,741,743,745,747,762,811],crawl:676,crawler:676,crazi:[659,811],crazier:806,crc32:756,crc:712,crd:747,creal:807,creat:[585,590,596,597,603,605,606,607,608,609,610,614,615,616,617,621,624,625,631,632,634,637,639,641,644,646,657,659,660,662,663,664,665,666,667,668,669,671,672,676,677,678,679,681,683,684,686,688,689,690,694,695,696,698,699,701,704,705,706,708,709,710,711,712,713,715,716,717,718,721,722,723,724,725,738,741,742,745,750,753,754,757,759,760,762,764,765,766,768,771,773,774,775,776,777,779,780,781,783,784,786,788,789,790,791,803,804,805,806,807,808,809,810,811,813],createalloca:[808,809,810],createasmstream:607,createbarejitdylib:[788,789,790,791],createbasicaliasanalysispass:810,createbasictyp:810,createbr:[806,807,808,809,810],createcal:[743,804,805,806,807,808,809,810],createcfgsimplificationpass:[789,790,791,805,806,807,808,810],createcgscctofunctionpassadaptor:725,createcompileunit:810,createcondbr:[806,807,808,809,810],createcontentblock:709,createdatalayout:[790,805,809],createdefaultmypass:782,createentryblockalloca:[808,809,810],createexpress:810,createfadd:[804,805,806,807,808,809,810],createfcmpon:[806,807,808,809,810],createfcmpult:[804,805,806,807,808,809,810],createfil:810,createfmul:[804,805,806,807,808,809,810],createfsub:[804,805,806,807,808,809,810],createfunct:810,createfunctiontolooppassadaptor:725,createfunctiontyp:810,creategep:727,creategvnpass:[789,790,791,805,806,807,808,810],createindirectstubsmanag:790,createinstructioncombiningpass:[789,790,791,805,806,807,808],createjit:715,createjitdylib:726,createlazycallthroughmanag:[790,791],createlinkgraph:709,createlinkgraph_:709,createload:[727,808,809,810],createlocalcompilecallbackmanag:790,createlocalindirectstubsmanagerbuild:790,createmiroperandcom:716,createmoduletocgsccpassadaptor:725,createmoduletofunctionpassadaptor:725,createmul:743,createmyregisteralloc:782,createparametervari:810,createphi:[806,807,808,809,810],createpromotememorytoregisterpass:[808,810],createreassociatepass:[789,790,791,805,806,807,808],createresourcetrack:[805,806,807,808],createret:[804,805,806,807,808,809,810],createsect:709,createstor:[808,809,810],createstringerror:743,createsubroutinetyp:810,createtargetasminfo:780,createtargetmachin:809,createthin:621,createuitofp:[804,805,806,807,808,809,810],createvirtualregist:607,createzerofillblock:709,creation:[590,605,621,663,666,676,702,706,710,743,745,780],creator:[605,782],creatur:807,credit:[667,760],creduc:[602,704],crhc:747,critedge1:724,criteria:[667,745,756],criterion:745,critic:[593,606,619,639,667,674,676,689,702,710,753,756,760,782,803],cross:[592,610,612,644,689,690,691,697,698,709,710,722,726,739,745,750,757,775,778,809],crosscompil:[607,701],crt:[605,698],crtp:781,crucial:[710,742,760],cruel:[717,761],cruft:745,crypto:712,cryptograph:[756,760],cs1:[593,710],cs2:[593,710],csail:760,cse:[594,670,685,711,728,805],csemirbuild:685,cset:595,csk_md5:710,csk_none:710,csk_sha1:710,csk_sha256:710,csr_64:716,cst:710,cst_code_integ:597,cst_code_wide_integ:597,cstdint:[805,806,807,808],cstdio:[803,804,805,806,807,808,809,810],cstdlib:[803,804,805,806,807,808,809],cstptr:710,csv:[631,774,783,784],cta:724,ctabl:768,ctag:658,ctaid:724,ctest:773,ctmark:773,ctor:[610,672,708,710,759],ctpop:780,ctrl:[804,809],ctx:[726,788,789],ctxt:[663,786],ctype:731,cube:588,cubemap:588,cubic:712,cucontext:724,cuctxcreat:724,cuctxdestroi:724,cuda:[585,590,607,616,661,668,724,778],cuda_success:724,cudamodul:724,cudevic:724,cudevicecomputecap:724,cudeviceget:724,cudevicegetcount:724,cudevicegetnam:724,cudeviceptr:724,cufunct:724,cuinit:724,culaunchkernel:724,culinkst:724,cull:[590,610],cultur:[608,719],cumbersom:[726,745],cumemalloc:724,cumemcpydtoh:724,cumemcpyhtod:724,cumemfre:724,cumod:590,cumodul:724,cumodulegetfunct:724,cumoduleloaddata:724,cumoduleloaddataex:724,cumoduleunload:724,cumul:[607,641,784],cuobjdump:660,curesult:724,curiou:722,curious:703,curli:[610,710,743,775],curloc:810,curop:780,currenc:[607,609,805],current:[175,270,435,548,585,588,589,590,593,594,596,597,599,602,603,604,605,606,607,608,610,611,612,614,616,617,619,621,625,627,639,641,648,649,652,659,660,663,664,665,666,667,668,669,670,671,672,673,674,676,677,678,679,682,683,684,685,688,689,691,693,694,695,696,698,701,702,707,708,709,710,712,714,715,716,722,723,724,725,727,731,735,740,741,742,743,744,747,749,752,753,754,755,756,758,759,760,762,764,765,768,770,771,775,776,777,778,780,782,783,784,786,788,789,790,802,803,804,805,806,807,808,809,810,811],current_corpus_dir:712,current_valu:663,curs:[612,616],curtok:[803,804,805,806,807,808,809,810],curvar:[808,809,810],custom:[602,605,607,610,611,612,669,670,672,674,679,680,688,689,709,710,712,716,725,741,764,768,770,771,775,783,785,788,791],customalloc:663,customari:[659,667,710],custombehaviour:639,customev:785,customeventunstructuredmemori:785,customfor:688,customfre:663,customif:688,customis:605,customiz:[685,705],cut:[598,659,780,788],cute:811,cutoff:645,cv_cpu_type_:731,cv_signature_xx:734,cvdebugrecord:736,cvdump:644,cve:[712,757],cvinfo:734,cvise:704,cvpack:738,cvt:724,cxa_demangl:759,cxx11:747,cxx14:610,cxx:[603,605,610,616,679,694,701,726,753],cxx_fast_tl:710,cxx_fast_tlscc:[597,710],cxx_flag:697,cxx_statu:610,cxxcompilelay:726,cxxcompilinglay:726,cxxfilt:615,cxxflag:[624,788,789,790,791,803,804,805,806,807,808,809,810],cxxlayer:726,cxxmap:[615,645],cycl:[631,639,662,666,669,678,710,713,714,759,784],cycle_frequ:785,cyclic:[585,679,710,714],cyclonemodel:780,cygwin:[607,679,681],cypress:590,cywritev3:780,d02:777,d0o:664,d10:780,d11:780,d12:[777,780],d13:780,d14:780,d15:[710,780],d16:[0,3,4,9,98,112,114,288,306,308,449,470,562,576,578],d16_x:587,d16_xy:587,d16_xyz:587,d16_xyzw:587,d31:710,d43184:698,d57504:749,d57896:748,d58291:723,d7k:664,d88124:698,d89518:698,d90021:698,d_ctor_bas:611,d_libcpp_abi_force_itanium:698,d_no_crt_stdio_inlin:698,dag:[591,658,710,711,738,743,745,764,768,769,771,775,780],dagarg:770,dagarglist:770,dagcombin:670,dagiselemitt:769,dagrecti:769,dagtodag:670,dagtodagisel:670,dai:[605,609,667,678,679,702,723,743,757,766],damag:759,dan:786,danc:682,danger:[611,710,743,755,760,776],dangl:[610,743,770],daniel:[692,759],danila:748,darktabl:747,darwin9:611,darwin:[603,605,607,615,621,640,650,673,710,713,726,762,810],dash:[659,675,786],dasinconsist:748,data16bitsdirect:780,data1:770,data32bitsdirect:780,data64bitsdirect:780,data:[36,37,38,66,68,78,84,93,96,97,98,99,100,101,104,105,106,107,110,111,112,113,114,115,117,157,159,171,175,186,187,188,189,190,191,194,195,196,199,200,201,202,203,204,205,254,256,266,270,279,280,281,282,283,286,287,288,289,290,291,294,295,296,297,298,299,300,301,304,305,306,307,308,309,310,354,355,386,387,388,418,420,435,444,447,448,449,450,451,452,455,456,457,467,468,469,470,471,472,499,500,501,530,532,542,548,557,560,561,562,563,564,565,568,569,570,571,574,575,576,577,578,579,580,587,588,589,592,593,594,595,596,601,603,605,606,609,610,615,616,625,639,640,641,642,643,644,646,648,649,650,653,654,658,659,663,669,670,671,673,674,675,676,677,683,688,689,699,709,711,712,713,714,715,716,725,726,727,730,731,735,736,738,739,745,746,747,750,751,754,756,757,759,762,764,768,770,771,773,775,780,782,784,788,804,805,808,809,810],data_alignment_factor:[585,590],data_ti:769,databas:[710,739,751,786,811],datacrc:731,dataflow:[605,710,759,808],datalayout:[597,676,710,724,726,727,777,780,788,789,790,791],datalayoutpass:810,dataloc:710,dataset:[747,773],datastructur:[743,745],datatyp:[607,659,743,770,802],date:[593,601,602,604,621,641,653,679,681,701,702,721,723,725,731,738,742,748,752,753,757,761,766,788,789,790,806],daunt:605,david:[748,750],dbaremetal_armv6m_sysroot:700,dbaremetal_armv7em_sysroot:700,dbaremetal_armv7m_sysroot:700,dbg:[601,705,709,710,716,743,780,810],dbg_instr_ref:716,dbg_valu:[590,705,710,716,759],dbg_value_list:759,dbgentityhistorycalcul:759,dbgopt:759,dbgstreamarrai:731,dbi:[729,734,739],dbi_head:731,dbistreamhead:731,dbistreamvers:731,dbl:773,dblty:810,dbootstrap_cmake_build_typ:668,dbootstrap_cmake_c_flag:592,dbootstrap_cmake_cxx_flag:592,dbscan:631,dbuild_shared_lib:679,dbuilder:810,dbx:759,dce:[612,659,663,670,710,749],dced:743,dclang_bootstrap_passthrough:592,dclang_bootstrap_target:668,dclang_enable_bootstrap:[592,668],dclang_enable_proto_fuzz:675,dclang_enable_static_analyz:679,dclang_tablegen:[699,701],dcmake_ar:700,dcmake_asm_compiler_target:700,dcmake_asm_flag:700,dcmake_build_typ:[662,668,679,697,699,723,775,784],dcmake_c_compil:[699,700,773],dcmake_c_compiler_external_toolchain:700,dcmake_c_compiler_target:700,dcmake_c_flag:[697,698,699,700],dcmake_c_flags_releas:784,dcmake_crosscompil:701,dcmake_cxx_compil:699,dcmake_cxx_flag:[699,701,784],dcmake_cxx_link_flag:679,dcmake_exe_linker_flag:700,dcmake_install_prefix:[605,679,697,698,701],dcmake_nm:700,dcmake_osx_architectur:679,dcmake_ranlib:700,dcmake_sysroot:700,dcmake_toolchain_fil:679,dcmake_try_compile_target_typ:700,dcommit:745,dcompiler_rt_baremetal_build:700,dcompiler_rt_build_builtin:700,dcompiler_rt_build_libfuzz:[700,712],dcompiler_rt_build_memprof:700,dcompiler_rt_build_profil:700,dcompiler_rt_build_sanit:700,dcompiler_rt_build_xrai:700,dcompiler_rt_default_target_onli:700,dcompiler_rt_emul:700,dcompiler_rt_include_test:[700,712],dcompiler_rt_os_dir:700,dcompiler_rt_test_compil:700,dcompiler_rt_test_compiler_cflag:700,ddg:[666,751],ddgbuilder:666,deactiv:766,dead:[593,607,659,672,674,679,686,705,709,710,711,713,716,724,728,743,759,764],deadli:712,deadlin:609,deadtypeelim:743,deal:[594,596,597,605,608,610,660,663,667,669,673,683,688,689,703,725,730,738,741,743,745,750,756,759,760,768,781,782,783,808],dealloc:[676,695,709,710,743,756],dealloc_type_mismatch:756,death:[675,712],deb:701,debat:685,debian8:668,debian:[679,700,701,740,761],debug:[591,601,602,603,605,607,610,612,616,617,624,628,631,632,640,641,642,644,649,653,654,657,658,659,662,664,667,669,671,676,679,681,685,699,702,704,709,711,712,713,720,722,723,725,726,729,730,733,734,736,738,739,740,744,748,754,762,764,771,773,778,780,782,802,808,809,811,812,813],debug_:590,debug_addr:585,debug_arang:[585,590],debug_count:743,debug_fram:[585,642],debug_info:[630,759],debug_inlin:614,debug_level:659,debug_lin:585,debug_line_str:585,debug_loclist:585,debug_metadata_vers:810,debug_mod:590,debug_nam:[585,759],debug_pubnam:[614,729,759],debug_pubtyp:[614,759],debug_rnglist:585,debug_s_crossscopeexport:644,debug_s_crossscopeimport:644,debug_s_inlineelin:644,debug_s_lin:644,debug_str:[585,611,631,759],debug_symbol:740,debug_with_typ:743,debugcount:743,debugflag:[659,743],debugg:[607,610,644,658,665,669,685,695,715,723,726,736,739,748,766,775,778,782,806,810,812],debuginfo:[679,705,710,736,745,775,810],debuginfoforprofil:710,debuginfometadata:759,debuglev:659,debuglevel:659,debuglink:641,debugloc:[607,716,754,810],debugstreamarrai:731,debugtrap:590,dec:[650,743],decent:[670,697,702,743,782,810],decid:[585,590,596,603,605,659,660,667,669,670,688,693,704,710,729,731,742,743,757,759,760,764,779,782,786,803,807,808],decim:[589,590,611,620,640,645,650,652,659,671,710,786],decimal_numb:590,decimalinteg:770,deciph:590,decis:[596,601,603,607,608,609,610,667,669,674,684,688,693,710,712,723,726,743,750,755,760,764,779,787,789,805],decl:[658,703,771,804],decl_in_loop:710,declar:[585,589,590,593,597,607,611,658,659,660,663,670,672,698,703,708,710,716,722,724,725,743,762,764,765,768,769,770,771,777,780,781,782,802,803,804,805,806,807,808,809,810],declaring_vari:748,declcontext:703,declet:710,decltyp:[710,790],decod:[597,631,639,648,649,710,759,768,773],decodermethod:770,decodernamespac:770,decompos:[590,667,670,688,709,710,738,764],decomposit:631,decompress:641,decor:[709,759],decreas:[588,603,605,619,699,702,710,730,756],decrement:[590,669,743],dedic:[589,607,667,693,695,702,714,745,780],deduc:[590,674,710,714,728,743,768,776],dedupl:[710,731],deee:639,deeer:639,deem:[667,670,702,704,710,714],deep:[610,666,703,709,741,747],deeper:[606,644,675,717,761,773,788],deepli:664,def:[585,591,607,631,658,663,666,670,683,686,705,707,711,714,716,721,725,750,759,768,769,771,780,781,782,802,803,804,805,806,807,808,809,810],default_branch_weight:599,default_float_round:590,defaultdest:710,defaultopt:659,defeat:760,defect:609,defend:760,defens:667,defer:[585,609,715,722,726,748,788,789,790,805],deferrederr:743,defi:710,defici:676,defin:[20,66,67,123,157,158,212,254,255,349,370,418,419,482,530,531,585,588,589,590,592,593,594,596,597,598,599,602,605,606,607,611,612,616,621,623,639,640,641,645,658,659,660,662,663,664,666,667,668,669,670,671,672,675,676,677,679,683,684,685,686,687,688,695,700,701,703,704,705,707,708,709,712,713,714,716,717,721,722,723,724,726,728,730,731,734,736,739,740,741,742,743,744,746,748,749,750,751,756,759,761,762,763,764,765,768,769,771,773,774,775,776,777,781,782,783,784,785,786,788,789,790,791,802,803,804,805,806,809,810,811,812,813],define_dwarf:590,defined_symbol:709,definingaccess:721,definit:[589,590,594,596,597,599,606,607,611,614,628,631,640,644,658,659,662,663,666,670,676,683,684,686,687,688,695,698,704,707,709,710,713,716,721,722,726,741,743,756,759,764,768,771,777,780,781,782,788,789,790,791,802,803,804,805,806,807,808,809,810,811],defm:[769,771,780],defreg:631,defunct:726,defvar:769,degen:710,degener:[741,808],degrad:[667,676,766],degrandmaison:719,degre:[594,667,668,674,696,760,779],deiniti:726,delai:[609,702,722,743,748,755,756,757,759,766,770],deleg:[594,639,669,710,746],delet:[585,593,601,605,607,611,621,632,633,672,679,694,699,709,710,714,716,721,722,724,725,726,745,756,759,782,788,790,804,805,806,807,808,810],delete_all_rpath:633,delete_rpath:633,delete_size_mismatch:756,deleteaninstruct:743,deliber:[721,762],delim:770,delimit:[606,669,679,710,718,770],delin:616,delta:[602,639,785],deltalinestart:664,delv:[667,805],demand:[590,597,607,679,726,745,788,808],demangl:[603,615,620,625,640,642,648,649,654,712,759],demo:[659,702,804],demonstr:[639,664,667,679,703,710,735,743,759,780,804,805],denisov:748,denisovcamelback:748,denorm:[660,710,786],denot:[585,597,607,611,663,676,679,684,710,735,786],dens:[590,597,710,743],densemap:610,densemapinfo:743,denser:807,denseset:726,densiti:[610,807],deopt:[710,764],deopt_arg:710,deoptim:764,depart:[621,719,747],departur:745,depend:[19,27,82,84,86,96,97,98,99,112,113,114,129,175,186,187,188,202,220,270,279,280,286,287,288,289,291,294,295,296,306,307,308,315,320,325,337,351,369,378,433,435,447,448,449,450,469,470,490,546,548,560,561,562,563,576,577,578,585,586,588,589,590,591,592,594,595,597,599,600,605,607,608,610,612,616,617,621,623,624,631,633,639,644,645,649,657,658,659,660,663,667,668,669,672,674,675,676,677,679,683,684,687,688,695,696,698,700,701,706,707,709,712,715,721,722,724,725,726,727,728,730,735,742,743,744,745,746,751,753,754,756,759,762,764,765,766,770,773,774,775,776,779,780,782,785,788,803,804,808],dependencegraphbuild:666,depict:[639,666],deplib:[597,671],deploi:[668,698,742,757,760],deploy:760,deprec:[585,590,597,605,611,616,642,667,697,716,725,726,727,730,743,752,759,770,775,780,783],depth:[78,171,266,542,590,602,607,611,630,644,659,679,694,710,714,722,741,745,747,763,768,771,789,806],deref_bytes_nod:710,derefer:[585,590,606,677,710,723,743,759,788],dereferenc:[585,597,673,674,695,741,743],dereferenceable_byt:710,dereferenceable_or_nul:597,deriv:[590,607,610,667,676,703,707,710,711,715,741,742,759,768,769,770,771,776,782,789,802],derived_ptr:710,derivedtyp:[670,743,804,805,806,807,808,809],desc:[590,659,780,782],descend:[616,669,682,710],descent:[803,807,812],describ:[0,1,2,3,4,5,6,7,8,9,20,27,52,53,120,123,129,143,209,212,213,220,238,239,314,370,371,378,401,402,479,482,483,490,514,515,584,585,586,587,588,589,590,594,596,597,598,599,603,606,607,610,611,615,616,617,627,630,639,641,644,659,660,662,663,664,666,667,669,670,671,673,675,676,677,678,679,682,683,688,689,690,691,694,698,702,703,704,705,707,709,710,711,713,715,716,720,721,722,724,726,728,729,730,731,734,735,736,738,739,741,742,743,744,745,748,750,751,752,754,755,757,759,760,762,764,768,769,770,771,776,777,778,780,781,782,783,785,786,787,788,790,791,803,804,805,806,807,809,810,812],descript:[1,5,6,7,8,17,20,27,29,30,31,78,120,121,123,129,131,171,209,210,212,213,220,222,223,266,314,341,370,371,378,380,381,479,480,482,483,490,492,493,494,542,584,586,587,588,589,590,593,594,597,603,605,606,609,610,615,659,664,668,669,676,679,684,688,691,695,696,703,704,710,711,722,723,724,729,741,742,743,751,756,757,759,762,768,769,770,771,773,778,780,783,785,809,810,814],descriptor:[585,597,639,709,710,726,731,759,762,780,810],deseri:768,deserv:[674,710],design:[593,594,603,609,610,611,612,616,639,659,662,663,664,667,669,672,674,676,678,696,706,709,710,711,712,716,719,723,729,733,735,741,744,745,746,748,751,754,757,759,760,762,764,765,770,771,774,777,778,780,781,782,786,808,811,812],desir:[585,590,594,601,605,610,611,614,616,630,671,674,676,707,710,712,715,727,728,741,743,746,748,754,755,759,760,762,764,768,769,770,775,780,782,811],desktop:[681,740,775],despair:723,despit:[594,607,659,699,710,745,748,759,760,764,771,782,785],dest1:710,dest2:710,dest:[710,809],dest_bas:764,dest_offset:764,destabil:667,destarglist:710,destin:[18,122,211,336,368,481,586,587,588,589,590,599,607,621,669,671,674,684,705,710,760,764,775,780,808,809,810],destreg:607,destroi:[607,669,672,693,708,710,716,726,743,780,786],destruct:[663,708,743,759,760,762,811],destructor:[610,660,663,669,672,710,743,748,759,782],detail:[27,129,220,378,490,585,587,588,590,593,594,598,600,601,603,605,606,607,608,609,611,616,617,630,631,639,644,645,648,658,659,660,663,667,668,669,672,675,676,677,678,679,681,684,695,696,699,703,704,709,710,711,712,713,719,720,722,723,724,725,726,728,731,735,738,741,742,743,744,745,747,748,751,755,764,765,766,769,771,773,775,776,780,781,782,783,784,786,787,789,790,791,804,807,808,811,812],detect:[590,605,611,616,625,626,628,631,639,645,701,710,712,714,720,722,723,726,742,743,750,751,756,759,760,763,775,779,783,785,805],detect_leak:712,detecthost:788,detector:[710,720,722],determin:[585,590,593,594,597,602,605,607,616,617,620,621,627,639,641,657,663,664,667,669,686,687,688,691,702,703,704,707,709,710,712,715,722,724,725,726,727,728,729,731,733,738,741,743,744,750,755,757,759,760,762,763,764,765,768,769,770,774,775,776,777,780,782,785,786,803,804,806,807],determinist:[592,593,601,603,607,621,641,653,710,712,743,768],deterministicfiniteautomaton:748,detriment:[610,748],dev:[594,609,617,619,662,667,670,674,676,678,695,701,719,723,727,742,743,745,748,750,757,759,764,766,782,784,809,811],devbuffera:724,devbufferb:724,devbufferc:724,devcount:724,devel:753,develop:[585,590,597,600,603,606,607,608,609,610,641,653,659,661,662,668,669,674,676,677,681,682,688,696,697,700,702,704,710,713,719,720,723,726,740,744,746,747,748,749,750,752,754,755,757,759,760,764,765,766,768,770,771,774,783,784,787,789,790,791,808,813],deviat:[607,610,671],devic:[585,588,590,595,622,629,632,636,657,697,724,726],device_enqueue_symbol:590,device_fn:660,device_onli:660,devirtu:[710,777],devis:744,devmajor:724,devmeet:690,devminor:724,devoid:607,dexonsmith:759,dfa:[607,658,748,768],dfapacket:607,dform_1:607,dfpreg:780,dfpregsclass:780,dfpregsregclass:780,dfpregsregisterclass:780,dgpu:590,dgwp_asan_default_opt:695,di_index:710,dia:[605,644,654,681],diag:658,diagnos:[611,628,710,779,782],diagnost:[610,611,615,616,617,628,640,658,704,710,712,720,723,743,768,771],diagnosticinfooptim:754,diagnosticsemakind:723,diagram:[666,689,735,779],dialect:[658,710,718,768],diamond:705,diana:[692,748],dibasictyp:[705,759],dibuild:[759,810],dice:743,dicompileunit:[705,759,810],dict:712,dictat:[585,610,743,758],dictionari:[616,710,768],dictionary_fil:712,did:[585,590,593,610,663,667,669,675,703,710,714,722,743,745,754,782,789,805,807,808,811],didn:[607,660,665,674,675,710,722,723,743,759,760,781,782,804,808],die:[585,630,759,810],die_offset_bas:759,diego:602,diegotf:602,dies:790,diexpress:[590,705,716,759],dif:741,diff:[609,610,615,630,662,667,668,679,723,741,742,743,745,773,775],differ:[14,79,172,267,288,306,308,333,350,430,543,585,586,587,588,589,590,591,592,596,597,598,599,601,602,605,606,607,608,609,610,611,612,616,617,619,620,623,625,628,630,631,638,639,641,642,644,650,651,653,654,659,661,663,664,666,667,668,669,670,671,672,674,676,679,681,682,683,684,688,689,693,699,702,703,706,707,709,710,712,713,714,715,716,719,722,723,724,725,726,727,728,729,731,738,741,742,743,745,746,748,751,753,754,755,756,757,759,760,762,764,765,766,768,770,771,773,775,776,779,780,781,782,783,784,785,786,787,789,790,804,805,806,808,809,810,811],differenti:[609,671,710,723,742,746],difficult:[585,593,594,610,659,667,669,688,689,700,710,713,717,726,728,743,745,748,759,761,769,810],difficulti:[747,759],difil:[705,759,810],diflagartifici:710,diflagfwddecl:710,diflagprototyp:[710,759],diflagpubl:759,diflagvector:710,difwddecl:710,digit:[590,611,640,659,710,720,769],diglobalvari:759,digress:807,dijkstra:747,dil:[671,771],dilexicalblock:759,diloc:[611,705,716,759,810],dilocalvari:[705,716,759,810],dim:[0,84,710,757],dimens:[66,157,175,254,270,418,435,530,548,588,590,710],dimension:[84,175,270,435,548,588,677,724,807],diminish:759,dimitri:757,dindex:780,dinkumwar:743,dinod:[710,759,810],dinstall_gwp_asan_stub:695,diploma:750,dir1:[590,712],dir2:[590,712],dir3:590,dir4:590,dir:[592,605,616,625,635,675,679,698,700,701,710,712,744,753,759,773],dir_to_mount:595,direct:[585,604,605,606,608,610,612,616,631,639,649,658,659,663,666,667,669,674,675,685,689,700,705,709,711,722,723,725,738,743,745,753,759,760,764,765,769,771,774,776,780,782,786,808],directedgraph:666,directli:[107,196,301,457,571,585,590,594,597,605,607,609,610,615,616,623,624,631,639,654,659,663,667,669,671,674,675,676,677,679,689,691,693,694,696,698,708,709,710,716,721,724,726,728,729,731,733,741,742,743,744,745,750,754,755,759,762,764,765,766,768,769,770,771,773,780,782,783,784,785,788,789,790,791,804,805,806,807,808],directori:[590,592,604,605,606,607,610,614,616,621,624,625,635,639,642,649,654,658,660,664,667,674,678,681,682,694,696,699,700,701,702,705,710,712,716,723,731,736,739,743,744,745,748,752,753,754,759,765,766,771,772,773,774,775,780,781,782,783,810],dirnam:635,dirti:[590,731],dis:[615,622,623,642,679,775,808],disabl:[213,371,483,588,589,590,593,595,596,601,605,610,611,612,616,617,619,625,631,639,640,641,642,653,659,663,664,676,679,681,689,694,701,702,704,705,712,721,723,728,740,743,753,756,764,766,773,775,776,779,781,783,806,810],disable_assert:740,disable_nonforc:776,disable_sanitizer_instrument:[597,710],disableencod:[770,771],disablelazycompil:743,disadvantag:[659,706,713,714,743],disagr:[608,609],disagre:[608,609,667,710,757,759],disallow:[659,677,710,762,764],disambigu:[593,674,683,714,721],disappear:[672,679,697,759],disassembl:[604,615,642,643,646,658,679,681,710,712,743,751],disassembleremitt:768,discard:[585,589,602,607,611,641,653,659,663,671,679,684,688,710,743,745,750,764,791],discharg:674,disclaim:[606,667],disclos:[755,757],disclosur:[667,755],discontigu:735,discop:810,discord:[667,678],discount:725,discourag:[610,659,667,710,743,755,765],discours:[667,678],discov:[585,607,616,631,663,676,702,712,736,759,760,765,766,806,808],discover:668,discret:757,discrimin:[607,610,654,703,710,768,785],discriminatori:608,discuss:[585,606,608,609,610,611,639,659,662,663,664,667,675,678,679,680,681,685,688,698,703,705,710,719,723,726,728,731,733,735,738,742,743,745,749,755,759,760,762,764,780,782,783,789,790,803,812],disjoint:[585,590,593,666,684,710,714,721,779],disk:[585,590,610,616,631,665,679,681,709,710,712,713,731,735,736,738,745,759,765,783,788],dislik:667,disp32:607,dispatch1:710,dispatch2:[669,710],dispatch:[669,710,726,743,760,762,803],dispatchwidth:639,dispel:677,dispflagdefinit:759,dispflagdelet:759,dispflagelement:759,dispflagpur:759,dispflagrecurs:759,displac:[585,607,710,770],displai:[606,607,608,610,614,616,621,623,625,626,630,631,633,635,637,639,640,641,642,644,645,648,649,650,652,653,659,664,667,696,703,718,741,742,754,759,768,774,780,802],disposit:667,disproportion:759,disregard:757,disrepair:766,disrupt:[717,760],dissassembl:603,dissemin:757,distanc:[642,710,743,756,786,791],distil:775,distinct:[585,590,593,597,607,611,627,630,666,667,669,677,683,689,695,705,709,710,727,741,743,750,756,759,770,775,776,780,804],distinctli:683,distinguish:[585,596,597,601,663,664,667,683,691,710,714,727,743,748,764,765,811],distract:667,distribut:[592,598,606,610,612,616,639,645,660,662,665,667,668,672,678,679,680,681,700,701,724,745,754,757,760,771,773,782,783],distributionexampl:603,distributor:757,distringtyp:759,distro:[699,701],disturb:766,disubprogram:[705,759,810],disubroutinetyp:[705,759,810],dityp:810,div:[588,607,611,684,710],divari:710,dive:[703,802,812],diverg:[585,590,677,710,728,807],divers:[699,766],divid:[585,590,598,607,611,616,623,639,645,660,663,670,710,739,741,765,766,769,773,779],dividend:[607,660,710],divis:[589,590,607,684,710,807],divisionbyzero:710,divisor:[660,710],django:[608,755],dlc:[0,590],dlclose:[712,726],dlibcxx_cxx_abi:698,dlibcxx_cxx_abi_include_path:698,dlibcxx_cxx_abi_library_path:698,dlibcxx_enable_shar:698,dlibcxx_enable_stat:698,dlibcxx_enable_static_abi_librari:698,dlibcxx_has_win32_thread_api:698,dlibcxx_install_head:698,dlibcxx_no_vcruntim:698,dlibcxx_use_compiler_rt:698,dlibcxxabi_enable_shar:698,dlibcxxabi_enable_stat:698,dlibcxxabi_libcxx_includ:698,dlibunwind_enable_shar:698,dlibunwind_enable_stat:698,dlibunwind_use_compiler_rt:698,dll:[597,669,681,698,726,731,775],dllc:[705,775],dllexport:[597,698,710,805,806,807,808,809,810],dllimport:[597,698,710],dllstorageclass:[597,710],dllvm_binutils_incdir:694,dllvm_build_instru:699,dllvm_build_runtim:[675,679,699],dllvm_build_stat:595,dllvm_config_path:700,dllvm_default_target_tripl:701,dllvm_dir:605,dllvm_enable_assert:[662,679,697,775],dllvm_enable_backtrac:679,dllvm_enable_doxygen:605,dllvm_enable_doxygen_qt_help:605,dllvm_enable_new_pass_manag:725,dllvm_enable_p:701,dllvm_enable_project:[605,679,681,723,745],dllvm_enable_sphinx:[679,761],dllvm_experimental_targets_to_build:780,dllvm_external_bar_source_dir:605,dllvm_external_foo_source_dir:605,dllvm_external_project:605,dllvm_include_exampl:679,dllvm_include_test:679,dllvm_integrated_crt_alloc:605,dllvm_libdir_suffix:605,dllvm_local_rpath:679,dllvm_optimized_tablegen:679,dllvm_parallel_link_job:[605,679],dllvm_path:698,dllvm_profdata_fil:699,dllvm_tablegen:[699,701],dllvm_target_arch:701,dllvm_targets_to_build:[605,668,679,681,697,701],dllvm_use_crt_releas:605,dllvm_use_link:[605,679],dllvm_use_sanit:675,dllvm_use_sanitize_coverag:675,dllvm_use_split_dwarf:679,dloc:611,dlopen:726,dlsym:805,dmacro1:770,dmacro3:770,dmask:[0,2,3,4,9,96,97,98,99,113,114,186,187,188,202,286,287,288,289,307,308,447,448,449,450,469,470,560,561,562,563,577,578],dmb:594,dmlc:747,dmpqrstx:621,dname:659,do_on:663,do_safepoint:764,do_someth:714,do_something_with_t:673,do_two:663,doc:[591,605,610,658,664,670,678,679,682,699,700,701,702,710,712,717,723,745,748,749,752,761,773,786,788,809],dockerfil:[681,778],doclist:786,document:[0,1,2,3,4,5,6,7,8,9,585,587,588,589,593,594,596,597,598,600,602,603,605,606,607,608,615,631,642,658,659,660,662,664,666,667,668,669,670,671,675,676,677,678,679,681,682,683,684,685,693,696,697,698,699,700,701,703,704,705,707,709,710,711,712,713,715,716,720,721,723,724,725,726,729,730,733,735,738,740,741,743,745,747,748,751,752,753,755,757,759,762,764,765,766,767,768,769,770,772,773,775,777,778,780,781,782,783,784,785,804,805,810],documentlist:786,documentlisttrait:786,dodg:611,doe:[288,306,308,585,587,588,589,590,592,593,594,595,597,600,601,603,604,605,606,607,609,610,611,616,621,622,625,628,631,639,640,641,642,649,653,654,659,660,663,665,667,668,669,670,671,673,674,676,679,681,682,684,685,688,693,694,695,698,699,700,703,704,705,706,708,709,710,713,714,715,718,720,721,722,724,725,726,727,730,731,736,738,741,742,743,744,745,746,747,754,755,756,757,758,759,760,762,764,765,769,770,773,775,776,779,780,781,783,784,786,788,802,804,805,806,807,808,809,810,811,812],doesn:[590,593,594,596,599,600,601,602,606,607,608,610,611,616,621,639,659,660,662,664,667,668,669,672,674,676,677,679,682,684,687,688,691,694,698,703,704,710,714,716,721,723,727,733,743,745,748,757,759,760,762,764,765,766,775,782,784,786,802,803,804,805,806,807,808,809,810,811,812],dofin:780,dog:[659,748],doing:[592,593,600,605,609,610,611,619,631,636,657,659,662,667,670,673,676,679,686,699,700,703,705,710,713,714,721,743,759,760,764,765,770,776,782,783,784,789,790,802,804,807,810,811],doiniti:[712,780,789,790,791,805,806,807,808,810],dollar:[697,770],domain:[590,607,676,683,710,760,770,771,780,811],domin:[632,660,676,710,714,721,725,743,773,781,782,808],dominatoranalysi:725,dominatorset:782,dominatortre:[725,748,782],dominatortreebas:743,domtreeupdat:725,don:[591,593,600,602,605,606,607,608,611,614,616,619,639,640,644,654,659,660,662,664,667,668,669,674,676,679,682,686,688,697,698,703,704,709,710,712,716,717,721,722,723,724,725,727,729,740,741,742,743,745,753,755,757,759,761,763,764,766,774,775,779,781,782,785,788,802,803,804,805,806,807,808,809,810,811],donald:770,done:[0,2,3,4,585,590,594,595,601,602,603,605,607,609,610,631,657,659,662,667,669,670,676,679,684,686,695,697,698,699,702,703,707,709,710,712,713,714,715,721,722,723,725,726,727,741,742,743,745,753,756,757,758,759,760,764,765,768,770,773,774,775,777,781,782,783,785,786,790,791,803,804,805,806,807,808,809,810],dontcal:710,dooneiter:601,door:703,doorbel:590,dootherth:610,dopartialredundancyelimin:659,dorit:[750,779],dosometh:[610,760],dosomethinginterestingwithmyapi:712,dot:[639,702,710,742,743,759,783,784],doubl:[589,590,597,599,607,611,659,660,661,681,683,703,710,716,722,741,743,756,768,780,786,802,803,804,805,806,807,808,809,810,811],doublescalarti:710,doubletyp:743,doubleword:710,doubli:[606,743,758],doubt:[703,717,723,761,766],dovetail:808,down:[594,596,599,601,607,608,612,617,619,659,664,667,669,677,679,689,691,703,704,710,712,721,722,723,725,728,740,741,742,743,745,750,759,771,774,780,782,783,787,788,790,803,806,807,809,810,811,812],downcast:703,downgrad:766,download:[605,667,668,679,681,694,696,700,701,702,710,712,723,743,752,753,775,780,787],download_link:747,download_prerequisit:679,downsid:[667,712,743],downstream:[598,609,628,667,745,766],downward:710,dox:608,doxgyen:605,doxygen:[605,675,679,681,703,743,751,769],dozen:[674,697,743],dpp16:588,dpp16_ctrl:[0,1],dpp32_ctrl:9,dpp64_ctrl:9,dpp8_sel:[0,1],dpp:587,dpp_ctrl:[3,4,7,8],dpp_op_sel:0,draft:[594,602,608,684,710,723,742,755,759],drag:742,dragonegg:[607,667],dram:590,dramat:[610,617,659,679,710,743,760,805],drastic:765,draw:[590,638],drawback:[606,659,667,743,791],drawindex:590,drectv:649,drive:[603,605,681,697,746,750,757,764],driven:[607,611,659,663,741,775,780,805,808,811],driver:[590,625,660,661,694,698,700,701,712,713,724,728,744,775,802,805,806,807,808,809,810],drop:[610,615,633,635,641,645,650,652,653,667,709,710,716,742,743,759,775,776],dropdown:742,droploc:705,drown:774,dry:[699,748],ds_add_f32:[0,3,4,9],ds_add_f64:9,ds_add_rtn_f32:[0,3,4,9],ds_add_rtn_f64:9,ds_add_rtn_u32:[0,2,3,4,9],ds_add_rtn_u64:[0,2,3,4,9],ds_add_src2_f32:[0,3,4],ds_add_src2_u32:[0,2,3,4],ds_add_src2_u64:[0,2,3,4],ds_add_u32:[0,2,3,4,9,590],ds_add_u64:[0,2,3,4,9],ds_and_b32:[0,2,3,4,9],ds_and_b64:[0,2,3,4,9],ds_and_rtn_b32:[0,2,3,4,9],ds_and_rtn_b64:[0,2,3,4,9],ds_and_src2_b32:[0,2,3,4],ds_and_src2_b64:[0,2,3,4],ds_append:[0,2,3,4,9],ds_atom:590,ds_bpermute_b32:[0,3,4,9],ds_cmpst_b32:[0,2,3,4,9],ds_cmpst_b64:[0,2,3,4,9],ds_cmpst_f32:[0,2,3,4,9,590],ds_cmpst_f64:[0,2,3,4,9],ds_cmpst_rtn_b32:[0,2,3,4,9],ds_cmpst_rtn_b64:[0,2,3,4,9],ds_cmpst_rtn_f32:[0,2,3,4,9],ds_cmpst_rtn_f64:[0,2,3,4,9],ds_condxchg32_rtn_b64:[0,2,3,4,9],ds_consum:[0,2,3,4,9],ds_dec_rtn_u32:[0,2,3,4,9],ds_dec_rtn_u64:[0,2,3,4,9],ds_dec_src2_u32:[0,2,3,4],ds_dec_src2_u64:[0,2,3,4],ds_dec_u32:[0,2,3,4,9],ds_dec_u64:[0,2,3,4,9],ds_gws_barrier:[0,2,3,4,9],ds_gws_init:[0,2,3,4,9],ds_gws_sema_br:[0,2,3,4,9],ds_gws_sema_p:[0,2,3,4,9],ds_gws_sema_release_al:[0,2,3,4,9],ds_gws_sema_v:[0,2,3,4,9],ds_inc_rtn_u32:[0,2,3,4,9],ds_inc_rtn_u64:[0,2,3,4,9],ds_inc_src2_u32:[0,2,3,4],ds_inc_src2_u64:[0,2,3,4],ds_inc_u32:[0,2,3,4,9],ds_inc_u64:[0,2,3,4,9],ds_load:590,ds_max_f32:[0,2,3,4,9],ds_max_f64:[0,2,3,4,9],ds_max_i32:[0,2,3,4,9],ds_max_i64:[0,2,3,4,9],ds_max_rtn_f32:[0,2,3,4,9],ds_max_rtn_f64:[0,2,3,4,9],ds_max_rtn_i32:[0,2,3,4,9],ds_max_rtn_i64:[0,2,3,4,9],ds_max_rtn_u32:[0,2,3,4,9],ds_max_rtn_u64:[0,2,3,4,9],ds_max_src2_f32:[0,2,3,4],ds_max_src2_f64:[0,2,3,4],ds_max_src2_i32:[0,2,3,4],ds_max_src2_i64:[0,2,3,4],ds_max_src2_u32:[0,2,3,4],ds_max_src2_u64:[0,2,3,4],ds_max_u32:[0,2,3,4,9],ds_max_u64:[0,2,3,4,9],ds_min_f32:[0,2,3,4,9],ds_min_f64:[0,2,3,4,9],ds_min_i32:[0,2,3,4,9],ds_min_i64:[0,2,3,4,9],ds_min_rtn_f32:[0,2,3,4,9],ds_min_rtn_f64:[0,2,3,4,9,590],ds_min_rtn_i32:[0,2,3,4,9],ds_min_rtn_i64:[0,2,3,4,9],ds_min_rtn_u32:[0,2,3,4,9],ds_min_rtn_u64:[0,2,3,4,9],ds_min_src2_f32:[0,2,3,4],ds_min_src2_f64:[0,2,3,4],ds_min_src2_i32:[0,2,3,4],ds_min_src2_i64:[0,2,3,4],ds_min_src2_u32:[0,2,3,4],ds_min_src2_u64:[0,2,3,4],ds_min_u32:[0,2,3,4,9],ds_min_u64:[0,2,3,4,9],ds_mskor_b32:[0,2,3,4,9],ds_mskor_b64:[0,2,3,4,9],ds_mskor_rtn_b32:[0,2,3,4,9],ds_mskor_rtn_b64:[0,2,3,4,9],ds_nop:[0,2,3,4,9],ds_or_b32:[0,2,3,4,9],ds_or_b64:[0,2,3,4,9],ds_or_rtn_b32:[0,2,3,4,9],ds_or_rtn_b64:[0,2,3,4,9],ds_or_src2_b32:[0,2,3,4],ds_or_src2_b64:[0,2,3,4],ds_ordered_count:[0,2,3,4],ds_permute_b32:[0,3,4,9],ds_read2_b32:[0,2,3,4,9],ds_read2_b64:[0,2,3,4,9],ds_read2st64_b32:[0,2,3,4,9],ds_read2st64_b64:[0,2,3,4,9],ds_read_addtid_b32:[0,4,9],ds_read_b128:[0,2,3,4,9],ds_read_b32:[0,2,3,4,9],ds_read_b64:[0,2,3,4,9],ds_read_b96:[0,2,3,4,9],ds_read_i16:[0,2,3,4,9],ds_read_i8:[0,2,3,4,9],ds_read_i8_d16:[0,4,9],ds_read_i8_d16_hi:[0,4,9],ds_read_u16:[0,2,3,4,9],ds_read_u16_d16:[0,4,9],ds_read_u16_d16_hi:[0,4,9],ds_read_u8:[0,2,3,4,9],ds_read_u8_d16:[0,4,9],ds_read_u8_d16_hi:[0,4,9],ds_rsub_rtn_u32:[0,2,3,4,9],ds_rsub_rtn_u64:[0,2,3,4,9],ds_rsub_src2_u32:[0,2,3,4],ds_rsub_src2_u64:[0,2,3,4],ds_rsub_u32:[0,2,3,4,9],ds_rsub_u64:[0,2,3,4,9],ds_store:590,ds_sub_rtn_u32:[0,2,3,4,9],ds_sub_rtn_u64:[0,2,3,4,9],ds_sub_src2_u32:[0,2,3,4],ds_sub_src2_u64:[0,2,3,4],ds_sub_u32:[0,2,3,4,9],ds_sub_u64:[0,2,3,4,9],ds_swizzle_b32:[0,2,3,4,9,588],ds_wrap_rtn_b32:[0,2,3,4,9],ds_write2_b32:[0,2,3,4,9],ds_write2_b64:[0,2,3,4,9],ds_write2st64_b32:[0,2,3,4,9],ds_write2st64_b64:[0,2,3,4,9],ds_write_addtid_b32:[0,4,9],ds_write_b128:[0,2,3,4,9],ds_write_b16:[0,2,3,4,9],ds_write_b16_d16_hi:[0,4,9],ds_write_b32:[0,2,3,4,9],ds_write_b64:[0,2,3,4,9],ds_write_b8:[0,2,3,4,9],ds_write_b8_d16_hi:[0,4,9],ds_write_b96:[0,2,3,4,9],ds_write_src2_b32:[0,2,3,4],ds_write_src2_b64:[0,2,3,4,590],ds_wrxchg2_rtn_b32:[0,2,3,4,9],ds_wrxchg2_rtn_b64:[0,2,3,4,9],ds_wrxchg2st64_rtn_b32:[0,2,3,4,9],ds_wrxchg2st64_rtn_b64:[0,2,3,4,9],ds_wrxchg_rtn_b32:[0,2,3,4,9],ds_wrxchg_rtn_b64:[0,2,3,4,9],ds_xor_b32:[0,2,3,4,9],ds_xor_b64:[0,2,3,4,9],ds_xor_rtn_b32:[0,2,3,4,9],ds_xor_rtn_b64:[0,2,3,4,9],ds_xor_src2_b32:[0,2,3,4],ds_xor_src2_b64:[0,2,3,4],dsa:711,dsc:784,dse:[594,711],dsl:771,dso:[594,604],dso_loc:[597,710,759],dso_local_equival:710,dso_path:617,dso_preempt:[597,710],dsoloc:710,dsp:585,dst0:[0,2,3,4,9],dst1:[0,2,3,4,9],dst:[0,1,2,3,4,5,6,7,8,9,213,371,483,586,607,684,707,770,771,780],dst_sel:[0,3,4,7,8,9,590],dst_unus:[0,3,4,7,8,9,590],dstindex:780,dstkei:709,dsym:[605,614,630,642,654],dsymtab:649,dsymutil:[615,630],dtest_suite_profile_gener:773,dtest_suite_profile_us:773,dtest_suite_run_typ:773,dtest_suite_subdir:773,dtor:[672,710,759],dtrace:603,dual:709,duck:743,due:[84,585,589,590,593,599,602,603,604,605,607,611,639,660,663,665,667,668,669,673,674,676,679,682,684,697,700,704,710,711,713,714,716,722,725,727,736,743,745,753,756,757,760,764,766,770,776,777,785,790,804,805],dumb:811,dummi:[607,664,672,716,741,775,780],dummy_main:675,dummytargetmachin:780,dump:[590,597,601,607,611,614,615,623,625,631,640,641,642,645,648,649,658,695,704,709,726,731,743,759,768,771,782,786,804,805,806,810],dumpabl:782,dumpattr:768,dumper:[615,645,658,759],dumpmymapdoc:786,duo:590,duplic:[585,593,600,603,605,606,607,610,611,621,631,649,659,663,664,667,671,674,705,709,710,711,713,722,726,736,739,743,759,764,768,771,779,803],durat:[593,607,759,782,783,785,788],dure:[585,590,592,598,599,601,605,606,607,609,631,639,645,659,660,662,663,667,668,669,670,673,674,678,679,681,685,695,699,702,707,709,710,712,713,714,722,728,731,742,743,745,747,750,752,753,754,760,762,764,774,775,780,786,804],dvariabl:605,dw_addr_amdgpu_region:590,dw_addr_llvm_const:[585,590],dw_addr_llvm_gener:585,dw_addr_llvm_glob:[585,590],dw_addr_llvm_group:[585,590],dw_addr_llvm_hi_us:585,dw_addr_llvm_lo_us:585,dw_addr_llvm_priv:[585,590],dw_addr_non:[585,590],dw_apple_property_assign:759,dw_apple_property_atom:759,dw_apple_property_class:759,dw_apple_property_copi:759,dw_apple_property_gett:759,dw_apple_property_nonatom:759,dw_apple_property_nul:759,dw_apple_property_null_resett:759,dw_apple_property_readonli:759,dw_apple_property_readwrit:759,dw_apple_property_retain:759,dw_apple_property_sett:759,dw_apple_property_strong:759,dw_apple_property_unsafe_unretain:759,dw_apple_property_weak:759,dw_aspace_:585,dw_aspace_amdgpu_gener:590,dw_aspace_amdgpu_loc:590,dw_aspace_amdgpu_private_lan:590,dw_aspace_amdgpu_private_wav:590,dw_aspace_amdgpu_region:590,dw_aspace_non:[585,590],dw_at_addr_bas:585,dw_at_address_class:[585,590],dw_at_address_spac:585,dw_at_align:759,dw_at_apple_properti:759,dw_at_apple_property_attribut:759,dw_at_apple_property_gett:759,dw_at_apple_property_sett:759,dw_at_artifici:759,dw_at_byte_s:590,dw_at_call_data_loc:585,dw_at_call_data_valu:585,dw_at_call_site_parameter_valu:710,dw_at_call_valu:585,dw_at_const_valu:585,dw_at_data_loc:585,dw_at_data_member_loc:585,dw_at_decl_lin:759,dw_at_declar:759,dw_at_delet:759,dw_at_element:759,dw_at_encod:590,dw_at_entry_pc:759,dw_at_export_symbol:710,dw_at_frame_bas:[585,590],dw_at_high_pc:759,dw_at_languag:585,dw_at_llvm_active_lan:585,dw_at_llvm_address_spac:585,dw_at_llvm_augment:585,dw_at_llvm_lan:585,dw_at_llvm_lane_pc:585,dw_at_llvm_proc:585,dw_at_llvm_vector_s:585,dw_at_loc:[585,590,611,759],dw_at_loclists_bas:585,dw_at_low_pc:759,dw_at_mips_linkage_nam:759,dw_at_nam:[590,611,630,759],dw_at_rang:759,dw_at_return_addr:585,dw_at_static_link:585,dw_at_string_length:759,dw_at_typ:759,dw_at_use_loc:585,dw_at_vtable_elem_loc:585,dw_ate_address:[585,710],dw_ate_boolean:[585,710],dw_ate_float:[710,810],dw_ate_hi_us:585,dw_ate_lo_us:585,dw_ate_sign:[585,710,759],dw_ate_signed_char:[585,710],dw_ate_unsign:[585,590,705,710],dw_ate_unsigned_char:[585,710],dw_cfa_aspace_def_cfa:585,dw_cfa_def_cfa:585,dw_cfa_def_cfa_express:585,dw_cfa_def_cfa_offset:585,dw_cfa_def_cfa_offset_sf:585,dw_cfa_def_cfa_regist:585,dw_cfa_def_cfa_sf:585,dw_cfa_express:585,dw_cfa_llvm_def_aspace_cfa:585,dw_cfa_llvm_def_aspace_cfa_sf:585,dw_cfa_nop:585,dw_cfa_offset:585,dw_cfa_offset_extend:585,dw_cfa_offset_extended_sf:585,dw_cfa_offset_extended_uf:585,dw_cfa_offset_uf:585,dw_cfa_regist:585,dw_cfa_restor:585,dw_cfa_restore_extend:585,dw_cfa_same_valu:585,dw_cfa_undefin:585,dw_cfa_val_express:585,dw_cfa_val_offset:585,dw_cfa_val_offset_sf:585,dw_cfa_val_offset_uf:585,dw_form_:759,dw_form_addr:759,dw_form_block:585,dw_form_data1:759,dw_form_data2:759,dw_form_data4:759,dw_form_exprloc:585,dw_form_flag_pres:759,dw_form_line_strp:585,dw_form_ref1:759,dw_form_ref2:[585,759],dw_form_ref4:[585,759],dw_form_ref8:759,dw_form_ref_addr:585,dw_form_ref_udata:759,dw_form_sec_offset:[585,611],dw_form_strp:[585,611,759],dw_form_strp_sup:585,dw_form_strx1:759,dw_form_udata:585,dw_form_xxx:759,dw_lang_c99:[710,759],dw_lang_c:[705,810],dw_lang_llvm_hip:585,dw_lnct_llvm_is_md5:585,dw_lnct_llvm_sourc:[585,590],dw_lnct_md5:585,dw_lnct_path:585,dw_macinfo_defin:710,dw_macinfo_start_fil:710,dw_macinfo_undef:710,dw_op_:585,dw_op_add:585,dw_op_addr:[585,590,759],dw_op_addrx:[585,590],dw_op_aspace_bref31:585,dw_op_aspace_breg0:585,dw_op_aspace_breg1:585,dw_op_aspace_bregx:585,dw_op_bit_piec:[585,710],dw_op_bra:585,dw_op_breg0:585,dw_op_breg1:585,dw_op_breg31:585,dw_op_breg:[585,710],dw_op_bregx:[585,710],dw_op_cal:585,dw_op_call2:585,dw_op_call4:585,dw_op_call_frame_cfa:585,dw_op_call_ref:[585,590],dw_op_const1:585,dw_op_const1u:585,dw_op_const2:585,dw_op_const2u:585,dw_op_const4:585,dw_op_const4u:585,dw_op_const8:585,dw_op_const8u:585,dw_op_const:585,dw_op_const_typ:585,dw_op_constu:[585,710],dw_op_constx:585,dw_op_convert:[585,710],dw_op_deref:[585,710],dw_op_deref_s:585,dw_op_deref_typ:[585,590],dw_op_drop:585,dw_op_dup:585,dw_op_entry_valu:[585,710],dw_op_eq:585,dw_op_fbreg:[585,759],dw_op_form_tls_address:585,dw_op_g:585,dw_op_gt:585,dw_op_implicit:585,dw_op_implicit_point:[585,710],dw_op_implicit_valu:585,dw_op_l:585,dw_op_lit0:585,dw_op_lit1:585,dw_op_lit31:585,dw_op_lit:585,dw_op_llvm_:585,dw_op_llvm_arg:[710,759],dw_op_llvm_aspace_bregx:585,dw_op_llvm_aspace_implicit_point:585,dw_op_llvm_bit_offset:585,dw_op_llvm_call_frame_entry_reg:585,dw_op_llvm_convert:710,dw_op_llvm_entry_valu:710,dw_op_llvm_extend:[585,590],dw_op_llvm_form_aspace_address:[585,590],dw_op_llvm_frag:710,dw_op_llvm_implicit_aspace_point:585,dw_op_llvm_implicit_point:710,dw_op_llvm_offset:[585,590],dw_op_llvm_offset_constu:585,dw_op_llvm_offset_uconst:585,dw_op_llvm_piece_end:585,dw_op_llvm_push_lan:[585,590],dw_op_llvm_select_bit_piec:[585,590],dw_op_llvm_tag_offset:710,dw_op_llvm_undefin:[585,590],dw_op_lt:585,dw_op_minu:[585,710],dw_op_n:585,dw_op_nop:585,dw_op_ov:[585,710],dw_op_pick:585,dw_op_piec:585,dw_op_plu:[585,710,759],dw_op_plus_uconst:[585,710,759],dw_op_push_object_address:[585,710],dw_op_reg0:585,dw_op_reg1:585,dw_op_reg31:585,dw_op_reg:585,dw_op_regval_typ:[585,590],dw_op_regx:[585,590],dw_op_reinterpret:585,dw_op_rot:585,dw_op_skip:585,dw_op_stack_valu:[585,590,710,759],dw_op_swap:[585,710],dw_op_xderef:[585,710],dw_op_xderef_s:585,dw_op_xderef_typ:585,dw_opreg:585,dw_tag:759,dw_tag_apple_properti:759,dw_tag_array_typ:[710,759],dw_tag_atomic_typ:[710,759],dw_tag_base_typ:[585,590,710,759],dw_tag_call_site_paramet:585,dw_tag_class_typ:[710,759],dw_tag_compile_unit:585,dw_tag_const_typ:[710,759],dw_tag_dwarf_procedur:[585,590],dw_tag_entry_point:585,dw_tag_enumeration_typ:[710,759],dw_tag_file_typ:759,dw_tag_formal_paramet:585,dw_tag_friend:710,dw_tag_gnu_template_param_pack:710,dw_tag_gnu_template_template_param:710,dw_tag_imported_modul:710,dw_tag_inherit:710,dw_tag_inlined_subroutin:[585,759],dw_tag_interface_typ:759,dw_tag_label:759,dw_tag_llvm_address_class_typ:585,dw_tag_memb:[710,759],dw_tag_namelist:759,dw_tag_namespac:759,dw_tag_packed_typ:759,dw_tag_pointer_typ:[710,759],dw_tag_ptr_to_member_typ:[585,710,759],dw_tag_reference_typ:[710,759],dw_tag_restrict_typ:[710,759],dw_tag_set_typ:759,dw_tag_shared_typ:759,dw_tag_string_typ:759,dw_tag_structure_typ:[710,759],dw_tag_subprogram:[585,759],dw_tag_subrange_typ:759,dw_tag_subroutine_typ:759,dw_tag_template_value_paramet:710,dw_tag_typedef:[710,759],dw_tag_union_typ:[710,759],dw_tag_unspecified_typ:[710,759],dw_tag_vari:[585,759],dw_tag_volatile_typ:[710,759],dw_tag_xxx:759,dw_virtuality_pure_virtu:710,dwarf2:810,dwarf:[607,615,641,642,654,664,665,669,676,679,705,710,711,713,729,743,762,778,780],dwarfdebug:759,dwarfdump:[611,614,615,638,759],dwarfencod:669,dwarfnumb:780,dwarfregnum:780,dwell:804,dwo:[585,641,679],dwoid:710,dword:[10,11,12,13,15,16,24,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,80,81,82,83,84,85,87,88,89,90,91,92,93,94,95,96,97,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,127,132,133,134,135,136,137,138,139,140,141,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,217,224,225,226,227,228,229,230,231,232,233,234,235,236,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,317,318,319,322,323,324,328,329,330,331,332,334,335,341,342,343,344,345,346,347,348,349,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,375,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,487,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,585,587,588,589,590,642],dwp:654,dx10:590,dx10_clamp:590,dying:705,dyld:715,dyldinfo:640,dylib:[642,709,726,740,775,788],dyn:[590,648,649,663],dyn_cast:[610,703,751,768,780],dyn_cast_or_nul:743,dyn_switch:610,dynam:[585,590,595,597,601,603,612,617,619,625,633,635,639,640,642,648,649,657,663,665,671,672,674,676,677,684,695,703,709,712,714,715,726,731,741,743,744,758,759,760,762,764,776,777,783,786,805,807],dynamic_cast:[610,703,743,808,809,810],dynamic_shared_point:590,dynamiclibrari:790,dynamiclibrarysearchgener:[726,788,789,790,791],dynamicsharedpoint:590,dynstr:590,dynsym:590,e_entri:590,e_id:590,e_machin:590,e_typ:590,ea_r:607,eabi:[617,700],eacc:775,each:[27,96,97,98,99,112,113,114,129,186,187,188,201,202,213,220,286,287,288,289,306,307,308,371,378,447,448,449,450,469,470,483,490,560,561,562,563,576,577,578,585,587,588,589,590,592,593,594,596,597,598,601,603,605,606,607,608,609,610,611,614,616,617,619,621,624,625,626,630,631,632,635,636,637,639,640,641,642,644,645,648,649,650,652,653,654,657,659,660,663,664,666,667,669,670,672,674,675,676,677,678,679,682,683,684,688,689,690,691,695,702,703,705,706,707,709,710,711,712,713,714,715,716,719,720,721,722,724,725,726,727,728,730,731,733,734,735,738,739,740,743,744,745,748,750,753,754,755,756,757,759,760,762,763,764,765,766,768,769,770,771,773,774,775,776,777,780,782,783,784,785,786,788,789,790,802,803,804,805,806,807,808,809,810],eager:[726,743],eagerli:[726,789],eao:743,earli:[645,660,663,682,693,700,710,712,716,724,727,728,731,743,760,780,782,808],earlier:[585,590,594,597,609,610,611,663,664,667,674,682,689,694,698,709,726,727,735,745,749,759,760,770,780,788,804,805],earliest:611,earlyclobb:716,earlycs:674,earlyout:768,earn:745,eas:[679,697,710,764],easi:[597,601,603,605,607,610,611,616,667,676,679,694,701,703,709,710,712,717,722,726,743,745,746,748,753,759,760,761,766,768,770,774,775,782,786,789,790,802,803,804,805,806,807,808,811,812],easier:[585,590,591,593,594,600,602,607,608,610,611,624,662,663,665,667,670,672,676,679,701,704,710,714,721,724,725,726,741,742,743,745,748,753,768,769,770,771,775,782,803,804,808,810],easiest:[594,686,695,700,717,761,780,782,783,805,811],easili:[585,600,607,659,666,674,676,704,719,721,726,741,743,754,757,759,760,762,764,765,768,769,775,782,786,788,809],eat:[659,802,803,804,805,806,807,808,809,810],eatomtypecuoffset:759,eatomtypedieoffset:759,eatomtypedietag:759,eatomtypenameflag:759,eatomtypenul:759,eatomtypetag:759,eatomtypetypeflag:759,eax:[607,611,639,642,671,710,716,759,760,770,771,780],ebnf:785,ebp:[607,669,771],ebx:[607,671,771],echo:[595,616,631,654,679,682,717,723,745,761],eckel:743,ecma:731,econom:608,ecosystem:[739,743,745,766],ecsubstreams:731,ecx:[607,671,710,770,771],edg:[598,601,607,610,611,660,666,669,671,676,678,709,710,712,714,745,750,770,782,783,784],edi:[607,611,716,760,771],edit:[590,605,615,644,681,698,731,742,743,748],editor:[610,667,679,717,723,761,766],edu:[595,747,760],educ:608,edx:[607,639,710,760,770,771],eeee:639,eeeer:639,ef_amdgpu_feature_sramecc_:590,ef_amdgpu_feature_sramecc_any_v4:590,ef_amdgpu_feature_sramecc_off_v4:590,ef_amdgpu_feature_sramecc_on_v4:590,ef_amdgpu_feature_sramecc_unsupported_v4:590,ef_amdgpu_feature_sramecc_v3:590,ef_amdgpu_feature_sramecc_v4:590,ef_amdgpu_feature_trap_handler_v2:590,ef_amdgpu_feature_xnack_:590,ef_amdgpu_feature_xnack_any_v4:590,ef_amdgpu_feature_xnack_off_v4:590,ef_amdgpu_feature_xnack_on_v4:590,ef_amdgpu_feature_xnack_unsupported_v4:590,ef_amdgpu_feature_xnack_v2:590,ef_amdgpu_feature_xnack_v3:590,ef_amdgpu_feature_xnack_v4:590,ef_amdgpu_mach_amdgcn_gfx1010:590,ef_amdgpu_mach_amdgcn_gfx1011:590,ef_amdgpu_mach_amdgcn_gfx1012:590,ef_amdgpu_mach_amdgcn_gfx1013:590,ef_amdgpu_mach_amdgcn_gfx1030:590,ef_amdgpu_mach_amdgcn_gfx1031:590,ef_amdgpu_mach_amdgcn_gfx1032:590,ef_amdgpu_mach_amdgcn_gfx1033:590,ef_amdgpu_mach_amdgcn_gfx1034:590,ef_amdgpu_mach_amdgcn_gfx1035:590,ef_amdgpu_mach_amdgcn_gfx600:590,ef_amdgpu_mach_amdgcn_gfx601:590,ef_amdgpu_mach_amdgcn_gfx602:590,ef_amdgpu_mach_amdgcn_gfx700:590,ef_amdgpu_mach_amdgcn_gfx701:590,ef_amdgpu_mach_amdgcn_gfx702:590,ef_amdgpu_mach_amdgcn_gfx703:590,ef_amdgpu_mach_amdgcn_gfx704:590,ef_amdgpu_mach_amdgcn_gfx705:590,ef_amdgpu_mach_amdgcn_gfx801:590,ef_amdgpu_mach_amdgcn_gfx802:590,ef_amdgpu_mach_amdgcn_gfx803:590,ef_amdgpu_mach_amdgcn_gfx805:590,ef_amdgpu_mach_amdgcn_gfx810:590,ef_amdgpu_mach_amdgcn_gfx900:590,ef_amdgpu_mach_amdgcn_gfx902:590,ef_amdgpu_mach_amdgcn_gfx904:590,ef_amdgpu_mach_amdgcn_gfx906:590,ef_amdgpu_mach_amdgcn_gfx908:590,ef_amdgpu_mach_amdgcn_gfx909:590,ef_amdgpu_mach_amdgcn_gfx90a:590,ef_amdgpu_mach_amdgcn_gfx90c:590,ef_amdgpu_mach_non:590,ef_amdgpu_mach_r600_bart:590,ef_amdgpu_mach_r600_caico:590,ef_amdgpu_mach_r600_cayman:590,ef_amdgpu_mach_r600_cedar:590,ef_amdgpu_mach_r600_cypress:590,ef_amdgpu_mach_r600_junip:590,ef_amdgpu_mach_r600_r600:590,ef_amdgpu_mach_r600_r630:590,ef_amdgpu_mach_r600_redwood:590,ef_amdgpu_mach_r600_rs880:590,ef_amdgpu_mach_r600_rv670:590,ef_amdgpu_mach_r600_rv710:590,ef_amdgpu_mach_r600_rv730:590,ef_amdgpu_mach_r600_rv770:590,ef_amdgpu_mach_r600_sumo:590,ef_amdgpu_mach_r600_turk:590,ef_amdgpu_mach_xxx:590,effect:[585,588,590,593,597,605,607,610,611,616,617,630,631,639,641,642,648,649,660,667,676,679,680,684,685,697,705,710,714,718,741,743,745,750,756,759,760,764,765,770,775,776,780,782,783,790,806,807],effici:[585,589,590,597,607,610,658,667,672,676,678,679,703,709,710,712,718,726,741,743,750,756,759,760,764,765,768,775,777,782,803,805],effort:[590,610,667,670,674,705,710,715,723,725,741,752,759,760,762,765,766,783,804],eflag:[716,759,770,771],egregi:[608,667],egrep:679,eh_fram:[650,669],eh_label:716,eh_return:669,eha:710,ehashfunctiondjb:759,ehcleanup:663,ehobj:669,ehptr:669,ehselector:663,ei_abivers:590,ei_class:590,ei_data:590,ei_osabi:590,eieio:710,eight:[607,616,710,770],einval:775,eip:771,eisdir:775,either:[32,83,84,98,112,114,120,209,288,306,308,314,382,434,449,470,479,495,547,562,576,578,584,585,588,589,590,593,594,596,597,601,603,605,607,609,610,616,617,621,624,625,631,639,641,653,654,657,659,662,663,667,669,670,671,674,676,677,679,683,684,687,688,694,695,704,705,709,710,711,712,714,715,716,721,722,723,724,725,726,728,730,731,733,738,741,742,743,745,748,755,759,760,762,764,766,770,771,773,774,775,776,777,780,782,783,784,785,802,803,804,806,810,811],eject:766,elabor:[743,760],elaps:639,elect:[609,667,766],electr:695,eleg:[803,805,806,811],elem0:710,elem_func:759,element:[96,97,98,99,112,113,114,186,187,188,201,202,286,287,288,289,306,307,308,447,448,449,450,469,470,560,561,562,563,576,577,578,585,588,590,593,596,597,606,607,611,639,663,666,670,676,683,684,688,709,711,717,722,724,727,741,743,750,759,761,764,768,769,770,776,777,779,780,785,786,788],element_s:710,elementtyp:[597,710],elementwis:684,elev:775,elf32:641,elf64:[641,784],elf64_rela:590,elf:[585,597,607,640,642,648,650,652,654,660,661,665,679,709,710,715,726,756,759,762,775,783],elf_cgprofil:671,elf_word:671,elf_x86_64:709,elf_xword:671,elfabiversion_amdgpu_hsa_v2:590,elfabiversion_amdgpu_hsa_v3:590,elfabiversion_amdgpu_hsa_v4:590,elfabiversion_amdgpu_mesa3d:590,elfabiversion_amdgpu_p:590,elfclass32:590,elfclass64:590,elfdata2lsb:590,elflinkgraphbuild:709,elfosabi_amdgpu_hsa:590,elfosabi_amdgpu_mesa3d:590,elfosabi_amdgpu_p:590,elfosabi_non:590,elfv2:661,eli:660,elid:[625,663,676,708],elig:[639,663,741],elimin:[585,590,593,601,602,607,610,617,659,663,667,672,674,676,679,686,694,709,710,711,716,724,728,743,759,760,764,780,790,803,805,806,807,808],eliminatecallframepseudoinstr:780,eliminateframeindex:780,elis:663,els:[590,607,608,609,611,619,630,631,637,659,660,663,664,667,670,681,682,694,695,699,705,710,712,714,716,721,722,723,724,725,741,742,743,745,760,761,764,765,766,770,775,776,779,780,786,789,802,803,804,805,807,808,809,810,811],elsebb:[806,807,808,809,810],elseif:590,elsev:[806,807,808,809,810],elsewher:[588,589,590,659,664,667,709,743,764,768,780],elt:[684,710],eltsizeparam:[597,710],eltti:[597,810],em_amdgpu:590,emac:[610,679,771],email:[594,608,609,662,667,678,679,696,702,711,723,727,742,755,757,766,811],emast:757,emb:[585,590,597,605,614,710,759,811],embargo:757,embed:[585,597,614,641,664,667,675,676,689,700,710,730,731,733,739,741,743,750,759,760,770,806,807],embodi:769,embrac:667,emerg:[590,665,667,757],emerson:748,emersonconcern:748,emiss:[614,676,679,710,759,762,780,808],emissionkind:[705,710,759],emit:[585,590,594,596,597,601,605,610,611,614,615,617,619,630,639,641,642,644,645,653,658,659,660,663,664,665,669,671,672,674,677,679,681,698,704,707,709,710,711,715,716,723,724,729,731,736,738,741,743,759,760,762,764,768,770,776,780,782,784,789,791,803,804,806,807,808,810,811],emit_22:780,emitaddressmod:769,emitalign:676,emitbyt:780,emitcal:743,emitconst:780,emitconstantpool:780,emitconstpooladdress:780,emitepilogu:780,emitexternalsymboladdress:780,emitfnstart:607,emitfunctionstub:780,emitglobaladdress:780,emitinstruct:[607,780],emitint32:676,emitjumptableaddress:780,emitjumptableinfo:780,emitlabel:607,emitlabelplusoffset:676,emitleadingf:594,emitloadlink:594,emitloc:810,emitmaskedatomiccmpxchgintrins:594,emitmaskedatomicrmwintrins:594,emitnam:610,emitobject:715,emitprologu:[669,780],emitpseudoexpansionlow:768,emitsourcefilehead:769,emitstacksizesect:607,emitstorecondit:594,emitsymbolattribut:607,emitt:[658,665,710,754,768,769,806],emittrailingf:594,emitvalu:607,emmc:697,emphas:[610,717,761,770],emploi:[710,743,746,750,757,760,770],empti:[213,371,483,585,590,605,606,610,616,631,654,658,659,663,667,669,676,679,709,710,712,716,722,733,743,745,757,759,760,764,769,770,775,776,780,786,803,804,806,810],empty_nod:710,empty_subregsset:780,emul:[607,619,639,670,674,700,710,726,743,745],enabl:[213,371,483,585,588,589,590,592,596,603,605,606,607,610,611,612,614,616,617,619,620,621,622,625,629,631,632,636,639,641,642,645,649,653,657,658,659,660,662,663,664,665,667,672,673,674,675,676,679,681,686,687,693,694,695,700,705,709,712,713,714,718,721,725,726,728,740,741,743,745,748,750,756,758,759,764,766,769,771,773,775,776,779,780,782,783,784,786,788,789,790,805,809],enable_dx10_clamp:590,enable_exception_address_watch:590,enable_exception_fp_denorm:590,enable_exception_fp_denormal_sourc:590,enable_exception_ieee_754_fp:590,enable_exception_ieee_754_fp_division_by_zero:590,enable_exception_ieee_754_fp_inexact:590,enable_exception_ieee_754_fp_invalid_oper:590,enable_exception_ieee_754_fp_overflow:590,enable_exception_ieee_754_fp_underflow:590,enable_exception_int_divide_bi:590,enable_exception_int_divide_by_zero:590,enable_exception_memori:590,enable_ieee_mod:590,enable_if:703,enable_mem_ord:590,enable_optim:740,enable_private_seg:590,enable_sgpr_:590,enable_sgpr_dispatch_id:590,enable_sgpr_dispatch_ptr:590,enable_sgpr_flat_scratch:590,enable_sgpr_flat_scratch_init:590,enable_sgpr_kernarg:590,enable_sgpr_kernarg_segment_ptr:590,enable_sgpr_priv:590,enable_sgpr_private_seg:590,enable_sgpr_private_segment_buff:590,enable_sgpr_private_segment_s:590,enable_sgpr_queue_ptr:590,enable_sgpr_workgroup:590,enable_sgpr_workgroup_id:590,enable_sgpr_workgroup_id_i:590,enable_sgpr_workgroup_id_x:590,enable_sgpr_workgroup_id_z:590,enable_sgpr_workgroup_info:590,enable_tg_split:590,enable_trap_handl:590,enable_vgpr:590,enable_vgpr_workitem_id:590,enable_wavefront_size32:590,enable_wgp_mod:590,enc:768,encapsul:[590,703,706,710,743,750],enclos:[585,588,590,597,611,614,654,663,716,743,762,770,775,810],encnorm:770,encod:[175,270,435,548,589,590,594,596,617,619,623,627,639,645,661,663,669,671,676,683,705,710,722,739,750,751,759,762,764,768,770,771,773,777,778,780,785,786,808,810],encodecompactunwindregisterswithoutfram:607,encodingbyhwmod:770,encodinginfo:770,encompass:[697,706,766],encount:[585,590,597,607,611,626,640,669,674,700,710,730,743,745,780,784,789],encourag:[600,603,608,609,610,667,678,679,702,704,709,741,743,745,746,748,756,811,812],end:[585,589,590,592,593,596,597,600,601,602,603,605,607,608,609,611,614,621,622,628,631,635,639,641,644,652,659,660,664,667,669,671,673,674,676,679,681,688,695,697,699,701,702,703,705,709,711,712,713,714,717,720,721,722,725,726,727,728,731,735,741,742,744,745,753,754,755,757,760,761,762,765,766,768,769,770,771,774,775,778,779,780,782,783,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],end_amd_kernel_code_t:590,end_amdgpu_metadata:590,end_amdhsa_kernel:590,endcod:610,endcond:[806,807,808,809,810],endexpr:[806,807,808,809,810],endforeach:606,endfunct:606,endia:710,endian:[585,590,597,607,688,709,710,739,754,759,778,780,785],endif:[590,606,610,659,660,664,695,712,765,768,770,781,788,789,790,791,805,806,807,808,809,810,811],endl:809,endloop:[806,807,808,809,810],endmacro:606,endors:667,endsess:[788,789,790,791],endur:667,enforc:[594,607,610,611,639,659,667,676,679,689,710,743,745,755,756,762],engag:[609,667],engin:[624,661,666,672,677,711,712,722,741,745,748,760,778,804,806,811,812],enginebuild:[715,790],english:610,enhanc:[610,661,667,759,778,779,782,804,805,806,807,808,810,811],enian:585,enjoi:696,enoent:775,enough:[585,594,600,605,607,609,610,631,639,644,659,667,674,677,689,695,702,705,709,710,718,722,723,743,744,758,759,762,765,766,775,780,782,788,803,806,810],enqueu:590,enqueue_kernel:590,enrich:750,ensu:[592,780],ensur:[585,590,593,594,597,599,604,605,607,608,609,610,611,623,624,639,644,657,659,662,667,669,671,672,673,674,676,679,686,695,698,702,705,709,710,714,723,725,726,738,741,742,743,745,756,757,759,760,762,764,775,779,780,781,782,784,807,808,810],enter:[590,597,601,605,607,616,663,669,674,679,702,710,714,721,726,742,743,759,760,784,785,788,803,804,805],entir:[585,590,593,594,596,597,607,609,610,611,616,621,623,625,630,644,652,659,660,663,667,674,676,677,681,684,688,689,704,709,710,712,714,721,724,726,728,731,734,735,736,738,739,741,743,756,759,760,764,770,773,774,775,776,779,780,782,783,786,804,805,811],entireti:[667,736,757,764],entiti:[589,597,610,710,736,743,750,757,762,770],ento:711,entomolog:711,entosa:711,entranc:710,entri:[593,597,598,605,607,611,625,628,630,631,638,639,640,642,643,645,648,649,650,663,664,669,670,671,673,674,676,677,679,684,685,703,705,708,709,711,712,714,715,716,717,721,722,724,726,733,734,738,742,743,745,750,754,756,757,759,760,761,764,765,768,777,780,782,783,785,786,788,789,790,803,804,805,806,807,808,809,810],entry_arg:785,entry_point:590,entrypoint:[663,710,712],entrysym:726,entrytoken:607,enumattr:658,enumcas:786,enumer:[587,589,597,607,644,658,659,667,676,710,714,722,731,736,738,743,756,759,760,769,770,780,785,786],env:753,environ:[585,594,605,607,610,611,616,620,625,654,661,668,671,676,679,681,694,695,698,709,726,740,741,743,756,757,765,773,775,778,783,784,805],environment:[709,710],envis:746,envp:665,eof:[611,712,770,785,802,803,804,805,806,807,808,809,810],eor:[710,716],ep_earlyasposs:782,ep_fulllinktimeoptimizationlast:782,epc:[788,789,790,791],epcindirectionutil:[790,791],epciu:[790,791],epilog:[710,780],epilogu:[676,710,759,776,780],episod:806,epoch:786,epsilon:631,eptr:710,equal:[585,588,589,590,593,597,598,599,607,616,644,645,659,660,669,672,674,679,683,684,695,703,709,710,722,726,731,734,735,738,743,760,770,780,803,806,807,808,809,810],equal_rang:743,equival:[585,588,590,594,596,597,605,606,607,610,611,614,619,621,625,627,639,641,642,645,647,648,649,650,653,659,660,669,671,676,677,683,684,686,688,700,711,716,722,724,725,726,729,741,743,749,756,759,770,789,811],eras:[594,743,759,806,807,808,809,810],erasefrompar:[743,780,804,805,806,807,808,809,810],erasur:743,eref:661,erefrm:661,ericsson:710,erlang:710,erol:773,err2:743,err:[608,724,726,743,757,781,782,788,789,790,791,804,805,806,807,808,809,810],errat:705,errata:661,errc:743,errc_:775,errc_eno:775,errcod:775,erred:710,erring:609,errno:[593,710,779],erron:[659,679,710],error:[585,589,590,593,601,605,606,607,611,612,614,616,617,619,621,622,623,624,625,626,629,631,632,633,635,636,639,641,643,648,649,650,652,653,654,657,658,659,660,667,669,670,671,675,676,679,694,695,698,700,701,702,704,705,710,711,712,713,714,720,724,725,726,741,744,747,751,753,754,768,770,771,775,780,782,783,788,789,790,791,802,803,804,805,806,807,808,809,810,811],error_cod:809,error_exitcod:712,errorasoutparamet:743,errorcodetoerror:743,errorhandl:788,errorinfo:743,errorlist:743,errorloc:769,errorortoexpect:743,errortoerrorcod:743,es_gs_lds_siz:590,escal:[667,757],escap:[593,611,616,641,653,669,676,710,713,716,718,770,775,807],eschnett:747,escudo:756,esgsldssiz:590,esi:[607,716,760,771],esp:[607,611,771],especi:[585,594,599,605,608,609,610,616,631,639,645,659,660,667,668,674,679,697,709,710,711,714,722,723,725,727,741,743,744,750,759,760,761,765,766,770,780,788],espresso:774,essenc:[667,760],essenti:[594,597,609,639,657,676,700,719,722,727,743,745,746,756,759,760,770,780],establish:[607,663,665,669,674,676,710,726,745,760,764,770,780],estim:[598,639,709,722,750,779],et_dyn:590,et_rel:590,etc:[20,123,212,370,482,587,590,592,593,594,597,602,605,607,610,611,612,630,639,644,658,662,663,667,668,676,677,678,679,688,699,701,702,706,709,710,711,712,713,716,721,722,726,728,731,734,740,741,742,745,751,753,754,755,756,759,760,765,766,768,769,770,771,773,776,778,780,782,783,784,802,803,804,805,806,807,808,809,810,811,812],ethnic:608,eurollvm:675,european:750,evalu:[589,590,603,605,606,611,664,670,676,677,682,684,708,710,712,722,724,726,750,757,760,770,780,803,804,805,806,807,808,809,810,811],even:[585,589,590,593,595,597,598,602,603,607,608,609,610,611,614,616,619,627,640,641,644,653,660,663,667,668,673,674,679,683,691,697,699,703,705,709,710,712,713,714,717,718,722,723,725,726,729,735,736,739,742,743,745,748,764,765,766,768,770,771,774,775,779,782,786,788,789,790,805,806,807,808,811],evenli:[710,779],event:[605,608,639,646,709,712,719,743,755,756,762,764,783,784,785,787],event_s:785,eventu:[591,611,667,669,683,691,703,708,710,712,714,721,741,759,760,764,774,788,806],ever:[590,593,610,667,682,710,734,743,768,789,790,811],evergreen:590,everi:[585,590,593,594,596,597,598,599,601,603,606,607,609,611,616,625,631,639,641,645,657,659,663,664,667,669,671,672,674,676,678,682,683,696,697,702,703,704,705,709,710,711,712,714,716,719,720,721,722,724,726,729,730,735,738,741,742,743,745,753,756,757,758,759,760,762,763,764,765,766,768,769,770,771,773,774,775,777,781,782,785,789,803,805,806,808,810,811],everybodi:[609,723],everyon:[667,755,811],everyth:[594,601,603,606,607,659,660,662,677,679,680,681,694,697,699,701,702,710,723,724,727,745,766,775,782,789,803,804,806,807],everywher:[705,721,722,723,775,786,805],evex2vex:658,evex2vexoverrid:770,evex:[658,768],evex_w1_vex_w0:770,evid:[748,760,766],evidenc:677,evil:[596,743,771],evl:710,evlmask:710,evolut:[714,782],evolutionari:712,evolv:[585,600,608,667,713,757],evt:683,ex2:724,exact:[594,610,611,630,639,659,669,676,679,695,709,710,713,716,728,729,736,741,759,760,764,768,770,774,780,782],exact_artifact_path:712,exactli:[585,588,590,593,596,597,605,610,611,616,635,645,659,663,667,669,671,676,684,689,703,705,710,722,723,731,735,741,742,743,746,754,755,759,764,765,768,769,782,784,786,805],exactmatch:710,examin:[585,601,607,625,644,663,702,710,715,733,759,760,764,770,775,780,788,789],exampl:[17,20,24,27,117,120,121,123,127,129,205,209,210,212,213,217,220,310,314,370,371,375,378,479,480,482,483,487,490,580,584,586,587,588,589,592,593,594,595,597,598,599,600,601,603,605,606,607,609,610,611,612,627,628,639,641,642,644,650,653,657,659,660,664,666,667,668,669,670,671,672,673,674,675,677,681,682,683,684,685,688,689,696,697,698,700,701,702,703,704,705,706,708,709,711,714,716,718,721,722,723,724,725,726,727,729,730,731,735,736,740,741,742,744,745,746,748,750,751,753,754,756,757,759,760,762,764,765,768,769,773,774,775,776,777,778,779,780,781,783,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],exampletest:616,exce:[27,120,129,209,220,314,378,479,490,584,585,588,590,610,710,759,760],exceed:756,exceedingli:760,excel:[593,606,672,679,719,743,780],except:[585,586,587,588,590,593,594,596,597,603,605,607,608,611,616,617,619,622,623,626,641,653,659,660,661,663,667,671,672,674,677,683,684,687,695,698,709,714,716,720,722,724,725,726,728,731,743,745,751,759,762,765,769,770,775,776,780,782,786,788,789,790,791,804,805,807,808,811,812],excerpt:724,excess:[610,611,617,619,712],exchang:[679,743,756],exchange2_:747,exchange2_r:747,excis:710,excit:[667,702,811],exclam:710,exclud:[585,595,602,607,611,612,616,621,630,639,641,644,645,658,667,682,707,710,748,754,776],exclus:[594,607,637,659,710,726,756,764,770,777],exclusionari:608,excp_en:590,excp_en_msb:590,exctyp:669,exctype1:669,exctypen:669,excus:[600,608],exe:[615,654,696,698,731,775,810],exec:[10,11,12,13,48,49,57,58,59,60,61,63,64,65,69,70,73,75,77,139,140,144,146,147,148,149,151,152,153,154,155,156,160,161,162,163,166,167,168,169,234,235,240,243,244,245,246,248,249,250,251,252,253,257,258,261,262,263,265,317,318,322,323,328,329,330,331,332,342,343,344,345,347,348,397,398,403,406,407,410,411,412,413,421,422,425,426,427,429,510,511,516,519,520,521,522,523,524,526,527,528,529,533,534,537,538,539,541,588,590,595,612,712],exec_hi:589,exec_lo:589,exec_mask_32:590,exec_mask_64:590,exec_tim:773,execinfo:695,execut:[30,223,380,493,585,589,593,594,598,599,601,604,606,607,610,612,614,615,617,621,625,626,631,633,640,641,642,645,648,649,650,652,653,654,657,659,660,661,662,663,664,665,667,668,669,671,673,674,676,679,680,681,682,686,688,695,700,704,710,711,712,714,715,725,726,728,731,741,744,750,757,758,759,760,762,764,765,768,769,773,774,776,778,779,780,781,782,783,784,785,788,789,790,803,804,805,806,807,808,810,811],executable_format_error:743,executeprogramandwait:765,executionengin:[607,665,679,709,726,743,748,788,789,790,791],executionsess:[709,726,788,789,790,791],executionutil:[788,789,790,791],executorprocesscontrol:[788,789,790,791],execv:765,execz:[10,11,12,13,54,57,58,59,60,61,63,64,65,69,70,75,77,144,146,147,148,149,151,152,153,154,155,156,160,161,162,163,166,167,240,243,244,245,246,248,249,250,251,252,253,257,258,261,265,317,318,322,323,328,329,330,331,332,342,343,344,345,347,348,403,406,407,410,411,412,413,421,422,425,429,516,519,520,521,522,523,524,526,527,528,529,533,534,537,541],exedepsfix:691,exedomain:770,exeext:775,exegesi:[615,658],exempt:610,exercis:[675,756,764,775,788,811],exhaust:[603,607,608,610,674,676,679,688,743,756],exhibit:[594,601,710,760],exig:743,exist:[585,588,590,594,597,602,603,605,606,607,609,610,611,614,616,617,621,625,629,631,639,640,644,645,660,663,667,669,670,671,674,676,677,681,682,688,689,691,694,696,704,705,708,709,710,712,714,716,720,721,725,726,731,736,739,741,742,743,744,745,747,748,749,750,755,756,757,759,760,761,762,764,765,766,769,770,771,775,776,778,780,781,786,788,804,805,806,807,809,810,811],exit5:724,exit:[585,590,598,602,607,637,642,665,669,672,674,679,681,682,710,712,714,722,724,726,743,745,750,759,760,774,776,782,783,784,785,790,791,806,808,809,810,811],exitcod:616,exitcond:[710,776],exitonerr:[709,726,743,805,806,807,808,809,810],exitonerror:[805,806,807,808,809,810],exn:[663,710],exp0:773,exp1:773,exp2:[773,779],exp:[117,205,310,580,599,779],exp_cnt:[120,209,314,479,584],expand:[585,591,594,602,607,610,616,623,625,648,649,658,664,667,670,672,683,698,705,706,722,742,745,748,757,759,770,771,775,783,786],expandatomicrmwinir:594,expandinlineasm:780,expandop:670,expans:[594,606,607,625,658,664,674,688,706,710,712,716,780],expansionregiontag:664,expcnt:[120,209,314,479,584,590],expcnt_sat:[120,209,314,479,584],expect:[585,588,589,590,593,594,595,596,597,598,600,603,606,607,609,610,611,616,625,639,659,663,667,673,674,676,677,678,679,681,684,688,706,709,712,714,720,721,722,730,734,738,741,743,748,749,754,756,757,758,759,760,762,763,764,765,768,770,773,774,775,781,782,785,786,788,789,790,791,803,804,805,806,807,808,809,810],expected_v:710,expectedli:681,expectedtoerroror:743,expens:[594,603,605,610,639,659,660,669,673,685,688,691,710,712,722,725,741,743,760,780,782],expensive_check:610,experi:[585,605,608,609,697,705,717,721,722,723,748,759,764,773,788,789,805,812],experienc:601,experiment:[590,604,605,667,670,696,712,716,742,748,751,760,764,766],expert:757,expertis:[709,746],expir:[745,757],explain:[0,1,2,3,4,5,6,7,8,9,588,589,590,593,596,605,607,609,610,611,621,639,658,667,668,672,710,714,722,723,742,743,748,757,758,766,778,782,783,785,808,812],explan:[603,605,668,672,679,703,710,722,755],explanatori:[667,731],explicit:[585,590,594,607,608,609,610,631,645,659,666,671,672,673,676,677,679,684,688,703,710,711,722,723,724,725,743,749,750,755,760,781,782,788,806,807,808,809,810,811],explicitli:[585,590,592,593,603,605,607,609,610,611,659,660,663,665,667,672,674,676,677,682,708,710,712,715,716,725,726,727,741,742,743,745,759,760,764,766,768,770,774,776,779,804,811],exploit:[673,750,760,805],explor:[631,663,681,742,743,757,764,783],expon:[589,710],exponenti:[659,684,710,741],exportsymbol:710,expos:[585,590,593,594,601,607,610,616,639,644,659,676,679,710,713,725,726,741,743,760,764,768,782,808],exposesreturnstwic:716,exposit:710,expr0lh:664,expr0rh:664,expr1lh:664,expr1rh:664,expr:[589,611,632,658,665,709,710,748,770,803,806,807,808,809,810],exprast:[791,803,804,805,806,807,808,809,810],express:[20,27,120,123,129,209,212,220,314,370,378,479,482,490,584,588,593,599,607,608,609,610,611,616,625,627,630,632,639,641,644,653,658,659,663,670,672,674,676,677,679,687,709,711,712,714,716,726,743,748,754,759,762,768,775,780,782,785,786,788,802,805,807,808,809,810,811],exprloc:585,exprprec:[803,804,805,806,807,808,809,810],exprproc:585,exprsymbol:[805,806,807,808],ext:[596,710,782],extbinari:[645,646],extend:[585,588,589,594,604,610,611,616,631,641,653,659,666,671,674,676,679,684,703,709,710,726,739,743,745,750,760,763,764,770,771,778,780,788,789,802,803,805,811,812,813],extens:[590,594,604,605,607,609,610,614,616,617,625,634,645,661,667,669,670,672,674,676,678,679,683,693,709,710,717,720,722,741,743,746,748,749,751,753,760,761,762,764,765,768,770,778,779,780,803,807,808,814],extent:[667,676,677,710,748],exterior:764,extern:[585,590,596,597,604,605,607,614,619,621,640,644,654,660,667,674,684,694,695,697,700,709,710,712,713,715,719,722,724,726,729,740,743,747,748,753,754,756,760,764,766,770,777,779,780,781,782,802,803,804,805,806,807,808,809,810,811],extern_weak:[597,710],external_symbol:709,externallinkag:[804,805,806,807,808,809,810],externally_initi:[597,710],externallyiniti:710,externalstorag:659,extra:[585,590,597,599,605,606,609,610,644,659,668,669,674,675,679,700,701,704,709,710,712,714,716,720,722,727,735,741,743,745,748,753,754,755,758,760,764,766,768,773],extra_sourc:606,extract:[607,611,615,621,637,641,664,669,681,684,688,689,743,759,760,768,769,770,771,775,780,783],extract_subreg:716,extract_subvector:607,extractel:596,extractloop:741,extrem:[593,594,597,601,607,667,673,679,695,707,710,712,743,759,766,780,782,790,808,813],exynosm3model:780,f00ba:682,f107:724,f108:724,f110:724,f128:[710,780],f16:[0,4,9,19,52,238,315,320,325,337,369,401,514,586,587,588,589,710],f16x2:[0,1,3,4,7,8,9,587],f16x4:[8,9],f1f2:722,f2_1:780,f2_2:780,f31:[710,780],f32:[0,1,2,3,4,7,8,9,19,53,143,239,315,320,325,337,369,402,515,586,588,589,607,660,683,705,710,724,780],f32x16:[8,9],f32x2:[0,2],f32x32:[8,9],f32x4:[8,9],f3_12:780,f3_1:780,f3_2:780,f3_3:780,f4rc:607,f64:[0,2,9,588,589,607,710,724,780],f64x2:[0,2],f64x4:9,f80:710,f88:724,f89:724,f90:724,f91:724,f92:724,f934:780,f93:724,f94:724,f95:724,f96:724,f97:724,f98:724,f99:724,fab:779,face:[610,674,758,760,765,768,782],facil:[585,593,610,616,659,672,676,688,719,744,745,746,759,765,769,788],facilit:[604,610,667,710,743,744,750,759,786],fact0:722,fact1:722,fact:[585,593,602,607,609,610,639,659,660,663,667,669,672,674,677,679,703,710,711,721,726,743,750,755,760,763,770,782,790,805,807,808,811],factor:[585,607,610,667,710,733,743,750,756,757,770,771,776,779],factori:[610,688,741,743],fadd:[591,607,670,683,724,780,804,805,806,808],fail:[590,594,602,605,610,611,612,616,619,621,645,658,659,663,667,669,673,679,681,682,683,688,695,697,704,709,710,723,726,742,743,745,753,756,757,759,760,763,765,766,770,774,775,779,788,789,804],failedisel:689,failmateri:789,failtkind:673,failur:[585,601,602,609,611,612,616,640,645,662,667,679,681,693,696,697,704,709,710,712,723,726,728,743,745,751,753,754,756,759,760,774,775,779,782,786],failure_ord:594,fairli:[593,594,609,666,672,674,683,695,704,723,727,743,756,780,803,806,807],faith:669,fake:619,falkormodel:780,falkorwr_1vxvy_2cyc:780,falkorwr_2vxvy_2cyc:780,fall:[594,598,610,611,650,654,659,663,676,688,695,710,714,726,741,743,757,760,764,766,780,805,806,807,808,809,810],fallback:[597,654,676,688,693,710,756,760,776],fallible_iter:743,falliblechilditer:743,fallthrough:[604,710,760],fals:[590,593,599,601,610,611,612,616,619,625,631,645,654,658,659,663,664,672,679,684,695,701,703,705,707,710,712,713,714,716,722,728,731,741,743,756,759,763,768,769,770,780,782,783,790,804,805,806,807,808,809,810],false_branch_weight:599,falsebr:759,falsedest:710,fam:725,famili:[590,608,658,709,710,743,759,770,811],familiar:[585,590,607,609,644,660,662,669,674,679,683,703,709,722,724,782,788,806,810],fan:699,fanci:[723,774],fancier:[717,782],fancyaa:782,fancyobject:760,faq:[667,710,811],far16:730,far32:730,far:[585,593,606,659,670,709,710,719,730,731,735,738,743,746,757,759,760,782,784,789,790,803,804,807,808,811],farawai:610,farm:616,farpoint:738,farpointer32:738,fashion:[609,670,671,710,746,760,780],fast:[607,610,617,658,660,674,682,689,691,693,697,709,712,717,720,722,723,739,743,745,756,759,760,761,768,780,782,808,812],fast_filter_branch:745,fast_path:710,fastcal:607,fastcc:[597,607,663,672,710],fastemit:768,faster:[585,601,603,610,621,645,660,669,675,679,689,696,697,710,712,721,722,723,725,743,756,759,760],fastest:[681,696,723],fastisel:710,fastiselshouldignor:770,fastlink:736,fastmath:724,fat:[590,637,660],fatal:[658,756,769],fatbin:660,fault:[590,596,609,612,642,667,669,695,710,751,760],faultingload:673,faultingloadstor:673,faultingpcoffset:673,faultingstor:673,faultkind:673,faultmap:[710,751],favor:[609,610,667,705,736,760,770,771],favorit:[711,723,746,806],fbb:780,fca:711,fcc_g:780,fcc_u:780,fcc_ug:780,fcc_val:780,fcf:710,fcmp:[804,806,808],fcmpinst:743,fco:780,fcomi:607,fconst:[346,408,409,414,415,416,417],fcontext:810,fcoverag:[625,664],fcuda:660,fcur:722,fde:[585,590,607,711],fdebug:[614,654],fdiv:749,fdr:783,fear:667,feasibl:[667,710,759,760,784],featur:[0,1,2,3,4,5,6,7,8,9,585,588,589,602,603,604,605,606,609,610,616,621,659,663,664,665,667,671,675,678,679,685,686,695,702,703,707,708,710,713,716,719,722,723,728,735,741,742,743,744,745,753,756,759,764,765,768,769,770,771,780,782,784,785,786,788,803,805,806,807,808,809,811],featurea:786,featureb:786,featurec:786,featurefparmv8:771,featurev8deprec:780,featurev9:780,featurevi:780,februari:748,fed:[607,750],fedora:668,fee:719,feed:[592,593,710,712,726,745,759,806],feedback:[585,662,667,696,702,710,711,723,748,754,755,782,789,790,791],feel:[585,601,602,608,609,610,662,667,674,710,719,723,752,755,757,781,784,802,804,811],feh_asynch:710,femul:710,fenc:[590,594,639,674,695,721],fermi:607,ferrant:666,fesetround:710,fetch:[585,588,589,590,599,607,639,687,710,745,756,782],fetchshaderptr:590,few:[592,594,597,601,603,605,606,607,608,610,611,621,639,657,660,667,668,669,675,676,682,683,688,699,700,702,710,714,723,724,725,741,743,744,745,746,759,760,764,765,769,770,775,779,780,782,784,785,790,803,804,805,807,810,811],fewer:[590,667,676,741,748,759,760,764],fewerelementsfor:688,fewerelementsif:688,fexist:722,ffast:[660,779],ffi:[605,672],ffi_include_dir:605,ffi_library_dir:605,ffile:625,ffmpeg:712,ffp:660,fgr:683,fib:[741,802,806,808,810],fibi:808,fibonacci:[681,802],fiddl:[701,717,743,761],fidel:759,field1:611,field2:611,field3:611,field4:611,field:[585,590,593,597,600,606,607,611,616,639,640,641,644,650,653,662,664,674,676,677,698,702,707,710,721,730,731,735,736,739,742,743,754,756,759,760,762,764,768,780,785,786,803,811],field_offset:611,fieldn:611,fieldti:769,fifth:[663,710,724,780],figur:[596,601,607,659,666,704,710,721,727,743,757,774,775,780,782],fiji:590,file1:[590,638],file2:590,file:[585,590,591,592,599,601,602,603,605,606,612,613,615,617,618,619,621,622,623,624,625,626,628,629,630,631,632,634,635,636,637,638,639,641,643,645,646,648,649,650,652,653,654,655,656,657,658,660,661,662,665,667,668,670,671,672,675,678,681,682,683,694,696,698,699,701,702,704,705,707,709,710,711,712,715,717,718,721,722,723,724,726,729,730,733,734,736,738,742,743,744,745,746,748,751,752,753,757,760,761,764,765,766,768,769,771,773,774,777,779,780,781,782,784,785,786,788,789,802,803,804,805,806,807,808,809,810,812,814],file_nam:641,file_path:590,file_uri:590,filecheck:[605,612,615,644,689,705,716,759,775,781],filecheck_opt:611,filedesc:709,fileinfosubstream:731,fileit:659,filelist:635,filemag:735,filenam:[605,610,611,612,613,614,617,618,619,621,622,623,624,625,629,630,631,632,635,636,637,638,639,640,641,642,645,648,649,650,651,654,655,656,657,658,659,660,664,671,705,743,754,759,761,771,773,783,809,810],filename0:664,filename1:664,filenameindex0:664,filenameindex1:664,filenameoffset:731,filenotfound:743,filenumb:671,fileorerr:743,filepo:659,files:651,filesizepars:659,filesystem:[616,668,701,809],filetyp:[617,759,809],fill:[588,589,590,593,600,610,631,659,660,668,696,702,704,709,710,723,742,743,756,759,760,764,770,782,784,785,786,802,803,804,805,806,807,808,809,810],filt:[625,626],filter:[68,159,256,420,532,590,605,611,616,625,631,640,645,658,667,707,710,741,754,770,773,775,784],filterclass:[707,768,769],finalize_statu:783,finalizeasync:709,finalizecontinu:709,finalizememori:715,finalizeobject:[715,810],financi:745,find:[585,590,592,593,600,601,602,603,605,607,609,610,611,612,614,616,625,628,630,644,658,659,664,667,668,669,670,677,678,679,682,683,684,689,695,697,698,700,701,704,709,710,712,713,714,717,721,722,726,736,744,745,751,753,755,757,759,760,761,762,764,765,770,771,773,774,780,782,783,784,786,787,788,790,791,803,804,805,806,807,808,809,810],find_a:743,find_packag:605,find_program:605,findanalysispass:784,finder:751,findings_dir:712,findlead:743,findsectionbynam:709,findsymbol:[790,805],fine:[590,596,600,605,609,610,644,659,674,677,679,681,682,703,710,723,725,726,745,765,785,808],finer:593,fini:[590,665],finish:[588,590,592,601,605,607,610,663,665,667,668,669,710,715,717,722,723,725,742,753,761,770,779,782,784,804,805,806,807,808,809,810],finishassembl:676,finit:[607,710,768],finliz:590,fire:[601,610,659,710],firefox:[702,748],firepro:590,first:[20,27,85,117,123,129,176,205,212,220,271,310,352,370,378,436,482,490,549,580,585,587,588,589,590,593,594,596,597,599,601,602,603,605,606,607,609,610,611,614,616,619,621,625,629,630,631,632,637,639,641,642,654,657,659,663,664,666,667,668,669,671,672,676,679,681,682,683,684,687,688,692,696,698,699,702,703,704,705,707,708,709,711,712,713,714,715,716,717,718,721,722,723,724,725,726,733,735,736,738,741,742,743,744,745,747,748,749,750,754,755,757,759,760,761,762,763,764,765,766,768,770,771,773,774,775,776,780,781,782,783,784,785,786,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,813],first_wavefront:590,firstcondit:743,firstlett:665,firstli:710,fish:726,fit:[590,596,607,610,659,660,667,677,684,700,710,723,725,735,741,742,743,756,759,766,785,789,806],five:[585,594,607,659,666,669,683,712,721,743,757,769,770],fix:[585,586,587,594,598,603,610,611,616,659,663,665,667,677,695,698,702,704,709,711,712,717,720,723,725,726,727,730,731,736,739,745,746,748,753,754,756,757,758,760,761,762,764,766,768,770,771,776,780,782,783,786,790,803,804,805,808,811],fixabl:674,fixedstack:716,fixedt:759,fixit:723,fixm:[611,668,755,774,780],fixup:[709,731],flag1:[712,722],flag2:[712,722],flag:[588,589,590,592,602,604,605,606,607,611,612,624,625,631,637,639,641,643,645,649,653,654,663,665,667,668,671,674,675,676,679,681,682,684,694,697,699,700,701,704,705,707,709,712,720,722,723,724,725,729,730,731,735,736,743,749,753,754,756,759,762,764,775,780,782,783,784,786],flagflat:786,flaghollow:786,flagpointi:786,flagprototyp:810,flaground:786,flagscpu1:786,flagscpu2:786,flagscpumask:786,flagsfeaturea:786,flagsfeatureb:786,flagsfeaturec:786,flagsflat:786,flagshollow:786,flagspointi:786,flagsround:786,flakypass:616,flame:783,flamegraph:784,flang:[678,682,773],flat32:730,flat:[31,32,81,82,83,110,111,174,199,200,269,304,305,341,351,355,381,382,432,433,434,467,468,494,495,545,546,547,574,575,589,614,730,786],flat_atom:590,flat_atomic_add:[0,2,3,4,9],flat_atomic_add_f64:9,flat_atomic_add_x2:[0,2,3,4,9],flat_atomic_and:[0,2,3,4,9],flat_atomic_and_x2:[0,2,3,4,9],flat_atomic_cmpswap:[0,2,3,4,9,590],flat_atomic_cmpswap_x2:[0,2,3,4,9],flat_atomic_dec:[0,2,3,4,9],flat_atomic_dec_x2:[0,2,3,4,9],flat_atomic_fcmpswap:[0,2],flat_atomic_fcmpswap_x2:[0,2],flat_atomic_fmax:[0,2],flat_atomic_fmax_x2:[0,2,590],flat_atomic_fmin:[0,2],flat_atomic_fmin_x2:[0,2],flat_atomic_inc:[0,2,3,4,9],flat_atomic_inc_x2:[0,2,3,4,9],flat_atomic_max_f64:9,flat_atomic_min_f64:9,flat_atomic_or:[0,2,3,4,9],flat_atomic_or_x2:[0,2,3,4,9],flat_atomic_smax:[0,2,3,4,9],flat_atomic_smax_x2:[0,2,3,4,9],flat_atomic_smin:[0,2,3,4,9],flat_atomic_smin_x2:[0,2,3,4,9],flat_atomic_sub:[0,2,3,4,9],flat_atomic_sub_x2:[0,2,3,4,9],flat_atomic_swap:[0,2,3,4,9,590],flat_atomic_swap_x2:[0,2,3,4,9],flat_atomic_umax:[0,2,3,4,9],flat_atomic_umax_x2:[0,2,3,4,9],flat_atomic_umin:[0,2,3,4,9],flat_atomic_umin_x2:[0,2,3,4,9],flat_atomic_xor:[0,2,3,4,9],flat_atomic_xor_x2:[0,2,3,4,9],flat_load:590,flat_load_dword:[0,2,3,4,9,590],flat_load_dwordx2:[0,2,3,4,9],flat_load_dwordx3:[0,2,3,4,9],flat_load_dwordx4:[0,2,3,4,9,587],flat_load_sbyt:[0,2,3,4,9],flat_load_sbyte_d16:[0,4,9],flat_load_sbyte_d16_hi:[0,4,9],flat_load_short_d16:[0,4,9],flat_load_short_d16_hi:[0,4,9],flat_load_sshort:[0,2,3,4,9],flat_load_ubyt:[0,2,3,4,9],flat_load_ubyte_d16:[0,4,9],flat_load_ubyte_d16_hi:[0,4,9],flat_load_ushort:[0,2,3,4,9],flat_scratch:[35,133,134,136,139,140,144,145,146,147,148,149,151,152,153,154,155,156,160,161,162,163,164,165,166,167,168,169,170,224,226,227,229,231,234,235,236,240,241,243,244,245,246,248,249,250,251,252,253,257,258,259,260,261,262,263,264,265,317,318,322,323,328,329,330,331,332,341,342,343,344,345,347,348,381,382,383,385,386,387,389,390,392,394,397,398,399,403,404,405,406,407,410,411,412,413,421,422,423,424,425,426,427,428,429,494,495,496,498,499,500,502,503,505,507,510,511,512,516,517,518,519,520,521,522,523,524,526,527,528,529,533,534,535,536,537,538,539,540,541,590],flat_scratch_hi:[20,589,590],flat_scratch_lo:[20,589,590],flat_stor:590,flat_store_byt:[0,2,3,4,9],flat_store_byte_d16_hi:[0,4,9],flat_store_dword:[0,2,3,4,9,590],flat_store_dwordx2:[0,2,3,4,9],flat_store_dwordx3:[0,2,3,4,9,590],flat_store_dwordx4:[0,2,3,4,9],flat_store_short:[0,2,3,4,9],flat_store_short_d16_hi:[0,4,9],flatten:[607,710,779],flavor:[699,710,743,759,771],flavour:689,flaw:[683,748,760],fldcw:607,fldcww:607,fleet:668,flegaci:725,flexibl:[607,615,616,659,676,688,689,703,709,710,721,743,770,771,775,782,807],flight:[639,669,710],flip:710,float128:738,float16:738,float32:738,float32partialprecis:738,float48:738,float64:738,float80:738,float_denorm_mode_16_64:590,float_denorm_mode_32:590,float_denorm_mode_flush_dst:590,float_denorm_mode_flush_non:590,float_denorm_mode_flush_src:590,float_denorm_mode_flush_src_dst:590,float_mod:590,float_round_mode_16_64:590,float_round_mode_32:590,float_round_mode_minus_infin:590,float_round_mode_near_even:590,float_round_mode_plus_infin:590,float_round_mode_zero:590,floatscalarti:710,floattyp:743,floor:779,floorf:779,flow:[590,593,601,607,610,628,660,663,666,667,669,673,674,678,710,711,713,714,715,717,721,724,725,741,743,750,751,759,760,764,777,778,779,780,782,803,804,805,807,808,810,811,812,813],floyd:747,flt_round:710,flto:[605,694,713,728],flush:[590,610,639,660,697,710,715,724,783,785,809],flush_statu:783,flux:695,flwend:747,fly:[607,733,788,803,805],fma3d:747,fma:[607,660,705,724,779],fmadd:607,fmax:684,fmaximum:684,fmaxnum:684,fmaxnum_iee:684,fmin:684,fminimum:684,fminnum:684,fminnum_iee:684,fmod:710,fmt:[2,3,4,9,611],fmt_align:743,fmt_pad:743,fmt_repeat:743,fmtspec:611,fmul:[607,724,804,805],fmuladd:779,fnaddr:663,fnast:[791,804,805,806,807,808,809,810],fnentri:597,fnf:743,fnir:[804,805,806,807,808,809,810],fnloc:810,fnname:[803,804,805,806,807,808,809,810],fno:[592,779],fnptrval:710,fnstart:607,fntree:722,fnty:710,focu:[607,608,686,689,713,717,726,761,779,788,812],focus:[585,590,607,628,676,709,726,743,746,760,764,775,780,789,812],fold:[593,594,658,670,673,674,685,686,705,710,741,743,751,759,762,770,782,804],foldabl:685,folder:[605,668,710,761,805],foldgenregform:770,foldingsetnod:743,foldingsetnodeid:743,foldl:770,foldmemoryoperand:[607,780],folk:[667,712,742],follow:[20,24,27,112,120,123,127,129,209,212,213,217,220,306,314,370,371,375,378,479,482,483,487,490,576,584,585,586,587,588,589,590,591,592,593,594,596,597,600,601,602,603,605,606,607,609,610,611,612,615,616,621,622,623,625,627,630,631,633,634,635,637,639,640,641,642,644,645,646,649,653,654,659,660,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,678,679,682,684,686,688,689,693,694,695,696,699,700,701,702,703,704,705,707,709,710,712,713,714,716,717,718,719,720,723,724,725,726,727,728,729,730,731,733,734,735,736,738,739,740,741,742,743,744,745,746,747,748,750,753,754,755,756,757,759,760,761,762,764,765,766,768,769,770,771,774,775,776,777,779,780,781,782,783,784,785,786,787,788,790,802,803,806,807,808,810],followup:776,followup_al:776,followup_coincid:776,followup_epilogu:776,followup_fallback:776,followup_inn:776,followup_out:776,followup_remaind:776,followup_remainder_inn:776,followup_remainder_out:776,followup_sequenti:776,followup_unrol:776,followup_vector:776,fom:741,fomit:607,foo1:[694,710,713],foo2:[694,713],foo3:[694,713],foo4:[694,713],foo:[593,596,598,599,605,606,607,610,611,626,627,639,645,652,654,659,660,663,664,669,671,672,676,677,689,703,704,705,708,709,710,712,716,721,723,724,725,726,730,743,754,759,764,768,770,775,777,779,781,786,803,804,805,806],foo_bar:627,foo_bodi:726,foo_ctor:708,foo_dtor:708,foo_in_a_cc:599,foo_test:775,foo_var:606,fooanalysi:725,fooanalysisresult:725,foobar:[610,716,786],food:748,foof:660,foolproof:782,foomodulepass:725,foopass:725,footnot:[683,688,714],footprint:[683,756],fopen:746,for_each:610,forbidden:765,forc:[585,587,588,590,592,605,606,610,619,639,644,659,660,669,671,679,701,708,710,725,726,738,743,756,759,760,762,764,768,776,779,782,786],force_off:605,force_on:605,forcedisassembl:770,forcefulli:677,forcibl:710,fore:776,foreach:[606,771],foreachiter:770,foreign:[605,672],forens:615,forese:667,forest:714,forev:719,forexpr:[806,807,808,809,810],forexprast:[806,807,808,809,810],forget:[608,610,676,682,703,742,743,810],forgiv:722,forgotten:809,fork:[745,748,750,760,765],form:[588,590,592,593,597,604,605,606,610,611,612,626,627,630,642,645,654,659,666,667,669,671,674,675,676,677,678,679,684,688,695,699,702,703,704,705,707,709,710,711,712,713,715,719,721,722,724,731,738,742,743,748,750,751,754,755,757,759,760,762,764,768,769,770,771,773,774,775,778,780,782,783,785,786,789,802,803,804,805,806,808,809,811,812],formal:[585,590,630,638,660,710,714,722,743,755],format:[66,96,97,157,175,186,187,254,270,286,287,294,295,296,309,418,435,447,448,530,548,560,561,587,588,589,596,604,611,613,614,617,618,619,621,623,625,626,628,631,639,640,642,644,645,646,648,650,652,653,655,656,657,658,661,662,665,667,670,671,672,673,676,679,681,698,707,709,710,712,715,717,719,724,726,729,730,731,734,736,738,741,742,745,748,750,751,754,757,768,769,770,771,774,775,778,780,783,784,786,810],format_int_custom:743,format_provid:743,formatadapt:743,formatstr:590,formatt:710,formattedfil:743,formatv:769,formbit:[770,771],former:[594,607,667,674,688,710,716,759,764,777,803],formerli:[709,743],formfe:770,formmask:780,formul:[710,765],formula:590,forth:[585,590,594,596,683,727,770],fortran:[585,710,747,773],fortun:[743,805,806,808,809,811],forum:[608,667,678,710],forward:[585,590,593,594,597,607,610,639,660,667,669,670,676,695,705,706,710,725,739,741,743,745,757,759,764,765,770,783,803,806],fosdem17:692,foster:[667,745,765],fotonik3d_:747,fotonik3d_r:747,found:[0,1,2,3,4,5,6,7,8,9,587,589,590,593,595,601,605,607,611,612,614,616,621,623,630,631,635,636,642,654,659,660,667,668,669,675,676,678,679,681,683,684,693,698,700,701,702,704,705,707,709,710,712,713,716,717,719,721,722,724,726,727,728,731,739,741,743,744,747,749,752,753,759,760,764,765,768,770,773,775,779,780,782,783,786,804,805,807,808,809,810],foundat:[585,590,608,667,745,755,782],foundfoo:610,four:[84,175,270,435,548,588,590,593,594,596,597,603,607,616,639,652,659,677,689,690,699,710,724,726,769,770,780,804,805],fourinarow:774,fourteen:757,fourth:[663,677,710,759,762,780,782],foz:654,fp0:[770,771],fp128:[597,710,722],fp16:[590,658,752],fp16_ovfl:590,fp1:[770,771],fp2:[770,771],fp3:[770,771],fp4:[770,771],fp5:[770,771],fp6:[770,771],fp_to_sint:780,fpexcept:710,fpform:[770,771],fpformat:[770,771],fpformbit:771,fpga:[585,607],fpic:[605,756],fpimmleaf:686,fpinst:771,fpm1:735,fpm2:735,fpm:[644,725,735,789,790,791],fpmad:759,fpo:[731,739],fpo_data:731,fppassmanag:784,fpr:683,fpreg:780,fpregsclass:780,fpregsregclass:780,fpregsregisterclass:780,fprintf:[803,804,805,806,807,808,809,810],fprofil:[605,625,664],fptr:810,fptype:710,fpu:[701,710],fputc:[805,806,807,808,809,810],fqa:610,fra:607,fraction:[589,598,703,707,710,712],fragil:[611,745],fragment1:627,fragment2:627,fragment:[607,627,675,676,710,759,764,770,774],fragmentkind:627,frame:[617,620,630,642,648,649,654,665,674,676,684,708,709,710,711,715,716,731,741,743,758,759,760,762,764,780,785],frameindex:780,frameinfo:[716,780],frameless:607,framemap:676,frameptr:663,frames:676,framework:[593,604,607,670,676,692,693,695,710,741,743,759,774,781,782,787,804,807,808],francesco:741,frb:607,frc:607,free:[590,593,594,605,607,608,609,610,639,644,661,662,667,674,676,686,691,709,710,711,712,719,726,742,743,745,752,755,756,760,764,768,775,781,782,784,785,790,802,804,805,806,807,808,811],freea:708,freebench:774,freeblockmapblock:735,freebsd5:607,freebsd:[595,607,641,679,757,783],freed:[585,590,593,663,710,720,726,743,758],freedom:[585,602,683,710,743],freeli:[594,667,710,770],freelist:756,freescal:661,freestand:710,freetyp:712,freez:[702,745],frequenc:[595,599,678,710,751,784,785],frequent:[587,590,610,631,659,667,676,677,679,681,710,741,743,745,751,764,765,771,789,790,791,810],fresh:[710,745,766],frexp:710,friend:[672,710,743],friendli:[608,630,667,718,741,745,783,788,803],frighten:712,from:[14,24,79,80,84,98,99,104,105,106,107,113,114,115,117,127,172,173,175,188,194,195,196,202,203,204,205,217,267,268,270,288,289,294,295,296,297,298,299,300,301,306,307,308,309,310,333,350,375,430,431,435,449,450,455,456,457,469,470,471,472,487,543,544,548,562,563,568,569,570,571,577,578,579,580,585,586,587,588,589,591,592,593,594,596,597,599,600,601,602,603,604,605,606,608,609,610,611,612,614,615,616,617,620,621,622,623,624,625,626,627,628,629,630,631,633,634,635,637,639,640,641,642,643,644,645,646,648,649,650,652,653,654,657,658,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,678,680,681,683,684,685,686,688,689,690,691,693,694,695,696,698,699,700,702,703,704,705,707,708,709,711,712,713,714,715,716,719,720,721,722,723,724,725,727,728,729,731,734,736,738,739,740,744,745,746,748,749,750,752,753,754,755,756,757,758,759,761,762,763,764,765,766,768,769,770,771,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,802,803,804,805,806,807,808,809,810,811,813],front:[607,609,610,641,667,669,676,679,681,705,709,710,728,741,743,744,754,757,766,771,778,789,790,805,806,807,808,810,811,812],frontend:[590,594,628,631,639,658,663,664,669,671,672,673,676,679,680,708,710,725,747,754,759,764,775,776,792,793,794,795,796,797,798,799,800,801,809,813],frontier:[590,714,808],frown:667,frozen:[665,710],frt:607,frustrat:608,fsanit:[604,695,704,712,720,756],fsave:779,fsin:[617,780],fsjlj:698,fstream:724,fstrict:710,fsub:808,ftest:625,fthinlto:710,ftl:[710,762],ftoi:780,ftp:679,ftz:724,fuchsia:756,fucomip:607,fucompi:607,fudg:701,fulfil:[700,703,765],full:[585,588,590,593,594,603,605,606,607,610,611,616,621,624,639,640,642,644,654,658,660,665,667,672,679,699,701,702,703,708,709,712,716,717,722,724,726,729,731,741,742,743,744,745,748,759,760,761,764,768,775,776,777,784,786,802,811],full_corpus_dir:712,full_nam:757,fulldebug:[705,710,759],fuller:785,fullest:610,fulli:[585,590,592,597,598,604,605,606,607,619,639,660,663,675,677,679,681,684,686,709,710,726,727,729,741,750,757,759,760,764,771,776,779,788,789,803,807,808,811],fun:[671,783,784,811,812],func1:590,func:[632,644,664,710,716,741,743,782,784],func_typ:710,funcflag:710,funcid:784,funclet:663,funcresolv:743,function1:590,function_entry_arg:785,function_entry_count:599,function_id:785,function_old:597,function_other_typ:785,function_pass:781,function_to_mock:709,function_to_test:709,functionaddress:673,functionanalysismanag:[725,781],functionanalysismanagercgsccproxi:725,functionanalysismanagermoduleproxi:725,functionast:[791,803,804,805,806,807,808,809,810],functioncalle:743,functiondecl:610,functionend:671,functionfaultinfo:673,functionfoopass:725,functionid:671,functionindex:706,functioninfo:673,functionlisttyp:743,functionnam:[654,676,709,710],functionnod:722,functionpass1:725,functionpass2:725,functionpass:[593,728,741,743,780],functionpassctor:782,functionpassmanag:[725,789,790,791,805,806,807,808],functionproto:[805,806,807,808,809,810],functionptr:743,functionstart:671,functiontyp:[743,804,805,806,807,808,809,810],fundament:[585,606,607,610,659,669,674,677,706,722,743,760,764,780,782],funnel:710,funni:808,furi:590,further:[585,594,597,600,602,603,605,609,610,611,616,631,658,663,667,669,674,676,677,683,688,689,695,700,702,709,710,712,721,722,728,731,733,735,743,748,756,759,760,766,773,776,780,785,788,790,805],furthermor:[585,600,602,611,667,677,683,695,705,710,714,741,764,775],furyx:590,fuse:[605,619,660,684,694,698,700,710,741],fusion:710,futur:[585,590,597,604,605,609,610,639,667,675,676,677,679,691,692,695,705,706,708,710,712,716,723,725,743,745,753,756,757,759,762,763,764,776,779,781,785,804,805,806],fuzz:[710,746,751,757],fuzz_target:712,fuzzer:751,fuzzer_no_main:712,fuzzercli:675,fuzzi:627,fuzzing_build_mode_unsafe_for_product:712,fuzzinglibc:712,fuzzmut:675,fval:759,fverifi:705,fwd_progress:590,fwdargreg:716,fxrai:[783,784],g_add:[683,685,688,705],g_and:[687,688],g_anyext:688,g_ashr:688,g_bitcast:688,g_block_addr:688,g_build_vector:688,g_build_vector_trunc:688,g_concat_vector:688,g_constant:[683,685,687,688,705],g_extract:688,g_fconstant:[683,685],g_fmul:683,g_fneg:683,g_foo:688,g_fpext:688,g_frame_index:688,g_icmp:716,g_implicit_def:688,g_inttoptr:688,g_load:[683,688],g_merge_valu:688,g_mul:688,g_or:688,g_phi:688,g_ptrtoint:688,g_sext:688,g_sext_inreg:683,g_shl:688,g_store:688,g_sub:[688,705],g_trunc:688,g_unmerge_valu:688,g_vecreduce_add:684,g_vecreduce_and:684,g_vecreduce_mul:684,g_vecreduce_or:684,g_vecreduce_smax:684,g_vecreduce_smin:684,g_vecreduce_umax:684,g_vecreduce_umin:684,g_vecreduce_xor:684,g_xor:[683,688],gabi:661,gadget:760,gain:[600,659,667,695,710,714,755],galina:[667,696],game:667,gap:[585,639,664,710,743],gapend:671,gapstart:671,garbag:[597,672,711,738,762,812],gas:671,gather4:588,gather:[590,659,667,669,699,722,743,744,748,750,768,775,785],gave:690,gazonk:759,gc_transit:710,gc_transition_end:764,gc_transition_start:764,gcal:678,gcc:[585,594,606,607,610,612,625,645,646,659,660,661,667,668,669,678,679,694,695,700,701,704,710,721,723,741,743,759,773,774,779,780,783],gcc_version:679,gcda:625,gcfunctioninfo:676,gcfunctionmetadata:[676,762],gcmetadaprint:676,gcmetadata:676,gcmetadataprinterregistri:676,gcmetataprint:764,gcn3:590,gcn:590,gcname:597,gcno:625,gcodeview:759,gcolumn:779,gcov:645,gcov_prefix:625,gcov_prefix_strip:625,gcregistri:676,gcroot:[676,762,764],gcse:[741,743],gcstrategi:[676,764],gdb:[585,607,630,694,743,759,766,778,780],gdbinit:743,gdbregistrationlisten:726,gds:[0,2,3,4,9],ge_missing_jmp_buf:610,ge_missing_sigjmp_buf:610,gear:[603,605],geforc:724,gemb:590,gen:[614,645,658,686,766,768,780,789,810],genaddressmod:769,gender:608,gendfapacket:607,gener:[0,1,2,3,4,5,6,7,8,9,587,589,590,592,593,594,596,597,599,602,604,605,606,608,610,611,612,614,615,617,623,625,630,631,634,638,639,644,645,657,659,660,662,663,664,665,667,670,673,674,677,679,680,681,685,686,688,689,690,691,692,693,695,696,699,700,702,703,705,708,711,712,713,714,718,720,721,722,723,724,725,726,728,729,730,731,735,736,738,739,742,744,745,748,750,751,753,754,755,756,757,758,759,760,762,764,765,769,770,771,773,775,776,777,779,780,782,783,786,788,789,803,805,807,808,809,810,811,812,813],generalis:807,genericdomain:770,genericenum:768,generict:[768,769],genericvalu:665,genregisternam:607,gensubtargetinfo:780,genuin:743,geomean:779,geometr:811,geometri:590,geordi:678,georg:757,gep:[672,684,710,711,722,727,741,751,759,764],gep_index:710,get:[36,37,38,55,56,93,96,97,100,101,145,186,187,189,190,191,241,242,281,282,283,286,287,354,386,387,388,404,405,444,447,448,451,452,499,500,501,517,518,557,560,561,564,565,585,588,590,591,592,593,594,596,600,601,605,606,607,608,609,610,611,615,639,659,660,662,663,664,667,668,670,672,674,676,677,683,685,687,693,694,696,699,700,701,702,703,704,706,711,713,714,715,717,719,721,722,724,725,726,736,738,741,742,743,745,753,754,755,757,759,760,761,764,765,766,768,770,771,774,775,776,778,780,781,782,783,786,787,788,789,790,803,804,805,806,807,808,809,810,811,812],get_atable_decl:768,get_atable_impl:768,get_bvalues_decl:768,get_cenum_decl:768,get_ctable_decl:768,get_ctable_impl:768,get_instrinfo_named_op:780,get_instrinfo_operand_enum:780,get_instrinfo_operand_types_enum:780,get_matcher_implement:768,get_ptr:673,get_reginfo_target_desc:768,get_register_match:768,get_subtarget_feature_nam:768,getactiondefinitionsbuild:688,getaddend:709,getaddress:[726,788,805,806,807,808],getaddressingmod:780,getadjustedanalysispoint:593,getalign:709,getalignmentoffset:709,getallderiveddefinit:769,getallocatedtyp:808,getanalysi:687,getanalysisusag:[593,687],getargumentlist:743,getattribut:722,getattributespellinglistindex:768,getbasicblock:607,getbasicblocklist:[743,806,807,808,809,810],getbinarycodeforinstr:780,getbinarypreced:[807,808,809,810],getbit:[659,769],getbitwidth:743,getbuff:786,getcachedresult:725,getcalledfunct:743,getcalleesavedreg:780,getchar:[802,803,804,805,806,807,808,809,810],getcheck:725,getclass:769,getclobberingmemoryaccess:721,getcol:810,getcont:709,getcontext:[726,788,810],getdagop:770,getdata:610,getdatalayout:[726,780,788,789,790,791,805,806,807,808,810],getdatasect:676,getdebugloc:759,getdef:769,getdefaultdatalayoutfortarget:[788,789,790,791],getdefaultresourcetrack:[788,789,790,791],getdefaulttargettripl:809,getdirectori:[759,810],getdirectsuperclass:769,getdoubleti:[804,805,806,807,808,809,810],getehframesect:715,getelementptr:[593,607,611,623,663,664,676,677,708,711,722,724,743,759,764,777,811],getelementptrinst:743,getelementtyp:[727,743],getend:709,getendina:709,getendview:639,getentryblock:[743,808,809,810],getenv:765,getexceptionpointerregist:669,getexceptionselectorregist:669,getexecutionsess:[789,791],getexecutorprocesscontrol:[788,789,790,791],getfil:726,getfilenam:[759,810],getfixupaddress:709,getfoo:710,getforcurrentprocess:[726,788,789,790,791],getfourdatapoint:760,getframeinfo:[607,780],getframeinstruct:716,getframes:676,getfunct:[676,728,743,782,804,805,806,807,808,809,810],getfunctionlist:743,getfunctiontyp:743,getgeneralcategori:659,getglob:780,getgloballist:743,getglobalprefix:[726,788,789,790,791],getgotentrytarget:709,getimm:780,getindex:780,getiniti:743,getinsertblock:[806,807,808,809,810],getinstlist:743,getinstralternativemap:691,getinstrinfo:[607,780],getinstrmap:691,getint32ti:610,getinterfac:791,getjitdylib:791,getjitinfo:[607,780],getjmp_buftyp:610,getkei:743,getkind:[703,709,768],getknownbit:687,getlazycallthroughmanag:790,getlazyresolverfunct:780,getlin:[759,810],getlinkag:709,getllvmcontext:610,getlock:726,getloopanalysisusag:782,getloopid:776,getloopinfo:782,getmainjitdylib:[726,788,789,790,791,805,806,807,808],getmanag:725,getmbb:780,getmemb:743,getmodul:[726,789],getmodulematchqu:780,getmyobject:760,getnam:[672,709,743,781,782,791,803,804,805,806,807,808,809,810],getnamedoperandidx:780,getnameinitasstr:769,getnexttoken:[803,804,805,806,807,808,809,810],getnod:[670,780],getnullvalu:[743,806,807,808,809,810],getnumel:743,getnumoperand:743,getnumparam:743,getnumsuccessor:610,getnumvirtreg:607,getobjfilelow:676,getoffset:[709,780],getop:770,getopcod:[743,780],getoperand:[607,610,687,743,780],getoperationnam:670,getoperatornam:[807,808,809,810],getopt:659,getorcreatefoo:743,getorcreatetypearrai:810,getordin:709,getorinsertfunct:743,getpar:[743,806,807,808,809,810],getparamtyp:743,getpoint:710,getpointerelementtyp:727,getpointers:[676,709],getpointertofunct:[715,743,810],getpointertonamedfunct:715,getpostinstrinfoview:639,getpredopcod:707,getprocaddress:805,getprocesstripl:810,getprotectionflag:709,getproto:791,getrawpoint:706,getrawsubclassoptionaldata:722,getreg:[607,687,780],getregclass:607,getreginfo:607,getregisterinfo:[607,780],getreservedreg:780,getresult:725,getreturntyp:743,getscev:714,getscevatscop:714,getschedclass:780,getscop:709,getsect:709,getsextvalu:743,getsigjmp_buftyp:610,getsiz:709,getsourc:679,getsrc:679,getstart:709,getstartview:639,getsubtarget:780,getsubtargetimpl:[780,784],getsuperclass:769,getsymboladdress:674,getsymbolnam:780,getsymbolt:743,gettarget:709,gettargetlow:780,gettargetmachin:805,gettargetmemori:709,gettargettripl:[709,788,789,790,791],getter:[669,710,759,768],gettermin:743,getthesparctarget:780,gettingstart:700,gettok:[802,803,804,805,806,807,808,809,810],gettoknam:810,gettokpreced:[803,804,805,806,807,808,809,810],gettraget:709,gettyp:[610,743],gettypenam:743,getunqu:743,getvalu:[610,743,769],getvalueaslistofint:769,getvalueasoptionaldef:769,getvalueid:722,getvalueinit:769,getvaluetyp:780,getvaluevt:685,getview:639,getworkingmemori:709,getx86regnum:780,getzextvalu:743,gfx1010:590,gfx1011:590,gfx1012:590,gfx1013:590,gfx1030:590,gfx1031:590,gfx1032:590,gfx1033:590,gfx1034:590,gfx1035:590,gfx10:[1,587,588,589],gfx600:590,gfx601:590,gfx602:590,gfx700:590,gfx701:590,gfx702:590,gfx703:590,gfx704:590,gfx705:590,gfx7:[587,588,589,590],gfx801:590,gfx802:590,gfx803:590,gfx805:590,gfx810:590,gfx8:[279,280,288,291,294,295,296,306,308,587,588,589,590],gfx900:590,gfx902:590,gfx904:590,gfx906:590,gfx908:[588,590],gfx909:590,gfx90a:[588,589],gfx90c:590,gfx9:[5,6,7,8,587,588,589],gfx:590,gfxip:590,ghash:649,ghc:[607,710],ghostli:743,giant:745,gicombin:658,gicomplexoperandmatch:686,gicomplexpatternequiv:686,gid:[621,635,641,653],gif:712,gigabyt:760,gil:750,ginodeequiv:686,gird:590,gisel:[658,675],giselknownbit:687,giselknownbitsanalysi:687,gist:679,git:[605,662,667,668,674,678,681,694,702,704,723,740,742,752,766,773,774],gitattribut:679,github:[590,595,605,667,678,679,681,702,710,723,739,742,743,747,748,757,773,774,783,784,813],gitlab:[745,747],give:[585,590,592,593,597,601,607,609,610,639,659,660,664,667,674,679,683,696,699,701,709,710,717,718,721,722,724,725,726,731,741,742,743,748,754,759,760,761,764,768,769,776,778,780,782,783,784,786,789,790,802,804,805,807,809,810,811,812],given:[585,590,593,594,597,598,599,600,602,605,606,607,609,610,611,614,616,621,623,625,628,631,639,644,645,646,654,657,658,659,660,663,666,669,670,671,672,674,676,679,681,683,684,688,691,693,702,703,709,710,714,715,721,725,726,729,735,738,741,743,745,748,750,754,756,757,759,760,762,764,765,768,769,770,773,777,780,782,783,784,786,788,789,790,791,803,804,805,807],gladli:[679,681],glanc:[610,709,811],glasgow:710,glb2:710,glb3:710,glb:710,glbi:710,glc:[0,2,3,4,9,18,28,36,37,38,93,96,97,100,101,110,111,122,130,186,187,189,190,191,199,200,211,221,281,282,283,286,287,304,305,336,340,354,355,368,379,386,387,388,444,447,448,451,452,467,468,481,491,499,500,501,557,560,561,564,565,574,575,586,590],glibc:712,glibcxx_3:679,gline:779,glob:632,global:[31,82,341,351,381,433,494,546,585,588,589,590,593,594,597,602,603,607,609,610,611,616,621,623,628,632,639,640,641,642,644,649,658,659,660,663,664,665,669,672,674,675,676,677,679,684,686,691,692,698,709,711,712,713,724,725,726,728,731,734,736,739,743,744,748,751,756,760,763,764,768,769,770,777,780,781,782,788,802,804,805,806,807,808,809,810,811,812],global_atom:590,global_atomic_add:[0,4,9],global_atomic_add_f32:[8,9],global_atomic_add_f64:9,global_atomic_add_x2:[0,4,9],global_atomic_and:[0,4,9],global_atomic_and_x2:[0,4,9],global_atomic_cmpswap:[0,4,9],global_atomic_cmpswap_x2:[0,4,9],global_atomic_dec:[0,4,9],global_atomic_dec_x2:[0,4,9],global_atomic_fmax:0,global_atomic_fmax_x2:0,global_atomic_fmin:0,global_atomic_fmin_x2:0,global_atomic_inc:[0,4,9],global_atomic_inc_x2:[0,4,9],global_atomic_max_f64:9,global_atomic_min_f64:9,global_atomic_or:[0,4,9],global_atomic_or_x2:[0,4,9],global_atomic_pk_add_f16:[8,9],global_atomic_smax:[0,4,9],global_atomic_smax_x2:[0,4,9],global_atomic_smin:[0,4,9],global_atomic_smin_x2:[0,4,9],global_atomic_sub:[0,4,9],global_atomic_sub_x2:[0,4,9],global_atomic_swap:[0,4,9],global_atomic_swap_x2:[0,4,9],global_atomic_umax:[0,4,9],global_atomic_umax_x2:[0,4,9],global_atomic_umin:[0,4,9],global_atomic_umin_x2:[0,4,9],global_atomic_xor:[0,4,9],global_atomic_xor_x2:[0,4,9],global_begin:[610,743],global_buff:590,global_empti:743,global_end:743,global_iter:743,global_load:590,global_load_dword:[0,4,9],global_load_dwordx2:[0,4,9],global_load_dwordx3:[0,4,9],global_load_dwordx4:[0,4,9],global_load_sbyt:[0,4,9],global_load_sbyte_d16:[0,4,9],global_load_sbyte_d16_hi:[0,4,9],global_load_short_d16:[0,4,9],global_load_short_d16_hi:[0,4,9],global_load_sshort:[0,4,9],global_load_ubyt:[0,4,9],global_load_ubyte_d16:[0,4,9],global_load_ubyte_d16_hi:[0,4,9],global_load_ushort:[0,4,9],global_s:743,global_store_byt:[0,4,9],global_store_byte_d16_hi:[0,4,9],global_store_dword:[0,4,9],global_store_dwordx2:[0,4,9],global_store_dwordx3:[0,4,9],global_store_dwordx4:[0,4,9],global_store_short:[0,4,9],global_store_short_d16_hi:[0,4,9],globalalia:782,globalbuff:590,globaldc:743,globalisel:[607,658,683,684,685,686,687,688,689,692,693,751],globallayoutbuild:777,globallisttyp:743,globalref:734,globalrefss:734,globalsaa:[721,725],globalstreamindex:731,globalt:590,globalvar:597,globalvari:[677,782,811],globalvarnam:710,globl:[589,590,671,724,764],glue:[607,672],gmail:742,gmake:[774,782],gmir:[683,685,688,689],gninja:[773,784],gno:590,gnu:[585,592,605,607,610,616,617,620,621,626,630,631,640,641,642,647,649,650,652,653,654,668,669,671,675,679,694,695,696,697,698,700,710,780,782,809],gnu_debuglink:641,gnu_hash:759,gnueabi:700,gnueabihf:[700,701],gnuwin32:605,goal:[585,600,607,609,610,620,631,639,664,666,667,669,677,693,695,709,713,717,722,723,733,757,760,762,765,768,769,770,779,785,788,808],goat:610,goe:[594,599,605,607,663,667,669,672,674,676,682,699,704,710,712,718,721,723,724,725,743,745,748,759,780,786,805,808,810],gofmt:610,going:[588,593,602,605,606,607,609,610,644,666,672,676,683,689,699,710,723,725,726,739,743,745,752,753,760,763,782,783,805,806,807,808,810,812],goingn:743,gold:[605,607,679,697,728,754,778],goldberg91:676,goldberg:676,golden:610,gone:789,good:[593,600,601,607,609,610,611,612,639,659,660,661,662,667,670,674,675,679,682,689,697,699,702,709,710,711,716,718,719,722,723,724,741,742,743,746,748,752,753,756,759,760,764,765,770,771,780,782,804,805,806,807,808],goodby:[717,761],goodwil:609,googl:[602,660,667,668,742,748,757,760,773,775],googleblog:712,googlegroup:712,googleprojectzero:760,googletest:616,gori:667,got:[590,607,645,659,679,682,709,710,743,810],gotload:709,gottarget:709,gov:747,govern:710,gpg:679,gpgpu:660,gpl:[667,672],gplv2:667,gplv3:[667,694],gpr64:716,gpr:[590,607,669,683,710,716,718,770],gpr_idx:[213,371,483],gprc:607,gpu:[279,280,291,294,295,296,585,587,589,590,607,660,668,710,724,768,778,780],gpucc:660,gpz:760,gr16:607,gr1:710,gr32:[607,759,770,771],gr64:[607,716,759],gr64_nosp:759,gr8:[607,780],grab:[743,753],gracefulli:[712,743,766],grad:662,gradient:747,gradual:[683,689],graduat:667,grain:[590,710,723,725,785],grammar:[606,670,770,803,807],grammat:803,grant:[602,667,699,722,788],granular:[590,693,736,760,765],granulated_lds_s:590,granulated_wavefront_sgpr_count:590,granulated_workitem_vgpr_count:590,graph:[590,598,601,604,605,607,630,644,658,669,679,684,709,710,711,714,725,738,750,751,759,769,774,780,782,783,804,805,806,807,808,810],graphic:[590,712,714,807,811],graphviz:[741,743,784],gratitud:739,grayscal:747,grbm:590,great:[585,592,607,610,663,667,674,676,688,699,709,719,743,781,782,807,812],greater:[585,589,590,596,597,599,607,623,625,644,645,676,677,684,710,712,722,724,731,733,738,741,756,770,780],greatest:585,greatli:[605,674,678,679,705,710,743,756,759,764],greedi:[607,617,659,691],green:[667,745,748,766],greenedistinguish:748,greenegitblam:748,greet:770,gregg:784,grei:[689,753],grep:[605,607,611,659,679,697,723,745,774,775,809],grid:590,griddim:724,gridsizei:724,gridsizex:724,gridsizez:724,grok:[672,804],grokabl:610,grosser:750,ground:[608,757],groundwork:749,group:[585,588,590,597,603,607,610,611,621,630,631,635,637,639,648,649,658,664,666,667,678,689,709,719,722,725,729,731,741,745,750,755,765,766,768,770,771,774,780,783,787,803,811],group_segment_align:590,group_segment_fixed_s:590,groupsegmentfixeds:590,grow:[590,607,608,609,659,667,708,710,712,718,743,770,806],grown:[602,743,807,811],growth:[607,710,743,756],grpc:712,grpid:597,grunt:659,gs_op_cut:[27,129,220,378,490],gs_op_emit:[27,129,220,378,490],gs_op_emit_cut:[27,129,220,378,490],gs_op_nop:[27,129,220,378,490],gstess:590,gtcasl:747,gte:710,gtest_filt:723,gtu:724,gtx:724,guarante:[593,594,597,607,610,611,639,659,663,667,669,671,672,674,677,679,684,688,695,709,710,711,721,726,728,736,741,743,755,759,760,762,764,768,769,777,782,808],guaranteedtailcallopt:710,guard:[671,674,712,714,743,756,768,782,788],guard_cond:[710,714],guard_condit:710,guardedpoolalloc:695,guelton:757,guess:[605,641,709,712,748,805],gui:[605,681],guid:[585,592,599,601,603,605,607,608,616,646,660,661,662,664,666,667,677,678,679,680,681,697,700,702,710,723,736,742,743,744,748,751,753,759,768,770,771,780,781,787,803,809,813],guidanc:[602,610,662,667,674,731],guidelin:[667,701,743,748,760],gunzip:679,gupta:757,gvn:[594,674,711,782,789,790,791,805,806,807,808],gvneedslazyptr:780,gvnhoist:721,gvreg:[686,688],gwp:[751,756],gwp_asan:695,gwp_asan_opt:695,gwpasanalloc:695,gzip:[679,784],h264ref:773,habit:[610,741],hac:760,hack:[738,742,745,782,810,811,812],hackathon:719,hacker:[662,667],had:[585,590,596,599,609,616,623,639,667,681,683,709,710,711,717,719,724,743,745,750,757,759,761,762,765,782,786],hainan:590,half:[588,590,597,607,610,625,644,684,712,717,743,761,780,789],halfton:747,halfwai:[710,723],halt:[590,709,760],halv:670,han:702,hand:[585,594,607,610,660,665,666,667,676,677,679,682,703,706,709,710,714,716,721,722,723,726,730,741,743,753,755,760,765,768,770,771,780,786,803,804,805,807,812],handi:[726,743,761,804],handili:610,handl:[24,127,175,217,270,375,435,487,548,585,588,589,590,591,594,596,605,611,619,634,659,661,663,667,668,671,674,675,676,679,684,686,688,695,698,703,705,706,708,713,714,716,724,726,728,735,738,745,750,751,755,757,759,760,762,764,765,770,779,780,782,783,788,802,803,804,805,806,807,808,809,810,811],handle_extens:782,handleallerror:743,handleattr:610,handleattrondecl:610,handledefinit:[803,804,805,806,807,808,809,810],handleerror:743,handleextern:[803,804,805,806,807,808,809,810],handlefunctiondecl:610,handlelazycallthrougherror:[790,791],handlelazycompilefailur:726,handleotherdecl:610,handler0:710,handler1:710,handler2:710,handler:[589,594,607,610,658,663,665,669,673,695,710,726,785,788],handlerpcoffset:673,handletoplevelexpress:[803,804,805,806,807,808,809,810],handlevardecl:610,hang:684,hao:750,happen:[585,589,590,591,594,600,601,607,608,609,610,611,612,663,667,673,676,682,683,684,685,688,689,705,709,710,712,714,715,719,721,722,723,725,726,727,731,736,741,742,743,745,746,748,759,760,762,763,764,766,768,782,786,789,803,804,805,806,810],happi:[723,742,743,744,745,808],happili:805,har:744,harass:608,hard:[601,607,610,667,669,672,674,682,697,700,701,710,738,740,742,744,748,759,765,771,786,807,809],hardcod:[705,707,780],hardcodedcontiguousstorag:743,hardcodedsmalls:743,harddriv:697,harden:[695,710,720,751,757],harder:[610,672,710,712,741,760,764,765,766,773,803],hardwar:[20,123,212,370,482,585,594,605,607,609,619,631,639,660,667,674,711,720,724,756,760,780],hardware_map:590,hardware_stag:590,hardwareassistedaddresssanit:720,hardwareloop:710,harfbuzz:712,harm:[607,610,645],harmless:[699,741],harri:747,harvard:710,has:[288,306,308,585,586,587,588,589,590,591,592,593,594,596,597,598,599,601,602,604,605,606,607,608,609,610,611,612,614,616,621,622,630,631,634,637,639,641,642,644,645,648,649,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,682,683,684,685,687,688,689,691,693,695,699,700,701,702,703,704,705,707,708,709,710,711,712,713,714,715,716,717,718,721,722,723,724,725,726,727,728,729,731,733,734,735,736,738,739,741,742,743,745,746,747,748,749,750,752,754,755,756,757,758,759,760,762,764,765,766,767,768,769,770,771,772,773,775,776,777,779,780,781,782,783,784,785,786,788,789,790,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811],hasactivevectorlength:710,hasadsizeprefix:771,hasard:676,hascompletedecod:770,hasconflictingtyp:731,hasctrldep:[770,771],hasdelayslot:[770,771],haserror:754,hasevex_b:770,hasevex_k:770,hasevex_l2:770,hasevex_rc:770,hasevex_z:770,hasexternallinkag:743,hasextradefregallocreq:770,hasextrasrcregallocreq:770,hasfp:780,hasfparmv8:771,hasgc:722,hash:[590,593,607,614,625,644,648,649,664,667,668,710,712,722,729,736,738,739,742,743,745,773,783,786,811],hash_data_count:759,hash_funct:759,hash_map:743,hash_set:743,hashadjbufferlength:738,hashadjbufferoffset:738,hashauxstreamindex:738,hashbufferlength:738,hashdata:759,hashes_count:759,hashfunctiontyp:759,hashkeys:738,hashstreamindex:738,hashvaluebufferlength:738,hashvaluebufferoffset:738,hasiniti:743,hasinternallinkag:743,hasjit:780,haskel:[710,811],haslockprefix:770,hasn:[609,677,743,757,782],hasnam:[709,743],hasnoschedulinginfo:770,hasnotrackprefix:770,hasoneus:610,hasopsizeprefix:771,haspostiselhook:770,hasrepprefix:770,hasrex_wprefix:[770,771],hassideeffect:[639,770,771],hassse2:770,hassse3:770,hasvex_4v:770,hasvex_l:770,hasvex_w:770,haswel:631,hat:786,hatsiz:786,have:[20,27,120,123,129,209,212,220,314,370,378,479,482,490,584,585,586,587,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,614,616,621,623,625,628,631,635,639,640,641,644,654,657,659,660,662,663,664,666,667,668,669,670,671,672,673,674,675,676,678,679,681,682,683,684,685,686,688,689,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,712,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,733,736,738,741,742,743,744,745,746,747,748,749,750,752,753,754,755,756,757,758,759,760,761,762,764,765,766,768,769,770,771,774,775,776,778,779,780,781,782,783,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],haveactivevectorlength:749,haven:[610,685,710,725,757,766,780,782,783,784],havoq:747,hawaii:590,hazard:[639,711,760],hblcnsviw:659,hd2xxx:590,hd6xxx:590,hdl:663,head:[592,610,662,676,679,682,692,710,717,723,742,745,761,766,770],headach:753,header0:710,header:[594,597,605,624,634,641,642,643,644,648,649,653,658,659,660,666,667,668,671,672,673,674,678,679,681,687,698,701,703,709,710,714,715,716,730,741,743,744,754,762,768,769,774,776,780,781,783,784,788,790,806,807,808,809,810,814],header_data:759,header_data_len:759,headerblock:739,headerdata:759,headers:738,headlight:610,heal:673,healthi:667,heap:[593,695,710,711,751,756,758,760,762,764,786,808,811],heard:[660,668,746],heart:764,heartble:712,heartfelt:739,heartwal:747,heavi:[743,764,789],heavier:667,heavili:[590,644,676,743,759,760,778,811],heavyweight:743,heffernan:660,held:[585,596,667,710,742,756],helllo:775,hello:[606,664,679,709,710,768,769,770,775],hello_world:590,hellooooooooooooooo:695,helloworld:[606,700,781],helloworldpass:781,helo:775,help:[585,589,590,591,593,594,596,600,601,602,603,605,608,609,610,611,612,614,615,616,617,619,621,622,623,624,625,626,629,630,631,632,633,635,636,637,639,640,641,642,643,645,648,649,650,652,653,654,657,658,663,666,667,668,670,674,676,679,680,688,689,695,696,697,699,702,704,706,709,710,712,713,718,721,723,742,745,746,747,748,753,755,756,757,759,760,761,763,764,765,766,770,771,773,775,782,784,785,786,788,809,810,811],helper:[593,594,610,616,658,659,661,675,695,710,715,722,725,728,743,749,759,766,768,769,775,780,803,804,805,806,807,808,809,810],henc:[590,610,659,663,679,710,713,714,762,770],henceforth:[734,738,758],henderson2002:676,henderson:748,hendersonprefix:748,henrik:765,her:722,here:[0,1,2,3,4,5,6,7,8,9,52,53,143,238,239,401,402,514,515,585,588,589,590,593,594,600,601,605,606,607,610,611,620,621,639,644,645,659,660,664,665,667,668,669,672,674,676,677,678,679,681,682,683,687,689,695,696,697,698,699,700,702,703,705,707,709,710,711,712,713,714,715,716,719,721,722,724,726,730,731,736,739,742,743,745,748,752,755,756,757,758,759,760,762,764,765,768,769,770,771,773,774,776,778,780,781,782,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812,814],herebi:610,hertz:785,heterogen:[590,778],heurist:[607,710,741,776,783,785,808],hex16:786,hex32:786,hex64:786,hex8:786,hex:[611,639,640,642,644,648,649,650,710,712],hex_numb:590,hex_valu:631,hexadecim:[589,590,620,637,640,644,648,649,650,652,659,710,786],hexagon:[607,679,710,748,768],hexinteg:770,hfc:673,hh567368:610,hi16:607,hidden:[585,590,597,607,610,615,625,641,643,659,706,708,709,710,723,731,743,745,759,776,777,782],hidden_completion_act:590,hidden_default_queu:590,hidden_global_offset_i:590,hidden_global_offset_x:590,hidden_global_offset_z:590,hidden_hostcall_buff:590,hidden_multigrid_sync_arg:590,hidden_non:590,hidden_printf_buff:590,hiddencompletionact:590,hiddendefaultqueu:590,hiddenglobaloffseti:590,hiddenglobaloffsetx:590,hiddenglobaloffsetz:590,hiddenhostcallbuff:590,hiddenmultigridsyncarg:590,hiddennon:590,hiddenprintfbuff:590,hide:[631,663,726,745,764,770],hideaki:607,hierarch:[597,666,741,750],hierarchi:[616,639,670,679,704,710,714,725,750,751,770,777,804],hiet:757,high:[0,3,4,120,288,306,308,479,584,585,589,590,592,593,595,597,602,609,639,644,660,662,663,665,667,668,669,670,672,674,676,677,678,684,695,699,702,709,715,719,721,722,724,726,735,738,741,743,744,751,759,764,769,770,775,776,777,779,780,783,785,787,790,808,811,812],higher:[585,589,590,593,599,605,607,609,631,667,675,678,681,689,709,710,720,721,728,741,743,762,764,766,776,779,788,790,803,811],highest:[585,590,596,684,710,803,804,805,806,807,808,809,810],highli:[590,607,609,660,667,674,676,699,710,712,719,743,744,756,760,776,790,808],highlight:[606,607,639,664,674,679,683,717,743,761,784,804,810],highproduct:747,hihihi:743,him:722,hint:[590,601,610,642,643,654,674,677,776,804],hip:[585,590],hipe:[607,710],hiraditya:747,his:743,histogram:[638,639,648,649,747],histor:[592,606,610,677,710,743,764,775],histori:[667,679,682,723,739,742],hit:[598,639,712,721,743,760],hitherto:597,hmmer:773,hoc:[585,607,741,743],hoist:[590,593,594,610,705,710,714,721,741,743,762,776,782],hoistcommoncodeinsucc:705,hold:[585,589,590,592,603,607,608,659,667,669,671,676,679,709,710,712,715,716,722,738,741,743,744,750,754,756,759,760,768,769,770,771,775,780,782,785,802,803,804,805,806,807,808,809,810],holder:[585,667],hole:[659,710,746,760,764],holidai:609,hollow:786,home:[674,679,697,743,747,756,759,774,775],homeless:743,homepag:[679,681],honour:671,hood:664,hook:[585,594,601,607,669,685,710,716,725,744,745,806],hope:[609,667,676,722,740],hopefulli:[593,597,610,659,660,741,782,789,805],horizont:[611,639,684,710],horn:760,horribl:804,host:[590,592,603,605,607,610,619,621,626,631,639,667,676,678,681,699,701,709,710,715,724,726,742,743,745,747,757,773,775,788,809,810],host_fn:660,host_install_dir:700,host_onli:660,hosta:724,hostb:724,hostc:724,hostcal:590,hostnam:773,hot:[598,645,674,710,754,760,764],hotfix:745,hotspot3d:747,hotter:598,hottest:[645,710],hour:[592,609,667,682,755],hous:711,how:[27,129,220,378,490,585,588,590,594,595,596,597,598,599,601,602,603,605,606,610,611,615,616,631,644,654,659,660,663,664,665,666,667,668,669,670,674,676,678,679,680,681,682,688,690,709,710,711,714,716,717,721,723,724,725,727,728,731,739,741,742,743,744,745,746,748,750,751,755,756,760,762,764,766,771,774,775,776,777,778,780,781,782,783,784,785,786,787,788,789,790,803,804,805,806,807,808,809,810,811,812,813],howev:[585,590,591,593,594,596,597,601,606,607,609,610,611,616,621,639,659,660,663,666,667,669,671,672,673,676,677,679,683,684,685,688,694,698,701,702,703,705,707,708,709,710,712,714,715,716,717,725,726,727,730,738,740,742,743,745,746,748,750,757,758,759,760,761,762,763,764,765,766,768,770,771,774,775,777,779,782,790,804,805,806,807,808,811],howto:[701,717,761,813],howtosubmitabug:679,howtousejit:624,howtouselljit:726,hpc:585,hpgmg:747,hpp:669,hresult:738,hrf:590,hsa:585,hsail:590,hsail_major_vers:590,hsail_minor_vers:590,hsatext:590,hsl:747,ht206167:712,htm:747,html:[605,610,625,631,658,664,670,679,699,700,701,702,705,710,712,724,745,747,748,749,753,754,759,760,761,768,773,774,788],http:[595,605,609,610,633,634,635,637,641,642,643,650,652,653,664,675,678,679,681,682,696,698,699,700,701,702,703,710,712,723,742,745,747,748,749,753,760,773,774,783,788,789,790,791,813],huge16:730,huge:[610,667,679,681,712,727,730,738,768,771,775],hugepoint:738,hull:590,human:[604,608,610,622,623,625,629,630,636,638,645,654,679,695,705,710,716,741,760,771,782,786],hundr:674,hundt:660,hung:743,hunt:757,hurdl:672,hurt:610,hw_reg_flat_scr_hi:20,hw_reg_flat_scr_lo:20,hw_reg_gpr_alloc:[20,123,212,370,482],hw_reg_hw_id:[20,123,212,370,482],hw_reg_ib_st:[20,123,212,370,482],hw_reg_lds_alloc:[20,123,212,370,482],hw_reg_mod:[20,123,212,370,482],hw_reg_pops_pack:20,hw_reg_sh_mem_bas:[20,370,482],hw_reg_statu:[20,123,212,370,482],hw_reg_tba_hi:20,hw_reg_tba_lo:20,hw_reg_tma_hi:20,hw_reg_tma_lo:20,hw_reg_trapst:[20,123,212,370,482],hw_reg_xnack_mask:20,hwaddresssanit:710,hwreg:[0,2,3,4,9],hwreg_enc:[20,123,212,370,482],hybrid:760,hydra:592,hydro:747,hyper:748,hypersparc:780,hyphen:[770,780],hypothes:734,hypothet:[585,607,669,683,722,726,735,743,764,782],i100:710,i16:[0,3,4,9,589,607,705,710,724,780],i16x2:[1,7,8,9],i17:683,i1942652:710,i19:710,i20:710,i24:710,i256:710,i29:710,i32:[0,1,2,3,4,7,8,9,589,596,598,599,607,611,663,664,669,672,673,676,683,684,685,687,688,705,707,708,710,714,716,717,722,724,727,741,743,759,761,762,764,771,775,777,780,781,783,808,811],i32imm:[770,780],i32mem:770,i32x16:[8,9],i32x32:[8,9],i32x4:[8,9],i386:[607,641,654,710],i48:710,i4x8:[1,7,8,9],i64:[0,2,3,4,9,589,596,599,607,611,663,664,677,710,724,727,762,764,771,777,780],i64imm:780,i65:710,i67:710,i686:[607,611,775],i8086:809,i8mem:607,i8x4:[1,7,8,9],iOS:[679,710,773],i_intr_llvm_ptrmask:684,iaca:639,iaddroff:607,iamcu:641,iat:698,ibc:698,ibm:666,icc:[679,759,779],icc_:780,icc_g:780,icc_n:780,icc_val:780,iceland:590,icmp:[597,663,673,714,743,759],icmp_eq:716,icmpinst:743,icon:742,iconst:[12,13,58,61,65,76,148,149,156,166,170,245,246,248,253,264,331,332,346,347,348,408,409,411,412,414,415,416,417,428,521,523,524,526,540],ics:678,icu:712,id1:684,id2:684,idata:640,idea:[592,597,598,610,639,659,663,666,667,669,670,674,682,697,710,719,722,723,741,742,747,748,753,759,760,784,788,803,804,805,806,808,811,812],ideal:[639,667,682,710,712,719,743,764,780],ident:[585,590,592,596,607,608,610,611,621,627,663,676,688,701,705,710,711,716,722,731,738,741,743,755,756,759,768,775,777,784,805,806,808],ident_t:710,identif:[633,679,701,710,759],identifi:[27,129,220,378,490,585,597,601,602,604,605,607,608,609,610,611,616,639,658,659,663,666,669,671,677,689,695,703,705,709,711,714,716,718,724,731,736,738,741,743,745,748,750,753,754,756,759,760,762,764,768,775,776,777,779,780,782,783,788,789,790,791,802,803,804,805,806,807,808,809,810],identifierexpr:[803,804,805,806,807,808,809,810],identifierstr:[802,803,804,805,806,807,808,809,810],identifycudaprefer:660,identitytransform:789,idiom:[659,660,663,703,743,750,760,804],idiomat:[610,743,750,775],idiosyncrasi:766,idiv:607,idnam:[803,804,805,806,807,808,809,810],ids:[644,664,785],idx0:710,idx1:[677,710],idx2:677,idx3:677,idx:[610,631,677,684,710,716,768,784,804,805,806,807,808,809,810],idxen:[0,2,3,4,8,9,85,176,271,352,436,549,590],idxmask:710,ieee754:710,ieee:[590,617,660,674,684,710,768],ieee_mod:590,if_1_then:590,ifbodi:770,ifcond:[806,807,808,809,810],ifcont:[806,807,808,809,810],ifconvers:780,ifconvert:780,ifdef:[659,660,664,695,712,765,768,770,805,806,807,808,809,810,811],ifequ:710,ifexpr:[806,807,808,809,810],ifexprast:[806,807,808,809,810],iff:[688,710],iffals:710,ific:659,ifloc:810,ifndef:[660,769,770,781,788,789,790,791],ifstream:724,iftmp:[806,807,808,809,810],iftru:710,ifunc:597,ifuncti:710,ifunequ:710,ignor:[35,107,145,196,301,385,457,498,571,585,589,590,593,597,598,607,611,616,620,621,623,625,627,628,630,631,633,634,635,638,639,641,642,644,650,652,653,659,663,666,667,673,678,679,682,710,712,716,718,722,731,736,743,748,759,776,779,780,783,789,802,803,804,805,806,807,808,809,810],ignore_crash:712,ignore_oom:712,ignore_remaining_arg:675,ignore_timeout:712,ignoresvex_l:770,ignoresvex_w:770,ihex:641,ii32:770,ilbdc:747,ill:[585,621,695],illeg:[176,585,590,607,659,672,677,688,689,710,724,741,743,745,779],illinoi:[667,747],illustr:[590,596,654,659,663,666,710,713,714,743,745,759,770,779,782,802,803,804,808],ilp32:811,ilp:[639,779],imac:779,imag:[66,68,84,96,97,98,99,112,113,114,157,159,175,186,187,188,201,202,254,256,270,286,287,288,289,306,307,308,418,420,435,447,448,449,450,469,470,530,532,548,560,561,562,563,576,577,578,588,589,607,638,640,641,642,644,671,710,715,736,768,807],image_atomic_add:[0,2,3,4,9],image_atomic_and:[0,2,3,4,9],image_atomic_cmpswap:[0,2,3,4,9],image_atomic_dec:[0,2,3,4,9],image_atomic_fcmpswap:[0,2],image_atomic_fmax:[0,2],image_atomic_fmin:[0,2],image_atomic_inc:[0,2,3,4,9],image_atomic_or:[0,2,3,4,9],image_atomic_smax:[0,2,3,4,9],image_atomic_smin:[0,2,3,4,9],image_atomic_sub:[0,2,3,4,9],image_atomic_swap:[0,2,3,4,9],image_atomic_umax:[0,2,3,4,9],image_atomic_umin:[0,2,3,4,9],image_atomic_xor:[0,2,3,4,9],image_comdat_select_associ:710,image_comdat_select_largest:710,image_debug_directori:736,image_debug_type_codeview:736,image_debug_type_except:731,image_debug_type_fixup:731,image_debug_type_fpo:731,image_debug_type_omap_from_src:731,image_debug_type_omap_to_src:731,image_file_32bit_machin:814,image_file_aggressive_ws_trim:814,image_file_bytes_reversed_hi:814,image_file_bytes_reversed_lo:814,image_file_debug_strip:814,image_file_dl:814,image_file_executable_imag:814,image_file_large_address_awar:814,image_file_line_nums_strip:814,image_file_local_syms_strip:814,image_file_machine_am33:814,image_file_machine_amd64:814,image_file_machine_arm64:814,image_file_machine_arm:814,image_file_machine_armnt:814,image_file_machine_ebc:814,image_file_machine_i386:814,image_file_machine_ia64:814,image_file_machine_m32r:814,image_file_machine_mips16:814,image_file_machine_mipsfpu16:814,image_file_machine_mipsfpu:814,image_file_machine_powerpc:814,image_file_machine_powerpcfp:814,image_file_machine_r4000:814,image_file_machine_sh3:814,image_file_machine_sh3dsp:814,image_file_machine_sh4:814,image_file_machine_sh5:814,image_file_machine_thumb:814,image_file_machine_unknown:814,image_file_machine_wcemipsv2:814,image_file_net_run_from_swap:814,image_file_relocs_strip:814,image_file_removable_run_from_swap:814,image_file_system:814,image_file_up_system_onli:814,image_gather4:[0,2,3,4,112,201,306,576],image_gather4_b:[0,2,3,4],image_gather4_b_cl:[0,2,3,4],image_gather4_b_cl_o:[0,2,3,4],image_gather4_b_o:[0,2,3,4],image_gather4_c:[0,2,3,4],image_gather4_c_b:[0,2,3,4],image_gather4_c_b_cl:[0,2,3,4],image_gather4_c_b_cl_o:[0,2,3,4],image_gather4_c_b_o:[0,2,3,4],image_gather4_c_cl:[0,2,3,4],image_gather4_c_cl_o:[0,2,3,4],image_gather4_c_l:[0,2,3,4],image_gather4_c_l_o:[0,2,3,4],image_gather4_c_lz:[0,2,3,4],image_gather4_c_lz_o:[0,2,3,4],image_gather4_c_o:[0,2,3,4],image_gather4_cl:[0,2,3,4],image_gather4_cl_o:[0,2,3,4],image_gather4_l:[0,2,3,4],image_gather4_l_o:[0,2,3,4],image_gather4_lz:[0,2,3,4],image_gather4_lz_o:[0,2,3,4],image_gather4_o:[0,2,3,4],image_get_lod:[0,2,3,4],image_get_resinfo:[0,2,3,4,9],image_load:[0,2,3,4,9],image_load_mip:[0,2,3,4,9],image_load_mip_pck:[0,2,3,4,9],image_load_mip_pck_sgn:[0,2,3,4,9],image_load_pck:[0,2,3,4,9],image_load_pck_sgn:[0,2,3,4,9],image_rel_amd64_addr32nb:671,image_rel_amd64_secrel:671,image_rel_amd64_sect:671,image_rel_arm64_secrel_high12a:671,image_rel_arm64_secrel_low12a:671,image_rel_arm64_secrel_low12l:671,image_rel_i386_dir32nb:671,image_rel_i386_secrel:671,image_rel_i386_sect:671,image_sampl:[0,2,3,4,9],image_sample_b:[0,2,3,4],image_sample_b_cl:[0,2,3,4],image_sample_b_cl_o:[0,2,3,4],image_sample_b_o:[0,2,3,4],image_sample_c:[0,2,3,4],image_sample_c_b:[0,2,3,4],image_sample_c_b_cl:[0,2,3,4],image_sample_c_b_cl_o:[0,2,3,4],image_sample_c_b_o:[0,2,3,4],image_sample_c_cd:[0,2,3,4],image_sample_c_cd_cl:[0,2,3,4],image_sample_c_cd_cl_g16:0,image_sample_c_cd_cl_o:[0,2,3,4],image_sample_c_cd_cl_o_g16:0,image_sample_c_cd_g16:0,image_sample_c_cd_o:[0,2,3,4],image_sample_c_cd_o_g16:0,image_sample_c_cl:[0,2,3,4],image_sample_c_cl_o:[0,2,3,4],image_sample_c_d:[0,2,3,4],image_sample_c_d_cl:[0,2,3,4],image_sample_c_d_cl_g16:0,image_sample_c_d_cl_o:[0,2,3,4],image_sample_c_d_cl_o_g16:0,image_sample_c_d_g16:0,image_sample_c_d_o:[0,2,3,4],image_sample_c_d_o_g16:0,image_sample_c_l:[0,2,3,4],image_sample_c_l_o:[0,2,3,4],image_sample_c_lz:[0,2,3,4],image_sample_c_lz_o:[0,2,3,4],image_sample_c_o:[0,2,3,4],image_sample_cd:[0,2,3,4],image_sample_cd_cl:[0,2,3,4],image_sample_cd_cl_g16:0,image_sample_cd_cl_o:[0,2,3,4],image_sample_cd_cl_o_g16:0,image_sample_cd_g16:0,image_sample_cd_o:[0,2,3,4],image_sample_cd_o_g16:0,image_sample_cl:[0,2,3,4],image_sample_cl_o:[0,2,3,4],image_sample_d:[0,2,3,4],image_sample_d_cl:[0,2,3,4],image_sample_d_cl_g16:0,image_sample_d_cl_o:[0,2,3,4],image_sample_d_cl_o_g16:0,image_sample_d_g16:0,image_sample_d_o:[0,2,3,4],image_sample_d_o_g16:0,image_sample_l:[0,2,3,4],image_sample_l_o:[0,2,3,4],image_sample_lz:[0,2,3,4],image_sample_lz_o:[0,2,3,4],image_sample_o:[0,2,3,4],image_scn_align_1024byt:814,image_scn_align_128byt:814,image_scn_align_16byt:814,image_scn_align_1byt:814,image_scn_align_2048byt:814,image_scn_align_256byt:814,image_scn_align_2byt:814,image_scn_align_32byt:814,image_scn_align_4096byt:814,image_scn_align_4byt:814,image_scn_align_512byt:814,image_scn_align_64byt:814,image_scn_align_8192byt:814,image_scn_align_8byt:814,image_scn_cnt_cod:[641,814],image_scn_cnt_initialized_data:[641,671,814],image_scn_cnt_uninitialized_data:[641,671,814],image_scn_gprel:814,image_scn_lnk_comdat:814,image_scn_lnk_info:[640,814],image_scn_lnk_nreloc_ovfl:814,image_scn_lnk_oth:814,image_scn_lnk_remov:[641,671,814],image_scn_mem_16bit:814,image_scn_mem_discard:[641,671,814],image_scn_mem_execut:[641,814],image_scn_mem_lock:814,image_scn_mem_not_cach:814,image_scn_mem_not_pag:814,image_scn_mem_preload:814,image_scn_mem_purg:814,image_scn_mem_read:[641,814],image_scn_mem_shar:[641,814],image_scn_mem_writ:[641,814],image_scn_type_no_pad:814,image_section_head:731,image_stor:[0,2,3,4,9,98,99,188,288,289,449,450,562,563],image_store_mip:[0,2,3,4,9],image_store_mip_pck:[0,2,3,4,9],image_store_pck:[0,2,3,4,9],image_sym_class_argu:814,image_sym_class_automat:814,image_sym_class_bit_field:814,image_sym_class_block:814,image_sym_class_clr_token:814,image_sym_class_end_of_funct:814,image_sym_class_end_of_struct:814,image_sym_class_enum_tag:814,image_sym_class_extern:814,image_sym_class_external_def:814,image_sym_class_fil:814,image_sym_class_funct:814,image_sym_class_label:814,image_sym_class_member_of_enum:814,image_sym_class_member_of_struct:814,image_sym_class_member_of_union:814,image_sym_class_nul:814,image_sym_class_regist:814,image_sym_class_register_param:814,image_sym_class_sect:814,image_sym_class_stat:814,image_sym_class_struct_tag:814,image_sym_class_type_definit:814,image_sym_class_undefined_label:814,image_sym_class_undefined_stat:814,image_sym_class_union_tag:814,image_sym_class_weak_extern:814,image_sym_dtype_arrai:814,image_sym_dtype_funct:814,image_sym_dtype_nul:814,image_sym_dtype_point:814,image_sym_type_byt:814,image_sym_type_char:814,image_sym_type_doubl:814,image_sym_type_dword:814,image_sym_type_enum:814,image_sym_type_float:814,image_sym_type_int:814,image_sym_type_long:814,image_sym_type_mo:814,image_sym_type_nul:814,image_sym_type_short:814,image_sym_type_struct:814,image_sym_type_uint:814,image_sym_type_union:814,image_sym_type_void:814,image_sym_type_word:814,imagick:747,imagin:[722,726,759],imaginari:689,imagmag:807,imagstart:807,imask:[3,4,9],imbu:783,imgrel:671,imit:[717,761,770],imm16:[0,2,3,4,9],imm32:607,imm:[607,639,642,644,710,718,770,780],imm_eq0:771,immarg:[597,670,710],immateri:722,immatur:704,immedi:[55,404,517,585,588,590,593,594,597,604,607,610,611,625,631,635,639,642,644,663,669,670,671,672,683,684,685,686,688,705,709,710,712,714,715,718,721,724,726,731,735,741,743,745,748,755,756,759,760,762,764,770,780,785,790,805,808,811],immediatepar:710,immigr:608,immin:702,immleaf:686,immsext16:607,immsubreg:780,immt:[770,771],immtyp:[770,771],immtypebit:771,immun:760,immut:[706,710,725,743,776,782],immutablepass:593,impact:[590,603,604,606,609,610,639,667,674,676,695,705,710,743,747,748,750,759,760,766],imped:710,imper:[610,676,759,806,808],impl:[658,743,782],implement:[585,590,591,594,603,604,606,609,611,616,617,621,639,641,648,649,653,654,658,659,660,663,665,667,669,670,671,675,677,678,679,684,685,686,688,690,694,695,703,708,709,711,712,714,718,721,724,730,738,741,742,743,745,746,748,749,751,753,756,759,762,763,764,766,768,769,770,777,778,781,783,784,785,786,788,789,790,791,802,804,805,806,807,808,809,810,812],implementor:[676,771,805,806],impli:[14,79,172,267,333,350,430,543,585,586,587,590,593,607,611,616,642,643,644,654,658,659,660,667,669,676,677,710,712,714,721,731,743,745,748,762,765,770,780],implic:[604,607,658,710,714,731,745,756,760],implicit:[607,610,611,669,684,709,710,716,725,743,751,759,760,768,770,780,784,806],implicit_def:705,implicitarg:590,implicitcod:759,implicitdef:607,implicitdefin:716,implicitli:[585,590,597,606,607,610,659,667,671,684,709,710,725,726,743,745,760,762,779,786,788,802,803,804,805,806,807,808,809,810,811],implicitus:607,importantli:[593,610,667,806],impos:[607,667,709,710,716,743,755,762,766],imposs:[593,594,596,688,710,760,780,788,811],impract:[667,695,728,729,743,760],improp:725,improv:[585,590,593,602,603,605,609,610,616,667,669,674,676,679,685,689,693,702,709,710,712,714,716,722,723,726,741,742,743,745,748,752,757,759,760,764,770,780,782,788,790,805,811],impur:759,imul16rmi8:607,imul16rmi:607,imul:607,in0:710,in1:[607,710],in2:607,in32bitmod:607,in64bitmod:607,inaccess:695,inaccessiblemem_or_argmemonli:710,inaccessiblememonli:[597,710],inaccessiblememonly_or_argmemonli:597,inaccur:[606,639],inaccuraci:[590,710],inact:[585,588,590,710],inadvert:[611,705,765,776],inadvis:677,inalloca:[597,663,710,751],inapplic:[600,697,760],inappropri:[755,757],inbound:[663,664,674,677,710],inc32r:[716,759],inc4:611,inc:[607,610,658,663,679,707,710,716,743,757,765,768,769,770,780,782],incant:660,incap:[585,597,811],incarn:670,incid:755,includ:[20,84,123,175,212,213,270,370,371,435,482,483,548,585,587,588,589,590,591,592,593,594,596,599,601,603,604,605,606,607,608,609,611,612,613,616,617,618,621,623,624,625,634,635,636,637,642,644,650,654,655,656,657,658,659,660,661,662,663,664,667,668,669,670,671,674,675,676,677,678,680,681,687,690,694,695,697,698,700,701,702,703,705,707,708,709,710,711,712,713,715,716,718,721,723,724,725,726,728,729,730,731,735,736,739,741,742,743,744,745,747,748,750,752,754,755,756,757,759,760,762,763,764,766,768,769,771,773,774,775,776,778,779,780,781,782,783,784,785,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],include_directori:605,includedir:624,inclus:[585,588,589,590,599,610,625,639,658,667,671,698,710,744,757,765,768,780,788],incom:[590,607,667,684,693,710,714,743,757,760,780,806,808],incompat:[641,653,669,695,710,740,768],incomplet:[585,590,668,672,743,760,768,789,790,791],inconsist:[610,631,677,748,759],inconveni:[712,808],inconvertibleerrorcod:743,incorpor:[585,607,609,703,743,770,812],incorrect:[590,599,667,710,727,728,742,756,759,760,804,805,806,807,808,809,810],incorrectli:[590,639,700,710,748,760,802,804],incq:611,incr:641,increas:[585,590,603,609,610,619,639,642,663,667,674,683,688,689,704,705,710,712,713,714,727,741,745,754,756,759,760,766,768,771,776,779,804],incred:[667,699,745],increment:[585,590,601,606,607,610,639,676,679,712,723,727,731,735,736,738,741,743,749,758,766,806,808,809,810],incur:720,ind1:710,ind4:710,ind:810,indefinit:712,indemn:667,indent:[642,654,717,743,761,786,810],independ:[585,590,594,609,611,619,659,662,666,667,674,677,679,683,688,701,706,708,710,713,718,720,726,743,746,749,754,760,762,765,770,771,778,779,780,782,784,785],index2:760,index2virtreg:607,index:[85,176,213,271,352,371,436,483,549,588,589,590,593,596,597,607,615,621,630,631,639,644,645,648,649,658,659,663,664,669,671,674,676,679,681,684,688,702,706,709,731,734,735,736,738,739,743,747,754,759,760,762,763,764,769,770,780,786,787,810],indexedmap:607,indexloc:743,indexoffsetbufferlength:738,indexoffsetbufferoffset:738,indexreg:607,indextyp:768,indic:[20,30,82,96,97,117,123,186,187,205,212,223,286,287,310,351,370,380,433,447,448,482,493,546,560,561,580,585,586,587,588,589,590,593,596,597,598,599,600,605,607,609,610,611,616,621,623,627,639,654,658,659,663,664,667,669,670,671,676,687,688,689,701,703,706,710,711,712,724,728,729,730,731,733,736,739,741,742,743,745,750,756,759,760,762,764,768,769,770,771,774,779,780,782,784,785,803,804,806,810],indir:710,indirect:[585,590,604,607,641,642,643,649,663,671,672,684,712,716,735,759,762,764,769,780,790],indirectli:[590,594,610,616,710,721,727,770],indirectstubmanag:[790,791],indistinguish:770,individu:[585,587,590,596,597,601,607,610,611,616,621,623,625,639,641,644,650,653,654,664,666,670,674,681,684,693,700,710,712,718,721,727,728,731,739,741,745,746,748,755,757,760,764,769,770,775,783,810,812],indivis:708,induc:[695,714,750,751],induct:[593,674,679,710,714,782,804,806,808],industri:[585,667,787],indvar:[710,743,782],indx:786,ineffect:[663,760],ineffici:[597,607,674,712,743,763,779,808],ineg:607,inequ:[589,607,743],inevit:[639,748],inexact:[590,710],inf:[590,617,619,659,710,759],infeas:[711,727,760],infecti:743,infer:[585,590,596,604,607,610,611,617,619,660,674,676,703,710,727,728,764,770,773,775,782,786],inferattr:782,inferenc:607,inferior:665,infin:[590,639,710],infinit:[585,593,607,688,710,712,714,723,741],infix:770,influenc:[596,602,710,711,757,759,776,782],info:[590,597,603,605,607,614,616,630,631,637,639,642,644,645,648,649,654,658,661,665,667,669,676,679,691,699,701,710,711,712,713,728,730,734,739,745,754,755,768,779,782,785,786,802,807,808,810,811,812],inform:[587,588,589,591,593,597,599,600,603,604,605,608,611,612,614,615,616,621,623,631,636,637,638,639,640,641,642,644,645,648,649,654,657,658,659,660,663,664,665,669,671,672,673,674,676,677,678,679,680,681,683,685,687,688,691,693,694,695,696,697,698,699,700,701,702,703,704,705,707,709,711,712,713,714,715,716,717,718,722,723,724,727,729,731,733,735,736,738,739,742,744,745,748,750,751,753,754,755,756,757,760,761,762,763,764,765,768,769,770,771,773,776,778,779,780,781,782,783,784,785,786,788,789,790,791,803,804,805,807,808,809,811,812,813],informativemailnotifi:696,infrastructur:[585,601,605,607,644,662,667,670,676,678,679,680,697,710,725,726,728,743,744,745,750,751,754,757,759,764,766,773,778,779,781,782,787,805],infrequ:782,infring:667,ing:[610,662,714,741,804,808],ingredi:750,inher:[606,609,672,710,725,736,743,760,770],inherit:[590,593,607,610,659,666,703,726,740,743,765,768,769,770,771,776,777,781,782,789],inhibit:[645,710,764],init:[590,632,659,665,702,712,745,770,782,808,809,810],initact:591,initi:[585,597,598,605,609,616,619,623,639,642,644,659,663,669,670,672,679,684,693,703,709,710,714,715,721,722,724,726,727,741,742,745,746,748,750,755,756,757,758,760,763,768,769,770,780,782,783,786,788,789,790,804,805,806,808,809,810],initial_instruct:[585,590],initial_loc:585,initialc:725,initialexec:[597,710],initialis:[659,771,783,809],initialize_ag_pass:782,initialize_pass_begin:687,initialize_pass_depend:[687,782],initialize_pass_end:687,initializealiasanalysi:593,initializeallasmpars:809,initializeallasmprint:809,initializealltarget:809,initializealltargetinfo:809,initializealltargetmc:809,initializemodul:[804,810],initializemoduleandpassmanag:[805,806,807,808,809],initializenativetarget:[805,806,807,808,810],initializenativetargetasmpars:[805,806,807,808,810],initializenativetargetasmprint:[805,806,807,808,810],initializepass:593,initializerconst:710,initialse:651,initid:597,initsynclibcal:594,initv:[808,809,810],inject:[610,663,705,710,725,760],inlin:[585,590,594,599,604,620,625,639,640,645,654,659,660,663,667,669,671,672,674,679,684,698,705,716,724,728,734,742,743,745,754,759,760,762,764,782,786,789,802,805,806,811],inline:[644,710,759],inline_hd1:660,inline_hd2:660,inlineasm:722,inlinebit:710,inlined_at:671,inlinedat:[611,710],inlinedinfo:640,inlinehint:[597,710],inner:[603,606,612,669,705,708,710,712,714,725,741,743,750,770,776,778,779,786],inner_a:710,innerloop:750,innerloopvector:748,innermost:[750,779],innerstructti:710,innoc:806,innov:667,inoperandlist:[770,771,780],inorderissu:639,inorderissuestag:639,inplac:614,inpredsens:707,inprocessmemorymanag:709,input:[10,11,12,13,16,18,36,37,38,39,40,41,57,58,59,60,61,62,63,64,65,69,70,71,72,73,74,75,76,77,87,88,89,90,91,92,93,94,95,96,97,100,101,116,118,119,122,146,147,148,149,150,151,152,153,154,155,156,160,161,162,163,164,165,166,167,168,169,170,178,179,180,181,182,183,184,185,186,187,189,190,191,206,207,208,211,226,227,228,243,244,245,246,247,248,249,250,251,252,253,257,258,259,260,261,262,263,264,265,273,274,275,276,277,278,281,282,283,284,285,286,287,311,312,313,317,318,322,323,328,329,330,331,332,335,336,343,344,345,346,347,348,353,354,361,362,363,364,365,366,367,368,386,387,388,389,390,391,406,407,408,409,410,411,412,413,414,415,416,417,421,422,423,424,425,426,427,428,429,438,439,440,441,442,443,444,445,446,447,448,451,452,473,474,475,476,477,478,481,499,500,501,502,503,504,519,520,521,522,523,524,525,526,527,528,529,533,534,535,536,537,538,539,540,541,551,552,553,554,555,556,557,558,559,560,561,564,565,581,582,583,585,586,588,589,590,591,593,595,601,602,606,607,610,611,612,614,616,617,619,621,622,623,625,626,628,629,630,631,632,633,635,637,638,639,640,642,644,646,648,649,650,652,653,654,657,658,659,660,666,672,679,684,685,688,689,693,702,707,709,711,712,713,714,716,723,724,727,731,741,743,750,754,757,759,760,764,768,770,771,773,775,779,780,782,784,788,789,802,803,804,805,806,807,808,809,810,811,814],input_vector:710,inputcont:786,inputfilenam:659,inrang:710,inreg:[590,597,607,710,780],inresumepart:663,ins:[596,607,707,770,771,780],insan:610,insect:711,insensit:[593,611,630],insert:[590,593,594,596,601,610,611,616,621,659,663,676,684,689,695,705,714,722,730,741,759,760,761,765,780,782,783,804,805,806,807,808,809,810],insert_subreg:716,insert_subvector:607,insertbefor:743,insertbranch:780,insertbyt:712,insertdeclar:810,insertel:[596,611],insertvalu:663,insid:[597,603,605,606,607,610,611,616,621,664,665,666,668,669,676,678,679,689,701,703,709,710,712,714,721,722,725,731,738,741,743,750,753,759,760,761,763,764,768,770,774,775,776,777,782,786,805],insidebundl:607,insight:[585,664,710,754,784],insignific:[617,628,710,762],insn:642,inspect:[590,597,604,607,631,674,677,709,710,745,759,781,782,803],inspir:[639,710,741,759,784],inst:[658,705,710,741,743,770,780],inst_begin:743,inst_cal:597,inst_end:743,inst_invok:597,inst_iter:743,instal:[590,592,605,606,615,624,660,662,668,670,675,679,694,695,696,697,698,700,702,709,712,723,724,740,743,744,745,753,761,771,773,774,775,782,783,803,804,805,806,807,808,809,810],instalia:607,instalias:607,install_gwp_asan_stub:695,install_name_tool:633,install_prefix:605,installeddir:668,installsignalhandl:695,instanc:[585,590,591,605,607,610,611,616,621,659,663,668,676,678,683,686,693,703,708,709,710,714,715,716,722,726,728,733,742,743,745,753,759,762,768,769,770,773,775,776,780,782,783,786,788,789,803,804,811],instanti:[616,625,658,659,660,703,715,743,750,768,770,771,780,782,786],instantiatetemplateattribut:768,instantli:743,instcombin:[601,675,714,743,789,790,791,805,806,807,808],instead:[35,385,498,585,586,590,593,594,597,601,603,605,606,607,608,610,611,614,616,619,620,621,625,630,631,632,636,639,640,642,644,654,657,659,660,663,664,665,667,669,671,672,674,675,677,678,679,681,683,688,689,693,695,696,697,699,702,703,707,708,710,711,712,716,722,723,724,726,731,741,743,745,750,755,757,758,759,760,762,764,765,769,770,773,775,778,780,782,783,784,785,786,789,790,803,804,805,807,808,811,812],instinct:590,institer:743,instlisttyp:743,instnam:601,instprint:768,instr0:722,instr1:722,instr2:722,instr:[590,605,607,625,645,658,664,716,770],instr_iter:607,instr_map:784,instrinfo:[607,716,780],instritinclass:[770,771],instrmap:780,instrpostprocess:639,instrprofvaluekind:710,instrsch:743,instrsdrm:770,instrsdrr:770,instrssrm:770,instrssrr:770,instruct:[10,11,12,13,15,16,19,20,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,57,58,59,60,61,62,63,64,65,69,70,71,72,73,74,75,76,77,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,118,119,120,123,134,135,136,137,138,139,140,141,146,147,148,149,150,151,152,153,154,155,156,160,161,162,163,164,165,166,167,168,169,170,175,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,206,207,208,209,212,213,226,227,228,229,230,231,232,233,234,235,236,243,244,245,246,247,248,249,250,251,252,253,257,258,259,260,261,262,263,264,265,270,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,311,312,313,314,315,317,318,319,320,322,323,324,325,328,329,330,331,332,334,335,337,343,344,345,346,347,348,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,369,370,371,386,387,388,389,390,391,392,393,394,395,396,397,398,399,406,407,408,409,410,411,412,413,414,415,416,417,421,422,423,424,425,426,427,428,429,435,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,482,483,499,500,501,502,503,504,505,506,507,508,509,510,511,512,519,520,521,522,523,524,525,526,527,528,529,533,534,535,536,537,538,539,540,541,548,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,581,582,583,584,591,593,597,601,602,604,605,610,611,615,617,619,621,623,642,644,645,651,658,659,660,661,662,663,666,667,668,671,673,674,675,676,678,679,681,684,685,686,688,689,690,691,692,694,700,702,704,708,709,711,714,720,721,722,724,727,728,742,745,750,751,752,754,755,758,762,764,768,770,771,775,776,777,778,779,782,783,804,805,806,807,808,809,810,811],instructioncount:754,instructionencod:770,instructionselect:[685,688,693],instructionselector:690,instructor:780,instrument:[592,599,605,625,645,658,664,677,699,709,710,712,725,728,731,741,751,763,769,773,785],instrumentationmap:783,instsimplifi:659,instsimplifypass:725,instsp:780,insttoreplac:743,instvisitor:[670,743],insuffici:[660,663,697,710,759,765,780],insul:760,insult:608,int128:738,int128oct:738,int16:738,int16_t:[780,786],int16short:738,int2006:773,int32:[738,762],int32_t:[676,731,738,786],int32long:738,int32ti:743,int32x4_t:596,int64:738,int64_t:[743,769,786],int64quad:738,int8_t:786,int_amdgcn_buffer_load_format:769,int_amdgcn_image_atomic_swap_1d:769,int_get_dynamic_area_offset:710,int_max:710,int_min:710,intd:611,integ:[20,24,26,27,120,123,127,129,209,212,217,218,220,314,327,339,370,375,376,378,479,482,487,488,490,584,585,587,588,590,594,598,607,610,611,616,623,639,645,659,660,663,664,669,670,671,674,676,683,691,712,716,722,726,727,731,736,738,743,745,747,752,754,759,764,768,769,770,775,779,780,786,804],integer_numb:[20,21,22,23,24,27,30,51,52,53,120,123,124,125,126,127,129,142,143,209,212,213,214,215,216,217,220,223,237,238,239,314,370,371,372,373,374,375,378,380,400,401,402,479,482,483,484,485,486,487,490,493,513,514,515,584],integertyp:743,integr:[585,590,604,605,615,659,660,662,667,676,679,712,713,717,728,742,743,745,747,749,759,764,766,768,777,778,783,813],intel:[594,605,619,639,642,646,661,701,710,750,756,760,770,775,779,809],intel_pst:595,intel_syntax:639,inteldialect:710,intellig:[710,743,760],intend:[585,590,592,593,594,603,605,606,607,608,609,610,611,628,634,650,652,659,663,667,669,670,674,676,679,683,689,691,693,710,713,715,718,723,728,736,741,743,745,755,756,760,761,762,765,770,778,780,782],intens:[603,674,743],intent:[585,596,606,607,610,667,710,713,728,748,757,759,808],intention:[610,667,710,760,782,803],inter:[590,593,599,610,667,674,710,711,726,763,765,770],interact:[588,594,601,605,607,626,659,663,669,676,679,699,710,715,721,742,745,754,755,759,764,780,788,803,810,811],intercept:712,interchang:[607,610],interest:[585,590,593,594,596,602,603,607,609,610,611,612,615,616,623,639,659,662,664,667,669,672,674,675,676,679,681,696,703,709,710,712,715,716,719,723,736,741,743,746,748,757,759,760,764,769,774,778,781,782,784,786,789,802,803,804,805,806,807,808,810,811],interf:759,interfac:[590,605,607,610,616,621,649,658,659,661,665,667,668,669,675,676,678,679,681,694,703,706,707,709,710,712,713,714,722,723,724,726,741,751,754,756,759,762,768,770,778,780,781,782,789,790,802,803,804,811],interfer:[639,669,674,677,710],interim:[604,667],interior:[676,703,710,711,764],interleav:[585,590,642,723,745,750,770,779],interleave_count:779,intermedi:[597,605,632,636,657,660,662,668,669,672,676,679,683,684,688,693,706,708,710,716,721,743,751,753,754,759,764,770,773,778,780,803],intermediari:743,intermingl:710,intermitt:679,intermix:590,intermodular:[713,778],intern:[590,593,597,605,606,607,611,612,616,639,641,644,654,660,662,663,664,667,668,669,672,674,677,678,679,684,695,709,710,711,713,714,715,716,724,726,727,728,729,735,739,743,746,757,764,765,768,769,770,771,773,774,777,780,782,786,788,804,808,810],internal_pipeline_hash:590,internallinkag:743,internalread:716,internet:[661,745],interoper:[594,669,676,710,739,788,811],interpol:[17,29,121,131,210,222,480,492,590],interpret:[585,596,597,605,607,609,612,616,619,620,624,628,644,645,658,659,660,664,671,675,679,683,709,710,726,740,759,762,764,768,770,771,775,776,803,804,805,806,807,808,809,810,811],interprocedur:[593,710,713,728,782,789],interrel:710,interrupt:[610,616,673,710,712,760,765],intersect:766,interspers:785,intertwin:711,interv:[590,735,743,756],interven:[590,597,639,745,762],intim:610,intimid:667,intimmleaf:686,intpr:716,intptr_t:[779,805,806,807,808,810],intptrsiz:676,intra:[593,709,763],intraprocedur:[601,721],intrecti:769,intreg:[707,780],intregsclass:780,intregsregclass:780,intregsregclassid:780,intregsregisterclass:780,intregssubclass:780,intregssubregclass:780,intregssuperclass:780,intregssuperregclass:780,intregsvt:780,intrepid:803,intric:668,intrins:[594,609,645,658,661,674,684,689,705,711,714,716,725,727,743,758,769,777,778,779,780],intrinsicsamdgpu:769,intrinsicsnvvm:724,intrnodupl:710,intro:[661,662,710,787,808],introduc:[585,593,594,595,607,610,611,639,663,664,671,683,686,688,689,691,693,702,703,708,709,710,714,721,722,723,727,735,741,743,745,749,750,754,759,760,762,764,771,777,780,788,789,791,806,807,808,812],introduct:[609,658,662,679,680,681,787,813],introductori:787,introspect:[706,710,713,718,764],intrus:[710,743],intscalarti:710,intti:710,inttoptr:[674,762,764],inttyp:710,intuit:[710,714,721],inv:725,invalid:[585,588,589,590,593,594,610,616,631,641,645,653,657,659,663,669,677,679,684,710,711,712,714,715,731,741,743,756,759,760,762,764,766,770,780,782,786,803,804,805,806,807,808,809,810],invalu:709,invari:[593,674,679,689,705,711,714,743,764,776,782],invas:[667,702,786],invent:[739,768,769],invers:[588,631,710,741],inverse_throughput:631,invert:[674,683,710,775],invest:[670,757],investig:[644,667,688,748,757,760,788,811],invis:746,invit:[678,789,790,791],invoc:[585,592,601,605,607,611,612,633,645,663,668,675,679,694,699,700,705,710,712,728,748,773,782,783,784,785],invok:[585,590,593,597,599,601,605,606,607,610,611,625,654,659,663,669,674,676,679,694,705,708,712,713,715,716,722,726,728,743,762,764,768,769,773,775,780,782,783,785,803,808],invoke:710,invoke_normal_weight:599,invoke_unwind_weight:599,involv:[585,590,593,594,598,605,607,639,666,669,670,674,676,677,685,689,698,702,707,709,710,726,743,745,750,755,756,760,764,765,766,769,770,780,781,784,786,787,803,806,808,810],iommu:590,iostream:[695,724,784,809],ipa:711,ipc:[639,765],iphoneo:773,ipi:[644,729,730,736,739],ipo:[711,722,782],ipra:590,ipsk_covmap:664,iptr:[683,710],ipv:585,irbuild:[705,727,743,804,805,806,807,808,809,810],irc:[608,609,662,674,742,755],ircompil:788,ircompilelay:[726,788,789,790,791],irgen:674,irgenandtakeownership:791,irlay:[789,791],irpath:726,irread:605,irreduc:[710,714,741,780],irrelev:[593,611,677,710,768],irrelevant_extern:709,irrelevant_funct:709,irrespect:605,irtest:605,irtransformlay:[790,791],irtransl:[689,693],is64bitmod:780,is_base_of:703,is_const:590,is_int_min_poison:710,is_nul:673,is_open:724,is_pip:590,is_ptr64:590,is_restrict:590,is_stmt:671,is_trivially_copy:743,is_volatil:590,is_zero_undef:710,isa:[588,590,594,607,610,661,667,674,676,703,710,749,751,770,780],isa_and_nonnul:743,isa_impl:703,isabsoluteaddress:731,isadd:770,isaddresstaken:716,isalnum:[802,803,804,805,806,807,808,809,810],isalpha:[802,803,804,805,806,807,808,809,810],isascheapasamov:770,isascii:[803,804,805,806,807,808,809,810],isasmparseronli:770,isatleastorstrongerthan:594,isauthent:770,isbarri:[770,771],isbasewithconstantoffset:686,isbinaryop:[807,808,809,810],isbitcast:770,isbranch:[770,771],iscal:[709,770,771],iscodegenonli:770,iscommut:[770,771],iscompar:770,isconst:[590,597,743],isconstantpoolindex:780,isconverg:770,isconvertibletothreeaddress:[770,771],iscxxclass:759,isd:[591,594,607,669,670,780],isdeclar:743,isdef:607,isdefinit:[705,710,759],isdigit:[802,803,804,805,806,807,808,809,810],isdopcod:[591,607,670],isdoubl:770,isectcoff:731,isehscopereturn:770,isel:[607,617,658,693,711,759,769,780],iseldagtodag:768,isellow:[594,768],isempti:743,isextern:759,isexternalsymbol:780,isextractsubreg:770,isfirstclasstyp:722,isfloatingpointti:743,isfoo:610,isfunct:759,isglobaladdress:780,isgroup:[659,731],ish:807,isimm:684,isimmedi:780,isimplicitcod:759,isimplicitdef:771,isindirect:716,isindirectbranch:[770,771],isinlin:759,isinrang:709,isinsertsubreg:770,isintegerti:743,isjumptableindex:780,island:590,islandingpad:716,islazi:780,isload:607,isloadfromstackslot:780,isloc:[705,710,759],isloopinvari:743,islvalu:610,ismemoryfold:770,ismod:770,ismoveimm:770,ismovereg:770,isn:[592,594,602,606,607,608,610,611,639,645,654,660,667,672,677,689,690,700,709,710,716,721,723,741,743,755,756,757,759,761,765,766,782,802,803,804,805,806,809,810],isnotduplic:[770,771],isnul:610,isnullvalu:722,isobjcclass:759,isobviouslysafetofold:686,isol:[595,598,601,605,607,639,662,666,668,710,764,770,775,788,791,808],isomorph:672,isoper:[807,808,809,810],isoperandimmequ:686,isopt:659,isoptim:[705,710,759],isosdarwin:810,ispack:597,isphysicalregist:607,isphysreg:743,ispic:780,ispip:590,ispoint:722,ispointertyp:610,ispredic:[770,771],isprefix:659,ispreiselopcod:770,isprint:712,isproto:597,ispseudo:770,isref:770,isregist:607,isregsequ:770,isrematerializ:[770,771],isrequir:781,isrestrict:590,isreturn:[770,771],issafetospecul:764,isselect:770,isselector:731,isset:659,issimpl:594,issiz:743,isspac:[712,802,803,804,805,806,807,808,809,810],isstor:607,isstoretostackslot:780,isstrongerthan:594,issu:[585,590,594,596,600,606,607,608,609,611,616,630,641,659,660,667,671,674,675,679,689,693,695,697,702,704,705,710,742,743,745,747,756,759,760,764,766,768,775,779,782,786,787,805,806,808,811],issuedinst:639,issuedset:639,issuewidth:639,istargetspecif:716,istermin:[610,770,771],istrap:770,istreambuf_iter:724,istruncatingstor:780,isunaryop:[807,808,809,810],isunord:594,isunpredic:770,isunpredicatedtermin:780,isus:607,isv9:780,isvalids:770,isvalueunset:769,isvararg:743,isvector:776,isvi:780,isvolatil:[590,710],iszerofil:709,ital:585,italian:756,itanium:[626,627,671,675,777,778],itanium_abi_tripl:775,item:[585,596,597,606,623,659,669,671,674,710,712,722,743,745,748,759,764,769,770,786,808],iter:[593,598,601,605,606,607,609,610,631,639,659,663,667,676,688,691,693,709,711,712,713,714,715,722,723,735,741,750,769,780,783,786,803,806,807,808,812],iterator_rang:743,ith:743,iti:[695,770],itinerari:[607,770,771,780],its:[20,27,66,123,129,157,212,220,254,370,378,418,482,490,530,585,588,589,590,592,593,596,597,598,599,601,603,605,606,607,608,610,611,612,614,616,617,621,622,623,628,629,630,632,636,638,639,640,642,645,650,657,658,659,660,662,663,664,666,667,668,669,670,674,675,676,677,678,679,681,688,689,694,695,696,698,702,703,705,707,708,709,710,711,712,713,714,715,716,717,720,721,722,725,726,727,730,741,743,745,747,748,750,754,755,756,757,759,760,761,762,764,765,766,768,769,770,771,772,774,775,776,777,778,779,780,782,788,789,790,802,803,804,805,806,807,808,809,810,811,812],itself:[585,590,592,593,596,597,605,607,610,611,612,616,625,659,663,666,667,669,670,672,674,675,676,677,679,684,688,699,701,703,704,708,709,710,712,713,714,722,723,725,735,743,745,747,748,750,753,754,755,756,757,759,760,761,762,763,764,768,769,771,775,776,781,782,786,788,789,791,803,804,805,806,807,808,809,810,811],iuml:607,ival:[12,13,58,61,65,331,332,347,348,411,412,521,523,524,526],ivanov:748,ivanovicdistinguish:748,ivar:759,jacqu:660,jag:735,jaguar:639,jalu01:639,jalu0:639,jalu1:639,jam:[710,750],jame:748,jan:750,jann:760,jarvi:747,java:[594,673,676,710,741,743,764,811],javascript:[710,762,811],jazz:744,jb_1:759,jd1:726,jd2:726,jdiv:639,jdoe:[667,742],jeff:765,jemalloc:756,jessi:701,jfpa:639,jfpm:639,jfpu01:639,jfpu0:639,jfpu1:639,jfpuprf:639,jge:642,jingl:750,jingyu:660,jintegerprf:639,jit:[601,605,612,619,624,631,667,672,676,679,681,704,710,724,726,740,762,764,787,802,804,806,807,808,809,810,811,812],jit_dlclos:726,jit_dlopen:[709,726],jitcompilecallbackmanag:790,jitcompilerfn:780,jitcompilerfunct:780,jitdylib:[709,788,789,790,791],jite:[665,726,778,788,790,791],jitevaluatedsymbol:[788,789,790,791],jiteventlisten:726,jitfail:726,jitlink:[726,778],jitlinkcontext:709,jitlinkdylib:709,jitloadergdb:665,jitstack:726,jitsymbol:[788,789,790,791],jitsymbolflag:[726,791],jittargetaddress:[709,790],jittargetmachinebuild:[788,789,790,791],jlagu:639,jlsagu:639,jmp:[710,760],jmp_1:759,jmpq:760,jmul:639,jne:[607,760],job:[605,679,703,712,721,726,765,782,789],joe:666,joerg:745,john:[610,667,742],join:[590,619,678,710,712,714,719,743,782,784],joke:608,jonathan2251:813,jone:[748,750],jonesdistinguish:748,journal:750,journei:808,jpg:712,jsagu:639,json:[625,630,639,654,658,705,723,771,773],jsonnrevnum:745,jstc:639,jti:684,jtmb:[788,789,790,791],judg:[630,723],judgement:[667,760],judgment:667,juggl:[660,723],juli:[676,745,783],julia:726,jump:[589,597,604,659,663,669,671,684,704,705,710,714,723,726,743,744,760,777,780,790,806,810],jumpi:705,jumptabl:[597,710,716],junip:590,jurisdict:667,just:[585,592,593,594,595,596,597,598,601,602,603,605,606,607,609,610,611,614,616,619,621,623,625,628,639,640,642,644,654,659,660,663,664,667,671,672,677,679,681,682,683,687,688,694,697,701,702,703,704,705,709,710,712,714,716,717,719,721,722,723,724,725,726,741,742,743,744,745,746,753,758,759,760,761,762,764,765,768,769,770,771,773,774,775,780,781,782,783,786,788,789,790,802,803,804,805,806,807,808,809,810,811],justifi:[713,766],justin:692,jvalu0:639,jvalu1:639,jvimul:639,jvm:[726,811],jyknight:745,kabini:590,kaleidoscop:[679,722,726,743,788,789,790,791,812],kaleidoscopeastlay:791,kaleidoscopeastmaterializationunit:791,kaleidoscopejit:[789,790,791,805,806,807,808,810,813],kate:757,kaveri:590,kawahito:607,kdtree:747,keep:[590,593,607,611,614,632,641,653,658,659,660,663,667,668,672,676,695,703,709,710,713,714,716,719,720,721,722,723,725,727,741,743,749,750,754,759,766,771,775,778,779,782,783,790,802,803,804,805,806,807,808,809,810,811,812],kei:[590,597,603,607,610,630,631,667,695,698,703,707,710,714,716,722,725,726,733,736,738,743,754,757,760,764,768,771,782],kele:692,kept:[590,593,663,667,709,710,711,754,755,759,782],kern0:590,kern1:590,kernarg:590,kernarg_address:590,kernarg_s:590,kernarg_segment_align:590,kernarg_segment_s:590,kernargsegmentalign:590,kernargsegments:590,kernel1:590,kernel:[594,595,607,619,639,660,668,697,710,712,747,760,778,779,780,811],kernel_code_entry_byte_offset:590,kernel_param_0:724,kernel_param_1:724,kernel_param_2:724,kernelparam:724,kevin:607,keycol:707,keyr:679,keystrok:610,keyt:743,keytyp:[759,768],keyword:[593,600,607,659,662,664,675,677,710,712,748,759,768,769,770,771,775,779,782,802,806,807,808,809,810],kib:659,kick:[667,709],kill:[601,607,612,711,712,716,759,782],killal:712,kind:[585,587,588,589,590,594,597,599,607,608,609,610,611,616,617,621,623,658,659,664,665,669,670,672,673,675,678,702,703,705,706,709,710,711,712,713,716,721,722,723,725,727,729,730,731,738,741,743,745,755,759,760,761,762,768,770,777,784,785,787,788,807,808,809,810],kistanova:696,klabnik:757,klau:745,klimek:742,kmean:747,kmp:616,knock:610,know:[585,590,593,594,596,600,601,605,606,607,609,610,616,639,659,662,663,664,665,667,669,672,674,676,679,681,682,696,703,704,709,710,712,713,714,717,723,724,727,735,739,742,743,745,748,755,757,759,760,761,763,764,768,775,777,779,780,782,786,789,803,804,805,806,807,808,809,810,811,812],knowledg:[585,597,607,609,610,664,676,685,710,717,718,723,728,739,741,761,775,776,811],known:[585,590,593,594,597,605,607,610,612,621,641,660,663,666,669,674,676,677,678,679,681,693,702,703,705,709,710,724,741,743,744,748,753,758,760,764,774,775,777,780,782,786,802,803,804,805,806,807,808,809,810],knownbit:[687,710],knownsafevalu:743,knuth:770,koblenz:750,kocher:760,komatsu:607,kostanova:667,kripk:747,kristof:[748,757],kruskal:747,kryomodel:780,kryowrite_2cyc_xy_norsv_67ln:780,kryowrite_2cyc_xy_xy_150ln:780,krzysztof:748,ksdbginfo:810,kuck:666,kuhn:666,kw1:712,kw2:712,kw3:712,kwalifi:814,l10:784,l1d:639,lab:[678,696,723],label0:710,label:[0,2,3,4,9,585,586,589,590,597,598,607,642,662,663,669,671,672,673,676,705,708,714,721,722,724,741,743,759,776,780,784,806,808],label_1:[24,127,217,375,487],label_2:[24,127,217,375,487],label_3:[24,127,217,375,487],label_4:[24,127,217,375,487],label_branch_weight:599,labeltyp:743,labf:659,labor:659,labori:743,lack:[604,607,608,639,676,677,688,698,710,727,740,756,760,766,771],lad:659,laden:811,lagho:747,lah:773,lahf:760,lai:[607,610,677,709,710,711,743,777,810],laid:[596,607,667,710,734,735,739,743,749],lako:610,lambda:[590,686,726,743,748,788],lammp:747,lanai:[679,748],lanczo:747,land:[609,663,667,669,708,710,716,723,757],landingpad:[663,669,741],landscap:745,lane:[585,588,683,684],lang:[603,659,748,773],langref:[591,594,670,674,684,714,724,749,759],languag:[594,605,606,607,608,609,611,617,622,629,632,636,657,659,660,661,662,663,667,668,669,671,673,675,676,677,679,680,683,685,700,705,709,711,712,716,724,726,741,743,746,747,750,751,757,762,764,768,770,771,775,779,780,785,786,788,789,792,793,794,795,796,797,798,799,800,801,803,804,805,809,810],language_vers:590,languagevers:590,larg:[585,594,597,601,605,607,610,612,616,619,623,631,639,658,659,660,667,668,671,676,679,682,683,689,695,699,702,707,708,709,710,712,720,721,723,724,726,727,729,733,735,739,741,743,744,746,747,748,759,762,764,768,769,770,771,773,775,779,780,783,785,790,805],largeconst:762,larger:[585,590,597,603,607,609,623,631,632,639,664,666,668,670,671,681,684,688,699,710,712,723,727,731,743,747,748,756,759,764,768,780],largest:[120,209,314,479,584,585,588,590,607,611,614,671,674,688,710,731],last:[585,588,590,594,597,599,607,610,611,616,621,633,639,659,662,663,667,669,679,681,682,702,703,705,710,714,717,721,723,728,735,738,742,743,745,757,761,764,770,775,776,779,780,782,785,788,802,805,807,810],lastchar:[802,803,804,805,806,807,808,809,810],lastinst:780,lastli:[706,775,782,810],lastopc:780,latch:[710,714],late:[590,594,663,674,709,710,780],latenc:[607,619,631,639,710,726,760,784],later:[585,589,590,592,599,604,605,606,607,610,611,616,625,663,664,667,669,674,679,681,682,688,689,698,709,710,712,715,716,717,722,723,725,735,741,742,743,744,745,750,754,757,759,761,762,764,766,770,775,776,780,782,783,788,803,804,805,810,812],latest:[611,614,668,679,681,692,723,745,752,753,773],latter:[589,594,607,663,667,681,688,691,710,714,722,743,748,759,769,776,780,803],lattner:[745,748],lattneracronym:748,lattneragre:748,lattnerfunct:748,lattnerlow:748,lattnerrevnum:745,lattnertransit:748,launch:[590,603,610,660,724,726,790],launcher:681,lava:747,lavamd:747,law:[610,755],lawsuit:667,lawyer:667,layer:[593,639,675,689,698,709,716,726,735,745,754,788,790,791,811,813],layout:[596,603,605,606,644,660,662,674,676,677,680,701,709,726,730,731,733,736,738,745,764,780,782,785,805,809],lazi:[593,642,709,710,788,789,805,813],lazier:789,lazili:[597,687,709,713,743,788,789,790,791],lazycallthrough:[790,791],lazyemittinglay:726,lazyreexport:[726,813],lazyresolverfn:780,lbar:607,lbb0_1:[671,760],lbb0_2:[758,760],lbb0_3:760,lbb0_4:760,lbb0_5:760,lbb0_n:760,lbb_end0_0:671,lbb_end0_1:671,lbd:813,lbl:747,lbr:631,lc_id_dylib:633,lclang1:745,lclang2:745,lcov:625,lcovmap:664,lcpi0_0:775,lcssa:711,lcuda:724,lcudart:660,lcudart_stat:660,ld64:[664,728],ld_preload:756,ldflag:[624,694,788,789,790,791,804,805,806,807,808,809,810],ldl:660,ldm:710,ldpxi:716,ldpxpost:716,ldr:[671,718,771],ldrd:594,ldrex:594,ldri:780,ldrr:780,lds:[0,2,3,4,9,107,196,301,457,571,590],lds_direct:[10,12,59,61,62,64,77,146,149,150,152,155,156,162,243,246,247,250,253,317,322,328,330,331,343,345,347,519,521,523,525,528],lds_size:590,ldststoreupd:607,lea:[607,631,760,771],lead:[594,606,607,611,616,626,631,641,642,643,659,663,667,677,684,685,694,700,705,710,712,714,723,725,741,743,748,756,759,760,763,766,775,786,790,803,810],leader:[609,610,748],leaf:[617,669,710,729,764,784],leak:[616,671,676,708,710,758,760,775],leaki:712,leaksanit:712,leap:742,leaq:[709,758],leari:660,learn:[608,610,667,679,710,712,717,722,739,742,743,747,748,761,781,787,789,808,811],least:[585,590,593,594,595,596,597,603,607,609,610,616,621,631,633,637,639,642,644,646,659,663,667,669,675,683,684,688,697,699,701,710,712,714,719,721,729,741,743,745,748,753,757,760,762,764,768,770,780,783,785,803,804,805,806,807,808,809,810],leasur:666,leav:[585,590,595,600,602,609,616,641,653,659,663,667,672,676,679,684,694,710,712,714,722,723,724,726,741,742,745,750,759,760,764,780,789],leb128:[585,607,617],leb:664,left:[585,588,589,590,597,601,606,607,610,663,667,674,682,684,695,710,714,722,741,743,770,771,774,779,780,786,788,803,804,806,810],left_op:710,leftr:722,legaci:[597,602,605,611,676,710,726,727,752,766,776,781,782,784,788,789,790,791,805,806,807,808,809],legacy_stdio_definit:698,legacypassmanag:[782,784,789,790,791,805,806,807,808,809,810],legal:[585,590,591,594,660,667,669,670,677,684,686,689,690,693,705,710,716,718,721,724,729,743,749,750,775],legalact:780,legalfor:688,legalforcartesianproduct:688,legalfortypeswithmems:688,legalif:688,legalitypred:688,legalityqueri:688,legaliz:688,legalizeact:780,legalizedag:[591,670],legalizemut:688,legalizeop:[591,670],legalizerhelpertest:689,legalizerinfo:690,len:[710,759],len_control:712,length:[585,590,596,597,607,616,625,627,652,659,661,664,677,710,712,729,730,731,734,735,736,738,741,743,749,757,759,760,761,764,768,769,770,775,779],lengthen:631,less:[585,589,590,595,596,597,602,607,610,611,616,625,639,659,664,667,672,674,676,679,683,684,697,708,709,710,712,714,717,722,738,741,743,745,748,750,753,759,764,770,771,775,803,804,805,806,807,808,809,810,811,812],lesser:[596,710],let:[592,602,607,611,645,659,660,662,663,664,677,683,689,696,703,704,707,710,713,714,717,721,722,723,724,725,726,735,740,742,743,745,755,759,760,761,763,764,768,771,774,780,782,802,803,804,805,806,807,808,809,810,811,812],letitem:770,letlist:770,letter:[597,603,610,649,659,702,710,748,757,780,807,808,809,810],leukocyt:747,level:[585,588,590,592,593,594,597,599,600,603,605,608,611,617,623,639,642,644,648,649,660,661,662,663,665,667,669,673,674,675,676,677,679,689,691,699,708,709,711,714,715,721,722,724,725,728,735,739,743,744,745,751,762,764,766,770,774,775,776,777,778,779,780,782,783,785,786,787,788,790,802,803,804,805,806,807,808,809,810,811],leverag:[609,667,669,676,679,689,743,756,785],lex:[770,802,810],lex_1_1_els:590,lex_1_1_end:590,lex_1_1_start:590,lex_1_1_then:590,lex_1_els:590,lex_1_end:590,lex_1_start:590,lex_1_then:590,lex_end:590,lex_start:590,lexer:[675,679,784,803,804,805,807,808,809,810,811,812,813],lexic:[585,607,610,616,672,710,759,803,805,810],lexicalblock:810,lexicograph:[610,722],lexidentifi:[670,784],lexloc:810,lf_arglist:738,lf_arrai:738,lf_bitfield:738,lf_buildinfo:738,lf_class:738,lf_endprecomp:738,lf_enum:738,lf_fieldlist:738,lf_func_id:738,lf_interfac:738,lf_label:738,lf_methodlist:738,lf_mfunc_id:738,lf_mfunction:738,lf_modifi:738,lf_pad0:730,lf_pointer:738,lf_precomp:738,lf_procedur:738,lf_string_id:738,lf_structur:738,lf_substr_list:738,lf_typeserver2:[738,759],lf_udt_mod_src_lin:738,lf_udt_src_lin:738,lf_union:738,lf_vftabl:738,lf_vtshape:738,lfoo:607,lfunc1_end:590,lfunc_begin0:671,lfunc_beign0:671,lfunc_end0:590,lgkm:590,lgkm_cnt:[120,209,314,479,584],lgkmcnt:[120,209,314,479,584,590],lgkmcnt_sat:[120,209,314,479,584],lgtm:[662,711,742],lhse:[808,809,810],lib64:[605,660,679],lib:[590,591,605,607,615,623,624,635,639,644,648,649,659,660,668,670,671,676,681,694,695,698,701,712,716,722,726,743,744,745,748,754,756,759,765,768,769,778,780,781,782,788,789,790,791,804,805,806,807,808,809,810],liba:726,libb:726,libc:[605,610,627,660,665,667,669,678,679,695,700,710,712,743,756,811],libcal:[688,774],libcallfor:688,libcallif:688,libclang_rt:[699,712,783],libclc:[605,667,745],libcuda:724,libcxx:[603,605,679,698,745,748,753],libcxxabi:[603,605,679,698,745],libdir:624,liber:709,libffi:[605,740],libfil:624,libfoo:726,libfuzz:675,libgcc1:701,libgcc:[594,669,758],libit:659,libllvm:[603,605,679,740],libllvmcor:743,libllvmir:743,liblto:[694,748],liblzma:701,libm:[804,805],libnam:[624,782],libobject:709,libomp:[616,753],libomptarget:616,libopag:740,libpartit:671,libpath:[634,671],libpfm:605,libpo:659,librar:681,librari:[585,593,594,595,597,605,606,612,615,619,621,623,624,625,633,636,640,642,643,644,648,649,657,661,667,668,672,674,676,677,678,681,689,695,698,699,700,701,704,709,713,723,724,728,734,741,751,754,759,760,766,770,773,775,778,779,780,784,786,788,802,803,804,805,806,807,808,809,810,811,812],librarynam:[676,744],libremark:754,librt:746,libsampl:744,libscudo:756,libsfgcc1:701,libstdc:[605,627,660,668,669,679,701],libsupport:[709,745],libtinfo:701,libtool:615,libunwind:[603,605,669,679,745,748],libx:635,libxml2:[701,712],libxml:712,libxxx:701,libz:710,lic:741,licens:[610,679,702,746,747,782,788,789,790,791],license:667,licm:[594,663,674,705,711,714,743,782],lie:[659,764],lies:[676,710,762],life:[631,660,667,674,678,712,720,726,727],lifelong:787,lifetim:[585,607,663,667,674,676,695,720,726,743,782,783,788,790],lift:[674,750,757,789],light:[610,710],lightli:[710,746,753,788],lightweight:[610,616,668,743],like:[585,588,590,592,593,594,597,599,601,603,605,607,609,611,619,628,631,638,639,642,644,659,660,662,663,664,667,668,669,670,671,673,674,675,676,677,679,681,682,683,684,686,687,688,689,697,698,699,701,703,704,705,709,710,712,713,714,715,716,717,718,721,722,723,724,725,726,727,728,729,730,731,735,736,739,740,741,742,745,746,747,748,750,753,754,756,757,758,759,760,761,762,764,765,766,768,769,770,771,773,774,775,776,777,779,780,781,782,783,784,786,788,789,790,802,803,804,805,806,807,808,809,810,811,812],likeli:[599,710],likewis:[710,769],lim:712,limit:[27,84,117,129,175,205,220,270,310,378,435,490,548,580,585,588,589,590,594,601,603,605,607,608,609,610,612,616,621,639,641,644,653,659,666,667,671,674,676,677,678,679,683,688,698,702,706,709,710,712,721,725,729,731,741,743,745,748,751,755,756,757,758,759,760,762,766,770,786,805,806,807,810],linaro:[697,700],line:[592,594,601,605,606,607,610,611,612,616,617,619,621,622,623,625,626,627,629,630,631,632,633,635,636,638,639,640,641,642,643,644,645,648,649,650,652,653,654,657,658,660,664,665,667,669,671,675,676,678,679,694,695,698,700,704,705,706,709,710,711,712,716,717,721,723,725,726,728,731,734,739,740,743,744,745,748,753,754,756,759,760,761,768,769,770,771,773,774,775,778,780,782,784,786,788,802,803,804,805,806,807,808,809,810,811,812],linear:[585,590,593,607,610,617,619,677,679,682,710,723,731,738,743,745,750,782],linearscan:[607,619,782],linebegin:770,lineend:770,lineno:810,linger:[609,764],link:[585,586,587,590,593,594,595,601,603,605,607,610,612,614,615,621,624,625,634,640,641,642,653,654,657,659,660,664,667,671,675,676,678,680,695,698,700,701,702,704,711,712,717,720,721,722,726,728,729,731,736,738,740,741,742,743,744,745,753,755,756,758,759,760,764,767,772,773,775,777,778,780,782,783,784,789,790,803,804,805,807,809],link_compon:744,linkabl:[585,590],linkag:[597,607,628,654,674,679,695,709,722,724,729,743,777,804,810],linkagenam:[705,710],linkagetyp:743,linkallcodegencompon:782,linker:[24,127,217,375,487,589,590,601,603,604,605,607,610,615,617,624,642,643,648,649,659,664,675,676,678,679,694,698,700,701,709,724,725,726,728,731,736,738,743,754,756,759,766,773,775,778,780,788,807],linkinfo:739,linkonc:[597,679,710,724,743],linkonce_odr:[597,664,674,710,724],linkonceanylinkag:743,linkonceodrlinkag:743,linpack:[773,779],lint:748,linux:[590,594,605,607,616,631,646,660,665,668,669,675,679,694,695,697,698,699,700,701,710,712,720,726,746,760,762,773,775,778,783,804,805,809],lipo:[605,615,713],liquid:590,lisp:[676,811],list1:770,list2:770,list:[213,371,483,589,590,592,593,594,597,599,601,602,603,605,607,608,609,611,612,614,615,616,617,619,621,623,624,625,627,628,630,631,633,635,639,641,642,644,645,648,649,650,657,658,662,663,664,665,667,668,669,670,672,674,675,676,679,680,682,683,685,688,701,702,703,704,706,707,709,711,712,713,714,715,716,717,718,719,721,722,723,724,725,726,733,735,736,740,741,742,744,745,748,753,754,755,756,757,758,759,762,764,766,768,769,770,771,773,775,778,779,780,782,783,784,787,802,811],list_nam:606,list_of_list:606,listconcat:770,listen:709,listfil:635,listsplat:770,lit:[592,605,611,615,681,689,699,705,723,744,745,759,763,766,773,775,781],lit_arg:775,lit_config:616,lit_filt:616,lit_filter_out:616,lit_num_shard:616,lit_opt:616,lit_run_shard:616,lit_test_tim:616,lit_xfail:616,lit_xfail_not:616,litconfig:616,liter:[10,11,12,13,59,60,61,63,65,69,70,77,146,147,149,151,160,161,162,166,243,244,246,248,249,257,258,328,343,406,410,411,421,422,519,522,523,526,527,533,534,590,597,606,609,610,611,620,639,658,659,673,710,716,718,743,768,786,802,803,804,805,806,807,808,809,810],literatur:[607,714],lithium:602,litloc:810,litter:667,littl:[590,596,597,607,659,666,668,672,676,677,679,688,689,693,697,710,714,717,722,723,728,739,741,743,754,759,761,765,777,780,782,785,789,802,803,804,805,806,807,808,809,810,811,812],littleaarch64:641,littlearm:641,littleriscv:641,litvalu:597,live:[585,590,593,596,605,610,631,660,663,669,676,679,685,704,708,709,711,713,714,719,720,723,741,743,750,759,760,762,764,781,782,804,805,808],live_begin:676,live_end:676,live_iter:676,live_s:676,livecount:676,livedebugvalu:710,livedebugvari:759,livein:[631,716,759],liveinterv:[607,619],liveintervalanalysi:607,liveli:631,liveoffset:676,liveonentri:721,liveout:[762,764],ljti0_0:760,lkern0_end:590,lkern1_end:590,llazyjitbuild:726,llc:[601,607,611,612,615,619,673,675,676,679,699,704,705,710,716,724,728,754,759,764,775,780,782,784,809],lld:[590,605,610,634,679,698,699,700,728,745,748,754],lldb:[605,610,615,667,679,681,719,723,726,743,745,748,759,766],llegacy_stdio_definit:698,llgo:745,lli:[612,615,617,665,679,704,728,782],llibnam:659,lljit:709,lljitbuild:[709,726],lljitexampl:726,lljitwithlazyreexport:726,lljitwithobjectlinkinglayerplugin:709,lllazyjitbuild:726,llld1:745,llld2:745,lllexer:[670,784],lllvm1:745,lllvm2:745,llnl:747,llocat:695,llparser:[670,784],llt:683,lltok:670,llvm1:745,llvm2:745,llvm:[585,591,592,604,611,612,613,614,618,656,659,660,665,673,674,680,684,685,687,688,689,690,692,693,695,698,700,706,708,715,716,717,720,722,725,726,727,728,729,730,731,735,736,738,739,742,747,748,753,755,756,760,761,769,770,771,773,774,776,777,786,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,807,809,810],llvm_:[603,605,606,782],llvm_abi_breaking_check:605,llvm_addr2line_opt:620,llvm_addrsig:650,llvm_all_target:[679,780],llvm_analysis_utils_local_h:610,llvm_ani:670,llvm_any_ti:769,llvm_append_vc_rev:605,llvm_attribute_deprec:727,llvm_attribute_weak:782,llvm_bb_addr_map:671,llvm_build_32_bit:605,llvm_build_benchmark:605,llvm_build_dir:679,llvm_build_doc:605,llvm_build_exampl:605,llvm_build_instrumented_coverag:605,llvm_build_llvm_dylib:[603,605,679],llvm_build_root:605,llvm_build_test:605,llvm_build_tool:605,llvm_ccache_build:605,llvm_ccache_dir:605,llvm_ccache_maxs:605,llvm_cmake_dir:605,llvm_code_coverage_target:605,llvm_compiler_job:605,llvm_coverage_source_dir:605,llvm_create_xcode_toolchain:605,llvm_debug:[601,657,659],llvm_definit:605,llvm_definitions_list:605,llvm_dependent_librari:671,llvm_dir:[605,722],llvm_distribut:603,llvm_distribution_compon:603,llvm_doxygen_qch_filenam:605,llvm_doxygen_qhelpgenerator_path:605,llvm_doxygen_qhp_cust_filter_nam:605,llvm_doxygen_qhp_namespac:605,llvm_doxygen_svg:605,llvm_dylib_compon:[603,605,679],llvm_enable_abi_breaking_check:743,llvm_enable_assert:[605,743],llvm_enable_bind:605,llvm_enable_dia_sdk:605,llvm_enable_doxygen:[605,679],llvm_enable_doxygen_qt_help:605,llvm_enable_eh:605,llvm_enable_expensive_check:605,llvm_enable_ffi:605,llvm_enable_id:605,llvm_enable_libcxx:605,llvm_enable_libpfm:605,llvm_enable_lld:605,llvm_enable_lto:[603,605],llvm_enable_modul:605,llvm_enable_p:605,llvm_enable_pedant:605,llvm_enable_project:[603,605,679,681,775],llvm_enable_rtti:605,llvm_enable_runtim:[603,605,773],llvm_enable_sphinx:[605,679,761],llvm_enable_thread:605,llvm_enable_unwind_t:605,llvm_enable_warn:605,llvm_enable_werror:605,llvm_enable_z3_solv:605,llvm_enable_zlib:605,llvm_executionengine_orc_kaleidoscopejit_h:[788,789,790,791],llvm_experimental_targets_to_build:605,llvm_external_:605,llvm_external_project:605,llvm_externalize_debuginfo:605,llvm_force_use_old_toolchain:605,llvm_gc_root_chain:676,llvm_gcc_dir:774,llvm_include_benchmark:605,llvm_include_dir:605,llvm_include_exampl:605,llvm_include_test:605,llvm_include_tool:605,llvm_install_binutils_symlink:605,llvm_install_cctools_symlink:605,llvm_install_doxygen_html_dir:605,llvm_install_ocamldoc_html_dir:605,llvm_install_sphinx_html_dir:605,llvm_install_toolchain_onli:603,llvm_install_util:605,llvm_integrated_crt_alloc:605,llvm_jitlink:709,llvm_lib:605,llvm_libdir_suffix:605,llvm_library_vis:676,llvm_link_llvm_dylib:[603,605],llvm_linker_opt:671,llvm_lit_arg:605,llvm_lit_tools_dir:605,llvm_map_components_to_libnam:605,llvm_map_components_to_librari:605,llvm_obj_root:[744,774,775],llvm_on_unix:765,llvm_on_xyz:765,llvm_optimized_tablegen:[605,679],llvm_package_vers:605,llvm_parallel_:605,llvm_parallel_compile_job:605,llvm_parallel_link_job:605,llvm_profdata_fil:[592,605],llvm_project:726,llvm_reverse_iter:605,llvm_revis:605,llvm_runtime_distribution_compon:603,llvm_site_config:775,llvm_source_prefix:605,llvm_src_dir:697,llvm_src_root:[744,774],llvm_stackmap:762,llvm_static_link_cxx_stdlib:605,llvm_symbolizer_opt:[620,654],llvm_sympart:671,llvm_tablegen:605,llvm_target_arch:605,llvm_target_definit:780,llvm_targets_to_build:[603,605,668,679,681],llvm_temporarily_allow_old_toolchain:605,llvm_tools_binary_dir:605,llvm_transforms_hellonew_helloworld_h:781,llvm_tripl:631,llvm_ubsan_flag:605,llvm_unreach:[610,709,722,743,791],llvm_use_crt_:605,llvm_use_intel_jitev:605,llvm_use_link:605,llvm_use_newpm:605,llvm_use_oprofil:605,llvm_use_perf:605,llvm_use_relative_paths_in_debug_info:605,llvm_use_relative_paths_in_fil:605,llvm_use_sanit:605,llvm_yaml_is_document_list_vector:786,llvm_yaml_is_flow_sequence_vector:786,llvm_yaml_is_sequence_vector:786,llvm_yaml_strong_typedef:786,llvmanalysi:744,llvmasmpars:744,llvmbb:678,llvmbc:597,llvmbitcod:670,llvmbitread:744,llvmcfiverifi:604,llvmconfig:605,llvmcontext:[710,726,728,754,784,788,789,790,791,804,805,806,807,808,809,810],llvmcontextimpl:784,llvmcore:[702,744,753],llvmcreatedisasm:718,llvmcreatesimplemcjitmemorymanag:762,llvmdevmeet:692,llvmdisasminstruct:718,llvmdisassembler_option_usemarkup:718,llvmdummi:780,llvmdummyasmprint:780,llvmdummycodegen:780,llvmenableassert:723,llvmenableproject:723,llvmfuzzeriniti:712,llvmfuzzerrundriv:712,llvmfuzzertestoneinput:712,llvmgccdir:774,llvmgetpassplugininfo:782,llvmgettypekind:670,llvmgold:694,llvmgrep:679,llvmhello:782,llvminitializesparcasmprint:780,llvminitializesparctargetinfo:780,llvmir:658,llvmlab:745,llvmlib:744,llvmlibthin:634,llvmmemorymanagerallocatedatasectioncallback:762,llvmname:769,llvmorg:[679,702,719,745],llvmpassnam:605,llvmpush:745,llvmremarkentrydispos:754,llvmremarkentryref:754,llvmremarkparsercreateyaml:754,llvmremarkparserdispos:754,llvmremarkparsergetnext:754,llvmremarkparserhaserror:754,llvmremarkparserref:754,llvmrock:610,llvmscalaropt:744,llvmsetdisasmopt:718,llvmsupport:[679,744],llvmtarget:744,llvmtargetmachin:780,llvmtop:782,llvmtransformutil:744,llvmtype:769,llvmtypekind:670,llvmtyperef:670,llvmweekli:719,lma:[642,784],lmalloc:659,lmerg:745,lmsvcrt:698,lmyproj1:745,lnt:[701,745,753],lo12:671,lo16:607,load:[112,113,114,201,202,306,307,308,469,470,576,577,578,585,588,593,594,595,596,597,601,605,610,611,612,616,617,619,621,628,641,643,644,645,649,657,659,663,665,667,669,671,673,676,677,683,684,688,689,691,695,704,705,709,711,712,713,714,716,718,721,722,724,726,727,733,741,743,745,749,750,751,756,759,762,763,764,775,777,780,783,784,808,809,810],loadabl:[641,676],loadable_modul:676,loaded1:759,loaded2:759,loader:[590,665,679,716,760,805],loadfromdisk:709,loadinst:610,loadlal:710,loadlibraryperman:790,loadmodul:726,loadmoduleoncontext:726,loadobject:715,loadregfromaddr:780,loadregfromstackslot:[607,780],loadsdnod:743,loc0:762,loc1:762,loc:[630,671,721,759,764,780,810],local:[585,590,593,597,598,607,610,611,617,619,625,628,630,635,638,640,641,642,653,659,660,663,667,669,672,676,685,697,699,700,705,709,711,715,716,720,722,724,725,729,741,742,743,748,750,756,759,760,761,763,764,766,768,770,775,778,782,786,804,805,812],local_buff:760,local_release_x:745,local_unnamed_addr:[597,671,710],localaddress:710,localdynam:[597,710],localescap:669,localexec:[597,710],localrecov:669,localrepo:745,locat:[19,66,84,157,175,254,270,315,320,325,337,369,418,435,530,548,588,590,593,594,603,605,607,610,614,615,616,620,621,639,642,644,659,660,664,665,669,671,673,674,676,677,678,681,685,701,704,710,711,712,715,718,724,729,736,741,743,744,751,753,754,755,756,761,762,764,765,769,775,779,780,786,807,808],lock:[594,605,674,710,726,743,745,756,782],lockstep:745,loclist:[585,630],loclistproc:585,loclistsptr:585,loclistsptrproc:585,locstat:615,lod:588,lodg:726,log10:779,log2:779,log:[591,593,601,611,616,667,679,682,705,709,712,723,726,738,741,742,743,745,760,774,779,783,784,785,789],log_arg1:783,log_detail:726,log_fast:726,logarithm:[597,710,743],logcount:731,logerror:[803,804,805,806,807,808,809,810],logerrorp:[803,804,805,806,807,808,809,810],logerrorv:[804,805,806,807,808,809,810],logfil:783,logger:[591,755],logic:[589,590,607,610,611,616,639,663,664,667,677,686,703,710,714,722,731,741,743,750,760,770,788,803,806,807,808],logist:757,longer:[585,590,594,600,603,607,609,645,660,672,674,676,684,685,699,706,710,712,717,725,726,741,743,744,745,748,757,766,782,789,805,808],longjmp:[710,811],look:[591,593,594,597,603,605,606,607,609,610,611,616,630,639,644,652,654,659,660,662,663,664,665,667,669,670,671,676,677,679,682,683,684,688,694,702,703,709,710,711,713,714,716,717,719,721,722,723,724,725,726,727,728,731,735,736,739,741,742,743,744,745,746,748,754,758,759,760,761,762,764,768,769,770,775,777,778,780,781,782,783,784,785,786,788,789,790,803,804,805,806,807,808,809,810],lookup:[610,630,654,679,709,710,722,726,729,738,739,743,768,770,780,785,788,789,790,791,804,805,806,807,808,811],lookupatablebyvalu:[768,769],lookupcentri:768,lookupcentrybyencod:768,lookupcentrybynam:768,lookuptarget:809,loooong:745,loop:[590,593,594,607,611,631,639,660,663,666,674,676,678,679,688,705,711,712,721,722,725,728,743,750,764,770,773,775,778,780,783,802,803,804,805,807,808,809,810,811,812],loop_header_weight:710,loopanalysi:666,loopbb:[806,807,808,809,810],loopcond:[806,807,808,809,810],loopdistribut:776,loopend:[806,807,808,809,810],loopendbb:[806,807,808,809,810],loopfoopass:725,loopfullunrol:776,loophead:[710,806,807],loopid:776,loopinfo:[748,782],loopinfobas:743,loopinfowrapperpass:782,loopinterchang:776,looppass:[714,728],looppassmanag:714,looprerol:773,looprot:714,looprotatepass:725,loopsimplifi:714,loopstandardanalysisresult:725,loopunrol:776,loopunrollandjam:776,loopunswitch:[674,741],looputil:782,loopvector:[750,776],loopvectorizationplann:750,loos:[683,771],lore:592,lose:[693,745,759,760,768,782,811],loss:[589,676,705,743,745,760],losslessli:722,lost:[589,669,672,705,710,759,760,776,811],lot:[592,601,602,605,606,607,610,659,667,670,672,676,677,679,683,689,697,703,706,709,710,716,723,727,740,741,743,745,759,760,771,774,780,782,783,786,790,803,804,805,807,808,810,811],love:743,low:[30,120,223,288,306,308,380,479,493,584,585,588,589,590,595,597,607,623,639,644,648,649,659,663,670,676,677,678,679,688,691,695,709,710,720,722,724,725,735,743,750,760,764,765,769,770,780,785,803,807,808],lower16:671,lower:[585,588,590,593,594,596,607,610,639,640,658,660,669,670,672,674,679,684,685,688,690,708,716,720,725,726,727,738,748,749,756,760,762,766,768,779,780,803,807,808,811],lower_bound:[743,768],lower_cas:748,loweralloc:782,lowerbound:710,lowercal:784,lowercas:[603,780,805],lowercasemod:769,lowerfor:688,lowerfp_to_sint:780,lowerif:688,loweroper:780,lowertypetest:777,lowest:[585,590,594,596,664,684,710,803,804,805,806,807,808,809,810],lpad:[663,669,710],lpae:594,lpc0:716,lpc:585,lpechacek:595,lpm:782,lqueue:639,lround:684,lrt:660,lsampl:744,lsb:[145,607,684,785],lsb_extract:710,lsb_one:684,lsb_two:684,lsda:711,lse:594,lsl:671,lsunit:639,ltmp1:[716,764],ltmp:671,lto2:710,lto:[597,603,663,711,713,728,754,776,777],lto_codegen_add_modul:713,lto_codegen_add_must_preserve_symbol:713,lto_codegen_compil:713,lto_codegen_cr:713,lto_codegen_set_debug_model:713,lto_codegen_set_pic_model:713,lto_module_cr:713,lto_module_create_from_memori:713,lto_module_dispos:713,lto_module_get_macho_cputyp:713,lto_module_get_num_symbol:713,lto_module_get_symbol_attribut:713,lto_module_get_symbol_nam:713,lto_module_is_object_fil:713,lto_module_is_object_file_for_target:713,lto_module_is_object_file_in_memori:713,lto_module_is_object_file_in_memory_for_target:713,ltopostlink:710,lua:676,luck:723,luckili:676,lud:747,lump:769,lvalu:743,lvaluerefer:730,lvaluerefthispoint:730,lvl:[748,784],lvm:810,lwe:[0,2,3,4,9],lying:711,m35qpb:784,m3writenmisc1:780,m64:668,m7170:590,m_cache_count_max:756,m_cache_size_max:756,m_decay_tim:756,m_func:743,m_memtag_tun:756,m_op_sel:[0,5,6,7,8,9,19,315,320,325,337,369],m_op_sel_hi:[0,5,6,7,8,9,19,315,320,325,337,369,586],m_purg:756,m_thread_disable_mem_init:756,m_tsds_count_max:756,mac:610,mach:[597,615,626,633,637,650,661,710],mach_universal_binari:654,machin:[585,590,596,604,605,611,615,616,617,621,625,630,634,654,658,660,665,667,668,676,679,681,682,686,688,693,696,697,700,701,705,709,710,711,712,715,722,724,726,731,743,745,748,751,759,768,770,773,778,779,782,785,805,810,811,814],machine_model:590,machinebasicblock:[716,743,780],machineblockfrequencyinfo:598,machinecodeemitt:780,machineconstantpool:[607,716],machineconstantpoolvalu:716,machineframeinfo:607,machinefunct:[685,686,687,691,716,743,748,759],machinefunctioninfo:[607,716],machinefunctionpass:[728,780,784],machineinstr:[673,683,685,686,687,748,768,775,780],machineinstrbuild:683,machineir:683,machineirbuild:683,machinememoperand:[594,684,688],machinemoduleinfo:716,machineoperand:[607,684,748,780],machinepassregistri:782,machinepassregistrynod:782,machineregisterinfo:[607,683,693,748],machineri:[698,703,774],macho2:650,macho:[597,607,617,641,642,649,650,709,715,726,754,762],macholinkgraphbuild:709,machoobjectfil:743,machoplatformplugin:709,macinfo:710,maco:[605,643,660,665,679,712,728,743,753,773,775,782],macosx10:710,macosx_deployment_target:753,macport:740,macro:[605,610,616,625,630,657,658,659,660,664,669,698,710,712,768,770,771,780,782,805],macronam:[658,770],made:[585,590,593,596,597,600,604,607,610,660,666,667,669,672,673,674,677,678,695,699,702,703,709,710,716,717,721,722,723,725,726,730,741,743,744,745,748,752,754,756,757,758,759,760,764,766,768,771,777,782,786,805,806,808,811],mag:710,magic:[594,679,710,712,735,743,754,759,807],magnif:[802,807],magnitud:[605,684,710],mai:[0,1,2,3,4,5,6,7,8,9,18,20,24,25,26,27,36,37,38,84,85,93,96,97,98,99,100,101,113,114,120,122,123,127,128,129,175,176,186,187,188,189,190,191,202,209,211,212,213,217,218,219,220,270,271,281,282,283,286,287,288,289,307,308,314,316,321,326,327,336,338,339,352,354,368,370,371,375,376,377,378,386,387,388,435,436,444,447,448,449,450,451,452,469,470,479,481,482,483,487,488,489,490,499,500,501,548,549,557,560,561,562,563,564,565,577,578,584,585,586,587,588,589,590,591,594,596,597,599,600,601,603,604,605,606,607,608,609,610,611,612,614,616,617,619,621,625,630,631,632,639,640,641,642,645,648,649,653,659,660,662,663,664,666,667,669,670,671,672,674,675,676,677,679,681,683,684,685,688,689,694,695,696,697,699,700,702,703,704,705,706,708,709,710,711,712,713,714,715,716,719,720,721,723,724,725,726,727,728,729,731,735,736,738,739,741,742,743,744,745,746,748,750,752,755,756,757,759,760,762,763,764,765,766,769,770,771,773,774,775,776,777,779,780,781,782,783,784,785,786,789,790,791,804,805,806,807,808,809,810,811],mail:[608,609,662,667,675,679,696,702,711,712,717,719,723,742,744,752,753,755,757,764,766,783,784,787,811],mailer:667,main:[585,596,598,600,602,603,605,607,610,625,631,639,641,648,649,652,654,659,663,664,665,666,667,669,675,678,679,680,681,682,683,687,689,694,695,696,700,702,708,709,710,712,713,714,717,721,723,724,726,741,742,743,745,748,752,754,759,761,766,769,770,771,775,777,778,780,782,783,784,788,789,790,791,803,804,805,806,807,808,809,810,811],mainjd:[726,788,789,790,791],mainli:[602,639,660,666,683,710,714,722,744,768],mainlin:[665,667,702,745],mainloop:[803,804,805,806,807,808,809,810],mainsym:[709,726],mainsymbolnam:726,maintain:[585,590,593,594,597,600,602,605,606,607,609,610,611,621,659,660,662,663,667,668,670,676,688,702,710,722,723,726,733,735,743,745,748,752,756,757,759,762,765,766,769,770,771,782,785],mainten:[610,667,707,713,766],maj:753,major:[585,593,607,609,610,611,664,669,672,674,679,689,690,693,695,702,709,714,722,726,727,728,729,731,740,741,743,745,746,750,752,757,759,766,771,775,780,808],major_vers:590,majorvers:731,make:[585,588,590,591,592,593,594,596,597,600,601,602,604,605,606,607,608,611,612,614,616,617,620,621,624,625,631,638,639,641,658,659,660,662,663,664,665,668,669,674,675,676,677,678,679,680,681,682,684,687,688,689,694,695,696,697,698,699,700,701,702,703,704,705,708,709,710,711,712,713,714,717,719,721,722,724,725,726,727,728,731,738,740,741,742,744,745,746,748,750,751,753,755,756,757,759,761,762,763,764,765,766,768,769,770,771,773,774,775,776,779,780,781,782,784,785,786,788,789,790,802,803,804,805,806,807,808,809,810,811,812],make_error:743,make_error_cod:743,make_fallible_end:743,make_fallible_itr:743,make_pair:[722,808,809,810],make_rang:743,make_shar:790,make_uniqu:[709,726,788,789,790,791,803,804,805,806,807,808,809,810],makearrayref:768,makeextern:709,makefil:[605,607,624,676,678,679,704,713,775,803,804],makelight:610,makestruct:660,maketir:610,makeup:743,makevehicl:610,malform:[601,660,712,738,741,743],malici:[757,760],malloc:[663,676,695,712,720,743,751,756,782,811],malloc_limit_mb:712,mallocbench:774,mallopt:756,malyutin:748,malyutindistinguish:748,mamproxi:725,man:[605,659,751,775],manag:[585,590,601,605,608,611,612,639,660,662,672,673,675,676,677,679,681,695,696,702,710,711,714,715,724,728,743,752,753,757,762,764,766,776,778,780,781,782,788,789,790,791,805,806,807,808,811],mandat:[607,710,745],mandatori:[588,611,674,681,683,688,709,710,743,754,780],mandel:807,mandelbrot:[802,807],mandelconverg:807,mandelhelp:807,mangl:[611,615,626,642,654,667,710,726,729,754,759,788,789,790,791],mangleandintern:[726,788,789,790,791],manglednamestream:790,mangler:[780,788],mani:[585,592,593,594,595,596,597,598,599,601,603,606,607,609,610,611,628,631,639,641,653,659,660,664,669,670,672,673,675,676,677,679,681,683,684,685,687,688,689,700,701,703,704,705,708,709,710,712,714,721,723,724,725,726,727,731,735,741,742,743,745,747,748,753,757,759,760,762,764,765,766,769,770,774,775,779,780,782,783,785,786,788,803,804,805,806,807,808,811],manifest:[710,727],manifesto:667,manipul:[585,590,607,610,615,621,641,644,672,706,709,720,743,750,756,766,777,808],manner:[585,588,590,594,607,608,611,621,664,667,669,677,710,716,721,726,728,743,746,760,764,766,770,780],manpag:606,manual:[590,601,604,605,607,610,657,662,667,674,676,677,679,680,682,685,696,702,703,709,723,725,726,739,742,745,751,759,760,764,769,771,773,775,778,779,780,782,783,788],map:[585,588,593,597,610,614,616,625,627,639,642,644,648,649,659,663,670,683,684,686,690,691,695,709,711,715,721,722,724,726,729,734,738,739,745,751,756,759,760,768,769,770,777,778,779,783,784,785,803,804,805,806,807,808,809,810,814],mapopt:786,mappingnorm:786,mappingnormalizationheap:786,mappingtrait:786,maprequir:786,mapsectionaddress:715,maptag:786,mapvector:610,march:[611,617,619,639,700,701,704,720,750,759,760,809],margin:760,mark:[585,590,592,594,597,604,607,610,616,631,639,641,645,660,663,664,669,671,673,674,676,679,682,698,702,709,710,717,718,722,723,725,730,741,743,760,762,764,766,770,771,775,777,780,783,785],markasignor:610,markdown:761,markdownquickstarttempl:717,marker:[607,610,616,659,667,743,764,785],markup:[590,610,712,717,761],marshal:[663,762],mask:[30,213,223,371,380,483,493,585,588,589,590,594,607,610,611,674,683,684,706,750,760,768,780,785,786],maskedbitset:786,maskedbitsetcas:786,maskedir:749,maski64:710,maski:710,maskipopcnt:710,masm:[731,738],mass:[667,727],massag:699,massiv:[628,678,712,768],mast:757,master:[605,667,678,696,739,744,745,747,770],match:[585,588,589,590,591,594,597,599,601,603,605,606,610,612,615,616,621,625,627,628,630,632,639,641,644,645,653,658,659,660,663,664,669,670,675,676,677,679,683,684,688,703,709,710,712,713,716,720,723,725,729,731,739,741,743,745,749,754,756,759,768,770,775,780,782,784,785,786,802,804,805],matcher:[607,611,658,712,749,768,769],matchinstructionimpl:607,materi:[608,683,685,722,726,750,759,791],materializationrespons:[709,726,789,790,791],materializationunit:[726,789,791],math:[606,610,617,619,659,674,724,759,778,779,802,805,807],mathemat:[672,710,722,724,803],matplotlib:638,matrix:[588,676],matt:748,mattdr:757,matter:[594,641,653,667,670,687,710,757,762,782],matthew:[745,757],matthewsrevnum:745,mattr:[611,617,619,642,780,809],matur:[659,674,676,757,760],max:[590,597,611,616,631,639,645,659,660,664,710,712,752,760,773,784],max_dx10:590,max_flat_workgroup_s:590,max_fp16:590,max_int_bit:743,max_len:[675,712],max_total_tim:712,max_vgpr:590,maxatomicsizeinbitssupport:594,maxflatworkgroups:590,maxim:[597,603,619,666,709,710,712,714,726],maximum:[590,593,594,598,605,607,614,616,623,630,631,639,659,684,688,695,712,714,722,743,756],maximum_operations_per_instruct:590,maxscalar:688,maxsimultaneousalloc:695,maxsiz:610,may_return_nul:756,may_throw:669,mayalia:[593,710],mayb:[639,670,682,712,760,783,811],maybe_instru:783,maybeoverridden:722,maybevalu:659,mayfail2:743,mayfail:743,mayload:[639,770,771],maynot:660,mayraisefpexcept:770,mayreadfrommemori:594,maystor:[639,770,771],maytrap:710,maywritetomemori:[594,743],mbb:[607,780],mbbi:607,mca:615,mcasmpars:607,mcasmstream:607,mccfiinstruct:716,mccodeemitt:[607,768],mcdesc:780,mcdisassembl:607,mce:780,mcexpr:607,mcinn:757,mcinst:[639,768],mcinstlow:607,mcinstrdesc:683,mcinstrinfo:639,mcjit:[674,709,726,778,788],mclabel:607,mcmodel:671,mcobjectstream:607,mcode:590,mcoperand:607,mcpu:[590,617,619,631,639,642,643,697,701,724,775,780],mcregaliasiter:607,mcsectioncoff:607,mcsectionelf:607,mcsectionmacho:607,mcstreamer:676,mcsubtargetinfo:639,mcsymbol:676,mctargetdesc:768,mctargetstream:607,mcumod:590,md5:[585,625,645,664],md_prof:599,mdnode:[599,710,776],mdstring:[599,710],mdtupl:710,meabi:617,mean:[20,27,120,123,129,209,212,213,220,288,306,308,314,370,371,378,479,482,483,490,584,585,586,588,589,590,593,595,596,597,599,600,603,605,606,607,608,609,610,611,616,621,631,639,640,641,645,653,659,663,667,669,671,674,676,677,679,683,684,688,689,694,701,703,708,710,712,713,714,716,720,721,722,723,725,726,728,731,734,735,736,738,741,742,743,744,745,746,748,749,755,756,759,760,764,765,766,768,770,771,773,776,777,780,781,782,786,789,802,804,805,806,808,810,811],meaning:[585,590,602,610,619,641,645,660,676,677,710,717,743,746,761,766,768,785],meaningless:743,meant:[610,657,664,678,679,681,699,705,710,717,721,743,753,755,756,761,766],meantim:752,measur:[595,603,605,631,639,714,743,760,773,785],mechan:[590,597,604,607,616,667,669,674,676,694,695,698,710,711,715,723,724,725,726,743,746,755,759,760,764,765,770,774,777,779,782],med:784,median:760,mediat:[593,667,755],medium:[619,710],meet:[585,590,607,608,609,667,678,679,688,720,722,750,755,757,766,782],meetup:787,megabyt:612,mehdi:748,meltdown:760,mem2ref:764,mem2reg:[674,676,743,759,808],mem:[663,718,741,776],mem_address:607,mem_not_nul:663,mem_ord:590,memalign:756,memarg:708,member:[585,590,606,607,608,609,610,621,634,639,641,642,644,650,653,660,667,671,698,703,710,713,715,726,729,738,741,748,766,769,773,777,779,780,785,786,788,789,790],memberlist:780,membership:[669,743],memcheck:[616,775],memcpi:[593,594,684,760,764],memcpyopt:594,memdep:593,memfenc:590,memmov:[684,764],memop:645,memor:610,memori:[20,33,34,36,37,38,55,56,66,67,80,93,96,97,100,101,104,105,106,107,115,120,132,133,145,157,158,173,186,187,189,190,191,194,195,196,203,204,209,224,225,241,242,254,255,268,281,282,283,286,287,294,295,296,297,298,299,300,301,309,314,349,354,370,383,384,386,387,388,404,405,418,419,431,444,447,448,451,452,455,456,457,471,472,479,482,496,497,499,500,501,517,518,530,531,544,557,560,561,564,565,568,569,570,571,579,584,588,589,594,596,605,607,612,614,616,628,631,641,642,645,660,663,665,666,669,670,672,673,676,677,679,683,695,697,705,708,711,712,713,715,716,718,720,721,724,726,727,728,751,754,758,759,760,762,763,764,768,777,779,780,782,783,785,786,788,789,790,791,803,804,805,806,807,809,810,811],memory_manag:616,memory_order_acq_rel:[594,710],memory_order_acquir:[594,710],memory_order_consum:594,memory_order_relax:[594,710],memory_order_releas:[594,710],memory_order_seq_cst:[594,710],memory_uri:590,memoryaccess:721,memorybuff:[610,726],memorydef:721,memorydependenceanalysi:721,memorydependencyanalysi:594,memoryloc:721,memoryphi:721,memorysanit:[710,712],memoryssa:[593,678,778],memoryssawalk:721,memoryus:721,memorywithorigin:605,memri:[607,780],memrr:780,memset:[594,684,741],memtag:720,memtagsanit:[710,751],mental:[603,608,743],mention:[585,590,596,597,605,607,610,611,659,666,667,679,684,699,703,710,714,719,722,725,727,729,734,743,745,755,757,758,759,764],mentorship:609,mere:[677,681,710,741,742,760,764,804],merg:[592,593,605,607,611,625,641,649,660,664,667,671,676,679,682,691,699,710,713,714,721,743,753,754,759,760,766,770,773,778,779,803,804,805,806,807,808,809,810],merge_control_fil:712,mergeabl:741,mergebb:[806,807,808,809,810],mergedloadstoremot:705,mergefunct:778,merger:722,merit:667,mesa3d:590,mesa:590,mesh:590,mesh_scratch_memory_s:590,mess:781,messag:[27,120,129,209,220,314,378,479,490,584,590,601,605,606,609,611,616,621,631,636,639,659,669,678,679,700,704,711,712,723,725,728,742,743,745,756,757,768,770,771,775,782,786,809],messi:607,met:[585,607,639,676,710,722],meta:[607,642,676,702,710,716,743,754,759,762,768,771],metadata:[597,598,602,607,617,667,671,674,676,677,685,695,705,709,711,716,722,727,741,742,751,775,802,810],metadatarecord:785,metal:[700,710],metarenam:601,metasyntact:601,metatada:599,method:[585,601,603,604,606,607,611,639,659,666,667,670,676,677,679,686,691,695,698,700,702,703,704,706,708,709,710,711,715,722,725,726,727,742,757,759,760,762,764,768,770,774,780,781,783,786,788,789,790,803,804,805,806,808],methodbodi:780,methodolog:669,methodproto:780,metric:[598,630,689,712,760,773,782,784],meyer:610,mfc:731,mfctypeserverindex:731,mfenc:594,mfloat:[700,701],mflop:779,mfpu:700,mft:739,mgrid331:747,mi100:590,mi25:590,mi50:590,mi60:590,mi6:590,mi8:590,mib:671,michigan:666,micro:[585,590,610,639,747,760],microbenchmark:[760,773],microcod:590,microcontrol:661,microopbuffers:639,microphon:719,microprocessor:[607,710,780],microsecond:785,microsoft:[607,610,634,644,659,661,671,680,696,708,710,731,735,738,739,751,757,759],mid:[674,708,710,759],middl:[606,610,667,676,708,710,725,735,741,764,766],midli:605,might:[585,593,594,595,599,600,601,605,607,608,609,610,611,621,627,631,639,642,659,667,668,672,674,675,676,679,681,686,698,701,702,703,710,711,712,714,717,718,722,723,726,735,736,738,742,743,744,745,747,756,757,759,760,761,764,765,766,768,769,776,777,779,780,786,806,809,811],migrat:[590,593,611,667,676],mileag:719,mileston:722,mill:659,million:[710,743],millisecond:784,mimalloc:605,mimic:[586,610,648,649,726,745],min:[120,209,314,479,584,590,597,611,631,644,645,649,660,710,712,753,760,773,784],min_dx10:590,min_int_bit:743,mind:[609,610,667,725,743,746,756,803,808],mingw32:[607,696,775],mingw32msvc:607,mingw64:679,mingw:[605,607,681,698],mini:719,minim:[590,592,594,602,603,607,610,614,619,621,631,664,667,677,683,689,710,712,713,726,743,744,752,759,760,762,769,777,782,785,803,807],minimaldebuginfo:736,minimis:766,minimize_crash:712,minimum:[590,605,611,612,652,667,670,679,684,702,704,709,723,746,754,756,757,759,760,762,766,773,780,784,790,803],minimum_instruction_length:590,minor:[585,607,609,610,667,722,723,731,740,743,811],minor_process:760,minor_vers:590,minorvers:731,minscalar:688,minscalarsamea:688,minsiz:[597,710],minsizerel:[603,605,679],minu:[585,589,590,599,710,758],minut:[678,679,682,721,723],mioperandinfo:780,mip:[594,607,674,679,683,688,701,710,748,780,783],mips16:594,mips32:594,mips64:783,mips64el:783,mipsel:783,mir:[590,683,686,689,690,693,710,725,751],mirdebugifi:705,mirror:[588,675,676,806,807],mirstripdebugifi:705,mis:[590,612,704,760],misalign:756,misattribut:705,misbehav:753,miscellan:[710,727],miscommun:667,miscompil:[600,612,667,679,710,747],misinterpret:[588,601],mislead:[610,705,759],mismatch:[590,596,610,645,699,700,710,718,756,804,805,806,807,808,809,810],misnam:607,misoptim:599,mispredict:760,misrepres:[705,759],miss:[594,601,610,611,639,674,694,700,701,705,709,710,726,743,747,748,754,759,763,764,774,776,779,803,807,808],mission:667,misspecul:760,mistak:[608,610,674],mistaken:805,misunderstood:[672,751],misus:775,mit:[667,760],mitig:[639,676,685,710,720,745,748,751,756,757,766],mitr:757,mix:[585,594,606,607,610,611,677,683,694,710,713,726,743,746,750,759,760,770,781],mixtur:[585,654,689,760,770],mjansson:605,mkdir:[605,679,694,712,723,745,761,773,775,784],mlimit:612,mlir:[615,678,682,768],mllvm:[639,728,779],mm0:[607,770,771],mm1:[770,771],mm2:[770,771],mm3:[770,771],mm4:[770,771],mm5:[770,771],mm6:[770,771],mm7:[770,771],mmap:646,mmi:716,mmio:710,mmx:[607,710,780],mnemon:[642,780,783],mnemonicalia:607,mno:590,mo1:780,mo_cimmedi:716,mo_registermask:607,mobil:590,mock:[709,743,775],mod:[593,644,710,713,770,780],mode:[20,31,82,123,212,213,341,351,370,371,381,433,482,483,494,546,588,589,595,597,599,605,611,616,621,624,625,631,636,641,644,645,653,659,660,667,675,676,681,689,697,700,710,720,723,730,738,743,753,756,760,770,771,773,775,778,780,784,785,808,809],model:[585,594,597,600,606,607,616,619,631,659,663,667,668,669,672,676,677,678,683,698,704,707,708,709,713,726,743,749,750,751,756,757,759,768,776,779,780,786,803,804,808,809],moder:[610,667,742,743,755],modern:[606,610,660,661,665,672,697,719,726,729,730,731,734,736,743,749,760,779,811],modest:[585,610],modfilecount:731,modfilecountarrai:731,modi:[644,734],modif:[590,601,604,621,639,641,645,653,659,668,698,709,710,722,760,780,810],modifi:[0,1,2,3,4,5,6,7,8,9,19,25,26,85,117,128,176,205,218,219,271,288,306,308,310,315,316,320,321,325,326,327,337,338,339,352,369,376,377,436,488,489,549,580,585,591,593,594,601,605,606,607,610,616,639,641,653,662,663,669,670,679,681,682,686,689,703,705,707,709,712,716,718,721,723,724,725,727,728,730,735,738,741,742,743,745,747,750,759,760,764,766,768,769,770,773,774,780,781,782,784,788,805,808],modifier0:[587,590],modifier1:[587,590],modifypassconfig:709,modindic:731,modinfo:731,modinfos:731,modistream:734,modr:607,modref:[593,594,770],modrefbit:770,modrefresult:593,modrefv:770,modul:[593,597,601,605,607,610,614,615,616,623,628,639,657,672,674,676,679,702,704,705,711,712,713,715,722,723,724,725,726,728,739,747,759,762,763,764,768,773,774,775,777,780,781,784,788,790,791,804,805,806,807,808,810],modular:[593,602,657,693,726,746,760,782],moduleanalysismanag:725,moduleanalysismanagercgsccproxi:725,modulebitcodewrit:670,modulehandl:790,modulehandlet:790,moduleid:[775,804],moduleindex:731,modulelevelpass:782,modulenam:[654,731],modulepass:[593,728],modulepassmanag:725,modulepath:726,modulesummaryindex:710,modulesymstream:731,modulo:[585,607,710,759],moll:721,moment:[605,639,663,676,710,716,757,764,790],mondai:678,monitor:[593,710,745,773],mono:667,monolith:[667,745,746,758],monorepo:[723,742,782],monospac:[717,761],monoton:[590,710,745],month:[667,678,702,712,757],monthli:[678,757],more:[84,201,585,586,587,588,589,590,593,594,597,599,600,601,602,603,605,606,607,608,609,611,612,616,617,621,623,625,630,631,638,639,644,645,646,648,649,652,657,659,660,662,663,664,667,668,669,670,672,673,674,675,676,677,679,681,683,685,687,688,689,691,693,695,696,697,698,699,700,701,703,704,705,706,708,709,710,711,713,716,717,718,719,721,722,723,724,725,726,727,731,735,738,739,740,741,742,744,745,746,747,748,750,752,753,754,755,756,757,758,759,760,761,762,764,765,766,768,769,770,771,773,775,776,777,778,779,780,781,782,784,785,786,787,788,789,790,802,803,804,805,806,807,808,810,811],moreelementsfor:688,moreelementsif:688,moreelementstonextmultipl:688,moreelementstonextpow2:688,moreov:[589,607,609,611,710,741,746],most:[585,587,590,591,592,593,594,596,600,602,603,605,606,607,609,610,611,613,616,618,623,624,633,635,639,641,644,650,653,655,656,658,659,660,662,663,665,666,667,669,670,672,674,675,676,677,678,679,683,686,688,691,695,697,699,700,701,702,703,704,706,709,710,712,714,715,716,717,718,719,720,721,722,724,726,727,731,738,740,742,743,744,745,746,747,748,751,753,756,757,759,760,761,762,764,765,768,769,770,771,773,774,776,779,780,781,782,784,785,789,803,804,805,806,807,808,810,811],mostli:[594,607,639,659,660,679,682,691,711,714,721,722,725,727,741,756,759,764,773,780,785,806,810,811],motion:[593,679,710,711,714,721,782],motiv:[707,709,710,719,743,789,806,808],motohiro:607,mount:[595,668,679],mous:784,mov32mr:716,mov32r0:759,mov32ri:[607,716],mov32rm:[716,759],mov64ri:[716,780],mov64rm:716,mov64rr:716,mov:[594,607,611,671,710,724],movabsq:[758,762],movapd:611,move:[585,590,593,594,595,600,605,607,610,611,621,639,663,667,675,676,678,679,691,702,705,709,710,714,725,726,741,743,750,759,760,762,764,771,782,788,789,790,791,792,793,794,795,796,797,798,799,800,801,803,804,805,806,807,808,809,810,811],movhpd:611,movl:[611,710,760,762,764],movnt:710,movq:[709,760,762,764],movsd:611,movslq:762,movsq:607,movsx16rm8w:607,movsx16rr8w:607,movsx32rm16:607,movsx32rr16:607,movsx32rr8:607,movsx64rr16:607,movsx64rr32:[607,631,759],movsx64rr8:607,movsx:607,movt:671,movw:[671,710],movz:[596,710],mozilla:748,mpm:725,mppassmanag:784,mri:[607,683,748],mrm0m:[770,780],mrm0r:[770,780],mrm1m:780,mrm1r:780,mrm2m:[770,780],mrm2r:[770,780],mrm3m:780,mrm3r:780,mrm4m:780,mrm4r:780,mrm5m:780,mrm5r:780,mrm6m:780,mrm6r:780,mrm7m:780,mrm7r:780,mrmdestmem:780,mrmdestreg:[770,771,780],mrminitreg:780,mrmsrcmem:780,mrmsrcreg:780,mrt:[78,171,266,542],mrtz:[78,117,171,205,266,310,542,580],mrv:770,ms_abi_tripl:775,msa:[683,688,710],msan:712,msb:[589,607,785],msb_extract:710,msbuild:[605,681],msdia140:681,msdn:[610,634],msec:779,msf:[731,736],msg:[0,2,3,4,9,704,710,769],msg_early_prim_dealloc:[378,490],msg_g:[27,129,220,378,490],msg_get_ddid:27,msg_get_doorbel:[27,378,490],msg_gs_alloc_req:[27,378,490],msg_gs_done:[27,129,220,378,490],msg_halt_wav:[27,378,490],msg_interrupt:[27,129,220,378,490,590],msg_ordered_ps_don:[27,378,490],msg_savewav:[27,220,378,490],msg_stall_wave_gen:[27,378,490],msg_sysmsg:[27,129,220,378,490],msgpack:590,msp430:[607,679,710],mspdb:731,mspdbsrv:731,mspdbxxxx:731,msr:760,msse4:756,msvc:[605,607,610,669,671,679,698,710,731,743,759,760],mte:[710,720],mthumb:700,mtripl:[611,617,619,639,675,710,716,759],mtype:590,much:[585,593,594,595,601,602,603,607,610,623,634,644,659,660,667,668,670,672,674,679,681,685,688,694,696,699,701,703,704,705,708,709,710,713,717,720,721,723,724,725,726,728,739,740,741,742,743,744,745,746,747,748,753,755,759,760,761,762,764,765,766,770,775,780,782,783,784,786,789,803,804,805,808,811],mul:[588,611,677,705,724,741,770,804],mul_ri:770,mul_rr:770,mullin:590,multi:[588,590,610,648,659,668,693,699,712,739,743,770,787],multi_v:659,multiarch:[700,701],multiclass:[769,771,780],multiclassid:770,multiclassstat:770,multicor:594,multidimension:710,multidistributionexampl:603,multilib:[668,701],multilin:616,multimap:743,multipl:[585,589,590,591,593,594,595,597,598,602,603,605,606,607,609,610,611,614,625,627,630,632,633,639,640,641,653,654,659,660,662,666,667,671,674,677,678,679,684,685,688,691,696,698,703,705,708,709,710,712,714,715,716,721,722,723,726,727,731,739,741,742,744,750,754,759,760,762,764,765,766,768,769,771,773,775,776,777,779,780,781,782,783,784,786,803,804,805,806,807,810],multipli:[588,590,598,607,639,645,660,670,684,711,741,743,759,760,770],multiplicand:710,multiprocessor:[676,782],multirepo:745,multiset:743,multisourc:[667,704,747,759,773,774],multithread:[676,743,784],multmp1:804,multmp2:804,multmp3:804,multmp4:805,multmp:[804,805,806,807,808,809,810],mummergpu:747,mung:[677,741],munger_struct:677,musl:712,must:[20,21,22,23,24,27,28,30,31,32,83,84,85,110,111,117,120,123,124,125,126,127,129,130,176,199,200,205,209,212,213,214,215,216,217,220,221,223,271,304,305,310,314,340,341,352,355,370,371,372,373,374,375,378,379,380,381,382,434,436,467,468,479,482,483,484,485,486,487,490,491,493,494,495,547,549,574,575,580,584,585,588,589,590,591,592,594,596,597,599,603,605,607,610,611,616,621,625,631,635,639,641,660,663,665,667,669,670,671,672,675,676,677,679,683,684,688,689,695,698,700,701,702,703,704,705,708,709,710,711,712,713,714,715,716,721,722,723,724,725,726,728,735,736,741,742,743,744,745,748,750,753,756,757,759,760,761,762,764,765,766,768,770,771,774,775,776,777,780,781,782,785,786,788,789,804,805,806,807,808,809,810],must_tail_call_return:663,mustalia:[593,710],mustprogress:597,mustquot:786,musttail:[663,710],mutabl:[710,725,806,807,809,810,812,813],mutablearrayref:709,mutat:[591,610,676,686,709,741,743,789,807,809,810],mutatestrictfptofp:591,mutex:[594,788],mutual:[637,659,764,802],mve:658,mvt:[607,683,780],my86_64flag:786,my86flag:786,my_addit:759,my_build_dir:754,my_fil:728,my_fmad:724,my_funct:724,my_function_fast:724,my_function_precis:724,my_fuzz:712,my_gwp_asan_error:695,my_kei:610,my_kernel:724,my_list:606,my_list_of_numb:606,my_local_branch:745,my_local_tag:745,my_opt_diff:754,my_opt_yaml1:754,my_opt_yaml2:754,my_other_fil:728,my_project:745,my_str:606,my_valu:610,my_yaml_fil:754,myalloc:663,myapp:726,myawesomeproject:710,mybarflag:786,mybison:774,myblock:716,myboolean:759,mybranch:745,mybuilddir:605,mycustomtyp:786,mydevic:773,mydoclist:786,mydoclisttyp:786,mydoctyp:786,myfirstpatch:723,myfirsttypofix:680,myflag:786,myfoobar:743,myfooflag:786,myfunct:724,mygc:676,mygcprint:676,myglob:759,myinitprng:712,mylist:786,mylistel:786,mymachinefunctionpass:728,mymaptyp:786,mynewpass:612,myobject:759,myocyt:747,myownp3sett:759,mypass:[687,743,782],mypassnam:743,mypassopt:782,mypatch:742,myplugin:709,myproj:745,myregalloc:782,myregisteralloc:782,myrepo:745,myseq:786,myseqel:786,myset:610,mysteri:677,mystic:811,mystringtyp:786,mystruct:677,mytarget:712,mytyp:710,mytype1:786,mytype2:786,myvar:677,n16:724,n32:710,n4absl11string_view:627,n_bucket:759,n_hash:759,n_stab:640,nab:747,naddit:659,nail:806,naiv:[631,741,760],nakatani:607,nake:[597,710],name1:770,name2:770,name3:770,name:[20,27,123,129,212,220,370,378,482,490,586,589,591,592,594,597,599,601,602,603,605,606,611,612,614,615,616,617,619,620,621,622,623,624,625,628,629,630,631,632,635,636,639,640,641,642,643,644,645,648,649,652,653,654,658,660,663,664,665,666,667,668,669,670,671,672,673,675,676,677,678,679,684,689,696,699,701,702,703,705,707,709,711,712,713,716,717,718,722,723,724,725,726,728,729,731,739,742,743,744,745,753,754,755,756,762,764,765,768,770,771,774,775,777,781,782,783,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,814],namedindex:780,namedvalu:[804,805,806,807,808,809,810],namedvar:810,namefield:768,nameflag:759,namesbuff:731,namespac:[585,597,605,630,659,660,668,676,710,711,743,759,769,770,771,780,781,782,784,788,789,790,791,803,804,805,806,807,808,809,810],namestr:658,namesuffix:770,namevalu:770,nan:[589,590,617,619,684,710,759],nand:710,nandakumar:692,nano:590,narrow:[594,601,612,670,677,679,684,688,704,705,712,743,760],narrowaddr:710,narrowcharact:738,narrowscalarfor:688,narrowscalarforcartesianproduct:688,narrowscalarfortypeswithmems:688,narrowscalarif:688,nas:747,nasa:747,nation:608,nativ:[585,587,590,594,601,603,605,606,612,617,621,624,654,668,669,670,672,674,677,679,694,701,704,710,713,724,745,749,759,762,780,786,788,789,790,791,804,805,806,807,808,809,810,811],native_command:605,natur:[585,590,594,609,610,611,645,659,666,667,669,674,676,677,679,703,709,710,713,714,717,738,743,745,746,748,753,759,760,761,765,766,770,782,786,804,806,808],natvi:739,naveen:741,navig:[659,742,784],nbranch:631,nbsp:607,nc_nv:590,ncsa:667,nctaid:724,ndebug:[605,659,740],ndk:679,near16:730,near32:730,near64:730,nearbi:[673,682,719],nearbyint:779,nearest:[684,695,710,721,810],nearli:[605,610],nearpoint:738,nearpointer128:738,nearpointer32:738,nearpointer64:738,neat:789,nec:749,necess:585,necessari:[585,590,593,594,596,598,605,607,609,610,611,625,659,663,665,667,669,670,674,676,679,683,686,688,691,699,700,701,702,704,705,706,710,711,712,713,715,716,717,723,725,726,736,738,739,742,743,746,748,756,757,759,760,761,762,764,765,766,768,770,773,775,777,778,779,780,781,782,784,804,809,810,812],necessarili:[610,619,660,667,672,677,679,688,710,714,727,729,735,739,743,753,756,757,759,760,764,785,810],necessit:698,need:[585,589,590,591,592,593,594,596,597,598,599,600,601,603,605,606,607,609,610,611,613,617,618,619,621,624,625,631,648,649,655,656,657,658,659,660,663,665,667,668,669,670,671,672,673,674,675,676,678,679,681,682,683,685,686,688,689,693,694,695,696,697,698,699,700,701,702,703,704,706,707,708,709,710,712,713,714,716,718,720,721,722,723,724,725,726,729,731,735,736,741,742,743,744,745,746,750,753,755,757,758,759,760,761,762,764,765,766,768,769,770,771,773,774,775,777,778,780,781,782,783,784,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],needlessli:705,needstub:780,neelakantam:741,neg:[25,128,219,316,321,326,338,377,489,585,589,590,597,610,611,616,639,663,667,669,674,684,710,716,750,756,759,760,766,770,780],neg_hi:[0,1,4,7,8,9],neg_lo:[0,1,4,7,8,9],negat:[588,589,590,597,607,641,653,683,684,710,807],negeightkind:710,neglect:774,neglig:[673,712],negoti:757,neighbor:588,neighbour:631,neither:[585,593,596,597,654,669,674,709,710,712,722,731,741,762],nekbon:747,nemanja:748,neon:[607,658,661,683,688,701,710,749,768,778],ness:[602,709],nest:[585,590,593,597,605,606,607,610,616,639,666,669,676,710,714,717,725,728,731,743,745,750,760,770,776,779,780,782,803,806],nestabl:770,net:[590,662,673,678,696,702,747],netbsd:[679,783],network:[607,712,743],neumann:710,neural:774,neutral:[585,710],never:[590,593,594,596,603,607,610,611,621,625,639,660,663,664,668,672,673,676,677,682,683,693,694,703,710,713,714,722,726,728,729,730,736,741,743,745,754,759,760,766,768,776,780,782,783,784,786,788,790,811],never_instru:783,nevertheless:610,new_cond:710,new_corpus_dir:712,new_guard_cond:710,new_install_nam:633,new_potentially_interesting_inputs_dir:712,new_rpath:633,newabbrevlen:597,newaddr:684,newcom:[771,808],newer:[594,660,665,667,679,681,730,747,773],newest:[671,805],newinst:743,newli:[600,610,684,694,709,710,712,726,741,743,748,764,789,804,805,806,807,810],newlin:[597,610,616,625,635,645,770,807],newoffset:710,newptr2:710,newptr:710,newti:710,newtoset:610,newversionformat:731,next:[585,588,590,592,597,600,601,610,639,641,653,659,663,667,669,676,677,679,688,693,698,699,702,703,707,709,710,713,714,715,719,722,728,729,735,743,750,752,753,757,759,760,766,770,775,777,780,781,782,788,789,790,791,802,803,804,805,806,807,808,809,810],nextindvar:710,nextprec:[803,804,805,806,807,808,809,810],nextvar:[806,807,808,809,810],nextvari:[806,807],nfc:[667,711],ngg:590,nggcullingdata:590,nggsubgroups:590,nggtess:590,nice:[611,659,679,683,699,721,727,741,743,770,782,802,803,804,805,806,807,808,811,812],nicer:[659,743,786,810],nicknam:755,nicolai:748,nifti:[782,806,811],nightli:[667,678,702,774,775],nikhgupt:757,nikhil:757,nineti:757,ninf:710,ninja:[592,603,605,679,682,686,697,698,699,700,701,712,723,742,745,747,773,781,784],nmake:605,nnan:710,nnnn:667,nnnnnn:[600,607],no_dead_strip:710,no_merg:597,no_switch:779,no_turbo:595,no_warning_for_no_symbol:635,noalia:[593,597,639,663,674,677,721],nobodi:747,nobuiltin:[597,710],nocaptur:[597,710,741],nocf_check:[597,710],node:[593,598,599,607,623,658,666,669,673,684,703,709,711,714,716,721,722,724,743,750,751,759,764,768,770,776,777,779,780,782,786,803,804,805,806,807,808,809,810],nodebuginfo:659,nodedupl:[597,710],nodefinit:754,nodenam:769,nodetail:623,nodetyp:[591,780],nodupl:710,noencod:607,noexcept:669,noexec:709,nofre:[597,710],nohup:753,noimm:[770,771],noimplicitfloat:[597,710],noinlin:[597,706,710,754],nois:[595,610],noisi:[667,741,743],noitin:619,noitinerari:[770,771,780],nolink:659,noload:641,nologo:634,nomenclatur:592,nomerg:710,nomin:710,nomine:757,nomodref:593,non:[585,588,589,591,593,594,597,601,603,604,605,606,607,611,612,616,617,619,621,622,623,624,625,626,628,629,631,632,633,635,636,637,639,640,641,642,643,644,645,648,649,650,652,653,654,657,658,659,660,661,663,664,667,669,670,673,674,677,678,679,683,684,685,688,694,697,703,705,707,708,709,712,713,714,715,716,719,720,722,724,725,726,730,731,736,743,745,746,748,753,756,757,759,760,762,766,770,775,776,777,780,781,782,785,786,803,805,806,807,808,809,810,811,812],non_recurs:759,nonan:684,nonatom:759,nondot:600,none:[85,176,271,352,436,549,585,590,595,607,610,616,617,619,621,644,654,659,663,667,669,676,677,678,679,699,700,704,707,710,725,728,730,738,743,754,757,760,764,765,770,782],nonempti:710,nonetheless:[585,710,789,790,791],nonfat:770,nongnu:747,nonintuit:778,nonlazybind:[597,710],nonneg:710,nonnul:[597,669,674,710],nonparameter:770,nonsens:[717,761,808],nonstandard:770,nonstop:784,nonstop_tsc:785,nontemp_nod:710,nontempl:659,nontempor:[590,710],nonzero:[597,602],noop:710,nop:[671,710,762,783],noprfx:770,noprofil:710,nor:[585,590,597,607,669,672,676,677,709,710,712,722,731,741,762,765,766],norecurs:[597,710],noredzon:[597,710],noreg:[590,705,716,759],noreturn:[597,674,710,714],norm:[585,667,755],normal:[585,587,588,590,594,596,597,605,607,610,611,616,621,622,625,629,632,636,640,641,644,648,649,657,659,660,667,668,669,673,674,676,679,682,683,684,695,699,702,705,709,710,713,714,718,728,730,742,743,746,756,757,758,759,760,761,762,764,765,768,770,775,777,779,782,784,807],normalformat:659,normalizedpolar:786,norman:607,northern:590,northov:692,nosanitize_coverag:[597,710],nosync:[597,710],not_inline_hd:660,not_mangl:626,not_nul:673,notabl:[605,607,660,670,677,688,695,699,709,710,719,743,759,760],notail:710,notat:[588,589,590,659,710,770,786],note:[96,97,107,175,186,187,196,270,286,287,301,435,447,448,457,548,560,561,571,585,587,588,589,593,594,595,596,597,601,602,605,609,610,611,612,615,616,617,621,623,631,639,641,648,649,650,653,659,660,662,663,664,665,666,667,668,669,670,671,673,674,675,676,677,678,679,687,688,689,691,693,694,695,699,700,701,702,703,704,705,706,707,709,710,712,713,715,720,721,722,723,724,726,729,730,731,735,736,738,741,742,743,744,745,747,750,756,757,759,760,762,764,765,766,768,769,770,771,773,774,777,778,779,780,782,783,784,788,802,803,804,805,806,807,808,809,810,811],noteligibletoimport:710,notevex2vexconvert:770,noteworthi:714,notfp:770,noth:[78,171,266,542,590,607,611,639,663,667,676,677,682,688,702,709,710,726,741,743,745,755,765,770,775,782,808],nothidden:659,nothrow:594,notic:[605,606,607,609,660,666,667,672,674,677,703,705,710,714,722,723,724,742,770,804,808],notif:[593,696,742,757,766],notifi:[590,593,600,639,696,702,709,723,726,755,757,779],notifyemit:709,notifyfail:709,notifyfin:709,notifyload:709,notifyremovingresourc:709,notifyresolv:709,notifytransferringresourc:709,notion:[607,610,664,667,703,710,729,759,782,805,806],notori:765,nottransl:738,notw:611,notwithstand:667,notypemerg:736,noun:[610,748],noundef:[597,710],nounwind:[597,611,669,674,710,724,759,775,777],nouserdataspil:590,novel:[676,710,743],novic:605,now:[585,590,601,605,607,608,610,611,616,659,660,664,667,668,669,670,681,683,699,701,703,705,708,709,710,712,714,716,721,722,723,724,726,735,738,742,743,745,747,756,759,760,762,764,772,774,777,781,782,784,786,787,788,789,790,803,804,805,806,807,808,809,810,811],nowadai:[701,745],nozero:619,npb3:747,npb:747,nproc:679,nsa:[84,589],nsstring:759,nsw:[663,667,674,677,710,714],nsz:710,nt_amd_hsa_code_object_vers:590,nt_amd_hsa_hsail:590,nt_amd_hsa_isa_nam:590,nt_amd_hsa_isa_vers:590,nt_amd_hsa_metadata:590,nt_amdgpu_metadata:590,nth:[610,625],ntid:724,ntohl:607,ntradbigmip:641,ntradlittlemip:641,nuisanc:667,nul:[590,743,807],null_pointer_is_valid:[597,710],nullari:803,nullpointerexcept:673,nullptr:[610,709,710,768,788,789,790,791,803,804,805,806,807,808,809,810],nullptr_t:738,nullunknown:710,num:[589,590,611,614,616,625,631,645,676,710,745,764,780],num_arg:710,num_el:710,num_interpol:590,num_repetit:631,numabbrevop:597,numarg:[762,810],number1:722,number2:722,number:[20,27,30,117,120,123,129,205,209,212,220,223,310,314,370,378,380,479,482,490,493,580,584,588,593,594,596,598,599,600,601,605,606,607,609,610,611,614,616,621,623,624,625,630,631,637,638,639,641,642,644,645,652,653,654,658,660,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,683,684,688,695,697,701,702,705,706,709,710,711,712,713,714,716,717,721,722,726,728,730,731,733,735,736,738,739,740,742,743,744,750,754,756,758,759,760,761,762,764,766,768,769,770,771,774,775,776,779,780,782,783,785,786,802,803,804,805,806,807,808,809,810,811],numberexpr:[803,804,805,806,807,808,809,810],numberexprast:[803,804,805,806,807,808,809,810],numberofauxsymbol:814,numberofcpucor:712,numbit:743,numblock:735,numbyt:762,numconst:762,numdirectorybyt:735,numel:710,numelt:597,numeltsparam:[597,710],numentri:597,numer:[20,27,123,129,212,220,370,378,482,490,588,601,602,639,640,642,659,674,681,710,716,738,739,743,768,770,771,780,783,785,802,803,804,805,806,807,808,809,810],numerical_limit:760,numexpress:664,numfaultingpc:673,numfilenam:664,numfunct:[673,762],numfunctionsinmodul:726,numhashbucket:738,numindic:664,numlin:664,numliveout:762,numloc:762,nummemb:743,nummeta:676,nummodul:731,nummodulevalu:597,numop:[597,780],numpoint:631,numrecord:762,numregion:664,numregionarrai:664,numroot:676,numsgpr:590,numshadowbyt:762,numsourcefil:731,numspilledsgpr:590,numspilledvgpr:590,numstr:[802,803,804,805,806,807,808,809,810],numstream:735,numthread:726,numval:[802,803,804,805,806,807,808,809,810],numvar:611,numvgpr:590,numword:754,numxform:743,nutshel:743,nuw:[674,710],nuzman:[750,779],nval:710,nvcall:604,nvcc:607,nvcl:724,nvidia:[585,607,616,660,668,724,757],nvptx64:[616,724],nvptx:[679,710,748,778],nvt:780,nvvm:710,nvvmreflect:724,nxv16i1:710,nxv46i32:710,nxv4f32:710,nxv4i32:710,nxv4i64:710,nxv8f64:710,nxv8i16:710,obei:710,obit:710,obj:[617,624,650,654,669,676,710,753,759,764,773,774],obj_root:679,objc:[630,642,717,759,761,778],objc_autoreleas:710,objc_autoreleasepoolpop:710,objc_autoreleasepoolpush:710,objc_autoreleasereturnvalu:710,objc_copyweak:710,objc_destroyweak:710,objc_initweak:710,objc_loadweak:710,objc_loadweakretain:710,objc_moveweak:710,objc_properti:759,objc_releas:710,objc_retain:710,objc_retainautoreleas:710,objc_retainautoreleasedreturnvalu:710,objc_retainautoreleasereturnvalu:710,objc_retainblock:710,objc_storestrong:710,objc_storeweak:710,objc_unsafeclaimautoreleasedreturnvalu:710,objcopi:[615,633,653],objdump:[615,640,643,648,649,784],object:[585,593,594,596,601,603,607,609,610,612,614,615,616,617,619,621,624,625,630,631,634,637,639,643,644,652,654,657,659,661,663,664,665,667,669,671,672,673,675,676,681,683,684,686,694,698,700,701,703,704,706,708,709,711,712,713,716,722,724,726,728,730,731,734,736,738,739,745,756,757,760,762,765,766,768,770,773,774,777,780,782,785,786,788,789,803,804,806,808,810,811,812,813,814],object_addr:676,object_error:743,objectbuff:715,objectbufferstream:715,objectcach:715,objectfil:715,objectimag:715,objectivec:710,objectlay:[788,789,790,791],objectlinkinglay:[778,789],objfilenam:731,objlinkinglay:726,objptr:710,oblivi:596,obscur:[710,743],observ:[594,607,610,639,710,712,714,730,731,734,736,738,759,760,764,770,784,811],obsolet:[600,730],obstacl:748,obstruct:766,obtain:[585,590,602,607,663,664,665,679,681,686,700,704,710,714,715,720,721,731,741,743,757,765,769,770,780,784,785,805],obtus:677,obviat:744,obviou:[593,594,596,601,607,610,666,667,672,710,722,736,741,745,756,759,760,762,768,803,805,807,811],obvious:[593,607,659,667,670,677,702,710,722,731,741,743,753,782,803],ocaml:[605,679],ocamldoc:605,occas:[610,733],occasion:[597,660,667,676,685,709,738,757],occupi:[96,97,98,99,112,113,114,186,187,188,201,202,286,287,288,289,306,307,308,447,448,449,450,469,470,560,561,562,563,576,577,578,588,590,644,710,735],occur:[585,588,589,590,597,599,607,610,611,612,616,617,621,622,623,624,629,631,632,636,639,657,658,667,669,676,677,686,689,695,704,705,710,711,714,716,726,741,743,744,745,755,756,757,759,760,762,764,765,770,775,776,782,803,809],occurr:[597,611,658,683,710,770],oct:[650,720,745],octal:[589,590,640,650,652,659],octal_numb:590,octopu:745,odd:[585,659,667,704,710],oddli:770,odr:[614,709,710],oeq:710,of_non:809,off:[31,32,82,83,85,117,176,205,271,310,341,351,352,381,382,433,434,436,494,495,546,547,549,580,588,590,601,603,605,607,610,621,642,644,660,667,675,679,685,697,698,699,700,709,710,721,723,724,725,726,740,743,755,760,764,771,773,775,784,785,788,789,804,805,806,807,808,809,810,811],offen:[0,2,3,4,8,9,85,176,271,352,436,549,590],offens:757,offer:[585,594,676,698,705,743,745,746,756,764,786],offici:[606,607,660,667,668,694,719,722,739,742,747,753,755,766,773,804,808],offlin:[607,667,724,766],offload:[590,616,756],offset0:[0,2,3,4,9,590],offset11:0,offset12:[0,2,3,4,8,9,82],offset13:[4,8,9,351,433,546],offset1:[0,2,3,4,9,590,710],offset2:710,offset:[0,2,3,4,9,20,24,32,54,55,56,80,82,83,85,123,127,144,145,173,176,212,217,240,241,242,268,271,342,351,352,370,375,382,403,404,405,431,433,434,436,482,487,495,516,517,518,544,546,547,549,585,587,589,590,597,607,611,621,630,642,644,648,649,650,652,654,663,669,671,676,677,684,686,695,709,716,720,722,729,731,735,736,738,743,756,758,759,762,764,771,775,780,811],ofstream:659,oftc:[662,678],often:[590,591,593,594,596,601,605,607,609,610,611,639,659,660,667,669,670,672,674,676,679,682,688,695,698,699,700,703,704,705,709,710,712,714,716,723,724,725,741,743,745,751,754,757,759,760,764,766,769,770,775,779,780,784,786,789,803,805,808,811],oge:710,ogt:710,ojhunt:757,okai:[610,667,677,710,711,742,765,803,804,805,806,807,808,809,810],oland:590,old:[585,593,594,600,611,625,641,667,676,679,706,710,723,725,730,738,739,743,745,748,754,756,757,759,773,776,787,808,809,810,811],old_install_nam:633,old_rpath:633,oldbind:[808,809,810],older:[588,590,594,639,659,661,667,674,676,679,688,697,710,712,718,727,780],oldest:[590,639,805],oldval:[710,806,807,808,809,810],ole:710,oliv:757,oll:709,olt:710,omap:731,omiss:[590,610,667,710],omit:[585,586,587,590,607,610,614,617,622,623,627,629,632,639,640,642,643,644,645,654,657,658,660,664,681,705,710,712,726,741,743,754,759,762,777,782,783,803,806],omod:[0,2,3,4,7,8,9],omp:[741,747,776],on_fals:710,on_tru:710,onc:[213,371,483,589,590,593,597,601,602,605,606,607,609,610,612,614,616,625,630,632,635,639,645,657,659,660,662,665,667,669,670,672,673,676,677,678,679,681,682,688,689,694,700,702,704,705,709,710,711,712,713,714,715,717,722,723,725,726,728,731,738,742,743,745,746,750,753,755,756,757,759,760,761,764,765,766,770,771,774,777,780,782,783,784,786,788,790,803,804,805,806,808,811],one:[20,24,27,84,112,120,123,127,129,175,201,209,212,213,217,220,270,294,295,296,306,314,370,371,375,378,435,479,482,483,487,490,548,576,584,585,587,588,589,590,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,616,621,625,627,628,630,631,632,633,635,637,639,640,642,644,645,646,648,649,659,660,663,664,666,667,668,669,670,674,675,676,678,679,682,683,684,687,688,689,691,694,695,696,699,700,703,704,705,707,708,709,710,711,712,713,714,715,716,719,720,721,722,723,724,725,726,727,728,729,730,731,734,735,738,739,742,743,744,745,746,747,748,750,752,753,754,755,756,757,758,759,760,761,762,764,765,766,768,769,770,771,773,774,775,776,777,780,782,783,784,785,786,788,789,790,802,803,804,805,806,807,808,809,810,811],one_onli:671,oneargfp:770,oneargfprw:770,onelin:679,oneormor:659,ones:[590,601,602,605,606,607,610,612,659,660,667,669,674,676,689,701,702,703,710,722,741,749,759,760,764,765,768,770,771,775,780,789],onfin:709,ongo:[667,726,755,764,783],onli:[20,27,28,30,85,110,111,123,129,130,176,199,200,212,213,220,221,223,271,304,305,340,352,355,370,371,378,379,380,436,467,468,482,483,490,491,493,549,574,575,585,586,588,589,590,592,593,594,596,597,599,600,601,602,604,605,606,607,609,610,611,613,614,616,617,618,619,621,623,625,627,628,630,631,633,634,635,638,639,640,641,644,645,648,649,650,653,655,656,658,659,660,661,662,663,664,666,667,668,669,670,671,673,674,675,676,677,679,681,682,683,684,686,687,688,695,697,698,699,700,701,702,703,707,708,709,710,711,712,713,714,716,719,720,721,722,723,724,725,726,729,730,731,733,734,735,736,738,742,744,746,747,748,749,750,752,754,755,756,757,759,760,762,764,765,766,768,769,770,771,773,774,775,776,777,778,779,780,781,782,783,785,786,788,789,790,802,803,804,806,807,808,809,810,811],onlin:[585,590,595,605,606,808],only_ascii:712,onlyfailsforsomexvalu:743,onlyfailsforsomexvalues2:743,onon:710,ons:753,onto:[585,590,607,667,676,679,690,702,710,743,759,770,777],ontwo:710,onward:[590,605,699,714,759],onzero:710,oom:[605,712],oop:[709,764],op0:[597,754],op1:[597,710,754],op2:[710,754],op3:[710,754,780],op3val:780,op4:754,op_begin:743,op_end:743,op_iter:743,op_sel:[0,4,9],op_sel_hi:[0,4,9],opaqu:[596,597,671,683,726,741,743,764,789,804],opaquepoint:710,opc:[770,807,808,809,810],opclass:658,opcod:[14,79,84,172,175,267,270,333,350,430,435,543,548,585,588,590,594,607,610,631,639,643,658,688,693,703,707,710,716,722,743,750,770,771,780,803,804,807,808,809,810],opcode_nam:631,opcstr:780,open:[585,600,605,607,610,612,660,662,667,678,681,693,700,709,710,711,717,722,742,743,745,754,757,761,762,764,765,768,784,787,804,805,806,807,808,809,810],openbsd:[679,783],openc:770,opencbit:770,opencl:[585,658,724],openfil:610,openfileforwrit:765,openformattedfil:743,opengl:811,openmp4:750,openmp:[585,590,605,611,658,667,678,710,745,748,766,776],openorcreatefileforwrit:765,openpow:661,opensourc:712,openssl:712,openwork:764,oper:[27,33,34,36,37,38,93,96,97,100,101,120,129,132,133,186,187,189,190,191,209,220,224,225,281,282,283,286,287,314,354,378,383,384,386,387,388,444,447,448,451,452,479,490,496,497,499,500,501,557,560,561,564,565,584,587,588,593,594,596,597,599,602,604,605,607,610,611,617,619,635,638,639,641,648,649,653,658,660,661,663,664,669,670,672,673,676,677,679,683,689,691,693,695,705,709,712,721,722,724,725,726,727,728,741,745,749,750,754,756,759,764,765,766,768,769,771,777,779,780,782,783,784,786,788,789,802,803,804,805,806,809,810,811,812,813],operand0:[587,590],operand1:[587,590],operand:[10,11,12,13,14,15,16,18,20,24,25,26,27,28,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,122,123,127,128,129,130,132,133,134,135,136,137,138,139,140,141,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,211,212,213,217,218,219,220,221,223,224,225,226,227,228,229,230,231,232,233,234,235,236,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,316,317,318,319,321,322,323,324,326,327,328,329,330,331,332,333,334,335,336,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,370,371,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,481,482,483,487,488,489,490,491,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,591,597,599,607,610,611,623,639,642,643,669,670,671,677,683,684,685,686,688,690,691,705,721,722,727,741,743,748,750,759,764,768,769,770,804,807,808,809,810],operand_type_list_end:780,operandlist:770,operandmap:780,operands:770,operandti:607,operandtyp:780,operandv:[807,808,809,810],operating_system:710,opmap:770,opmapbit:770,opnam:780,opnod:780,opportun:[592,603,663,666,667,674,689,693,710,726,741,745,746,748,779,780,789,805],oppos:[610,659,667,671,679,683,688,702,710,711,725,734,744,748,756],opposit:[610,621,711,743,748,759,768,783,786],opprefix:770,opprefixbit:770,oprofil:[605,740],ops:[607,658,709,760,764,770,780],opsiz:770,opsize32:770,opsizebit:770,opt1:642,opt2:642,opt:[0,2,3,4,8,9,586,593,594,601,610,612,615,642,658,667,672,676,679,701,704,705,710,714,721,724,741,743,751,760,764,774,775,779,789,805,806,808,809],opta:659,optb:659,optbisect:[704,728],optforfuzz:[597,710],optim:[585,587,592,593,596,597,599,601,602,605,610,611,612,614,615,616,617,619,639,642,643,645,646,658,659,661,663,664,666,667,669,670,671,673,674,675,676,677,678,679,680,681,689,691,692,693,696,702,703,707,708,709,711,714,722,723,724,725,726,727,740,743,744,748,749,750,751,752,762,763,764,766,769,774,775,776,777,779,780,781,782,785,787,788,790,791,804,806,807,808,809,810,812,813],optimis:[710,741,777],optimist:[639,693,710],optimizationbit:659,optimizationlevel:[659,725],optimizationlist:659,optimizationremarkemitt:748,optimizefunct:790,optimizelay:[789,790,791],optimizemodul:[789,790,791],option:[18,27,28,31,32,36,37,38,68,83,85,93,96,97,100,101,110,111,117,120,122,129,130,159,176,186,187,189,190,191,199,200,205,209,211,220,221,256,271,281,282,283,286,287,304,305,310,314,336,340,341,352,354,355,368,378,379,381,382,386,387,388,420,434,436,444,447,448,451,452,467,468,479,481,490,491,494,495,499,500,501,532,547,549,557,560,561,564,565,574,575,580,584,585,586,587,588,592,593,596,597,599,601,606,607,610,613,615,618,620,628,634,637,647,655,656,660,663,664,668,672,675,676,681,686,687,688,689,694,697,699,700,701,704,706,709,710,711,713,715,716,722,723,725,726,727,728,740,741,742,744,745,746,749,750,751,753,754,759,760,762,764,766,768,769,770,771,776,779,780,782,783,784,786,803,804,805,806,807,808,809,810],optional_dir:744,optionaldbgheaders:731,optiondefinit:658,options_pars:695,optlevel:659,optnon:[597,704,710,728,781],optnum:659,optsiz:[597,686,710],optyp:780,optzn:[704,805,806,807,808],opval:780,orang:723,orbit:807,orc:[665,679,778,788,790,791,805,806,807,808,810,813],orcjit:[788,789,790,791,805,806,807,808,810],orcrpctpcjitlinkmemorymanag:709,orcv1:778,orcv2:778,orcv2exampl:709,ord:710,order:[120,209,314,479,584,585,586,587,593,597,602,603,605,607,609,611,616,628,635,640,641,644,653,657,659,662,663,664,665,667,668,669,670,671,672,675,676,677,679,681,684,688,695,703,705,707,709,711,712,713,714,716,721,722,723,725,726,727,728,731,735,736,739,741,743,744,745,748,750,754,759,760,761,762,764,765,768,769,770,774,775,777,779,780,781,782,790,803,804,805,806,808,812],ordered_append_term:590,ordin:[659,709],ordinari:[585,663,664,710,726,786],org:[600,605,608,610,633,635,637,641,642,643,650,652,653,664,667,675,678,679,694,696,698,699,700,701,702,703,712,716,719,723,742,745,748,749,753,755,773,786,788,789,790,791,804],organ:[585,588,590,606,667,669,678,719,722,738,744,755,757,759,764,805],ori:607,orient:[608,610,703,743,747,759,761,811],orig:710,origin:[585,589,602,607,608,609,610,611,625,644,663,667,669,677,679,682,709,710,714,723,726,731,736,742,743,745,748,750,755,756,757,759,760,761,764,765,766,770,771,776,777,783,788,789,810],originput:659,orl:760,orq:760,orr:710,ors:711,orthogon:[594,726],oscil:785,oso:614,oss:[675,702,712,745],ostream:[610,743],osuosl:696,other:[0,1,2,3,4,5,6,7,8,9,84,120,175,176,209,270,314,435,479,548,584,587,588,589,590,591,592,594,596,597,598,600,601,602,603,605,607,608,609,610,611,613,614,616,617,618,619,625,627,628,631,633,635,639,640,641,642,650,652,653,654,655,656,657,658,660,662,663,664,666,667,668,669,670,671,676,678,679,681,683,686,687,688,689,691,693,694,695,697,698,699,701,702,703,704,705,706,707,708,709,711,712,713,714,715,716,717,719,720,721,722,723,725,726,727,728,729,730,731,734,735,736,738,739,741,742,744,745,746,747,748,750,753,754,755,756,757,759,760,761,763,764,765,766,768,769,770,771,773,774,776,777,779,780,781,782,783,784,785,786,788,789,790,803,804,805,806,807,808,809,810,812,813],other_funct:760,otherspecialsquar:703,othervt:780,otherwis:[42,74,585,587,590,592,594,597,598,600,601,602,605,609,610,611,612,614,616,617,619,621,622,623,624,625,628,629,630,631,632,633,635,636,638,639,641,642,643,648,649,650,652,653,654,657,658,659,660,663,664,667,669,674,676,694,696,697,701,702,703,704,709,710,711,713,714,722,725,726,728,731,733,739,741,742,743,745,755,759,760,764,769,770,776,782,786,802,803,804,805,806,807,808,809,810],otool:[615,642],otp:710,ottenstein:666,ought:610,our:[592,603,605,606,607,608,609,610,659,660,663,667,668,676,679,704,709,713,722,723,724,726,736,739,740,743,745,755,756,757,759,760,764,766,779,781,782,784,788,789,790,791,802,803,804,805,806,807,808,809,810,811],ourfpm:810,ourfunctionpass:743,ourselv:[610,724,739,759,781,788],out:[585,590,592,593,594,595,596,597,600,601,606,607,609,610,611,614,616,619,621,623,625,630,631,634,638,639,640,642,650,659,660,663,665,666,667,668,669,670,675,676,679,681,682,684,688,695,697,699,700,701,702,704,705,707,709,710,711,712,714,716,721,722,723,726,727,734,735,739,741,742,743,745,749,752,753,755,756,757,759,760,763,764,765,766,768,770,771,773,774,775,776,777,780,781,782,783,784,785,786,789,790,791,803,804,805,806,807,808,809,810,811,812,813],out_cpusubtyp:713,out_cputyp:713,out_of_bound:606,outcom:[705,757],outdat:[701,774],outer:[590,610,669,708,710,712,714,725,730,741,750,769,770,776,782,786,806,808,809,810],outeranalysismanagerproxi:725,outercolumn:710,outermost:[585,710],outerrow:710,outerstructti:710,outfil:651,outgo:[598,710],outli:669,outlin:[604,607,663,667,669,760,782,786],outliv:[709,710,712,741],outloop:[806,807,808,809,810],outoperandlist:[770,771,780],output:[15,36,37,38,42,43,44,45,46,47,48,49,50,93,96,97,100,101,102,103,104,105,106,107,108,109,115,134,135,136,137,138,139,140,141,186,187,189,190,191,192,193,194,195,196,197,198,203,204,229,230,231,232,233,234,235,236,281,282,283,286,287,292,293,294,295,296,297,298,299,300,301,302,303,309,319,324,334,354,356,357,358,359,360,386,387,388,392,393,394,395,396,397,398,399,444,447,448,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,471,472,499,500,501,505,506,507,508,509,510,511,512,557,560,561,564,565,566,567,568,569,570,571,572,573,579,588,590,592,593,595,598,601,604,605,607,610,611,612,613,614,617,618,619,620,621,622,625,626,627,628,629,631,632,634,635,636,637,638,639,640,642,643,644,645,646,648,649,650,651,652,653,654,655,656,657,658,660,666,670,671,676,679,681,682,684,688,695,699,704,705,707,716,717,721,723,724,741,743,750,751,753,754,756,759,761,768,770,771,773,775,776,780,781,782,783,784,785,789,803,805,806,807,808,809,810,814],outputdebuginfo:659,outputfilenam:659,outright:[677,766],outsid:[590,593,608,610,659,663,667,671,677,678,694,695,700,709,710,711,712,714,721,722,723,741,743,745,753,759,760,762,764,765,770,774,777,789,804],outstand:[20,120,123,209,212,314,370,479,482,584,609,702,709],outstream:676,outweigh:748,over:[585,590,593,596,597,600,601,602,605,606,607,609,611,631,639,644,654,658,660,663,665,666,667,677,678,688,689,695,698,699,703,709,710,711,712,713,714,719,721,723,725,727,741,742,745,757,759,760,763,764,769,783,786,789,790,791,802,804,806,807,808,810,811,812],overal:[600,603,607,609,642,666,667,669,676,677,699,710,723,739,742,760,766,768,769,806],overapproxim:714,overboard:610,overestim:710,overflow:[585,589,590,611,667,674,712,760,785],overhaul:702,overhead:[672,676,695,707,709,710,720,726,743,745,750,756,785,790,805],overlai:[590,668,731],overlaid:721,overlap:[585,593,606,607,611,639,684,708,710,714,759,762,766,779,782],overli:[590,667,748],overload:[610,670,703,710,724,727,743,764,769,780,782,807],overrid:[590,594,603,605,606,607,616,617,619,625,629,639,641,645,659,660,666,686,695,698,703,709,710,712,716,722,723,724,741,743,756,759,760,768,780,782,789,791,804,805,806,807,808,809,810],overridden:[593,611,659,679,709,710,712,754,770,780],overrun:695,overs:743,overst:585,overtak:766,overview:[586,604,605,607,639,658,659,661,662,678,682,690,709,715,717,720,723,770,772,780,782,806],overwhelm:[602,812],overwrit:[659,744,760,762,783],overwritten:[607,669,710,756,762],overwrot:590,ovl:731,own:[585,590,597,598,603,605,606,607,609,610,621,659,660,664,666,667,668,669,670,676,679,694,695,700,702,703,705,709,710,712,715,721,726,727,729,730,739,741,742,744,745,748,750,754,755,756,757,759,762,765,766,768,769,770,772,774,775,780,781,782,789,804,805,807,808,811],owner:[607,609,621,662,702,742,745],ownership:[667,726,743,791,805,806,807,808,809,810],oxid:757,p0928:760,p0a10s_union:710,p0f_isvoidf:764,p0i32:611,p0i64:611,p0i8:[710,724],p0s_:710,p0s_struct:710,p0s_union:710,p0v16f32:710,p0v2f64:710,p0v4i32:710,p0v4p0f_i32f:710,p0v8f64:710,p0v8i32:710,p0v8p0f64:710,p0v8p0f_i32f:710,p10:[29,131,222,492],p15:[710,724],p16:724,p17:724,p18:724,p19:724,p1i8:[724,764],p20:[29,131,222,492,724],p2align:[590,760],p3i8:724,p4i8:724,p5i8:724,p_arch:660,p_reg:607,pac:725,pack:[98,112,114,279,280,288,291,294,295,296,306,308,449,470,562,576,578,588,597,607,639,664,674,702,709,710,745,750,753,804],packag:[598,603,605,660,662,667,668,678,679,681,696,697,700,701,702,744,746,753,757,773,778],package_str:605,packet:[590,658,768],pact:750,pad:[588,590,597,644,663,664,669,683,685,708,709,710,716,731,762,785],padding1:731,padding2:731,padparam:669,padua:666,page:[590,592,603,605,610,615,644,659,661,662,667,671,673,676,679,681,695,702,704,705,709,715,716,720,723,725,735,739,742,743,745,748,751,752,754,756,757,759,760,764,767,770,772,775,779,787,804],pai:[610,667,687,702,760,775,780],paid:766,paill:757,pain:[659,660,665,667,742,743,766],painlessli:659,pair:[585,587,588,589,590,593,594,595,596,605,607,610,611,617,627,630,632,639,648,649,650,669,671,684,705,706,710,712,716,721,722,726,738,741,743,745,757,759,760,764,769,770,780,785,786,790,803,806,808,809,810],pairwis:593,pal:590,pamela:747,pan:760,panda:773,pandaboard:697,pane:742,paper:[595,607,610,660,661,666,721,743,760,787],papertrail:614,par:[747,749],paradigm:[677,743],paragraph:[610,667,717,722,761],parallel:[585,592,605,607,611,616,631,639,679,696,710,711,721,725,740,744,745,748,750,776,779],parallel_dir:744,parallel_loop_access:776,param1:743,param2:743,param:[0,2,3,4,78,171,266,542,610,616,630,676,696,724],paramattr:597,paramet:[29,78,131,171,222,266,492,542,585,588,597,599,602,605,606,607,610,616,621,630,631,638,639,659,660,663,669,671,676,677,706,712,715,722,725,741,743,749,756,759,764,768,770,771,780,782,785,786,804],parameter:[686,770],parameterless:716,paramidx0:597,paramidx1:597,paramidx:597,paramti:597,parenexpr:[803,804,805,806,807,808,809,810],parent:[590,598,606,610,616,630,682,703,709,710,743,745,757,768,769,770,806],parent_scop:606,parentclasslist:770,parentclasslistn:770,parenthandl:710,parenthes:[588,611,616,710,769,770,803],parenthesi:[610,768,803],parentmulticlasslist:770,parentti:710,pariti:695,parlanc:[606,721,777,802],pars:[585,597,606,611,616,620,630,638,639,654,658,660,670,672,675,695,707,709,710,711,712,713,715,716,743,754,756,759,762,764,768,769,770,771,778,780,785,786,804,805,806,807,808,809,810,812],parsabl:[645,764],parse_fail:743,parseabl:[710,764],parsebinoprh:[803,804,805,806,807,808,809,810],parsecommandlineopt:784,parsedattrinfo:768,parsedefinit:[803,804,805,806,807,808,809,810],parseexpress:[803,804,805,806,807,808,809,810],parseextern:[803,804,805,806,807,808,809,810],parseforexpr:[806,807,808,809,810],parseidentifierexpr:[803,804,805,806,807,808,809,810],parseifexpr:[806,807,808,809,810],parsenumberexpr:[803,804,805,806,807,808,809,810],parseparenexpr:[803,804,805,806,807,808,809,810],parsepasspipelin:675,parsepath:726,parseprimari:[803,804,805,806,807,808,809,810],parseprototyp:[803,804,805,806,807,808,809,810],parser:[603,639,642,658,672,675,679,709,710,712,716,754,768,769,770,802,804,805,807,808,809,810,811,812,813],parserclass:659,parsesubtargetfeatur:780,parsetoplevelexpr:[803,804,805,806,807,808,809,810],parsetypetablebodi:670,parseunari:[807,808,809,810],parsevalid:784,parsevarexpr:[808,809,810],part:[585,588,589,590,591,593,594,596,597,600,601,603,604,605,606,607,610,611,612,613,614,616,618,626,635,639,655,656,658,659,660,663,664,666,667,669,674,675,676,677,679,685,686,695,698,699,700,703,710,712,713,714,719,720,721,722,723,725,726,727,730,735,736,741,742,743,745,746,747,750,755,757,759,760,763,764,766,768,771,773,775,776,777,780,781,782,783,784,786,788,789,790,791,803,804,805,806,807,808,810,811,812],parti:[585,610,667,674,679,744,746,755,757,764],partial:[585,588,590,607,645,658,709,710,711,712,743,747,756,759,762,776,785,786],partialalia:593,particip:[590,608,609,666,667,710,742,743,745,746,757,775],particlefilt:747,particular:[585,588,593,594,595,597,600,601,603,605,607,609,610,616,625,644,658,659,660,663,664,667,669,674,676,677,679,682,683,686,688,689,699,703,704,705,708,709,710,712,714,715,721,722,724,726,727,730,731,733,735,736,741,743,745,747,750,752,754,755,757,759,760,762,764,769,770,771,775,777,780,782,783,785,803,805,808],particularli:[585,594,605,610,611,630,674,679,684,686,689,696,710,738,743,745,757,760,764,769,780,782,783,785],partit:[601,607,616,617,641,697,711,721,790],partner:743,parzyszek:748,parzyszekacronym2:748,parzyszekacronym:748,parzyszekdistinguish:748,pascal:710,pass1:725,pass2:725,pass:[585,590,592,594,595,596,598,601,602,606,607,609,610,611,612,615,616,617,619,621,625,628,631,639,642,645,657,658,659,660,662,665,667,668,669,670,671,672,675,676,677,679,681,682,685,686,687,688,689,690,691,693,694,699,702,703,704,706,707,708,710,711,712,713,714,715,718,721,724,726,727,731,738,744,745,748,750,751,752,753,754,756,758,759,762,763,768,769,770,771,773,774,775,777,778,780,784,786,788,789,790,791,803,804,806,807,808,809,810,811],passag:610,passbuild:[675,725,781,782],passcon:663,passconfigur:709,passinfomixin:781,passinstrument:781,passiv:728,passmanag:[659,715,743,781,784,789,805,809],passmanagerbas:[780,782],passmanagerbuild:[724,782],passmanagerimpl:784,passnam:[657,743],passopt:782,passpluginlibraryinfo:782,passregistri:[725,781],passt:781,passthru:710,password:[696,745,757],past:[585,590,610,677,692,710,711,742,743,745,750,759,780,785,807,810],pat:[607,771,780],patch:[595,610,642,673,674,676,678,681,696,698,699,709,710,711,723,726,742,743,749,751,757,764,766,775,783,784,785,805,810],patch_premain:[783,784],patch_statu:783,patchabl:[676,710,762,764],patcher:762,patchpoint:[710,764],patfrag:780,path:[585,591,592,593,594,602,603,605,606,610,612,614,616,617,619,621,624,625,630,631,634,635,641,642,645,646,654,658,660,663,664,667,669,671,673,674,676,679,681,694,697,698,699,700,701,704,709,712,714,723,724,726,731,741,743,744,745,754,756,759,760,764,765,766,773,774,775,779,784,808],path_to_clang:592,path_to_llvm:679,path_to_sample_profil:645,pathnam:[624,679],patholog:743,pathsep:[616,775],patient:[608,723,782],patleaf:780,pattern:[0,2,3,4,9,585,592,594,596,606,607,615,616,625,631,641,653,658,659,663,666,670,677,683,686,688,689,695,710,723,724,735,743,750,756,759,760,766,768,769,770,771,775,780,784,790,804],pattern_fill_cont:756,patternmatch:749,paul:[748,757,760],paus:[593,676,745,789],payload:[607,664,710,785],pbqp:[607,617],pc_32:590,pc_64:590,pch:[658,759],pcie:590,pcre2:712,pcre:712,pcrel32:709,pct:644,pdata:[671,731],pdb:[605,615,681,729,730,734,735,751,759],pdbdllrbld:731,pdbdllversion:731,pdbfilepathnameindex:731,pdbraw_featuresig:736,pdbstreamhead:736,pdbstreamvers:736,pdbutil:615,pdf:[595,760],pdg:666,pdgbuilder:666,pear:768,peculiar:770,pedant:605,peek:725,peephol:[607,670,705,743,805,806,807,808],peform:596,penalti:[603,710,743],pend:[662,665,679,709,757,766,803,804,805,806,807,808,809,810],peng:757,pennant:747,pentium:[679,780],penultim:780,peopl:[600,603,607,608,609,610,659,662,667,670,672,677,678,679,702,709,710,719,723,742,745,753,757,760,765,782,788,811],pep:748,per:[20,96,97,123,186,187,212,286,287,370,447,448,482,560,561,585,589,594,597,598,605,607,623,625,627,631,635,639,640,644,659,663,664,667,669,676,683,684,691,695,702,703,709,710,712,713,716,720,721,724,742,743,745,754,756,759,760,762,764,766,768,769,774,775,784,786,788,789,805,813],perceiv:[666,766,811],percent:[593,625],percentag:[623,625,638,644,645,695,743],perf:[592,595,605,646,671,773],perf_data_buffer_s:590,perf_test:747,perfect:[596,645,667,743],perfectli:[607,609,610,611,639,644,672,677,695,710,759,803],perfectlyrightalign:695,perforc:585,perform:[585,589,590,592,593,594,595,596,601,602,603,604,605,606,607,609,610,611,616,617,621,627,631,635,641,642,645,654,657,658,659,663,667,668,669,670,673,675,676,677,679,680,683,684,688,689,693,695,697,699,702,703,708,709,710,711,712,713,714,715,716,722,723,724,726,728,731,741,743,744,745,746,748,750,753,754,756,757,759,763,764,765,768,770,773,775,776,777,780,781,782,783,788,789,790,804,808,809,811,812],perfscript:646,perfunctionst:784,perhap:[585,593,676,691,710,722,736,743,745,759],period:[601,604,609,610,631,667,676,710,712,757,766,775,783],peripher:667,perl:676,perldoc:679,perman:[585,667,688,716,755,757,790],permiss:[30,223,380,493,621,667,679,696,709,710,715,755,788],permit:[605,607,611,617,650,671,676,684,688,709,710,711,743,748,775,808,809,810],permlan:588,permut:[588,710],perpetu:[667,783],pershaderperfdata:590,pershadert:590,persist:[606,608,663,668,710,712,743],person:[597,608,609,667,669,704,711,717,742,755,757,761,770,786,789],personalityfn:597,personlist:786,personnam:770,perspect:[593,608,663,669,703,710,755,764,782],pertain:[585,667,669,710,757,759],pertin:766,pervas:[703,743,812],pervert:771,pessim:[673,741,743],peter:757,pf0:722,pf1:722,pg0:722,pg1:722,pgo:[603,645,726,739,754,760,778],phab:745,phabric:[609,662,667,678,679,723,745,748,749,757,766],phabricator_usernam:757,phase3:[702,753],phase:[612,658,669,676,688,702,709,710,728,743,746,749,753,789,803,807],phi:[597,607,610,623,663,669,683,705,714,741,759,779,806,807,808,809,810],phid:610,phielimin:607,phieliminationid:607,philip:748,philosophi:[705,746,778],phinod:[610,721,806,807,808,809,810],phoni:709,phoronix:747,php:[742,747],phrase:[606,610],physic:[585,590,603,608,639,683,685,709,710,716,731,743,755,759,764,780],physreg:607,pi8:777,pic:[607,619,701,704,709,716,741,759,780],pick:[585,596,605,607,610,619,639,676,677,682,691,700,701,702,704,710,723,742,745,746,764,773,780],pictur:[677,739,785,804],picu:[692,748],picusacronym:748,pid:590,pie:756,piec:[585,597,601,603,606,607,609,610,631,667,674,677,681,699,704,709,710,746,759,764,770,775,803,804,806,807,808,810,812],pienaar:660,pietro:757,pietroalbini:757,pifft:774,pilipenko:757,pin:710,ping:[609,662,722,723],pinpoint:745,pinsrd:611,pinsrd_1:611,pinst:743,pip3:681,pip:[681,761,773,775],pipe:[590,611,616,639,765,775,784],pipefail:616,pipelin:[597,616,623,639,663,674,675,683,685,690,693,705,709,710,719,724,741,752,754,764,775,776,780,781,782,789],pipelinemetadata:590,pipermail:[745,748],pitcairn:590,pixel:[78,96,97,171,186,187,266,286,287,447,448,542,560,561,590],place:[585,590,591,592,594,595,596,601,607,609,610,614,616,619,621,639,641,653,659,663,664,667,668,669,670,671,674,676,677,679,685,688,689,694,696,700,709,710,716,721,722,723,725,726,727,741,742,743,744,745,750,752,753,754,759,760,761,762,764,770,771,773,775,776,779,780,781,782,786,803,804,805,807,808,811,812],placehold:[621,668,669],placement:[671,674,759,770],placesafepoint:676,plai:[722,743,782,811],plain:[611,709,716,717,730,743,761,770,782,788],plan:[585,607,639,667,676,678,699,708,712,720,724,726,743,746,757,759,763,764,771,779],plane:[783,807],plant:782,plate:[659,743],platform:[585,590,592,594,606,607,626,644,663,667,669,676,679,680,696,697,699,701,702,710,713,726,736,739,740,743,760,762,765,782,785,788,805,807],plausibl:600,pldi:[676,750],pleas:[592,593,594,600,605,607,608,609,610,613,618,633,635,637,639,641,642,643,650,652,653,655,656,658,662,667,674,676,678,679,681,695,696,697,699,701,702,704,705,707,709,710,717,719,720,723,724,726,727,742,743,752,753,755,756,757,761,764,767,772,775,780,784,804,811,813],plethora:[602,672,679,743],plist:642,plot:[638,807],plotter:807,plt:[709,710],plu:[585,589,590,596,597,611,667,669,676,709,710,731,743,757,759,769,780,789,803,806],plug:[728,746,804],plugin:[612,619,621,657,665,699,710,728,754,778],plugin_tool:782,pluginfilenam:619,plugininfo:782,plusten:759,plx:585,pmbuilder:724,pmdatamanag:784,pmtoplevelmanag:784,png:[605,712],pni8:669,point:[25,128,219,316,321,326,338,377,489,585,587,588,593,594,597,598,599,600,601,604,605,607,610,611,617,625,631,639,644,659,660,667,668,669,670,674,677,679,683,686,689,691,696,700,701,702,703,705,708,709,711,713,714,715,716,721,722,723,724,726,727,731,735,738,743,745,746,751,753,754,757,759,760,764,765,769,770,771,777,778,779,780,782,783,784,785,786,789,790,802,803,804,805,806,807,808,809,810],point_numb:[52,53,143,238,239,401,402,514,515],pointcount:676,pointe:[590,597,710],pointee_align:590,pointeealign:590,pointer1:760,pointer2:760,pointer:[585,589,594,597,607,617,660,663,669,672,673,674,676,678,683,684,698,708,709,711,713,715,716,720,722,724,725,726,730,738,741,756,759,762,768,769,777,780,782,785,786,788,790,803,805,806,808,810,811,812],pointer_offset:710,pointerflag:730,pointerkind:730,pointermod:730,pointermodifi:730,pointertodatamemb:730,pointertojittargetaddress:[726,790,791],pointertomemberfunct:730,pointertyp:[727,743],pointi:786,pointless:[669,721,764],pointstoconstantmemori:710,pointtoconstantmemori:593,poison2:710,poison3:710,poison4:710,poison5:710,poison:760,poison_yet_again:710,poke:597,polar:786,polaris10:590,polaris11:590,polici:[588,590,593,597,610,662,678,679,723,743,745,748],polit:[607,608,609],poll:[676,719,764],polli:[605,667,679,745,748,750,776],pollut:[610,742,781],polyhedr:750,polymorph:[604,762,781],pong:722,pool:[590,597,607,623,709,726,756,762,780,782,788],poolalloc:593,poor:[608,610,764,806],poorli:[598,610],pop2_:747,pop:[585,589,592,597,607,676,710,743,785,806,808,809,810],pop_back:[659,743,810],popcnt:712,popf:760,popfq:760,popq:[760,762,764],pops_exiting_wave_id:589,pops_pack:20,popul:[616,669,670,679,709,710,724,728,757,760,768,804],popular:[593,606,607,668,679,697,710,812],pornin:760,porpoda:750,port:[607,631,672,686,688,693,696,709,746,760,765,779,783,811],portabl:[585,603,605,607,616,640,676,710,726,743,778,788],portion:[585,590,601,603,606,607,612,659,664,667,669,679,685,689,710,716,718,721,741,747,759,760,768,770,780,785,805],pos1:659,pos2:659,pos:[78,171,266,542,710],pose:[676,711,714,760],posit:[20,27,78,84,123,129,171,175,212,220,266,270,370,378,435,482,490,542,548,585,588,589,590,597,607,611,619,621,631,645,654,660,663,667,669,671,679,684,701,705,707,710,712,713,714,716,723,731,743,750,757,759,760,762,764,770,786,806,807],positionaleatsarg:659,posix:[605,611,621,640,641,653,668,698,746,754],possess:[664,743],possibl:[585,589,590,593,594,595,596,597,601,604,605,607,609,611,616,631,639,641,644,660,663,664,665,666,667,670,671,672,674,676,677,679,683,686,688,689,691,694,695,696,698,700,701,703,705,708,709,710,712,713,714,715,716,717,718,719,720,721,724,725,726,729,731,735,741,742,743,745,746,748,750,753,755,758,759,760,761,764,765,766,768,769,770,773,775,776,779,782,785,786,802,803,805,806,808,810,811],post:[607,608,609,619,639,662,667,678,684,693,709,716,719,725,742,744,754,755,756,757,759,760,764,769],postencodermethod:770,poster:608,postfix:585,postincr:610,postord:711,postprunepass:709,postrapseudo:716,postscript:741,potenti:[585,588,593,600,601,607,610,616,631,639,659,667,669,672,674,676,679,683,699,710,712,719,721,725,741,743,748,749,750,755,756,759,760,762,764,766,776,777,784,802,803,806],pound:786,povray31:774,pow:[724,779],power8:661,power9:661,power:[585,590,593,603,607,611,637,659,661,684,688,695,697,708,709,710,713,714,716,741,742,743,761,770,771,785,803,804,805,806,807,808],powerpc64:[661,710],powerpc64l:783,powerpc:[594,605,641,670,679,710,711,748,762,775,780,786],powerpcl:641,ppa:679,ppc32:607,ppc64:607,ppc_fp128:[597,710,722],ppcf128:710,ppcinstrinfo:670,ppcisellow:670,pr26161:685,pr26576:683,pr34636:675,practic:[585,594,598,603,610,659,662,667,672,673,676,677,678,683,703,710,711,712,714,723,726,730,734,736,738,741,743,745,746,759,760,761,762,764,766,770,776,778,789,804,805,806,812],pragma:[585,658,660,741,776],pre:[588,607,609,619,667,670,684,702,704,705,709,711,726,741,743,744,745,749,759,771,776],pre_stor:607,prealloc:[597,743],prebuilt:668,prec:[807,808,809,810],precaut:[743,757],preced:[585,590,593,597,610,611,616,640,654,663,686,710,721,741,743,756,759,762,764,770,775,785,788,803,804,805,806,807,808,809,810,812],precis:[585,589,590,593,594,607,611,617,619,676,679,683,703,705,716,725,759,762,768,780,802,803,804],preclud:[596,709,710,760,764],precompil:[709,724,788],precomput:593,precondit:610,precursor:667,pred:[669,710,724,743,759,806,808],predecessor:[598,607,674,705,710,714,741,750,759,806],predefin:[607,611,616,675,764,773,782,807],predetermin:593,predic:[594,596,678,683,707,726,743,750,768,770,771,780],predicate_st:760,predicate_stor:780,predicatedbinaryoper:749,predicatedinstruct:749,predicateinfo:710,predict:[599,610,616,639,674,743,751,756,760,782],predictor:760,predreg:707,predrel:707,predsens:707,preemptabl:712,preemptibl:741,preemption:597,preemptionspecifi:[597,710],preexist:[610,741],pref:710,prefac:606,prefer:[593,594,606,631,639,654,658,660,667,669,679,696,697,699,703,705,709,710,716,719,722,723,742,743,748,753,759,765,771,775,780,784],prefetch:661,prefix1:611,prefix2:611,prefix:[585,586,590,591,597,603,605,607,610,614,616,620,624,625,627,641,642,654,658,659,664,669,679,694,712,716,726,728,729,730,736,744,745,748,759,770,771,773,775,807],prefixdata:597,preformat:[717,761],preg:607,prehead:[705,710,714,741,806],preheaderbb:[806,807],preinstal:668,preorder:703,prepar:[625,669,677,678,701,710,719,722,723,735,738,741,742,745,753,755,759,805],prepass:607,prepend:[614,625,635,642,705,710,743,748,759,770,773],preprocess:[672,704],preprocessor:[605,625,659,660,664,672,743,768,770,780,805,811],prerequisit:[610,782],prescrib:683,presenc:[594,607,610,611,639,669,703,705,710,728,736,743,759,766,780,785],present:[585,590,597,602,604,607,609,611,616,619,625,626,637,639,644,652,654,659,660,663,664,669,671,673,676,677,679,683,692,700,704,710,728,730,731,734,735,736,738,743,745,747,754,759,760,764,766,770,776,787,806,807,808,809,810],preserv:[585,588,590,593,596,605,607,610,625,632,641,653,659,663,669,677,679,687,689,709,712,714,725,745,759,762,775,776,781,782,785,811],preserve_allcc:[597,710],preserve_mostcc:[597,710],preserveal:710,preservedanalys:[725,748,781],preservedanalysescheck:748,preservedset:725,preservedwhenstateless:725,preservemost:710,preserveset:725,press:809,pressur:[607,639,679,741,760,766,779],presum:[585,710,731,736,738,745,770],pretti:[595,638,654,677,706,712,717,718,721,722,741,743,760,761,765,774,782,802,803,804,805,806,807,808],prettyprint:743,prev:743,prevail:621,prevent:[590,593,607,609,611,641,653,659,660,663,666,667,672,676,685,689,698,705,709,710,726,731,741,743,745,756,757,758,759,760,762,764,765,775,779,781,782,791,804,808,809,810],previou:[585,590,594,596,597,607,610,611,616,621,639,645,663,664,667,670,679,695,699,702,705,710,714,717,722,723,724,725,729,739,742,743,752,753,758,760,764,766,769,770,780,782,785,786,788,790,804,805,806,807,808,809,810,812],previous:[585,590,597,605,611,616,625,641,659,667,669,671,684,695,702,710,712,734,738,742,745,760,764,770,774,780,805,806],prf:639,prim:[78,747],primari:[590,593,606,607,608,616,667,669,676,709,710,712,720,728,731,741,743,756,760,763,768,769,770,802,803,804,805,806,807,808,809,810],primarili:[607,616,628,632,654,663,676,679,697,708,710,712,716,741,743,760,807],primaryexpr:[589,803],primaryfunctionid:671,primarykei:[768,769],primarykeyearlyout:[768,769],primarykeynam:[768,769],prime:[631,803,804,805,806,807,808,809,810],primer:[603,605,761],primid:590,primit:[78,594,610,659,677,710,716,722,730,743,756,760,785,807,811],princeton:747,principl:[661,700,722,743,748,759,765,812],print:[606,607,610,611,612,614,615,616,617,619,620,621,622,623,625,626,629,630,631,632,633,636,638,639,640,641,642,643,645,646,653,654,657,658,659,663,670,672,676,679,681,699,704,709,710,712,716,718,721,723,725,728,743,759,770,771,773,774,775,779,780,781,786,803,804,805,806,807,808,809,810,812],print_arg:709,print_final_stat:712,print_list:606,print_pc:712,printabl:[652,710,716,768,780],printallsymbol:709,printccoperand:780,printd:[805,806,807,808,809,810,811],printdatadirect:780,printdeclar:780,printdens:807,printer:[607,676,718,743,754,768,775,782,809],printerror:769,printf:[590,601,611,664,672,679,694,709,710,713,726,743,759,805,806,807,808,809,810],printfatalerror:769,printfatalnot:769,printformattedfil:743,printfunctionpass:741,printimplicitdef:780,printinformationalmessag:659,printinlineasm:780,printinstruct:[768,780],printlabel:780,printmemoperand:780,printmethod:780,printnextinstruct:743,printnot:769,printoperand:780,printout:[657,743],printstar:806,printsth:741,printwarn:769,prior:[585,590,597,607,616,641,659,663,664,667,669,679,681,708,710,748,760,776,782],priorit:639,prioriti:[589,590,644,660,677,702,710,755,759],priority_queu:743,priv:590,privat:[585,589,597,642,663,667,674,696,703,710,722,724,731,743,755,757,759,760,768,780,788,789,790,791],private_bas:589,private_limit:589,private_segment_align:590,private_segment_fixed_s:590,privatesegmentfixeds:590,privileg:[590,710,760,765,788],prng:712,pro:[590,596],proactiv:[601,667],prob:710,probabilist:695,probabl:[590,607,609,610,623,625,639,659,660,667,668,672,676,677,695,697,716,723,743,744,745,747,763,764,803,806,811],probe:[0,3,4,9,590,607,710,743],probinson:757,problem:[585,590,593,600,601,602,607,609,610,611,612,659,663,667,672,674,675,676,677,681,682,689,693,697,700,701,702,704,705,708,710,711,714,723,728,731,736,740,741,743,745,746,748,756,759,764,765,766,775,804,806,807,811],problemat:[610,639,667,745,766],proc:[585,595,679,697,780],proce:[605,663,667,669,676,683,702,709,711,722,743,750,753,760],procedur:[585,590,596,599,659,661,674,681,698,702,709,710,711,722,743,744,760,763,802],proceed:[590,607,609,660,676],process:[585,590,592,593,594,595,598,601,602,603,605,606,609,610,611,614,630,638,639,642,644,645,658,659,660,661,662,664,665,667,668,669,671,676,679,680,689,695,697,699,704,707,709,710,712,713,715,728,729,741,742,743,745,748,751,754,756,759,760,764,765,769,770,771,775,779,780,782,783,786,787,788,790,791,802,803,805,811],process_id:590,processformattedfil:743,processor:[585,596,605,607,619,639,659,676,679,683,710,759,771,779,780,809],processrelocationref:715,procnoitin:771,procsym:671,produc:[585,587,588,589,590,594,595,596,597,601,602,605,607,610,611,612,614,616,617,623,625,627,628,631,635,639,641,644,650,654,657,663,664,667,668,669,672,675,676,677,679,684,688,694,701,704,705,708,709,710,712,718,721,722,726,731,733,735,736,738,741,743,750,754,759,763,764,765,768,769,770,773,775,779,780,782,783,785,786,788,789,804,805,806,808,810],product:[590,598,607,608,611,614,639,645,667,676,695,709,710,720,722,751,754,757,766,770,782,790,803,807],prof:[598,599,645,699],profdata:[592,605,615,625,646,699,710,773],profession:[609,662],profgen:615,profil:[590,592,599,603,605,615,625,631,648,649,654,664,673,674,700,716,726,742,744,754,759,778,785],profiledata:710,profit:[607,660,673,674,710,719,741,760,776,779],profraw:[592,625,699],progbit:[671,760],program:[589,590,593,594,595,601,602,605,606,610,611,612,613,615,616,617,618,621,625,632,635,637,639,641,644,648,649,650,652,653,654,655,656,657,658,659,660,661,664,667,668,669,673,674,675,676,677,678,679,681,683,694,695,698,700,702,703,704,705,709,710,711,712,713,714,716,720,723,724,726,729,731,736,738,739,740,741,743,747,751,754,756,759,760,762,763,764,765,770,773,774,775,776,777,778,779,780,781,782,784,785,786,787,788,789,790,803,805,806,808,809,810,811],programm:[590,594,601,607,610,657,659,660,661,662,676,680,703,709,760,768,769,771,776,779,782,788,807,810],programmat:[607,645,676,724,783,786,807],programmersmanu:[610,788],progress:[590,591,594,605,607,616,639,661,663,667,676,702,710,711,716,743,748,752,766,773,783],prohibit:[594,709,713,721,760],proj_install_root:744,proj_obj_root:744,proj_src_root:744,project:[585,592,603,608,609,610,616,661,662,663,665,668,670,672,674,675,678,680,681,682,698,699,700,701,702,709,710,711,712,723,726,727,742,743,746,747,748,754,755,756,757,759,760,761,762,766,769,770,773,774,775,782,783,787,788,789,790,791,812],project_nam:744,projector:719,prolang:774,prolifer:748,prolog:[710,711,760,780],prologepilog:754,prologu:[590,597,607,617,676,758,759,780,810],prologue_end:671,prologuedata:597,promin:773,promis:[609,667,766,783],promot:[585,593,607,660,667,670,674,709,710,711,728,743,750,759,808,810],promoteloopaccessestoscalar:705,promoteop:670,prompt:[681,723,775],promptli:[609,667,757],prone:[607,659,697,743,771],proof:[599,714,716,726,742,760],propag:[590,605,607,660,669,679,684,709,710,711,716,728,742,754,759,763,764,765],proper:[594,610,616,639,660,664,667,686,703,705,710,725,743,780,781,810],properli:[589,591,593,594,600,601,604,607,614,659,660,662,671,678,679,710,727,742,770,805,808],properti:[585,586,591,593,594,596,597,602,607,610,614,616,639,641,658,659,663,664,667,670,676,681,687,689,707,710,713,714,716,721,722,731,738,741,742,743,745,757,760,762,763,764,768,769,771,775,776,780,805,808],propertydefinit:658,propon:748,proport:[645,659,686],propos:[585,598,609,611,667,674,692,711,712,746,748,749,750,757,760,766,784,787],prose:610,prospect:667,protect:[594,597,604,607,610,641,667,709,710,712,745,755,765,780],protectfilecheckoutput:775,protectionflag:709,protector:[710,759],proto:[791,803,804,805,806,807,808,809,810],protoast:[804,805,806,807,808,809,810],protobuf:675,protocol:[663,675,710,745,764],prototyp:[585,610,632,663,670,674,695,709,710,749,751,756,769,780,782,803,804,805,806,807,808,809,810],prototypeast:[791,803,804,805,806,807,808,809,810],prove:[593,667,674,695,708,710,714,722,741,760],proven:[590,640,667,674,689,710,720,741,764,808],provid:[34,132,225,384,497,585,586,588,589,590,591,592,593,594,597,599,602,603,604,605,606,607,609,611,612,616,623,625,630,631,638,639,641,645,649,654,657,659,660,662,663,664,665,666,667,668,669,671,674,676,677,678,679,681,683,686,688,691,693,695,696,697,698,700,703,705,709,710,711,712,713,714,715,716,718,719,720,721,724,725,726,728,733,738,739,741,743,744,745,746,751,754,755,756,757,759,760,762,764,765,768,769,770,774,778,779,780,782,783,784,785,786,787,788,789,790,803,804,805,806,807,808,809,810,811],provis:667,provision:[585,590],proviso:667,proxi:[607,725],proxyapp:747,prune:[602,667,709,721,745,750,757,783],pruneexpir:745,psabi:661,pse:748,pseudo:[590,596,598,607,642,658,663,686,710,716,726,743,759,760,768,780,782],pseudocod:[695,710,780],pseudonym:755,psl:585,pstl:[605,745],psubu:775,psubusw:775,psutil:[681,775],pte:590,pthread:[660,710,746,756],pthread_attr_t:710,pthread_creat:710,ptr0:[684,710],ptr1:710,ptr2:710,ptr3:710,ptr64:590,ptr7:710,ptr:[590,594,610,642,663,673,684,727,730,743,762,777,780],ptr_is_nul:673,ptr_rc:607,ptra:724,ptrb:724,ptrc:724,ptrloc:[676,710],ptroff:607,ptrreg:607,ptrtoint:[674,764],ptrtoreplacedint:743,ptrty:710,ptrval:710,pts:747,ptx30:607,ptx31:607,ptx:[607,660,661],ptxa:660,ptxstring:724,pty2:710,pty:710,pub:759,publicli:[667,743,755,757,759,766],publicstreamindex:731,publish:[660,739,743,748,753,757],pubnam:[630,759],pubtyp:[630,759],pugixml:712,pull:[588,594,600,607,679,698,723,742,745,757,766],puls:712,punctuat:[610,710,770],pure:[594,601,607,665,667,710,711,714,741,750,759,780,804],purpos:[585,590,596,603,607,610,611,616,639,641,644,658,659,660,663,666,667,669,674,677,679,683,686,705,709,710,714,722,724,726,727,728,731,735,736,741,742,743,748,753,756,757,760,763,764,765,768,769,770,771,782,785,788,804,805,810],pursu:760,push:[585,590,601,604,607,610,676,702,710,723,742,745,810],push_back:[610,709,743,780,786,803,804,805,806,807,808,809,810],pushf:[607,760],pushfl:607,pushfq:[607,760],pushq:[760,764],put:[585,590,593,594,595,601,605,607,608,610,611,659,664,665,668,669,673,676,679,683,684,685,700,702,710,717,722,726,741,742,743,745,757,759,760,775,782,786,805,806,808,810,811],putchar:[805,806,807,808,809,810],putchard:[805,806,807,808,809,810,811],puzzl:704,pwd:[654,679,701],pygment:804,pynam:747,python3:681,python:[616,663,676,679,681,701,712,723,743,745,748,753,761,768,770,773,775,811],python_execut:679,q15:710,qch:605,qeaa:669,qemu:712,qhelpgener:605,qmcpack:747,qnan:710,qspectr:760,qt_coding_styl:748,quad:[588,589,590,671,709,710,760,764,780],quad_perm:[588,590],quadrant:590,quadrat:[607,617,721,743],qualifi:[590,607,609,677,710,716,729,730,759],qualiti:[602,607,609,630,639,674,689,697,702,710,725,766,775,782,788,790],qualtyp:610,quantiti:[639,710],quarantin:756,quarantine_max_chunk_s:756,quarantine_size_kb:756,quarter:783,quartil:760,quentin:692,queri:[585,590,593,594,607,610,639,644,659,663,674,693,706,707,709,710,721,723,725,726,728,739,743,759,779,780,782,784,789,811],query_sampl:590,question:[585,593,600,609,639,662,667,674,677,678,679,681,683,703,710,712,717,722,742,743,744,746,748,751,752,759,761,770,776,803,806,808,811],queu:590,queue:[639,726,743,780,782,783],queue_ptr:590,quick:[593,601,609,610,621,639,674,677,701,720,741,742,753,759,805,811],quicker:[697,721,755,789],quickest:695,quickli:[593,601,607,609,610,621,659,662,667,669,679,680,681,704,710,712,721,729,740,743,748,759,765,775,783,789],quicksilv:747,quickstart:[678,701],quiet2:659,quiet:[590,616,630,640,659,684,705,710],quieta:659,quirk:[606,764],quit:[593,607,611,639,660,665,670,674,676,677,679,710,717,722,743,760,761,762,765,770,787,790,804,806,811],quot:[590,605,621,659,679,710,712,716,759,768,775,786],quotient:[611,710],quotingtyp:786,quux:610,qux:710,r10:[607,611,631,724,758,771],r10b:771,r10d:771,r10w:771,r11:[710,758,762,771],r11b:771,r11d:771,r11w:771,r123456:745,r12345:702,r1234:745,r128:[66,157,254],r12:[607,671,710,771],r12b:771,r12d:771,r12w:771,r13:[607,642,771],r13b:771,r13d:771,r13w:771,r14:[607,642,710,771],r14b:771,r14d:771,r14w:771,r15:[607,642,710,716,771],r15b:771,r15d:771,r15w:771,r258109:745,r31:[607,710],r341082:712,r390:590,r390x:590,r600:590,r630:590,r6xx:590,r700:590,r7xx:590,r8b:771,r8d:771,r8w:771,r9b:771,r9d:771,r9w:771,r_386_8:671,r_amdgpu_abs32:590,r_amdgpu_abs32_hi:590,r_amdgpu_abs32_lo:590,r_amdgpu_abs64:590,r_amdgpu_gotpcrel32_hi:590,r_amdgpu_gotpcrel32_lo:590,r_amdgpu_gotpcrel:590,r_amdgpu_non:590,r_amdgpu_rel16:590,r_amdgpu_rel32:590,r_amdgpu_rel32_hi:590,r_amdgpu_rel32_lo:590,r_amdgpu_rel64:590,r_amdgpu_relative64:590,r_arm_thm_movw_abs_nc:701,r_offset:590,r_x86_64_8:671,race:[590,594,608,663,710,743,756,775],racist:608,radamsa:712,radare2:712,radeon:[585,590],radic:[607,693],radiu:703,radix:[607,610,640,650,652,710],rage:623,raii:663,rais:[585,600,601,635,669,676,684,710,741,743,745,757,760,764,766,770],rajaperf:747,ralia:632,ram:[605,681,696,712],ramif:677,ramp:663,ramsei:607,ran:[625,682,760,781,782],rand:712,random:[595,601,610,612,615,616,667,710,712,720,743,747],randomize_va_spac:595,randomli:[610,651,695,783],rang:[20,21,22,23,24,27,84,120,123,124,125,126,127,129,175,209,212,213,214,215,216,217,220,270,314,370,371,372,373,374,375,378,435,479,482,483,484,485,486,487,490,548,584,585,588,589,590,593,598,599,601,607,608,616,617,630,638,639,644,645,659,663,666,669,671,674,676,677,678,682,684,685,703,709,714,741,750,759,764,768,769,770,779,782,783,787,805,807,812],range_specifi:590,rangecheck:658,rangeend:671,rangelist:770,rangepiec:770,rangestart:671,rank:[710,741],ranlib:[615,621,679,694,700],rapaport:750,rapid:[667,760],rare:[597,607,608,610,611,669,673,674,676,683,701,710,738,743,760,780],rarer:685,rat:639,rate:[597,609,623,639,662,695,712,745],rather:[585,590,593,597,598,602,607,608,610,611,614,616,621,625,639,652,654,660,667,668,672,674,676,677,681,683,688,695,697,709,710,712,714,725,726,727,728,731,733,741,743,745,746,750,753,759,760,764,769,770,775,779,780,788,789,790,803,804,810],ratio:[590,598,645,760],rational:[607,667,683,757,778],rauw:[705,711,722,743],raw:[607,622,625,629,632,636,639,641,642,644,646,648,649,657,659,663,671,699,706,710,715,718,743,774,784,804],raw_fd_ostream:809,raw_ostream:[607,670,743,769,782,784,786,809,810],rawfrm:[770,780],rax:[607,710,716,760,762,764,771,780],rbp:[607,716,771],rbx:[607,771],rc1:[702,753],rc2:[702,753],rc3:702,rcn:[702,753],rcp_iflag_f32:590,rcu:639,rcx:[607,710,760,771],rdi:[607,631,671,710,716,759,760,762,771],rdna1:590,rdna2:590,rdna:590,rdtsc:[631,710],rdx:[607,760,762,764,771],rdynam:[805,807],reach:[585,590,610,616,639,663,667,669,673,702,709,710,712,714,719,721,722,726,728,741,743,750,755,756,757,760,762,766,780],reachabl:[605,676,682,709,710,711,714,760,764],react:[600,709],read:[20,29,30,68,104,105,106,107,115,123,131,159,194,195,196,203,204,212,222,223,256,294,295,296,297,298,299,300,301,309,370,380,420,455,456,457,471,472,482,492,493,532,568,569,570,571,579,585,588,589,590,593,594,596,597,601,605,606,607,610,611,617,619,621,622,623,625,626,628,629,630,631,632,635,639,640,641,642,643,644,645,648,649,650,652,653,654,657,658,659,661,663,665,667,668,670,671,672,673,678,679,681,699,704,709,710,712,716,717,722,723,731,733,739,741,742,743,752,759,760,761,762,763,764,769,770,771,775,779,783,785,786,789,802,803,804,805,806,807,808,809,810],read_async:663,read_onli:590,read_writ:590,readabl:[589,591,594,604,607,609,622,623,625,629,630,636,638,645,654,658,671,679,695,705,709,710,716,717,723,731,741,743,748,757,759,770,782,786,804],readandmaybemodifi:712,readattribut:768,readelf:[615,640,642],reader:[585,597,610,615,640,644,645,658,670,672,710,722,723,731,748,768,785,786,788,789,803],readi:[609,639,659,662,663,667,694,702,709,712,715,719,722,723,727,759,782,803,804,805,806,807,808,809,810,812],readili:[689,757],readjust:607,readm:[667,679,717,745,748,761,773,775],readnon:[597,674,710,724,741,759,777],readobj:[615,640,642,648,743,759],readonli:[590,597,641,674,710,721,741,759,762,784],readtscp:785,readwrit:590,readyset:639,real:[585,586,590,593,595,611,631,639,641,642,653,659,671,683,710,712,724,727,731,738,743,755,759,771,780,802,807,812],realign:[590,684,710],realist:[676,760,803],realiti:585,realiz:[667,670,807],realli:[590,592,594,605,610,659,660,667,670,672,677,679,681,683,710,712,717,721,736,738,741,742,743,759,761,774,775,802,804,805,806,807,808,810,811],realloc:[593,695,743,756],reallyhidden:659,realmag:807,realpr:710,realstart:807,ream:748,reamesconcern:748,reappli:[609,667],rearrang:711,reason:[585,590,591,593,594,599,600,603,605,606,607,609,610,611,659,663,665,667,669,670,673,674,676,677,679,686,688,689,693,695,698,701,703,709,710,712,714,717,720,721,722,726,730,733,743,746,747,754,755,756,757,758,759,760,761,764,765,766,768,770,775,776,781,782,788,790,803,804,805,806,808,810],reassoc:710,reassoci:[710,711,805,806,807,808],reassur:712,rebas:[642,679,723,742,745],rebuild:[592,605,614,699,723,762,784],rec1:770,rec20:770,rec2:770,rec:[769,770],recal:[722,769,770,786,804,805,806],recalcul:725,recap:596,receipt:755,receiv:[589,597,606,607,609,662,667,669,674,675,696,710,712,715,725,743,755,757,760,768,770,771,776,790],recent:[585,605,609,621,662,665,667,668,669,676,679,710,712,719,723,738,742,753,756,757,760,786,805],recept:667,recheck:[722,741],recip:[698,700,745,750],recipi:667,reciproc:[639,710],reclaim:[710,711,726],reclist:770,recogn:[595,607,611,654,659,710,713,722,741,743,760,774,802,806,810],recognis:743,recombin:[679,788],recommend:[585,590,603,605,606,607,610,611,616,639,660,667,674,676,677,679,681,688,690,694,695,699,700,708,710,712,719,724,740,742,743,757,760,764,770,775,776,782,803,805,808,809],recommit:[609,667],recommonmark:[717,761],recompil:[593,596,673,726,762,790,806],recomput:[585,593,743,782],reconfigur:696,reconstitut:[688,705,710],reconstruct:[585,621,644,677,733,759,783,785],record:[585,591,600,617,619,640,645,649,657,658,667,671,695,709,710,716,719,731,734,735,736,739,742,750,754,755,757,759,762,771,779,784,804,805,806,807,808,809,810,811],record_kind:785,record_meta_container_info:754,record_meta_external_fil:754,record_meta_remark_vers:754,record_meta_strtab:754,record_remark_arg_with_debugloc:754,record_remark_arg_without_debugloc:754,record_remark_debug_loc:754,record_remark_head:754,record_remark_hot:754,recordbodi:770,recordcallsit:709,recordfield:769,recordhead:730,recordid:597,recordkind:730,recordlen:730,recordmap:769,recoup:597,recov:[611,669,673,695,710,759,762],recover:[585,610],recoveri:[743,803,804,805,806,807,808,809,810],recreat:[710,786],recur:703,recurr:[666,741,750],recurs:[590,610,611,616,630,632,644,658,676,695,710,723,741,743,759,760,764,768,802,803,804,806,807,808,812],recursive_funct:723,recursiveexpansionlimit:616,recval:769,recycl:[607,756,783],red:[710,723,742,760],redefin:[641,804,807,808],redefinit:[660,788,804,808],redesign:678,redhat:[740,757],redirect:[612,630,631,695,710,712,775,777,810],rediscov:811,redistribut:667,redo:722,reduc:[590,595,601,602,604,605,606,607,610,612,632,659,660,663,664,667,668,677,679,684,688,704,707,711,712,714,722,724,726,743,745,750,751,752,753,756,760,770,771,782,783,788,791,805,810,812],reduce_input:712,reduct:[590,601,610,615,619],redund:[607,614,659,660,674,683,685,705,709,710,711,714,727,728,743,745,759,764,790,805],redwin:607,redwood:590,reentri:726,reexamin:609,reexport:791,reexportedflag:726,reexportflag:726,ref:[585,593,724,730,731,734,745,764,770,773,803,804,805,806,807,808,809,810],refactor:[609,610,667,709,711,745,748,808],refcount:676,refer:[24,127,217,375,487,586,587,589,590,592,593,596,597,598,601,605,606,607,609,610,611,612,616,619,639,641,642,653,658,661,662,663,664,667,669,671,672,677,685,687,689,695,697,698,702,703,705,706,709,711,712,715,718,720,721,722,724,726,728,729,730,731,734,735,738,739,742,753,754,759,760,762,765,766,769,771,773,775,776,778,780,782,785,786,787,788,789,790,803,804,805,806,807,808,809,810],referenc:[585,589,590,597,654,659,660,694,709,710,711,713,722,726,729,731,738,743,754,759,770,777,780,788,789,803,804,805,806,807,808,809,810],referenti:741,refil:722,refin:[593,607,710,743,748,788],reflect:[585,589,590,593,664,667,693,702,710,713,726,748,759,766,780,782,807],reflex:722,reflog:745,reflogexpir:745,reflogexpireunreach:745,refnam:745,reformat:[610,718,748],reformul:593,refrain:[702,710,755],refresh:[685,713],refs_to_clean:745,refspec:745,refus:[605,622,629,632,636,657,741,760],reg1024:607,reg1025:607,reg1026:607,reg1027:607,reg1:[611,710],reg2:[611,710],reg:[20,123,212,370,482,585,590,607,611,631,642,716,718,724,743,762,770,780],reg_bas:611,reg_nam:631,reg_offset:611,reg_sequ:[683,716],regalloc:[607,617,619,782],regalloclinearscan:607,regallocregistri:782,regard:[585,594,610,621,638,645,672,678,702,708,710,727,754,756,768],regardless:[585,590,593,605,610,622,629,632,636,639,652,657,676,681,710,723,726,740,741,759,760,766,773,789,808],regbankselect:[689,693,716],regconstraint:607,regcost:769,regener:704,regex:[625,630,641,653,658,659,754,774,775],regex_replac:616,regexp:616,region0:664,region1:664,region:[585,589,590,597,625,639,663,669,710,711,714,719,745,750,756,760,762,770,777],regioninfo:[741,750],regionsforfile0:664,regionsforfile1:664,regionvector:749,regist:[20,55,84,85,98,112,114,117,123,145,175,176,205,212,270,271,288,306,308,310,352,370,404,435,436,449,470,482,517,548,549,562,576,578,580,586,587,588,589,591,593,594,596,604,611,612,617,619,631,639,642,657,658,659,663,665,667,669,676,679,681,684,685,686,687,688,689,690,691,693,696,705,709,710,715,718,723,726,728,742,743,750,762,764,768,770,771,779,781,783,786,804,807,808,809,810],registeralias:768,registeranalysisgroup:593,registerasmprint:780,registerasmstream:607,registerbankinfo:[683,690],registerclass:[607,768,771,780],registerdescriptor:780,registeredarg:676,registerehfram:715,registerehframesect:709,registerinfo:[607,716,770,780],registerlist:780,registermcobjectstream:607,registermypass:782,registerouteranalysisinvalid:725,registerpass:782,registerpassbuildercallback:[725,782],registerpasspars:782,registerpipelinestartepcallback:725,registerregalloc:782,registerschedul:782,registerstandardpass:782,registertarget:780,registerwithsubreg:780,registr:[661,665,672,676,709,726,788],registri:[676,756,809],reglist:780,regmapping_f:607,regmask:716,regno:607,regnum:762,regoffset:718,regress:[601,611,616,665,667,679,681,682,689,712,744,745,753,760,781],regstat:[607,716],regsvr32:681,regtyp:780,regul:782,regular:[585,588,590,594,602,610,611,616,621,625,630,632,634,637,640,641,644,653,658,659,660,679,709,710,712,716,722,726,745,749,754,766,770,774,775,786,811],regularli:766,reid:765,reilli:743,reinterpret:[596,684,688],reintroduc:760,reject:[597,611,659,679,686,702,710,811],rejoin:714,rel32:590,rel:[24,127,217,375,487,589,590,594,597,598,604,605,607,610,611,616,623,625,642,644,645,648,649,654,659,663,664,671,674,676,678,679,709,712,714,718,721,743,744,750,753,758,759,762,764,789,806,810],rel_path_to_fil:625,rela:[590,652],relai:710,relat:[585,590,593,594,600,606,607,609,610,611,613,616,618,639,641,653,655,656,658,662,663,666,667,669,671,677,678,679,683,698,704,707,710,712,719,721,722,723,724,727,731,741,743,745,746,748,749,751,753,756,757,759,762,764,766,768,775,782,786,804,806,807],relationship:[590,611,630,666,669,676,705,707,709,710,722,759,765,780,782,783],relative_function_pointer_to_async_impl:663,relativenam:654,relax:[594,639,683,684,689,709,710,764],relaxgotedg:709,relbf:710,releas:[590,603,605,607,610,624,663,665,667,668,669,674,678,679,681,694,697,699,700,706,712,713,723,726,740,743,744,745,754,756,757,759,762,766,775,780,782,784,787,804],release_39:748,release_fil:702,release_test:702,release_to_os_interval_m:756,releasei:753,relev:[585,593,594,607,609,611,648,649,660,663,667,674,697,699,701,702,703,707,709,710,714,723,725,727,739,740,742,743,745,746,750,755,757,764,783,806],reli:[585,590,596,598,603,606,607,610,663,667,674,676,677,688,698,709,710,712,713,726,727,736,745,753,759,760,764,775,779,781,783,789,808],reliabl:[605,616,660,673,689,750,759,773],religion:608,relink:[601,624],reload:[607,663,676,685,711,712,760,808,809,810],reloc:[585,589,607,619,641,642,643,648,649,653,654,676,701,704,709,715,716,731,780,809],reloc_absolute_dword:780,reloc_absolute_word:780,reloc_pcrel_word:780,reloc_picrel_word:780,relocat:[24,127,217,375,487,585,590,619,641,709,710,715,726,764,780],relocationtyp:780,reloccrc:731,relpo:621,relr:[648,649],relwithdebinfo:[603,605,679,681,699,723],rem:[607,684],remain:[585,590,594,599,605,610,611,616,639,663,664,666,667,669,671,676,688,689,702,705,709,710,714,716,725,727,728,736,741,743,750,755,756,757,759,760,762,764,766,770,775,785,788,807,808],remaind:[585,589,607,611,663,664,670,679,684,710,741,748,760,769,770,775,776,779,780],remainingbuffersizetoskip:785,remap:[607,614,615,645,691],remark:[601,614,617,748,778,779],remarkemitt:748,remarkstream:754,remateri:[594,741],rematerializ:720,rememb:[600,608,609,610,659,662,667,672,677,681,697,701,710,712,743,764,774,782,803,806,808,809,810],remind:[667,702,719,745],remot:[590,715,726,745,788,791],remoteobjectclientlay:726,remoteobjectserverlay:726,remov:[585,593,597,601,602,603,605,607,610,611,614,616,617,621,625,629,632,639,641,653,659,663,667,668,669,672,674,679,683,685,688,698,702,703,706,709,710,711,712,713,714,716,721,722,724,725,726,727,728,740,743,745,748,752,757,759,764,765,775,776,777,780,782,790,804,805,806,807,808,809,810,811],remove_if:743,removeabsolutesymbol:709,removeblock:709,removebranch:780,removedefinedsymbol:709,removeexternalsymbol:709,removefrompar:743,removemodul:[726,790,805],removeobject:726,removeredundantdebugvalu:759,renam:[585,601,602,610,639,641,679,683,710,716,726,743,745,748,753,759,767,791,808],render:[585,590,607,686,710,743,784,807],reopen:[600,681],reorder:[590,594,611,639,666,684,705,710,759],reorgan:[750,770,786],repack:745,repeat:[590,601,611,631,639,669,671,689,700,710,718,722,741,745,748,753,759,770],repeatedli:[590,673,710],repetit:[607,611,631,766],rephras:[677,710],repl:[726,770,788,789,805],replac:[585,588,590,594,602,605,607,616,621,625,626,633,635,637,640,641,642,644,647,650,652,653,654,659,660,663,667,669,674,676,679,683,684,686,688,689,691,694,695,705,710,711,712,713,714,716,721,722,723,724,726,727,728,745,750,751,754,759,760,764,765,766,770,775,776,777,780,788,789,790,791,805,808],replacealldbguseswith:705,replaceallus:688,replacealluseswith:[705,711,714,722,743],replaceinstwithinst:743,replaceinstwithvalu:743,replacementtext:723,replaceoperandwith:776,replaceusesofwith:[711,743],replaceusesofwithonconst:711,replai:[590,704],repli:[667,702,723,748],replic:[585,704,713,728,750,786],repo:[667,668,679,702,739,745,782],report:[585,590,593,595,601,605,608,611,616,617,623,633,635,637,639,641,642,643,650,652,653,654,658,664,667,675,676,678,694,695,699,705,709,710,711,712,713,714,726,743,745,756,759,760,762,763,764,769,770,774,775,776,782,787,788,804],report_fatal_error:[610,743],reporterror:[788,789,790,791],repositori:[609,667,668,669,678,679,682,701,702,704,723,742,747,748,757,759,766,775],repres:[24,127,217,375,487,585,589,590,593,594,596,597,598,599,604,606,610,611,616,630,639,640,641,645,659,663,664,666,669,670,671,672,673,674,676,677,679,683,684,688,699,706,709,710,714,716,721,723,724,726,727,730,731,733,735,738,739,741,743,745,748,750,754,757,759,760,764,768,769,770,771,773,780,783,785,786,790,791,803,804,805,806,807,808,809,810],represent:[588,590,596,598,606,607,610,644,645,658,659,662,666,669,670,676,679,683,685,689,693,709,711,714,716,718,726,743,751,759,762,768,770,771,778,780,782,783,784,785,786,788,789,803,804,805,807,811],reprimand:755,repro:[682,704],reproduc:[600,601,610,614,662,667,668,682,702,704,712,728,775,808],repurpos:759,reqd_work_group_s:590,reqd_workgroup_s:590,reqdworkgroups:590,request:[30,223,380,493,585,590,597,600,611,621,639,641,653,662,667,668,669,671,679,684,688,709,710,711,712,713,715,721,725,726,743,745,748,754,755,756,757,762,764,766,770,776,785,788,789,790,809],requir:[84,585,587,588,589,590,592,594,595,596,597,601,603,604,605,606,610,611,616,621,625,627,631,633,637,638,639,641,642,646,652,653,661,664,669,670,672,673,674,675,682,683,684,687,688,689,693,694,695,697,698,699,701,702,703,707,708,709,710,712,713,714,715,716,721,722,724,725,726,727,728,733,735,741,743,745,748,753,754,755,756,757,759,760,762,764,765,768,769,770,773,776,780,785,786,788,802,803,804,805,806,807,808,809,810,811,812],requires_rtti:740,rerereresolv:745,rerereunresolv:745,rerun:[679,742],res:710,resbyarg:710,rescan:722,reschedul:759,research:[670,747,757,760,774,787],resembl:[677,760],reserv:[590,595,597,607,673,709,710,712,730,731,738,756,760,762,764,770,775,780,785],reserved_464:590,reserved_468:590,reserveresourc:607,reset:[590,597,605,669,682,712,720,743,760,770,785],resid:[585,588,590,596,597,604,663,676,710,712,724,726,735,756,759,760,766,782],resign:667,resili:[639,719,766],resist:712,resolut:[595,600,610,660,667,689,709,710,726,755,757,805,807],resolv:[24,127,217,375,487,589,590,603,606,607,608,609,616,631,660,665,667,698,709,710,713,715,726,727,743,748,755,757,760,763,766,770,782,785,789,790,804,805,806],resolvereloc:715,resolverti:710,resort:669,resourc:[34,66,67,96,97,132,157,158,175,186,187,225,254,255,270,286,287,349,384,418,419,435,447,448,497,530,531,548,560,561,588,605,606,631,639,649,659,668,669,693,709,726,743,780,782],resourcekei:709,resourcetrack:[805,806,807,808],resourcetrackersp:[788,789,790,791],resp:[631,710,743],respect:[585,590,593,601,605,607,608,610,612,627,630,639,645,649,650,652,654,660,663,667,669,676,677,679,684,698,703,710,711,714,716,718,726,743,744,745,754,756,757,764,771,773,775,777,780,788,790,808],respond:[593,609,662,667,719,741,742,755,757],respons:[590,607,609,621,626,639,640,641,642,643,648,649,650,652,653,654,663,667,669,676,686,702,709,710,723,726,736,742,743,750,754,755,758,762,764,766,768,776,777,782,786,788],rest:[213,371,483,585,590,597,607,611,632,663,667,676,679,683,688,693,695,698,710,712,713,716,717,722,723,735,743,754,759,761,765,770,783,805,808],restart:[594,688,696,712,743,782],restat:610,restofthecleanup:663,restor:[585,590,597,607,663,669,708,710,758,760,780,806,807,808,809,810],restrict:[585,590,594,605,607,610,639,652,659,667,670,671,672,677,683,684,708,709,710,716,721,730,743,745,757,759,760,762,764,766,770,779,782,805],restructur:776,restructuredtext:761,result:[585,588,589,590,594,596,597,601,603,604,605,606,607,610,611,612,614,619,622,625,630,631,639,644,645,654,657,659,660,663,664,665,666,667,668,669,670,671,672,674,676,677,678,679,681,682,683,684,688,689,694,695,699,700,702,704,705,706,707,709,712,714,715,716,721,722,723,724,725,726,728,729,735,736,741,743,745,748,749,753,754,759,760,762,764,765,766,768,770,771,774,775,776,779,780,781,782,783,784,786,788,790,803,804,805,806,807,808,809,810],result_typ:710,results_a:773,results_b:773,resultti:710,resulttyp:710,resultv:710,resum:[590,665,669,762],resume1:663,resume2:663,resume_func_ptr:663,resumpt:663,ret:[594,596,599,607,611,663,669,672,676,677,705,714,716,717,721,724,741,743,758,759,760,761,762,764,770,775,777,781,804,805,806,808],ret_typ:710,retain:[585,590,596,659,667,672,711,743,770,782],retainednod:[705,710,759],retainedtyp:[710,759],retarget:[607,778],retcc_sparc32:780,retcc_x86_32:780,retcc_x86_32_c:780,retcc_x86_32_fast:780,retcc_x86_32_ss:780,retcc_x86common:780,rethrow:[669,764],retir:667,retireooo:639,retpolin:760,retq:[716,760,764],retri:[766,783],retriev:[585,621,658,659,669,671,710,715,721,724,742,743,754,762,768,770,782,786],retroact:667,retti:597,return_addr:760,return_address_regist:[585,590],returnaddress:716,returndoesnotalia:710,returnindex:706,returns_signed_char:710,returns_twic:[597,710],returntyp:[676,710],retval:[669,710,804,805,806,807,808,809,810],reus:[590,606,607,611,666,693,695,710,725,745,748,756,759,760,761,762,770,771,781],reusabl:[607,693],rev128:596,rev64:596,rev:[596,745,748],reveal:[677,760],revector:[741,743],revers:[588,596,605,611,640,677,684,711,741,743,759,760,770],revert:[585,609,667,710,766],revert_polici:667,review:[600,604,610,662,674,678,679,681,698,702,711,712,717,722,745,748,749,750,755,757,761,764,765,766,770],revis:[279,280,291,294,295,296,600,605,609,610,667,668,674,679,682,702,710,742,743,759,762,766],revisit:[610,750,760,764],revisoin:742,revlist:745,revmap:745,revoc:757,revok:[667,757],revolv:759,reword:611,rework:[659,667],rewound:669,rewrit:[605,639,658,667,703,705,709,710,711,741,745,759,764,780,808],rewritestatepointsforgc:[676,710],rewritten:[607,663,743,745],rex:710,rfc:[667,711,749,757,766],rfunc:632,rgb:747,rgba:588,rglob:632,rgm:782,ri_inst:770,ricci:748,ricciacronym:748,rice:666,rich:[683,751],richer:762,rid:[722,731],right:[585,588,589,590,594,596,606,607,610,612,631,659,662,666,667,670,672,674,679,684,695,703,704,710,714,716,722,723,736,741,742,745,753,760,761,768,770,775,785,803,804,805,806,807,808,809,810,811,812],right_op:710,rightr:722,rigor:[607,766],riinst:770,rilei:757,ring:590,rip:[642,710,716,771,775],risc:[594,607,642,658,678,710,748,749,780],riscv:[658,679],riscv_vector:658,riscv_vector_builtin:658,riscv_vector_builtin_cg:658,rise:610,risk:[697,710,743,759,760],riski:760,rival:743,rl1:724,rl247405:712,rl247414:712,rl247416:712,rl247417:712,rl247420:712,rl247422:712,rl2:724,rl317337:756,rl3:724,rl4:724,rl5:724,rl6:724,rl7:724,rmrk:754,rmw:594,rnglist:630,ro_signed_pat:771,roadmap:[678,766],rob:[639,660],robert:660,robin:639,robinson:[748,757],robinsonagre:748,robinsondistinguish2:748,robinsondistinguish:748,robinsontransit:748,robot:678,robust:[609,660,693,705,710,718,746,775],rocgdb:585,rocm:[585,590],rodata:[590,760,780],rogu:760,role:[610,644,710,714,722,743,753,759,803],roll:[610,703,719],rom:641,roms_:747,roms_r:747,room:[706,782,803],root:[600,605,607,616,624,667,668,679,695,696,701,709,710,711,714,735,743,744,745,748,756,768,775,782,784,804],rootcount:676,rootmetadata:676,rootnum:676,roots_begin:676,roots_end:676,roots_iter:676,roots_siz:676,rootstackoffset:676,rope:743,rot:[667,766],rotat:[585,588,607,670,684,710],rotl:670,rough:677,roughli:[594,607,660,669,702,703,711,712,748,759],roun:660,round:[585,596,607,639,660,667,684,695,702,723,760,786,807],round_m:684,rounded_32:684,rounded_64:684,roundp:779,roundtointegraltiestoeven:710,roundup:590,routin:[585,594,607,616,667,669,676,710,722,741,760,764,765,782,783,785,803,811,812],rov:590,rovid:695,row:[588,639,707,710],row_bcast:[588,590],row_half_mirror:588,row_mask:[0,1,3,4,7,8,9,590],row_mirror:[588,590],row_newbcast:588,row_ror:588,row_shar:588,row_shl:[588,590],row_shr:588,row_xmask:588,rowfield:707,rpass:779,rpath:[615,642,679],rpc:[709,726,785],rpcc:710,rpmalloc:605,rpo:711,rppassmanag:782,rrinst:770,rs1:780,rs2:780,rs4gc:764,rs880:590,rsb:760,rsi:[607,760,771],rsp:[607,621,716,758,760,762,764,771],rsrc:[588,649],rss:[712,756],rss_limit_mb:712,rst:[591,607,658,761,768],rsync:773,rtc:776,rtdyldobjectlinkinglay:[726,788,789,790,791],rthroughput:639,rtl:607,rtld_fini:665,rtti:[605,669,740,743,751,777,808,809,810],rttiextend:703,rttiroot:703,rubi:676,rude:669,rui:748,rule:[587,590,594,607,608,610,611,614,622,627,639,644,658,660,663,667,683,685,701,708,709,722,726,743,744,748,753,754,756,759,760,764,765,766,770,771,774,775,782,785,804,805,806],ruleset:688,run:[585,590,592,593,594,595,596,601,602,605,606,607,610,611,612,614,624,625,631,639,641,645,647,657,658,659,660,661,662,663,664,665,668,669,672,673,674,676,678,679,680,681,685,689,694,696,697,698,699,700,702,704,705,709,710,713,715,716,720,721,722,723,725,726,727,728,740,741,742,743,744,745,748,751,753,757,759,760,761,762,764,766,768,769,770,775,779,780,783,784,785,788,789,790,791,803,804,805,806,807,808,809,810,811,812],run_long_test:775,runbeforepass:781,runfunct:[665,743],runfunctionasmain:665,runhelp:610,runnabl:679,runner:773,runonfunct:[593,601,728,743,780,784],runonmachinefunct:[607,687,728,780],runonmodul:784,runtest:[701,753],runtim:[585,590,592,594,597,599,601,603,605,607,610,611,625,639,642,654,659,660,661,665,667,673,676,679,698,700,711,712,714,726,736,741,743,745,756,758,759,762,764,765,773,776,780,782,784,786,789,790,805,807,811],runtimedyld:[715,726],runtimedyldelf:715,runtimedyldimpl:715,runtimedyldmacho:715,runtimehandl:590,runtimevers:[705,710,759],rust:[748,757],rv670:590,rv710:590,rv730:590,rv770:590,rvaluerefer:730,rvaluerefthispoint:730,rwx:709,ryzen:590,rzi:724,s15:710,s16:[683,684,688],s17:683,s31:710,s32:[683,684,685,687,688,705,716,724],s33:684,s64:[683,684,688,724],s7100:590,s7150:590,s9150:590,s9170:590,s9300x2:590,s_abs_i32:[0,2,3,4,9],s_absdiff_i32:[0,2,3,4,9],s_add_i32:[0,2,3,4,9],s_add_u32:[0,2,3,4,9,590,716],s_addc_u32:[0,2,3,4,9,590],s_addk_i32:[0,2,3,4,9],s_and_b32:[0,2,3,4,9],s_and_b64:[0,2,3,4,9,590],s_and_saveexec_b32:0,s_and_saveexec_b64:[0,2,3,4,9],s_andn1_saveexec_b32:0,s_andn1_saveexec_b64:[0,4,9],s_andn1_wrexec_b32:0,s_andn1_wrexec_b64:[0,4,9],s_andn2_b32:[0,2,3,4,9,590],s_andn2_b64:[0,2,3,4,9],s_andn2_saveexec_b32:0,s_andn2_saveexec_b64:[0,2,3,4,9],s_andn2_wrexec_b32:0,s_andn2_wrexec_b64:[0,4,9],s_annot:710,s_arch:660,s_ashr_i32:[0,2,3,4,9,590],s_ashr_i64:[0,2,3,4,9],s_atc_prob:[0,3,4,9],s_atc_probe_buff:[0,3,4,9],s_atomic_add:[0,4,9],s_atomic_add_x2:[0,4,9],s_atomic_and:[0,4,9],s_atomic_and_x2:[0,4,9],s_atomic_cmpswap:[0,4,9],s_atomic_cmpswap_x2:[0,4,9],s_atomic_dec:[0,4,9],s_atomic_dec_x2:[0,4,9],s_atomic_inc:[0,4,9],s_atomic_inc_x2:[0,4,9],s_atomic_or:[0,4,9],s_atomic_or_x2:[0,4,9],s_atomic_smax:[0,4,9],s_atomic_smax_x2:[0,4,9],s_atomic_smin:[0,4,9],s_atomic_smin_x2:[0,4,9],s_atomic_sub:[0,4,9],s_atomic_sub_x2:[0,4,9],s_atomic_swap:[0,4,9],s_atomic_swap_x2:[0,4,9],s_atomic_umax:[0,4,9],s_atomic_umax_x2:[0,4,9],s_atomic_umin:[0,4,9],s_atomic_umin_x2:[0,4,9],s_atomic_xor:[0,4,9],s_atomic_xor_x2:[0,4,9],s_barrier:[0,2,3,4,9,590],s_bcnt0_i32_b32:[0,2,3,4,9],s_bcnt0_i32_b64:[0,2,3,4,9,587,590],s_bcnt1_i32_b32:[0,2,3,4,9],s_bcnt1_i32_b64:[0,2,3,4,9],s_bfe_i32:[0,2,3,4,9],s_bfe_i64:[0,2,3,4,9,589,590],s_bfe_u32:[0,2,3,4,9],s_bfe_u64:[0,2,3,4,9,589],s_bfm_b32:[0,2,3,4,9],s_bfm_b64:[0,2,3,4,9,590],s_bitcmp0_b32:[0,2,3,4,9],s_bitcmp0_b64:[0,2,3,4,9,590],s_bitcmp1_b32:[0,2,3,4,9,590],s_bitcmp1_b64:[0,2,3,4,9],s_bitreplicate_b64_b32:[0,4,9],s_bitset0_b32:[0,2,3,4,9],s_bitset0_b64:[0,2,3,4,9],s_bitset1_b32:[0,2,3,4,9],s_bitset1_b64:[0,2,3,4,9],s_branch:[0,2,3,4,9,24,127,217,375,487,589],s_brev_b32:[0,2,3,4,9],s_brev_b64:[0,2,3,4,9],s_buffer_atomic_add:[0,4,9],s_buffer_atomic_add_x2:[0,4,9],s_buffer_atomic_and:[0,4,9],s_buffer_atomic_and_x2:[0,4,9],s_buffer_atomic_cmpswap:[0,4,9],s_buffer_atomic_cmpswap_x2:[0,4,9],s_buffer_atomic_dec:[0,4,9],s_buffer_atomic_dec_x2:[0,4,9],s_buffer_atomic_inc:[0,4,9],s_buffer_atomic_inc_x2:[0,4,9],s_buffer_atomic_or:[0,4,9],s_buffer_atomic_or_x2:[0,4,9],s_buffer_atomic_smax:[0,4,9],s_buffer_atomic_smax_x2:[0,4,9],s_buffer_atomic_smin:[0,4,9],s_buffer_atomic_smin_x2:[0,4,9],s_buffer_atomic_sub:[0,4,9],s_buffer_atomic_sub_x2:[0,4,9],s_buffer_atomic_swap:[0,4,9],s_buffer_atomic_swap_x2:[0,4,9],s_buffer_atomic_umax:[0,4,9],s_buffer_atomic_umax_x2:[0,4,9],s_buffer_atomic_umin:[0,4,9],s_buffer_atomic_umin_x2:[0,4,9],s_buffer_atomic_xor:[0,4,9],s_buffer_atomic_xor_x2:[0,4,9],s_buffer_load_dword:[0,2,3,4,9],s_buffer_load_dwordx16:[0,2,3,4,9],s_buffer_load_dwordx2:[0,2,3,4,9],s_buffer_load_dwordx4:[0,2,3,4,9],s_buffer_load_dwordx8:[0,2,3,4,9],s_buffer_store_dword:[0,3,4,9],s_buffer_store_dwordx2:[0,3,4,9],s_buffer_store_dwordx4:[0,3,4,9],s_call_b64:[0,4,9],s_cbranch_cdbgsi:[0,2,3,4,9],s_cbranch_cdbgsys_and_us:[0,2,3,4,9],s_cbranch_cdbgsys_or_us:[0,2,3,4,9],s_cbranch_cdbgus:[0,2,3,4,9],s_cbranch_execnz:[0,2,3,4,9],s_cbranch_execz:[0,2,3,4,9],s_cbranch_g_fork:[2,3,4,9,590],s_cbranch_i_fork:[2,3,4,9],s_cbranch_join:[2,3,4,9,590],s_cbranch_scc0:[0,2,3,4,9],s_cbranch_scc1:[0,2,3,4,9],s_cbranch_vccnz:[0,2,3,4,9],s_cbranch_vccz:[0,2,3,4,9],s_claus:0,s_cmov_b32:[0,2,3,4,9,590],s_cmov_b64:[0,2,3,4,9],s_cmovk_i32:[0,2,3,4,9],s_cmp_eq_i32:[0,2,3,4,9,590],s_cmp_eq_u32:[0,2,3,4,9],s_cmp_eq_u64:[0,3,4,9],s_cmp_ge_i32:[0,2,3,4,9],s_cmp_ge_u32:[0,2,3,4,9],s_cmp_gt_i32:[0,2,3,4,9],s_cmp_gt_u32:[0,2,3,4,9],s_cmp_le_i32:[0,2,3,4,9],s_cmp_le_u32:[0,2,3,4,9],s_cmp_lg_i32:[0,2,3,4,9],s_cmp_lg_u32:[0,2,3,4,9],s_cmp_lg_u64:[0,3,4,9],s_cmp_lt_i32:[0,2,3,4,9],s_cmp_lt_u32:[0,2,3,4,9],s_cmpk_eq_i32:[0,2,3,4,9],s_cmpk_eq_u32:[0,2,3,4,9],s_cmpk_ge_i32:[0,2,3,4,9],s_cmpk_ge_u32:[0,2,3,4,9],s_cmpk_gt_i32:[0,2,3,4,9],s_cmpk_gt_u32:[0,2,3,4,9],s_cmpk_le_i32:[0,2,3,4,9],s_cmpk_le_u32:[0,2,3,4,9],s_cmpk_lg_i32:[0,2,3,4,9],s_cmpk_lg_u32:[0,2,3,4,9],s_cmpk_lt_i32:[0,2,3,4,9],s_cmpk_lt_u32:[0,2,3,4,9],s_code_end:0,s_cselect_b32:[0,2,3,4,9,590],s_cselect_b64:[0,2,3,4,9],s_dcache_discard:[0,4,9],s_dcache_discard_x2:[0,4,9],s_dcache_inv:[0,2,3,4,9,590],s_dcache_inv_vol:[2,3,4,9,590],s_dcache_wb:[0,3,4,9,590],s_dcache_wb_vol:[3,4,9],s_decperflevel:[0,2,3,4,9],s_denorm_mod:0,s_endpgm:[0,2,3,4,9,590],s_endpgm_ordered_ps_don:[0,4,9],s_endpgm_sav:[0,3,4,9],s_ff0_i32_b32:[0,2,3,4,9],s_ff0_i32_b64:[0,2,3,4,9],s_ff1_i32_b32:[0,2,3,4,9],s_ff1_i32_b64:[0,2,3,4,9],s_flbit_i32:[0,2,3,4,9],s_flbit_i32_b32:[0,2,3,4,9],s_flbit_i32_b64:[0,2,3,4,9],s_flbit_i32_i64:[0,2,3,4,9],s_get_reg:589,s_get_waveid_in_workgroup:0,s_getpc_b64:[0,2,3,4,9,590],s_getreg_b32:[0,2,3,4,9,20,123,212,370,482],s_gl1_inv:0,s_icache_inv:[0,2,3,4,9],s_incperflevel:[0,2,3,4,9],s_inst_prefetch:0,s_load_dword:[0,2,3,4,9,590],s_load_dwordx16:[0,2,3,4,9,590],s_load_dwordx2:[0,2,3,4,9,590],s_load_dwordx4:[0,2,3,4,9],s_load_dwordx8:[0,2,3,4,9,590],s_lshl1_add_u32:[0,4,9],s_lshl2_add_u32:[0,4,9],s_lshl3_add_u32:[0,4,9],s_lshl4_add_u32:[0,4,9],s_lshl_b32:[0,2,3,4,9],s_lshl_b64:[0,2,3,4,9],s_lshr_b32:[0,2,3,4,9],s_lshr_b64:[0,2,3,4,9,590],s_max_i32:[0,2,3,4,9],s_max_u32:[0,2,3,4,9],s_memrealtim:[0,3,4,9],s_memtim:[0,2,3,4,9,590],s_min_i32:[0,2,3,4,9],s_min_u32:[0,2,3,4,9],s_mov_b32:[0,2,3,4,9,590],s_mov_b64:[0,2,3,4,9,590],s_movk_i32:[0,2,3,4,9],s_movreld_b32:[0,2,3,4,9],s_movreld_b64:[0,2,3,4,9],s_movrels_b32:[0,2,3,4,9],s_movrels_b64:[0,2,3,4,9],s_movrelsd_2_b32:0,s_mul_hi_i32:[0,4,9],s_mul_hi_u32:[0,4,9],s_mul_i32:[0,2,3,4,9],s_mulk_i32:[0,2,3,4,9],s_nand_b32:[0,2,3,4,9],s_nand_b64:[0,2,3,4,9],s_nand_saveexec_b32:0,s_nand_saveexec_b64:[0,2,3,4,9],s_nop:[0,2,3,4,9,590],s_nor_b32:[0,2,3,4,9],s_nor_b64:[0,2,3,4,9],s_nor_saveexec_b32:0,s_nor_saveexec_b64:[0,2,3,4,9],s_not_b32:[0,2,3,4,9],s_not_b64:[0,2,3,4,9],s_or_b32:[0,2,3,4,9],s_or_b64:[0,2,3,4,9],s_or_saveexec_b32:0,s_or_saveexec_b64:[0,2,3,4,9],s_orn1_saveexec_b32:0,s_orn1_saveexec_b64:[0,4,9],s_orn2_b32:[0,2,3,4,9],s_orn2_b64:[0,2,3,4,9],s_orn2_saveexec_b32:0,s_orn2_saveexec_b64:[0,2,3,4,9],s_pack_hh_b32_b16:[0,4,9],s_pack_lh_b32_b16:[0,4,9],s_pack_ll_b32_b16:[0,4,9],s_quadmask_b32:[0,2,3,4,9],s_quadmask_b64:[0,2,3,4,9],s_rfe_b64:[0,2,3,4,9],s_rfe_restore_b64:[3,4,9],s_round_mod:0,s_s:710,s_scratch_load_dword:[0,4,9],s_scratch_load_dwordx2:[0,4,9],s_scratch_load_dwordx4:[0,4,9],s_scratch_store_dword:[0,4,9],s_scratch_store_dwordx2:[0,4,9],s_scratch_store_dwordx4:[0,4,9],s_sendmsg:[0,2,3,4,9,27,129,220,378,490,590],s_sendmsghalt:[0,2,3,4,9],s_set_gpr_idx_idx:[3,4,9],s_set_gpr_idx_mod:[3,4,9,213,371,483],s_set_gpr_idx_off:[3,4,9],s_set_gpr_idx_on:[3,4,9],s_set_reg:589,s_sethalt:[0,2,3,4,9,590],s_setkil:[0,2,3,4,9],s_setpc_b64:[0,2,3,4,9,590],s_setprio:[0,2,3,4,9],s_setreg_b32:[0,2,3,4,9],s_setreg_imm32_b32:[0,2,3,4,9],s_setvskip:[2,3,4,9,590],s_sext_i32_i16:[0,2,3,4,9],s_sext_i32_i8:[0,2,3,4,9],s_sleep:[0,2,3,4,9,590],s_store_dword:[0,3,4,9],s_store_dwordx2:[0,3,4,9],s_store_dwordx4:[0,3,4,9],s_sub_i32:[0,2,3,4,9],s_sub_u32:[0,2,3,4,9],s_subb_u32:[0,2,3,4,9],s_subvector_loop_begin:0,s_subvector_loop_end:0,s_swappc_b64:[0,2,3,4,9,590],s_trap:[0,2,3,4,9,590],s_ttracedata:[0,2,3,4,9],s_ttracedata_imm:0,s_version:0,s_waitcnt:[0,2,3,4,9,120,209,314,479,584,590],s_waitcnt_expcnt:0,s_waitcnt_lgkmcnt:0,s_waitcnt_vmcnt:0,s_waitcnt_vscnt:0,s_wakeup:[0,3,4,9],s_wqm_b32:[0,2,3,4,9],s_wqm_b64:[0,2,3,4,9,590],s_xnor_b32:[0,2,3,4,9],s_xnor_b64:[0,2,3,4,9],s_xnor_saveexec_b32:0,s_xnor_saveexec_b64:[0,2,3,4,9],s_xor_b32:[0,2,3,4,9],s_xor_b64:[0,2,3,4,9],s_xor_saveexec_b32:0,s_xor_saveexec_b64:[0,2,3,4,9],sacrif:746,sacrific:[710,722],saddr:[0,4,8,9,82,83,351,433,434,546,547],sadli:[742,760],safe:[590,593,594,597,601,603,607,612,659,667,671,674,677,679,684,688,702,704,705,709,710,711,726,728,741,745,755,760,762,763,764,776,788,790,811],safepoint:[676,710,751],safepoint_pol:764,safepointaddress:676,safer:743,safestack:[597,710,763],safeti:[673,674,710,720,755,778],saga:806,sahf:[607,760],sai:[590,607,608,611,616,659,667,674,677,679,688,703,705,707,710,714,717,721,723,725,735,736,743,745,748,761,765,769,770,774,781,802,804,811],said:[602,609,610,631,639,667,669,695,699,709,710,721,736,743,756,757,759,811],sake:[611,743,782],salient:698,salvag:759,salvagedebuginfo:705,same:[20,24,120,123,127,209,212,213,217,314,370,371,375,479,482,483,487,584,585,588,589,590,591,593,594,596,597,599,600,601,602,605,606,607,609,610,612,614,616,619,621,622,623,625,631,633,639,640,641,644,649,653,654,657,659,660,663,664,667,669,670,671,673,674,675,676,677,679,683,684,687,688,689,694,695,697,698,699,700,702,705,707,708,709,710,711,712,714,716,719,721,722,723,724,725,726,728,729,731,736,738,739,740,741,742,743,744,745,748,750,753,754,756,757,758,759,760,761,762,764,765,766,768,769,770,771,775,776,777,779,780,782,784,785,786,788,789,802,803,804,805,806,807,808,811],same_cont:671,same_s:671,sames:710,sampl:[588,590,592,599,631,638,645,646,659,665,667,676,681,695,705,712,724,744,751,764,773,782,783,784,803,804,810,814],samplepgo:[599,705],sampler:[68,159,256,420,532,695],sampson:662,san:[695,757],sandbox:[701,709,753],sander:692,sandia:747,sandwich:705,sandybridg:779,sandylak:809,sane:[594,605],sanit:[605,606,675,704,710,712,725,741,746,756,763,784],saniti:[674,679,712],sanitize_:710,sanitize_address:[597,710],sanitize_hwaddress:[597,710],sanitize_memori:[597,710],sanitize_memtag:[597,710],sanitize_thread:[597,710],sanitizercoverag:[710,712],sanitizercoveragetracedataflow:712,sanitizerlist:784,sanjoi:748,saptr:710,sar:607,sarq:760,sass:[660,724],sata:697,satisfi:[27,129,220,378,490,589,590,593,594,609,611,667,673,683,688,700,709,710,760,765,775,808],satur:[684,807],save1:663,save2:663,save:[585,590,596,597,605,607,610,621,623,631,659,660,669,670,679,681,708,710,712,715,723,724,726,740,742,743,759,760,762,769,770,773,779,780,805,806,807,808,809,810],savesomewher:610,savethi:611,saw:[639,713,782,806],sbase:[0,2,3,4,9],sbyte:738,scaffold:803,scalabl:[593,678],scalar:[33,34,132,133,224,225,383,384,496,497,585,586,589,590,607,659,660,674,676,683,691,710,711,714,749,750,779,780,782,789,790,791,805,806,807,808,810],scalar_nam:710,scalarbitsettrait:786,scalarevolut:[593,677,748],scalaropt:624,scalartrait:786,scale2:710,scale:[585,595,607,610,645,677,703,710,748,760,766,768],scaling_governor:595,scan:[607,610,616,617,619,676,709,714,722,726,736,768,773,775,782,790,811],scanf:611,scanner:802,scare:667,scari:811,scatter:[750,759],scc:[10,11,12,13,54,57,58,59,60,61,63,64,65,69,70,75,77,144,146,147,148,149,151,152,153,154,155,156,160,161,162,163,166,167,240,243,244,245,246,248,249,250,251,252,253,257,258,261,265,317,318,322,323,328,329,330,331,332,342,343,344,345,347,348,403,406,407,410,411,412,413,421,422,425,429,516,519,520,521,522,523,524,526,527,528,529,533,534,537,541,666,711,725,728,782],sccp:711,scenario:[592,594,601,605,633,635,641,653,672,679,705,738,743,750],scene:743,scev:[667,710,714],sched:[607,619,631,770,780],sched_class:631,schedcov:780,schedq:639,schedreadwrit:770,schedrw:770,schedul:[611,619,631,639,679,688,697,702,705,714,719,724,743,750,764,766,768,782],schema:[616,759,786,814],scheme:[592,604,610,623,626,627,630,667,675,676,698,706,710,726,743,746,764,780,790,791,808],scienc:[714,747],scientif:711,scipi:773,scm:682,scope:[593,597,601,605,610,611,638,654,658,667,669,674,676,704,705,709,712,714,716,723,725,729,743,748,757,760,763,764,768,770,782,786,804,806,807,808,809,810],scopelin:[705,710,759,810],scott:610,scrape:616,scratch:[32,83,382,434,495,547,588,589,607,710,743,780,782],scratch_en:590,scratch_load:590,scratch_load_dword:[0,4,9],scratch_load_dwordx2:[0,4,9],scratch_load_dwordx3:[0,4,9],scratch_load_dwordx4:[0,4,9],scratch_load_sbyt:[0,4,9],scratch_load_sbyte_d16:[0,4,9],scratch_load_sbyte_d16_hi:[0,4,9],scratch_load_short_d16:[0,4,9],scratch_load_short_d16_hi:[0,4,9],scratch_load_sshort:[0,4,9],scratch_load_ubyt:[0,4,9],scratch_load_ubyte_d16:[0,4,9],scratch_load_ubyte_d16_hi:[0,4,9],scratch_load_ushort:[0,4,9],scratch_memory_s:590,scratch_store_byt:[0,4,9],scratch_store_byte_d16_hi:[0,4,9],scratch_store_dword:[0,4,9],scratch_store_dwordx2:[0,4,9],scratch_store_dwordx3:[0,4,9],scratch_store_dwordx4:[0,4,9],scratch_store_short:[0,4,9],scratch_store_short_d16_hi:[0,4,9],scratchpad:[585,590],screen:[602,681,753],script:[592,601,602,605,612,616,646,659,662,668,672,676,679,682,695,697,702,704,705,723,743,744,745,748,766,768,771,773,774,775,782],scroll:742,scrub:760,scrutini:[667,760],scudo:[695,720,751],scudo_default_opt:756,scudo_memtag_tun:756,scudo_opt:756,scudo_standalon:756,sdata:[0,3,4,9],sdisel:711,sdiv:[607,683],sdk:[605,644,654,660,679,681,698,773],sdnode:[601,607,743,749,759,771,780,784],sdnodeproperti:769,sdnpmemoperand:769,sdpatternoper:769,sdst:[0,2,3,4,9,586],sdtc:780,sdvalu:[607,780,784],sdwa:[586,587],sea:590,seamless:694,seamlessli:[712,713],sean:743,search:[601,604,605,610,611,616,630,635,642,644,652,659,667,669,671,676,679,707,709,710,711,712,726,738,741,743,744,750,759,763,764,769,770,779,780,787,788,804,805,806,807,808],searchabl:[658,768],searchindex:768,sec:[645,709,710],secidx:671,secnam:671,second:[85,117,176,205,271,310,352,436,549,580,585,587,588,590,597,599,604,605,606,607,610,611,616,639,654,659,663,664,668,669,671,676,677,679,681,684,702,703,705,707,710,712,714,716,717,721,722,725,731,733,735,736,738,741,743,745,758,759,760,761,762,764,765,768,769,770,773,774,780,782,785,786,790,804,805,806,807,808,809,810,811],secondari:[604,667,669,756,762,768],secondcondit:743,secondlastinst:780,secondlastopc:780,secondli:810,secrel32:671,secrel_hi12:671,secrel_lo12:671,secret:[756,760],sectalign:659,section1:642,section2:642,section:[588,589,593,594,595,596,597,603,604,605,606,607,610,611,614,616,617,619,621,630,638,639,640,641,642,643,644,645,648,649,650,652,653,654,657,659,660,663,664,667,668,669,676,677,679,683,693,698,699,702,703,704,705,709,710,715,716,718,721,722,723,726,729,730,738,739,741,742,743,745,752,754,757,758,759,760,764,765,768,769,770,777,779,780,781,782,783,784,788,803,805,810,811,814],section_nam:710,sectioncontr:731,sectioncontribentri:731,sectioncontribentry2:731,sectioncontributions:731,sectioncontrsubstreamvers:731,sectiondata:814,sectionlength:731,sectionmapentri:731,sectionmapentryflag:731,sectionmaphead:731,sectionmaps:731,sectionmemorymanag:[715,726,788,789,790,791],sectionnam:[597,731],sectionnumb:814,sectionordin:709,sectionrang:709,sectnam:[643,659],sector:765,sectvalu:659,secur:[604,659,678,704,741,751,756,787,805],sed:[616,679],see:[1,5,6,7,8,27,31,129,220,341,378,381,490,494,585,588,589,590,591,594,595,596,597,598,599,600,601,603,605,606,607,608,609,611,613,618,621,625,627,631,634,639,646,655,656,657,658,659,660,662,663,664,667,668,669,670,672,674,675,677,679,681,682,683,684,685,687,688,689,694,696,698,699,700,701,703,704,705,709,710,711,712,714,716,717,718,720,721,722,723,724,725,726,727,728,731,734,735,738,739,741,742,743,744,745,747,748,750,752,753,757,759,760,761,762,764,765,768,769,770,771,773,774,775,777,778,779,780,781,782,783,784,787,788,789,790,791,803,804,805,806,807,808,809,810,811],seed:[651,712],seek:[585,667,671,677,685,741,751,785],seekabl:785,seem:[585,590,593,596,601,605,607,628,659,660,667,676,677,703,705,721,722,731,745,747,748,757,760],seemingli:[683,780],seen:[607,610,639,645,659,667,683,710,721,722,741,743,764,780,782,790,805,807,811],seg:[709,710],segalign:637,segment:[640,641,642,643,648,649,650,653,695,709,710,730,731,751,759,760,762,773,785],segment_selector_s:[585,590],segmentreg:607,segmentsrequestmap:709,segnam:[643,659],sel:663,seldom:722,select:[585,588,589,590,596,605,610,617,621,625,630,631,633,639,640,643,662,667,668,669,670,671,675,676,677,679,681,682,684,686,688,689,690,691,692,695,700,705,707,709,711,715,716,724,725,741,742,743,745,750,751,760,764,768,769,771,773,776,780,782,783,784,786,808,809],select_isd_stor:780,selectaddrrr:780,selectcod:780,selectcodecommon:784,selectimpl:686,selectinst:610,selectiondag:[594,683,684,685,688,689,693,741,743,759,768],selectiondagbuild:[591,607,689],selectiondagdump:591,selectiondagisel:[768,784],selectiondagleg:591,selectiondagnod:780,selectionkind:710,selector:[585,590,607,639,658,669,679,683,686,689,693,710,731,759,768],selecttarget:790,self:[585,590,597,607,611,631,664,710,722,730,731,741,742,743,755,757,770,782,805,807],selfexecutorprocesscontrol:[788,789,790,791],selp:724,selti:710,sema:[658,768],semacxx:723,semant:[585,587,590,594,599,607,610,641,660,669,672,676,684,711,713,714,718,727,743,749,750,759,760,768,770,771,776,781,782,786,789,804,806],seme:714,semi:[603,606,676,783,811],semicolon:[605,616,679,723,773,803,804,805,806,807,808,809,810],semispac:676,semver:[585,590],send:[588,594,607,617,622,629,632,639,658,662,667,674,678,696,702,712,717,723,726,742,745,752,753,757,761,766,775,782,804],sendmsg:[27,129,220,378,490,590],sens:[593,594,605,607,610,659,667,672,677,708,710,712,714,723,743,745,748,759,760,764,766,770,784,804,811],sensibl:743,sensit:[593,605,611,621,628,645,710,716,757,760,770,775,782,786],sent:[588,609,617,629,639,645,667,678,702,711,723,726,742,764],sentenc:610,sentinel:597,separ:[120,209,213,314,371,479,483,584,585,586,587,588,589,590,592,594,597,598,603,605,606,607,609,610,611,616,621,625,626,631,635,637,640,641,644,645,650,654,658,659,660,662,663,666,667,675,676,677,679,681,688,689,691,693,695,700,701,708,709,710,712,713,714,716,717,721,723,724,725,726,727,735,738,743,744,745,748,754,755,756,759,760,762,764,765,766,768,769,770,773,775,776,780,782,786,803,805],separate_argu:605,separateremarksfil:754,separateremarksmeta:754,seq:[610,684,695,814],seq_cst:[590,594,710],seq_load:590,seq_stor:590,sequenc:[84,175,270,435,548,585,588,589,594,596,607,610,611,619,621,632,639,652,658,663,664,669,671,674,676,677,685,688,709,710,712,722,728,729,730,738,741,743,745,748,750,754,759,760,762,764,768,769,779,780,782,785,803,807,808,814],sequencetrait:786,sequenti:[589,590,597,607,611,639,669,684,705,710,714,745,776],sequential_fadd:710,sequential_fmul:710,sequentialtyp:743,serg:757,seri:[585,590,592,597,605,607,609,626,654,667,669,678,679,704,710,716,730,735,743,748,750,766,769,770,782,789,805,812],serial:[614,631,639,658,670,679,689,716,722,729,730,736,738,739,744,751,760,768,786],seriou:[667,702,755,760],serious:681,serv:[18,36,37,38,93,96,97,100,101,122,186,187,189,190,191,211,281,282,283,286,287,336,354,368,386,387,388,444,447,448,451,452,481,499,500,501,557,560,561,564,565,585,586,596,616,639,644,663,667,668,669,677,678,710,726,736,741,743,750,755,756,759,760,762,770,775,785],server:[644,678,681,702,742,743,745,760,773],servic:[590,593,595,672,745,756,765],sese:750,session:[665,717,726,743,761,789],set:[32,82,83,85,176,271,351,352,382,433,434,436,495,546,547,549,585,588,589,592,594,595,596,597,599,601,603,605,606,607,609,610,611,612,616,617,619,621,625,631,635,639,640,641,642,645,648,649,650,652,653,654,658,660,661,663,664,665,666,667,668,669,670,671,672,675,676,679,681,683,684,689,691,693,694,695,696,697,698,699,700,701,702,705,707,708,709,712,713,714,715,722,723,724,725,728,731,733,734,735,738,739,740,742,744,745,746,748,749,751,753,754,756,759,760,761,762,764,765,766,768,769,771,773,774,775,776,777,783,784,785,786,789,790,802,803,804,805,806,807,808,809,810,812],setargstr:659,setbann:743,setbid:597,setbr:631,setcategori:659,setcc:[607,743],setcondcodeact:780,setconvertact:780,setcurrentdebugloc:810,setdagop:770,setdata:610,setdatalayout:[805,806,807,808,809,810],setdebugloc:705,setdescript:659,setexitcodemapp:743,setfoo:710,setgc:676,setgraphattr:743,setgraphcolor:743,sethi:780,sethiddenflag:659,setindexedloadact:780,setindexedstoreact:780,setinsertfencesforatom:594,setinsertpoint:[804,805,806,807,808,809,810],setinternallinkag:743,setjmp:[710,811],setjmp_buf:669,setkind:709,setlazycompilefailureaddr:726,setloadextact:780,setloadxact:780,setmaxatomicsizeinbitssupport:594,setmcjitmemorymanag:715,setnam:[743,804,805,806,807,808,809,810],setnumcompilethread:726,seto:760,setobjectlinkinglayercr:709,setop:770,setoperationact:[594,607,780],setp:724,setpreservesal:782,setpreservescfg:782,setrecordnam:597,setreg:607,setrequiresstructuredcfg:780,setsubprogram:810,settarget:709,settargettripl:809,setter:[669,710,759,768,789],settl:745,settransform:789,settruncstoreact:780,setup:[590,603,671,694,700,709,712,716,743,745,759,773,780,781,788,805,811],setup_token:710,setupinprocesslctmreentryviaepciu:[790,791],setupmachinefunct:780,setvector:610,setxyzzi:610,sevenkind:710,seventeen:716,sever:[585,587,589,590,593,597,601,602,605,606,607,610,611,616,621,623,631,636,639,644,645,659,664,667,669,670,676,678,679,681,691,695,705,706,710,712,713,714,716,719,722,726,729,733,736,741,743,744,750,753,755,756,757,759,760,765,768,770,773,774,775,780,782,783,787,804,806,811,812],sex:608,sexist:608,sext:[26,218,327,339,376,488,705],sextload:[771,780],sexual:608,sge:710,sgi:743,sgn:710,sgpr0:590,sgpr105:590,sgpr1:590,sgpr29:590,sgpr2:716,sgpr30:590,sgpr32:590,sgpr33:590,sgpr34:590,sgpr4:590,sgpr63:590,sgpr64:590,sgpr:[20,123,212,370,482,710],sgpr_limit:590,sgpr_spill_count:590,sgprn:590,sgprs_use:590,sgt:710,sh_addralign:590,sh_entsiz:671,sh_hidden_private_base_vimid:590,sh_link:[641,653,671],sha1:712,sha:710,shader:[20,123,212,370,482,588,607],shader_funct:590,shader_subtyp:590,shadow:[710,720,748,762,764,770,780,806,807,808,809,810],shadowbyt:762,shadowcallstack:[597,710],shadowlist:780,shadowstackgc:676,shadowstackgclow:676,shall:[605,659,671,710,719,757,811],shallow:679,shape:[688,689,702,703,710,743,759,771,787],shapekind:703,shard:616,share:[585,588,589,590,591,594,597,601,603,605,607,610,612,617,619,621,633,641,642,643,660,663,667,669,671,679,683,687,693,701,704,707,709,710,712,713,714,724,726,729,741,743,744,754,756,757,759,765,769,770,771,774,775,782,785,786,788,804,807,810],shared_bas:589,shared_librari:744,shared_limit:589,shared_ptr:790,shared_vgpr_cnt:590,shared_vgpr_count:590,sharir:750,sharp:667,shayn:757,she:696,shelf:[726,788,789],shell:[601,605,616,659,679,681,717,723,761,775,810],shf_alloc:[590,641],shf_exclud:[641,671],shf_execinstr:[590,641],shf_link_ord:[671,710],shf_merg:641,shf_string:641,shf_write:[590,641],shield:[595,607,756,765],shift:[588,589,590,594,597,598,607,659,670,674,684,710,711,722,741,760,770,771,785],shiftinst:610,shim:741,shini:[781,782],ship:[592,594,604,665,676,679,695,757,764,766,811],shirt:608,shl:[610,684,705,722,741,770],shlib:679,shlibext:775,shlq:760,shn_amdgpu_ld:590,shoe:786,shoot:805,short_enum:710,short_wchar:710,shortcom:757,shortcut:590,shorter:[645,748],shortest:[722,770],shortli:[759,810],should:[86,585,588,590,591,592,593,594,596,597,598,599,601,602,603,604,605,607,608,610,611,612,616,617,619,621,625,627,631,632,639,644,645,646,654,657,658,659,660,662,663,665,667,669,670,671,674,675,676,677,678,679,681,684,685,686,687,688,689,691,693,694,695,696,697,699,700,701,702,703,704,705,706,707,708,709,710,712,713,714,715,716,717,718,719,721,723,724,725,726,727,728,730,731,733,735,738,740,741,742,743,744,745,746,748,750,751,753,754,755,756,757,759,760,761,762,763,764,765,766,768,769,770,771,773,774,775,776,778,780,781,782,783,784,785,786,790,803,804,805,806,808,810],shouldexecut:743,shouldexpandatomiccmpxchginir:594,shouldexpandatomicloadinir:594,shouldexpandatomicrmwinir:594,shouldexpandatomicstoreinir:594,shouldinsertfencesforatom:594,shouldn:[593,609,610,659,701,710,715,721,725,775],shouldprocess:610,shouldprocessattr:610,shouldsampl:695,show:[590,599,607,611,616,620,630,631,635,638,639,640,642,644,646,654,658,659,663,665,671,677,679,694,704,710,711,714,715,716,717,723,724,725,742,743,745,748,753,759,760,761,762,766,768,770,773,777,779,780,781,782,784,786,788,790,803,804,805,806,807,808,809,811,812,813],showc:645,showdebug:665,shown:[585,588,589,590,596,616,625,630,631,645,648,649,650,654,659,663,664,666,667,681,689,703,710,716,717,721,724,742,743,759,760,762,768,769,770,771,774,777,780,786,807],shr:610,shrink:[607,641,745],shrx:760,shrxq:760,shstrtab:590,sht_dynam:590,sht_hash:590,sht_llvm_linker_opt:671,sht_nobit:[590,641],sht_note:[590,641],sht_progbit:[590,641],sht_rela:590,sht_strtab:590,sht_symtab:590,shtest:616,shuffl:[590,607,610,616,684,710,726,749,750],shuffle_vector:607,shufflebyt:712,shufflevector:[596,607,611,684],shut:[712,780],shutdown:712,sibl:682,side:[590,593,594,605,607,609,610,639,660,665,666,676,679,684,705,710,714,716,722,724,741,743,745,754,760,764,770,775,803,804,806,807],sidebar:[702,761],sideeffect:714,sidelength:703,sierra:747,siframelow:590,sift:783,sig:679,sig_atomic_t:710,sig_ign:695,sight:808,sigil:716,sign:[24,55,127,217,375,404,487,517,585,587,588,589,607,608,610,617,631,659,661,671,674,677,678,683,684,709,710,712,713,716,727,738,743,757,760,770,780,811],signal:[588,594,659,661,663,665,669,673,684,695,710,712,714],signatur:[594,596,607,623,660,663,679,710,712,734,736,743,782,804],signature_invalid:679,signed:[585,727],signedcharact:738,signext:[597,710],signextimm:607,signifi:[684,710,727,759],signific:[30,223,380,493,585,590,593,596,597,603,609,610,648,649,667,670,676,684,695,710,712,713,720,722,741,750,757,760,768,773,782,785,806,807,812],significand:710,significantli:[597,603,605,610,623,639,669,673,679,689,700,710,712,713,743,759,760,769,770,782,803,806],sigplan:[607,676],sigsegv:695,sigusr1:712,sil:771,silenc:699,silent:[609,641,652,653,696,710],silli:[593,610,672],simd:[20,123,212,370,482,585,590,617,619,710,741,749,750,776,779],simd_benchmark:747,similar:[585,590,594,597,599,602,606,607,609,610,611,621,624,644,659,660,663,666,667,669,671,676,677,679,682,684,685,688,689,695,697,699,700,705,710,712,716,719,720,722,724,726,729,731,738,741,743,744,745,748,750,756,761,765,766,768,770,775,779,780,782,783,784,786,802,804,805,806,807,810],similarli:[585,590,593,594,596,597,601,610,631,639,659,660,667,669,675,683,686,688,689,703,704,710,725,729,743,760,770,774,777,780,783,785,787,809,810],simm13:780,simm21:[55,404,517],simm32:[0,2,3,4,9],simpl:[585,592,593,594,596,597,598,601,605,606,607,611,616,617,619,639,640,658,659,664,666,667,668,670,671,676,677,679,681,682,686,687,688,700,703,705,709,712,714,718,721,722,723,726,745,759,760,764,765,766,768,769,774,775,779,780,782,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812,813],simplecompil:[726,788,790],simpleloopunrol:776,simpleproject:605,simpler:[585,607,670,710,714,741,743,745,759,760,770,788,807,808],simplest:[585,592,603,607,659,666,681,712,716,745,760,780,786,789,803,804],simpletyp:814,simpletypekind:738,simpletypemod:738,simplevalu:770,simpli:[585,590,593,601,604,607,610,611,626,644,654,659,662,663,667,669,672,675,677,679,684,694,695,699,701,705,710,716,718,721,723,725,726,729,731,741,743,744,745,759,760,761,762,765,770,774,775,780,781,782,803,804,805],simplic:[600,676,683,743,746,762,802,804],simplif:[601,659,683,741,782],simplifi:[585,590,592,602,607,667,676,681,687,700,704,705,710,726,728,742,760,764,770,774,786,788,805,806,807,808,810,814],simplify_typ:703,simplifycfg:[612,808],simplifydemandedbit:784,simt:[585,590],simul:[596,607,639,667,693,709,710,773],simultan:[585,594,596,614,641,653,679,695,712,743,757,775,779],sin:[593,660,779,786,788,802,803,804,805],sinc:[585,590,591,593,597,598,605,607,609,610,614,621,639,644,645,660,663,666,667,669,672,676,677,679,681,685,688,697,699,701,703,704,707,709,710,712,714,715,717,721,722,724,725,726,727,729,731,735,739,743,745,753,756,757,758,759,762,764,765,771,780,781,782,785,786,802,803,804,805,806,807,808,810],sine:[710,780],sinf:660,singl:[585,588,589,592,594,596,597,601,603,605,606,607,609,611,616,621,625,626,630,631,636,637,639,641,644,645,649,653,654,659,660,664,666,667,668,669,670,676,677,679,682,683,684,688,689,695,699,705,706,708,709,711,712,714,716,720,721,722,723,724,726,728,729,730,731,734,735,738,739,743,744,746,750,753,759,763,764,769,770,773,775,776,779,780,782,783,784,786,802,804,805,808,810,811],singleimpl:710,singleimplnam:710,singlesour:773,singlesourc:[747,773],singlethread:[590,710],singleton:[585,769],singli:676,singular:[610,710,743],sink:[593,659,750,760],sint:710,sit:789,site:[585,590,593,606,616,669,671,676,681,708,709,716,721,741,747,752,761,762,764,775,777],sitofp:804,situat:[585,590,594,603,607,609,610,639,659,660,667,669,677,688,700,703,705,709,710,719,722,741,743,755,756,766,779,780,782,790,805],six:[594,607,663,757,768,788],sixkind:710,sixth:[663,683,780],sizabl:743,size1:710,size2:710,size3:710,size:[10,11,12,13,15,16,19,20,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,123,132,133,134,135,136,137,138,139,140,141,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,212,224,225,226,227,228,229,230,231,232,233,234,235,236,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,315,317,318,319,320,322,323,324,325,328,329,330,331,332,334,335,337,341,342,343,344,345,346,347,348,349,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,369,370,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,482,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,585,588,590,593,594,595,596,597,601,605,608,610,612,614,615,616,617,621,623,625,630,631,639,640,641,644,645,648,649,651,654,659,664,665,670,671,674,676,677,683,684,688,689,695,705,709,710,711,712,713,716,720,722,724,726,727,730,731,733,735,738,740,741,747,754,756,759,760,762,764,766,769,770,771,773,775,776,777,779,780,782,784,785,786,789,803,804,805,806,807,808,809,810,811],size_t:[594,676,709,712,713,726,743,786],sizem1:710,sizem1bitwidth:710,sizeof:[644,663,672,710,724,733,735,738,743,760,764],sizeofimm:780,sjlj:698,sjljehprepar:669,sk_buff:607,sk_circl:703,sk_lastsquar:703,sk_otherspecialsquar:703,sk_somewhatspecialsquar:703,sk_specialsquar:703,sk_squar:703,sketch:663,skip:[585,596,597,606,607,610,611,625,631,642,645,658,660,663,664,676,681,682,686,700,705,710,714,720,743,745,759,760,764,773,776,779,780,781,802,803,804,805,806,807,808,809,810,812],skipbasicblock:728,skipfunct:728,skiploop:728,skipmodul:728,skipscc:728,skylak:631,slab:709,slash:710,slc:[0,2,3,4,8,9,590],sle:710,sleb128:585,sled:783,slh:760,slice:[637,643,743,769,770],slide:[608,660,743],slight:[671,699],slightli:[592,596,649,650,660,666,679,710,714,720,760,764,770,782,805,807,808],sln:681,slot:[585,590,607,623,639,663,669,674,695,708,710,758,762,764,780,808],slow:[602,603,617,679,681,689,710,712,723,740,743,782],slow_path:710,slowdown:[695,712],slower:[607,610,630,660,669,672,674,676,710,722,723,743,756,760],slowest:616,slowli:[667,679,710,726],slp:[711,750],slt:[710,714],sm_20:[607,724],sm_21:607,sm_30:[607,660],sm_35:[607,660],sm_xx:660,small:[589,597,601,604,606,607,609,610,612,619,623,631,639,645,659,662,663,664,667,674,676,677,683,688,689,695,703,709,710,712,722,724,736,743,746,747,756,759,760,762,765,766,775,779,782,787,790,810,812],smallconst:762,smaller:[585,590,594,602,607,609,610,625,631,634,639,645,667,674,676,684,688,710,722,728,741,742,743,756,770,779],smallest:[588,611,704,710,714],smallsetvector:743,smallvector:[610,810],smallvectorhead:743,smallvectorimpl:[743,769,784],smart:[610,616,710,730],smarter:659,smartphon:719,smash:[710,760],smax:589,smell:743,smith:[610,757],smithwa:747,smloc:769,smooshlab:678,smoothli:606,smovq:607,smp:[594,782],smrang:769,smt:595,smul_lohi:607,sn_map:722,sn_mapl:722,sn_mapr:722,snan:710,snapshot:[593,668,712],sneak:610,snippet:[667,676,705,743,782],snmalloc:605,snork:770,social:[608,787],socket:[709,726],soffset:[0,2,3,4,8,9],soft:[619,639,667,759],softer:667,softfp:700,softwar:[585,590,606,610,619,661,667,668,676,699,700,710,711,723,730,748,760,775,780,782,804,811,812],solari:679,solaris2:782,sole:[667,689,710,721,743],solicit:[609,711],solid:699,solut:[585,594,596,605,607,659,663,668,679,681,710,726,743,745,759,766,782,789,808,811],solution_1:710,solution_2:710,solv:[593,607,667,677,689,693,702,705,743,745,759,807,808,811],solver:[605,607],some:[585,588,590,591,592,593,594,595,596,597,599,600,601,602,603,605,606,607,608,609,610,611,616,617,620,621,625,630,631,633,639,640,642,644,658,659,660,661,662,663,666,667,669,670,672,673,674,675,676,677,679,680,681,683,684,685,686,688,689,691,693,695,697,698,699,700,701,703,704,705,706,707,708,709,710,711,712,713,714,716,717,719,721,722,723,725,726,727,730,731,736,739,740,741,742,743,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,764,765,768,769,770,771,773,774,775,776,778,779,780,781,782,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811],some_bss:652,some_crash_test:745,some_int:770,some_op:716,somebodi:609,someclass:770,somedag:770,somefancyaa:782,somefunc1:743,somefunc2:743,somefunc:610,somehow:[663,710,731,743,782],somellvmdev:723,somelocalpath:712,somemap:610,somemulticlass:770,someon:[600,607,608,609,644,662,667,668,670,674,676,685,717,723,745,755,757,761],someone_els:609,somerec:769,sometest:774,someth:[590,592,593,607,609,610,611,640,659,663,667,670,679,681,682,688,689,699,703,704,707,710,712,714,717,721,722,723,725,726,736,741,742,743,745,756,760,761,764,765,768,780,784,786,803,804,805,806,807,809,810,811,812],someti:710,sometim:[597,601,602,607,609,610,611,639,659,660,667,677,679,688,703,710,712,722,725,735,741,742,743,745,759,760,768,769,770,780,786],somewhat:[593,594,610,663,667,668,689,710,743,745,748,760,764,770,774,778,803,807],somewhatspecialsquar:703,somewher:[611,669,703,710,727,745,774,775,782,804,807],soni:757,sonnenberg:745,soon:[639,667,682,709,712,713,726,753,766,782,789,803,806],sooner:696,sophist:[607,676,710,717,761,774,782],sort:[594,605,614,640,659,660,664,667,672,704,709,710,727,738,745,747,759,768,769,770,783,784,803,805,806,808,811],sortord:784,sought:[585,710],sound:[600,712,719],sourc:[117,205,310,580,586,587,588,589,592,593,595,601,602,603,606,607,611,612,614,615,616,617,620,624,625,627,639,642,644,657,658,659,660,662,667,668,669,671,674,676,677,678,681,684,695,700,701,702,704,705,709,711,712,713,714,721,722,723,724,726,727,731,739,742,743,745,746,748,751,753,754,757,760,761,762,764,765,768,769,771,773,774,775,778,780,781,782,783,784,786,787,790,805,809,811],source0:782,source_filenam:710,source_i386:654,source_x86_64:654,sourcebas:[662,680],sourcebasedcodecoverag:664,sourcecontrolbaserevis:742,sourcefil:625,sourcefilecount:731,sourcefilenameindex:731,sourceforg:747,sourceinfos:731,sourceloc:810,sourcewar:[694,712],southern:590,sp3:[588,589],space0:710,space1:710,space2:710,space:[120,209,314,479,584,586,587,595,597,605,608,611,617,625,631,640,644,660,667,669,676,678,679,681,683,684,707,708,709,710,715,719,720,722,727,736,743,744,745,746,755,758,759,760,762,764,765,768,770,780,782,785,786,805,808,811],spadini:741,spaghetti:812,spam:766,span:782,spanish:756,sparc:[594,607,641,679,710,748,770,780,782],sparcasmprint:[607,780],sparccallingconv:780,sparcel:641,sparcgenregisterinfo:780,sparcgensubtarget:780,sparcinstrformat:780,sparcinstrinfo:780,sparciseldagtodag:780,sparcisellow:780,sparclet:780,sparclit:780,sparclite86x:780,sparcreg:780,sparcregisterinfo:780,sparcsubtarget:780,sparctargetasminfo:780,sparctargetlow:780,sparctargetmachin:780,sparcv8:[594,710],sparcv9:710,sparingli:[610,674],spars:[607,645,670,679,711,728,743,745,785],sparsecheckout:745,sparser:785,spawn:[712,726],spcc:780,spdx:[610,788,789,790,791],spe:661,speak:[606,608,660,755,764,806,807],speaker:719,spec95:774,spec:[660,704,710,716,717,743,759,760,773,786],speccpu2000:[773,774],speccpu2006:774,special:[84,586,588,589,590,594,596,597,606,607,610,611,616,639,640,641,653,659,660,664,665,666,667,668,669,671,673,676,677,679,686,689,698,701,704,709,712,716,720,721,722,725,726,729,730,731,741,743,745,746,754,757,760,762,764,765,768,770,771,775,779,780,782,786,787,808,809,810,811,812],specialfp:770,specialsquar:703,specialti:743,specif:[1,5,6,7,8,27,129,175,220,270,378,435,490,548,585,587,590,592,594,595,597,600,602,603,609,610,611,616,619,625,630,642,648,659,660,661,664,667,669,670,672,675,676,677,678,679,681,683,686,688,689,690,691,693,698,699,701,702,709,710,711,712,714,715,716,717,718,719,721,723,724,725,726,727,728,731,739,741,743,746,749,751,752,753,755,756,757,760,762,764,766,768,769,770,771,774,779,780,782,783,785,786,789,802,803,805,807,809,812],specifi:[14,18,20,24,27,28,30,31,32,36,37,38,55,66,68,79,83,84,85,93,96,97,98,99,100,101,104,105,106,107,110,111,112,113,114,115,117,120,122,123,127,129,130,145,157,159,172,176,186,187,188,189,190,191,194,195,196,199,200,201,202,203,204,205,209,211,212,213,217,220,221,223,254,256,267,271,281,282,283,286,287,288,289,294,295,296,297,298,299,300,301,304,305,306,307,308,309,310,314,333,336,340,341,350,352,354,355,368,370,371,375,378,379,380,381,382,386,387,388,404,420,430,434,436,444,447,448,449,450,451,452,455,456,457,467,468,469,470,471,472,479,481,482,483,487,490,491,493,494,495,499,500,501,517,532,543,547,549,557,560,561,562,563,564,565,568,569,570,571,574,575,576,577,578,579,580,584,585,586,587,588,589,590,591,594,597,599,600,601,603,605,606,607,610,611,612,614,616,617,619,621,622,624,625,626,627,629,630,631,632,633,635,636,637,639,640,641,642,644,645,646,648,649,650,651,652,653,654,657,658,660,663,664,668,669,671,672,675,677,679,681,684,695,698,704,705,709,712,714,716,717,723,724,726,727,728,731,735,736,739,741,743,744,746,750,754,756,759,762,764,765,768,769,770,771,773,774,775,776,779,780,781,783,784,785,786,803,804,806,807,808,809,810,811],spectr:[710,751],spectre11:760,spectreattack:760,specul:[593,594,639,660,669,674,705,710,726,728,751,764],speculat:[597,710],speculative_load_harden:[597,710],speculativejit:726,speed:[603,605,610,631,659,660,679,712,714,721,775,808],speedup:760,spell:[610,658,667,768,775],spencer:765,spend:[616,779,784],spent:[639,784],spflag:759,spflagdefinit:810,spgo:615,sphinx:[605,678,679,717],sphinx_execut:605,sphinx_output_html:605,sphinx_output_man:605,sphinx_warnings_as_error:605,sphinxquickstarttempl:761,spi:590,spi_shader_user_data_ps_0:590,spi_shader_user_data_ps_1:590,spi_shader_user_data_ps_5:590,spiffygrep:659,spiffysh:659,spill:[585,604,607,619,663,685,710,759,760,762,764,780],spill_threshold:590,spillalign:780,spiller:[607,617,619],spillsiz:780,spilltabl:590,spir:658,spirit:[608,743,757],spirv:658,spisd:780,spl:771,splat:[607,711,726],splice:743,split64:710,split:[585,590,594,596,601,607,626,641,644,654,659,660,663,667,670,677,683,689,709,710,712,716,721,722,726,735,741,744,745,758,770,775,780],splitblock:709,splitdebugfilenam:710,sponsor:745,sponsorship:667,spot:[709,710,712,741],spotti:710,spread:[710,741],spreadsheet:784,springer:660,sprinkl:743,spu:780,spuriou:710,spurious:[710,775],sq_rsrc_img_1d:588,sq_rsrc_img_1d_arrai:588,sq_rsrc_img_2d:588,sq_rsrc_img_2d_arrai:588,sq_rsrc_img_2d_msaa:588,sq_rsrc_img_2d_msaa_arrai:588,sq_rsrc_img_3d:588,sq_rsrc_img_cub:588,sqlite:712,sqrt:[779,786],squar:[667,703,710,786],squash:[596,723,745],squeue:639,sra:770,sramecc:590,srand:712,src0:[0,1,2,3,4,5,6,7,8,9,117,205,213,310,371,483,580,588,589,684],src0_sel:[0,3,4,7,8,9,590],src1:[0,1,2,3,4,5,6,7,8,9,117,205,213,310,371,483,580,586,588,607,684,770,771],src1_sel:[0,3,4,7,8,9,590],src2:[0,1,2,3,4,5,6,7,8,9,117,205,213,310,371,483,580,607,684,770,771],src3:[0,2,3,4,8,9,117,205,310,580],src:[0,2,3,4,8,9,605,607,624,669,684,698,702,710,731,739,743,744,753,774,780,805],src_base:764,src_offset:764,src_private_bas:590,src_reg:607,src_root:679,src_shared_bas:590,srcarglist:710,srckei:709,srcloc:710,srcvalu:764,srd:590,srem:607,sret:[590,597,710],srgp:590,srl:[585,770],sroa:[594,660,674,711,728,808],srsrc:[0,2,3,4,8,9],ssa:[669,674,676,684,711,721,743,759,764,780,782,804,806,807,808,811,812],ssamp:[0,2,3,4,9],ssca2v2:747,sscabmk:747,ssd:681,sse2:[710,775],sse41:611,sse4:779,sse:[607,706,710,749,780,809],ssecal:780,ssh:773,ssize_t:610,ssp:[597,710,759],sspreq:[597,710],sspstrong:[597,710],ssrc0:[0,2,3,4,9],ssrc1:[0,2,3,4,9],ssrc2:[0,2,3,4,9],ssrc:[0,2,3,4,9],sstream:610,st0:[770,771,780],st17basic_string_viewicst11char_traitsice:627,st1:[596,607,771,780],st2:771,st3:771,st3__1:627,st4:771,st5:771,st6:771,st7:771,st7__cxx11:627,st_valu:590,stab:759,stabil:[667,745,746],stabl:[590,610,628,631,667,702,705,710,713,728,743,766,780],stack:[593,604,610,617,640,642,648,649,650,663,669,674,679,684,695,698,708,711,716,718,721,726,743,751,754,759,778,780,782,783,785,786,788,790,808,810,812],stack_end:665,stack_frame_size_in_byt:590,stack_loc:607,stack_siz:617,stackar:676,stackentri:676,stackframes:676,stackgrowsdown:780,stackmap:[648,649,676,710],stackoffset:676,stackprotectorcheck:710,stackrestor:708,stacksafeti:[710,741],stacksav:708,stacksiz:754,stacksizessect:607,staff:755,stage1:[592,668,699],stage2:[592,603,605,668,699],stage3:592,stage:[594,603,605,607,612,625,659,660,667,668,696,699,702,709,710,712,713,722,724,742,746,749,750,753,757,763,780,784,787,803],stageselectioncat:659,stai:[663,678,713,722,735,786],stale:[590,759],stall:[639,760],stamp:736,stanc:610,stand:[607,609,610,660,667,698,710,731,743,775,782,789,790,791,803],standalon:[607,616,675,698,700,745,754,756,810,811],standalone_test:616,standard:[84,589,590,592,593,594,596,605,607,611,612,616,617,619,621,622,623,626,629,630,631,632,636,638,639,640,641,642,644,645,648,649,650,652,653,654,657,658,659,661,662,667,669,671,672,673,674,676,678,684,686,699,703,709,713,716,724,726,741,749,756,761,764,768,769,770,771,775,779,780,782,785,789,802,803,804,805,806,807,808,809,810,811],standpoint:743,star:747,start:[80,173,268,288,306,308,431,544,585,590,592,593,597,601,603,610,611,612,616,625,627,639,641,642,643,644,645,653,654,662,663,664,665,667,668,670,671,672,674,677,678,682,683,684,688,689,696,700,701,702,705,709,716,717,720,721,722,723,724,726,727,729,741,742,743,745,746,748,753,756,757,759,760,761,764,768,769,770,778,779,780,783,784,785,786,787,789,790,802,803,804,805,806,807,808,809,810,811,813],start_valu:710,starter:[719,748],startexpr:[806,807,808,809,810],startfilenam:654,startlin:654,starttim:769,startup:[616,679,710,724,726,740,789,790],startval:[806,807,808,809,810],stash:703,stat:[593,595,601,617,619,639,657,658,686,765,773,774,782],state:[588,589,593,594,596,600,607,609,610,639,658,659,663,667,668,669,673,676,688,689,693,700,702,705,709,710,712,716,717,721,723,725,726,733,741,742,743,745,748,756,757,759,761,762,764,766,779,782,785,786,789,803,810],stateless:[594,725,782],statement:[604,606,639,658,660,664,666,667,669,670,677,710,711,714,741,743,755,759,766,768,769,779,780,785,804,806,808,810,812],statepoint:764,statepoint_token:710,static_cast:[808,809,810],static_librari:700,statist:[593,595,614,615,617,619,623,625,657,658,664,712,744,754,757,773,774,783],statu:[20,123,212,370,482,588,590,600,602,605,608,610,637,665,667,692,696,710,733,752,762,766,778],statuscheck:745,stb_local:710,std:[627,642,659,669,672,688,695,703,709,722,724,725,726,738,747,756,760,768,769,773,779,780,784,786,788,789,790,791,802,803,804,805,806,807,808,809,810],stdarg:710,stdcall:607,stddef:712,stderr:[610,611,612,712,728,743,775,781,803,804,805,806,807,808,809,810],stdin:[611,625,631,654,759,775,810],stdint:712,stdio:[679,694,698,709,713,765],stdlib:[605,679],stdlibc:605,stdout:[616,625,627,630,631,710,712,775,814],steadi:[673,756],steensgaard:593,steer:667,steinberg:747,stellard:702,stem:[660,743],step:[585,589,592,593,601,603,605,607,609,660,663,665,666,667,668,669,670,676,677,679,682,684,689,693,698,699,700,701,703,705,707,709,712,713,714,722,723,724,741,743,748,750,753,759,769,770,774,777,778,782,786,804,805,806,807,808,809,810,813],stepexpr:[806,807,808,809,810],stepval:[806,807,808,809,810],stepwithcompactencod:669,steve:757,steveklabnik:757,stick:[601,697,719,743,745],sticki:710,still:[585,588,590,596,600,601,602,604,607,610,611,616,625,639,641,653,659,660,663,664,667,669,672,676,679,681,683,685,687,688,689,693,694,695,700,701,703,704,705,708,709,710,712,714,715,721,722,725,726,728,740,741,742,743,745,746,747,755,756,757,759,760,762,764,765,766,770,774,777,779,781,782,786,805,806,807,808,811],still_poison:710,stingi:743,stitch:735,stkmaprecord:[762,764],stksizerecord:762,stl:[610,672,679,712,741,743,786],stlextra:[804,805,806,807,808,809,810],stm:710,stmt:[658,776],stn_undef:590,stonei:590,stop:[605,608,610,616,628,642,644,658,663,665,667,669,670,676,689,702,709,710,712,714,716,721,759,760,782,783,785,803],stopgap:743,stopiter:663,stopper:753,storag:[585,590,595,597,607,663,676,679,709,720,724,759],storageclass:814,store:[98,99,107,188,196,279,280,288,289,290,291,301,306,308,449,450,457,562,563,571,585,588,590,593,594,596,597,598,605,606,611,616,625,659,663,664,666,668,669,671,673,676,677,679,681,683,684,688,691,693,698,705,706,708,711,712,715,716,721,722,723,724,725,726,727,731,735,743,744,745,749,750,754,756,758,759,762,763,764,769,770,774,777,780,788,789,802,803,808,809,810],storemerg:663,storeregtoaddr:780,storeregtostackslot:[607,780],storesdnod:780,stori:[681,757,759],str1:770,str2:770,str:[596,630,659,664,710,724,743,768,769,786,788,789,790,791,803,804,805,806,807,808,809,810,814],str_offset:759,strai:775,straight:[593,607,631,660,706,722,723,743,764,765,766,803],straightforward:[596,607,672,703,709,743,745,746,756,759,770,780,782,789,802,804,805,806,807,808],strang:[596,808],strategi:[594,602,607,659,663,720,743,745,764,766,808],strconcat:[770,780],stream:[27,129,220,378,490,585,589,590,597,607,610,611,612,614,621,622,626,629,630,632,636,640,641,642,649,650,652,653,654,657,664,679,709,710,712,730,743,747,754,759,760,779,780,782,785,786,803],streamblock:735,streamclust:747,streamdirectori:735,streamer:607,streamlin:602,streamoutt:590,streamsiz:735,strength:[594,607,608,659,674,710,743,787,812],strengthen:639,stress:[610,615,675,714],strex:594,strict:[27,129,220,378,490,591,594,596,611,667,674,683,710,712,714,722,726,743,759,771,804],strict_:591,strict_fadd:591,strict_fp_round:591,stricter:[594,611,710],strictfp:[597,710],strictli:[611,669,679,710,723,724,743,759,764,765,804,806,809],stride:[34,132,225,384,497,590,710,747,750],strike:[667,756,804],strikingli:807,string1:770,string2:770,string:[585,590,597,599,603,605,606,607,610,615,616,617,619,621,625,630,635,639,641,642,643,644,645,646,648,649,654,658,659,671,679,689,695,706,707,716,718,723,724,731,735,736,739,741,756,759,764,765,768,769,770,771,775,780,782,783,786,788,791,802,803,804,805,806,807,808,809,810],string_view:[627,695],stringlength:759,stringlengthexpress:759,stringli:606,stringmap:659,stringmapentri:743,stringrecti:769,stringref:[610,659,709,759,768,769,784,786,788,789,790,791,808,809,810],stringsort:610,stringswitch:768,stringwithcstr:759,stringwithspecialstr:759,strip:[601,605,615,625,626,641,642,657,659,667,705,709,713,731,743,759,775],strippointercast:610,strive:[608,610,693,757,775],strlen:743,strong:[594,609,611,667,672,674,709,710,720,722,746,756],stronger:[585,590,594,710,743,764],strongest:760,strongli:[603,606,609,610,639,666,667,674,676,695,700,701,711,714,756,766,806,808,811,812],stroustrup:743,strr:780,strtab:[590,597,754],strtab_blob:597,strtod:[659,802,803,804,805,806,807,808,809,810],strtol:659,struct:[590,597,607,611,658,659,660,663,669,671,674,676,708,721,730,731,734,735,736,738,743,759,760,768,777,779,780,782,786,808,810,811],struct_anon:597,struct_nam:597,structtyp:743,structur:[590,593,594,597,601,607,609,610,611,615,630,639,644,648,649,659,664,666,669,674,676,677,679,681,683,686,689,706,709,711,712,713,714,715,717,723,725,731,735,736,739,741,746,750,759,761,768,771,780,781,782,785,786,788,803,804,806,807,808,810,811,812],struggl:679,stryjewski:741,stt_amdgpu_hsa_kernel:590,stt_file:[641,653],stt_func:[590,641],stt_gnu_ifunc:641,stt_notyp:590,stt_object:[590,641],stt_section:641,stub:[607,661,695,709,710,722,726,743,780,790,791],stubsmgr:726,stuck:811,stucki:747,student:662,studi:[674,711,712,744],studio:[592,605,671,679,680,698,723,759],stuf:611,stuff:[606,607,659,717,743,761,774,782,786,805,806,808,809,810],stump:780,stupid:601,stwu:607,sty:771,style:[585,590,592,594,601,611,615,616,619,620,621,625,626,639,641,642,649,654,659,663,667,676,714,716,731,743,748,751,760,766,768,769,770,783,803,808,811],styleguid:748,stylist:[610,770],sub1:611,sub32rr:759,sub:[588,594,597,602,603,605,606,607,611,616,639,663,667,670,671,679,684,702,714,722,743,750,766,768,770,771,775,780,804,809],sub_32:716,sub_8bit:716,sub_ri:770,sub_rr:770,subblock:597,subclass:[593,607,610,659,676,703,709,716,750,759,769,770,771,782,786,803],subclassoptionaldata:722,subcommand:[782,783,784],subcommun:667,subcompon:[585,667],subdir:745,subdirectori:[605,606,616,679,700,773,774,775,780],subdivid:710,subel:[710,711],subexpress:[593,711,760,803,805,806,807,808],subfield:770,subfold:668,subgraph:[666,714,741,750],subgroup:590,subject:[585,590,604,607,610,658,667,685,688,695,702,709,710,725,742,743,776,786],subl:[607,611],sublist:606,subloop:776,submiss:667,submit:[585,600,601,609,678,709,723,742,773],submodul:745,subnorm:[710,724],subobject:777,suboptim:745,subpath:616,subprogram:[585,590,710,759,810],subproject:[605,610,667,678,679,699,723,745,752,775],subproject_branch:745,subrang:710,subreg:[716,780],subreg_to_reg:716,subregclasslist:780,subregindex:716,subregion:710,subregist:[762,780],subroutin:[590,710],subroutine2:759,subscrib:[667,723,742,757],subscript:[593,710,741,770],subsect:[644,649,709,710,726,743,768,769],subsequ:[213,371,483,585,590,591,607,611,621,659,667,669,677,702,709,710,715,718,726,738,741,748,756,759,760,762,766,779,780,790,804,805],subset:[593,594,616,621,639,641,653,667,675,677,679,695,710,712,714,724,726,743,747,759,760,762,769,770,775,783,811],subsidiari:768,subst:[770,771],substanti:[593,603,609,663,667,676,710,760,805],substitut:[594,606,607,627,700,710,726,741,762,770],substr:[639,658,659,664,741,770,775],substream:[644,729,739],subsystem:[607,710],subtarget:[590,607,631,658,686,710],subtargetemitt:780,subtargetfeatur:[771,780],subtl:[603,606,677,712,731,803,806,811],subtleti:684,subtmp5:808,subtmp:[804,805,806,807,808,809,810],subtool:660,subtract:[589,590,607,664,670,684,710,770],subtyp:[590,669,769,770,780],subvalu:[769,770],subvec:710,subvector:[607,710],subvers:[667,702,723,745,775],subvert:604,succ:610,succe:[600,604,610,611,612,617,621,622,623,624,629,632,636,657,658,669,681,696,703,726,743,757,775,788,789,803,805],succeed:[597,598,616,681,710,782],success:[610,612,631,639,667,669,679,684,704,709,710,714,743,745,754,757,775,780,782,786,803,804,805,806,807,808,809,810],success_ord:594,successfulli:[593,610,614,630,638,667,669,686,696,700,701,702,704,714,726,760,764,779,807],successor:[598,607,610,628,669,674,705,710,741,750,759,780],succinct:[610,611,616],sudo:[679,697,723,761,773,775],suffer:[676,757],suffic:[596,681,736],suffici:[590,591,593,594,595,601,611,659,674,676,677,679,685,700,703,704,710,718,738,741,742,743,745,746,748,757,760,764,802],suffix:[590,605,610,611,616,617,622,625,629,641,659,670,700,710,743,748,760,766,768,775,780,782,791,804],sugar:[710,770],suggest:[585,593,603,609,610,667,674,679,701,705,710,723,726,740,742,743,744,748,757,760,764,771,782],suit:[592,606,607,611,660,667,669,670,678,689,693,697,701,702,722,723,743,745,751,754,759,760,766,783,788],suitabl:[585,597,604,606,607,610,611,625,658,659,662,663,664,675,676,686,710,712,714,715,724,726,741,743,748,759,764,775],sum:[585,587,597,598,599,611,625,630,710,731,743,770,779,784],sum_1:645,sum_2:645,sum_of_all_vari:630,summar:[607,616,630,663,676,710,736,738,741,754,757],summari:[597,605,611,612,616,617,619,621,622,624,625,626,629,631,632,633,636,639,640,641,642,645,648,649,650,652,653,657,659,682,683,739,741,742,756,760,763,764,774,808,810],summaris:[596,650],summat:710,sumo:590,sun:782,sunit:607,sunk:[590,593,686,759],sunwspro:679,superclass:[593,743,768,770,771,780,782],superflu:677,superior:766,superiorli:639,supermajor:757,superpair:769,superpos:743,superreg:780,supersed:749,superset:[594,710],supersparc:780,superword:[711,779],suppl:645,supplement:[616,645,654,661,755],supplementari:585,suppli:[35,55,85,145,176,271,352,385,404,436,498,517,549,585,588,589,597,616,621,625,645,654,663,669,688,697,698,700,702,704,709,710,713,718,726,743,745,782,786,788,789,790,811],support:[27,84,129,175,220,270,279,280,291,294,295,296,378,435,490,548,585,587,588,589,592,593,594,595,596,597,603,604,605,606,608,610,611,616,617,621,624,625,627,630,635,637,639,640,642,644,645,646,658,659,661,663,665,667,670,671,673,674,675,676,677,678,679,681,683,684,688,689,693,694,698,699,700,702,703,706,707,708,709,713,716,717,722,724,725,727,731,733,735,739,740,741,743,744,745,747,748,750,751,752,754,756,757,759,760,761,767,768,770,771,773,775,776,777,778,779,782,783,784,785,786,787,788,789,802,803,804,806,807,808,809,810,811,812,813],supportsvp:749,supporttest:605,suppos:[607,611,645,669,683,703,709,710,722,731,735,745,765,782,786],suppress:[610,614,616,640,663,705,710,776,786],sure:[593,594,596,600,605,607,609,610,612,625,659,662,663,667,670,672,674,679,682,694,695,696,697,700,701,702,703,704,712,719,723,725,727,740,742,743,753,757,759,763,765,766,770,771,773,775,779,782,783,803,804,805,806,807,808,809,810],surfac:[96,97,186,187,286,287,447,448,560,561,588,607,675,709,757,783],surgic:743,surpris:[710,722,743,811],surprisingli:[610,674,679,710,723,803,805],surround:[594,604,611,677,679,695,710,716,741,748,756,760,762,770,771],survei:661,surviv:[669,760,803],suspect:657,suspend1:663,suspend2:663,suspend:757,suspend_funct:663,suspens:[663,757],suspici:765,sustain:723,sve:[658,678,710,748,749],svelt:756,svg:[605,784],svn:[667,678,748,762,773,779],svptr:710,svr4:621,swap:[585,588,594,596,605,697,710,721,735],sweep:676,swift:[653,709,726,748],swiftasync:[597,710],swiftcc:[597,663,710],swifterror:[597,710],swiftmodul:614,swiftself:[597,710],swifttailcc:[597,607,710],swim:747,switchsect:[607,676],switchtosect:607,swizzl:[585,590],sycl:585,sym:[640,641,642,644,645,648,649,671,709,726,753,790,791],symbol1:[642,671],symbol2:[642,671],symbol:[24,127,217,375,487,588,593,597,603,607,610,615,617,619,620,621,623,625,627,634,635,639,641,642,643,645,648,649,650,653,659,664,676,679,694,695,698,701,709,710,711,712,715,731,739,740,743,746,759,764,773,777,780,784,785,786,788,789,791,804,805,806,807,808,809,810,811,814],symbol_in_partit:671,symbolalias:726,symbolaliasmap:726,symbolflagsmap:791,symbolmap:726,symbolnam:590,symbols:734,symbolstringptr:[726,791],symbolt:743,symbytes:[731,734],symlik:605,symlink:[605,723],symmetr:660,symmetri:775,symnam:642,symposium:660,symptom:679,symptomat:756,symrecordstream:731,symtab:[590,614,743],sync:[594,724,757,786],synch_and_fetch:700,synchron:[588,590,594,710,713,726,745],syncscop:710,synergist:750,synonym:[621,716],synopsi:659,syntact:[611,710,770,805],syntax:[17,20,27,29,78,84,120,121,123,129,131,171,209,210,212,213,220,222,266,314,370,371,378,479,480,482,483,490,492,542,584,586,590,606,607,610,616,617,619,639,641,642,644,646,653,659,672,677,679,695,700,711,712,716,717,718,728,743,751,756,759,761,768,770,775,786,802,804,805,806,807,808,809,810],synthes:[611,670,759,805],synthesizedcd:596,synthet:[641,705,780],sys:[595,679,698,709,790,809,810],sysadmin:745,syscal:607,sysmsg_op_ecc_err_interrupt:[27,129,220,378,490],sysmsg_op_host_trap_ack:[27,129,220,378,490],sysmsg_op_reg_rd:[27,129,220,378,490],sysmsg_op_ttrace_pc:[27,129,220,378,490],sysroot:[700,701,702,771],system:[585,592,593,594,595,596,600,603,605,606,607,610,614,616,617,619,621,644,659,661,662,668,670,672,675,676,677,678,680,683,694,697,700,701,702,703,704,709,711,712,713,720,723,724,726,735,739,740,742,743,744,745,746,753,756,757,759,760,762,764,766,768,769,770,771,773,774,778,781,782,783,784,787,788,789,790,791,804,805,806,807,808,809,810,811],system_error:809,system_vgpr_workitem_id_undefin:590,system_vgpr_workitem_id_x:590,system_vgpr_workitem_id_x_i:590,system_vgpr_workitem_id_x_y_z:590,systemat:760,systemz:[607,679,710,716,748,762],sysv:[640,650,661],t1item:722,t2bcc:716,t2item:722,tab:[610,611,625,667,681,770],tabl:[27,129,220,378,490,585,587,588,589,591,593,596,597,604,605,614,615,621,623,630,634,639,641,642,643,644,645,648,649,653,658,659,663,679,680,684,693,703,707,709,710,712,713,715,722,723,726,729,731,736,738,739,743,760,764,766,769,771,774,779,780,785,786,788,803,804,805,806,807,808,809,810,811],tabledef:631,tablegen:[605,658,670,679,686,691,701,702,707,748,778,780],tablegen_json_vers:768,tablegenbackend:769,tablegenbackendskeleton:769,tablegenmain:769,tablet:719,tabsiz:625,tackl:[672,805,808],tag0:[586,710],tag1:[586,710],tag2:710,tag3:710,tag:[14,79,172,267,333,350,430,543,587,597,610,658,667,668,676,679,710,718,723,743,745,751,756,764,768,769,770],tag_apple_properti:759,tag_base_typ:759,tag_memb:759,tag_offset:710,tag_pointer_typ:759,tag_structure_typ:759,tahiti:590,tail:[597,611,663,676,705,710,722,724,759,770,779,785,811],tail_exit:785,tailcal:[607,760],tailcalle:607,tailcallopt:[607,710],tailcc:[597,607,710],tailor:[644,697],take:[585,590,592,593,594,596,597,599,601,602,603,604,606,607,608,609,610,611,614,616,617,619,623,626,629,631,632,636,637,639,642,644,645,657,659,660,662,663,667,668,669,670,671,675,676,677,679,682,684,688,699,703,705,706,707,708,709,710,712,713,714,715,716,717,719,722,723,724,725,726,727,741,743,745,746,750,753,754,755,759,760,761,762,764,766,768,769,770,773,777,780,781,782,783,785,786,788,789,790,791,803,804,805,806,807,808,809,810,811,812,814],takecallback:743,takeerror:[726,743,788,789,790,791],taken:[590,593,599,603,607,631,659,663,664,669,674,688,695,709,710,714,716,721,722,726,741,743,745,746,750,753,757,759,760,764,765,766,769,770,771,777,780,811],talk:[610,660,670,683,689,710,720,743,755,782,803,804,805,806,808,811,812],talli:774,tape:679,tar:[679,701,753],tarbal:[679,701,702],target:[24,78,127,171,217,266,375,487,542,585,588,589,592,594,595,596,597,603,604,605,606,609,610,613,615,616,617,618,624,627,631,639,641,642,645,656,658,661,663,665,668,670,672,674,675,676,679,681,683,684,686,688,689,691,693,698,699,701,702,704,705,706,707,709,714,715,718,720,722,723,725,726,728,730,746,749,756,759,760,761,762,764,766,768,769,770,771,773,774,775,777,778,779,782,787,788,789,805,806,807,808,810,812],target_compile_definit:606,target_link_librari:605,targetasminfo:[676,780],targetcallingconv:780,targetdescript:780,targetframeinfo:780,targetframelow:669,targetfunc:743,targetinfo:[667,780],targetinstrformat:780,targetinstrinfo:748,targetjitinfo:780,targetlibraryinfo:[710,748],targetlow:[601,669,670,688,748,780,784],targetloweringbas:591,targetloweringobjectfil:607,targetloweringopt:784,targetmachin:[676,715,724,725,748,780,782,788,790,805,806,807,808,809],targetnam:[639,716],targetopt:[607,809],targetpassconfig:[690,725],targetprocesscontrol:[709,726],targetregisterclass:[607,780],targetregisterdesc:[607,780],targetregisterinfo:748,targetregistri:[607,780,809],targets_to_build:701,targetselect:[805,806,807,808,809,810],targetselectiondag:[607,670,780],targetsubtarget:780,targettransforminfo:710,targettripl:809,tarjan:782,task:[610,611,663,667,669,670,689,699,710,722,746,759,761,764,765,766],task_begin:611,task_end:611,taught:[607,684],tba:590,tba_hi:[20,589],tba_lo:[20,589],tbaa:[674,711,721,727],tbb:780,tbcc:780,tbd:[586,590,640,726],tbepler:747,tblgen:[605,607,615,670,699,701,768,770,771,780],tblgensubtarget:780,tbr_jtr:716,tbuffer_load_format_d16_x:[0,3,4,9],tbuffer_load_format_d16_xi:[0,3,4,9],tbuffer_load_format_d16_xyz:[0,3,4,9],tbuffer_load_format_d16_xyzw:[0,3,4,9],tbuffer_load_format_x:[0,2,3,4,9],tbuffer_load_format_xi:[0,2,3,4,9],tbuffer_load_format_xyz:[0,2,3,4,9],tbuffer_load_format_xyzw:[0,2,3,4,9],tbuffer_store_format_d16_x:[0,3,4,9],tbuffer_store_format_d16_xi:[0,3,4,9],tbuffer_store_format_d16_xyz:[0,3,4,9],tbuffer_store_format_d16_xyzw:[0,3,4,9],tbuffer_store_format_x:[0,2,3,4,9],tbuffer_store_format_xi:[0,2,3,4,9],tbuffer_store_format_xyz:[0,2,3,4,9],tbuffer_store_format_xyzw:[0,2,3,4,9],tcb:758,tcp:709,tdm:696,tdrr:619,tdtag:768,teach:[670,679,788,804,812],team:[660,667,702,744,745,750],tear:669,technic:[606,608,609,627,667,678,712,745,757,784],techniqu:[585,593,601,607,676,681,689,710,722,743,754,779,780,781,782,783,803,805,806,807,808,812],technolog:[666,713,811],ted:768,tediou:[611,786],tee:601,teeth:766,tell:[590,593,601,605,607,609,610,612,659,664,667,670,676,679,682,698,700,709,710,713,722,723,725,726,727,740,743,744,745,759,770,774,775,803,804,808,810,811],templat:[593,610,625,658,659,660,678,703,722,741,757,765,768,769,770,771,780,782,786,804],templateargdecl:770,templatearglist:770,templateparam:710,tempor:[603,710],temporari:[589,607,610,616,631,671,672,688,695,710,711,712,716,741,743,745,755,760,770,774,775,786],temporarili:[585,590,601,611,757],tempt:[610,679,765],ten:[677,735],tend:[628,631,639,667,673,677,679,689,709,710,726,743,748,753,759],tension:808,tensorflow:712,tent:[590,710,727],teor:716,term:[585,590,593,594,596,607,608,609,610,625,639,667,669,672,676,686,695,710,711,714,716,721,722,723,725,726,741,742,750,756,759,762,764,765,766,769,770,775,778,789],termin:[585,590,597,599,601,607,610,616,621,622,628,629,632,636,644,652,657,659,669,671,674,681,695,714,722,724,730,731,736,741,743,750,754,756,759,765,769,775,780,782,806],terminolog:[599,606,688,710,741,748,751,778,810],ternari:607,terribl:592,territori:[667,679],terror:592,tess:590,test1:[717,761,764,775],test2:[650,652],test5:611,test:[585,590,592,593,601,603,604,607,609,611,615,617,632,639,644,645,650,651,652,654,658,659,660,662,663,664,669,670,672,675,678,688,694,695,697,699,701,703,704,714,722,727,728,741,743,744,745,746,748,756,757,759,760,764,766,768,769,778,782,786,805,806,808,809],test_arg:602,test_argu:602,test_blockaddress:684,test_cod:709,test_code_har:709,test_devic:660,test_exec_root:616,test_format:616,test_fuzz:712,test_hd:660,test_host:660,test_nam:602,test_source_root:616,test_suite_benchmarking_onli:773,test_suite_collect_stat:773,test_suite_fortran:773,test_suite_remote_host:773,test_suite_run_benchmark:773,test_suite_run_typ:773,test_suite_run_und:773,test_suite_spec2000_root:773,test_suite_spec2006_root:773,test_suite_spec2017_root:773,test_suite_subdir:773,test_suite_use_perf:773,testabl:[610,689,742],testament:812,testcas:[611,612,667,709,710,743,804],testcase_dir:712,testcleanup:710,tester:[615,667,678,702,774],testfnptr:710,testfunc:805,testingconfig:616,testl:760,testq:760,testresult:678,testrunn:616,testsuit:[611,616,746],tex:774,text:[585,590,605,607,608,609,611,621,625,627,630,640,642,643,645,646,650,652,658,667,670,671,675,710,712,717,718,742,743,755,759,760,762,770,773,774,775,780,783,786,790,802,811,814],textfileread:610,textual:[585,590,607,614,616,617,625,667,672,675,710,711,718,721,727,729,768,771,775,784,805],textur:588,tfe:[0,2,3,4,93,96,97,100,101,104,105,106,107,112,113,114,115,186,187,189,190,191,194,195,196,201,202,203,204,281,282,283,286,287,294,295,296,297,298,299,300,301,306,307,308,309,354,447,448,455,456,457,469,470,471,472,557,560,561,564,565,568,569,570,571,576,577,578,579,590],tg_split:590,tgid_size_en:590,tgid_x_en:590,tgid_y_en:590,tgid_z_en:590,tgsplit:590,tgt:[0,2,3,4],than:[84,585,589,590,592,593,594,595,596,597,598,599,601,603,605,607,609,610,611,614,616,617,621,623,625,627,631,639,640,644,645,649,652,654,659,660,663,664,666,667,668,669,670,672,674,675,676,678,679,681,684,688,689,693,695,696,697,702,704,705,706,708,709,710,712,713,714,717,718,719,721,722,723,725,726,729,731,733,735,736,738,741,743,744,745,746,747,750,753,755,756,757,758,759,760,762,764,766,768,769,770,775,776,779,780,781,782,785,786,788,789,790,803,804,805,806,807,808,809,810,811,812],thank:[662,667,704,723,756],the_dag:770,theadsafemodul:726,thecontext:[804,805,806,807,808,809,810],thecu:810,thedoc:786,thedoclist:786,theexecutionengin:810,thefpm:[805,806,807,808],thefunct:[804,805,806,807,808,809,810],thei:[585,586,587,588,589,590,593,594,596,597,600,601,602,605,606,607,608,609,610,611,612,616,619,621,623,627,628,633,635,639,640,641,645,653,654,657,659,660,663,664,666,667,668,669,671,672,673,674,675,676,677,678,679,683,684,686,688,689,695,696,697,698,702,703,705,707,708,709,710,711,712,714,716,718,721,722,723,725,726,729,730,738,741,742,743,744,745,746,747,748,750,751,753,755,756,757,759,760,761,762,763,764,765,766,768,769,770,771,773,774,775,776,777,779,780,781,782,784,785,786,788,789,803,804,805,806,808,811],thejit:[805,806,807,808,810],them:[585,590,593,594,596,597,601,602,604,605,606,607,608,610,611,612,616,621,625,630,634,636,639,641,644,645,648,649,652,653,659,660,662,664,666,667,668,669,672,674,675,676,677,679,681,683,684,685,686,688,689,691,695,698,699,701,702,703,706,707,709,710,712,713,714,716,718,719,721,722,723,724,725,726,738,740,741,742,743,744,745,748,750,752,753,755,758,759,760,761,762,764,765,766,768,769,770,773,774,775,776,780,782,783,784,785,786,788,789,790,802,803,804,805,806,807,808,810,811],theme:[771,788],themodul:[804,805,806,807,808,809,810],themself:667,themselv:[594,597,607,616,659,667,675,710,722,723,727,743,745,757,759,760,770,771,776,784,785,803],then1:760,then2:760,thenbb:[806,807,808,809,810],thenv:[806,807,808,809,810],theoret:[639,743,765],theori:[700,722,731,786],theorist:709,thereaft:762,therebi:[659,684,710,762],therefor:[585,590,594,596,607,639,659,660,663,667,669,677,694,698,710,718,719,724,725,728,741,743,748,750,757,759,760,762,776,782,786],thereof:[608,710],thesi:750,thesimplifiedandinstruct:705,thetargetmachin:809,thetripl:748,thi:[0,1,2,3,4,5,6,7,8,9,14,18,19,20,24,25,26,27,28,30,32,35,79,82,83,84,85,86,107,110,111,117,120,122,123,127,128,129,130,172,176,196,199,200,205,209,211,212,213,217,218,219,220,221,223,267,271,279,280,288,291,294,295,296,301,304,305,306,308,310,314,315,316,320,321,325,326,327,333,336,337,338,339,340,350,351,352,355,368,369,370,371,375,376,377,378,379,380,382,385,430,433,434,436,457,467,468,479,481,482,483,487,488,489,490,491,493,495,498,543,546,547,549,571,574,575,580,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,616,617,618,619,621,622,623,624,625,626,627,628,629,630,631,632,635,636,637,639,640,641,642,644,645,648,649,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,673,675,676,678,679,680,681,682,683,684,685,686,687,688,689,690,691,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,713,714,715,716,718,719,720,721,723,724,725,726,727,728,729,730,731,733,734,735,736,738,739,740,741,742,743,744,746,747,748,749,750,751,753,754,755,756,757,758,759,760,762,763,764,765,766,768,769,770,771,773,774,775,776,777,778,779,780,781,782,783,784,785,786,788,789,790,791,802,803,804,805,806,807,809,811,812,813],thin:[603,605,621,625,634,637,710,743],thing:[590,593,596,597,601,603,606,607,608,610,611,616,659,660,664,667,669,675,677,679,681,686,689,699,701,704,710,712,714,717,718,721,722,723,725,726,727,739,740,741,743,744,745,756,757,759,760,765,768,770,774,775,782,784,786,788,789,802,803,804,805,806,807,808,809,810,811,812],think:[585,600,605,607,610,664,667,670,676,677,683,703,707,709,710,711,721,723,724,730,742,743,745,746,760,765,770,806,809,811],thinlto:[599,605,760,763],third:[585,592,597,599,607,610,639,644,654,659,663,664,669,671,676,677,679,683,702,710,716,722,728,743,744,757,759,760,762,768,770,774,780,782,785,786,804,811],thischar:[802,803,804,805,806,807,808,809,810],thoma:760,thoroughli:806,those:[585,588,590,593,594,601,603,605,606,607,608,609,610,611,616,620,621,623,625,628,639,640,641,644,645,659,664,666,667,669,675,676,677,678,679,680,682,685,687,688,697,700,701,703,709,710,711,712,714,721,722,725,726,739,741,743,745,746,749,755,756,757,759,760,762,764,765,766,768,769,770,775,777,778,780,782,783,786,787,808,809,810],thost:681,though:[585,594,598,607,611,627,631,639,659,660,667,669,670,671,674,676,677,683,684,688,699,703,705,709,710,713,714,719,721,722,723,729,741,743,745,759,760,774,775,782,788,803,804,806,808,810],thought:[607,683,710,721,736,748,764],thousand:[712,735,741],thread:[585,588,589,593,594,595,597,605,607,611,614,625,645,662,663,665,667,668,669,676,695,705,709,711,712,724,726,745,747,748,756,758,760,764,773,783,784,785,788,789],thread_id:[611,785],thread_loc:[607,710,764],thread_local_quarantine_size_kb:756,thread_siblings_list:595,threadbuff:785,threadgroup:590,threadgroup_dimens:590,threadgroup_size_in_wavefront:590,threadid:724,threadidx:724,threadloc:[597,710],threadpool:726,threadsaf:676,threadsafecontext:[788,789],threadsafemodul:[788,789,790,791,805,806,807,808],threadsanit:710,threat:[608,755,757],threaten:608,three:[585,588,592,597,601,606,607,611,612,639,645,659,663,664,666,669,673,677,679,683,688,693,710,714,716,721,722,723,728,729,741,743,748,750,753,754,757,760,764,766,768,770,775,780,782,783,788,803,806],threshold:[590,625,645,710,741,747,754,783],threshold_float_numb:645,threshold_int_numb:645,throttl:[631,750],through:[585,590,593,594,597,598,601,602,605,606,607,609,617,621,639,659,660,661,662,663,666,667,669,672,673,674,675,676,677,679,680,682,684,686,688,695,696,699,702,703,706,708,709,710,712,713,714,715,716,718,719,720,722,723,725,726,727,728,739,741,742,743,745,748,749,750,754,756,757,759,760,762,763,764,765,768,769,770,777,779,780,781,782,783,786,788,789,790,791,803,805,806,807,808,809,810,812],throughout:[588,589,596,597,606,610,611,664,667,679,710,715,735,738,743,754,759,765,770,785],throughput:[631,639,674,726],throwawai:592,throwinfo:669,thrown:[603,669,673,710],throwntyp:710,thu:[585,593,594,597,598,601,602,607,610,611,631,659,663,667,669,672,674,676,695,704,705,710,712,720,721,722,729,731,738,741,742,743,745,759,760,764,770,775,776,782,803,804,805,806,807,808,809,810],thumb1:[661,710],thumb2:[607,661,706,710],thumb:[594,607,667,700,765,783],thunderbird:667,thunderx2t99model:780,thunderxt8xmodel:780,thunk:[607,644,710,722,741,777],thursdai:[678,719],thusli:734,tian:750,tick:785,tid:724,tidbit:[593,810,812,813],tidi:[610,723,726,742,748,782],tidig_cmp_cnt:590,tie:[663,710],tiebreak:660,tied:[607,708,709,710,716,759,764],tier:667,ties:[684,710,726],tight:[674,713],tighten:683,tightli:[664,710,725,745,803,804,805,806,807,808,809,810],tii:[607,748],tile:[66,157,254,418,530,710,750],till:696,tim:[692,750,757],timberwolfmc:774,time:[585,589,590,592,593,594,595,597,598,599,600,601,602,603,605,606,607,608,611,614,616,617,619,621,625,630,631,632,633,639,640,641,644,645,653,654,657,658,659,660,661,662,663,664,667,668,669,670,671,674,676,678,679,681,682,684,685,688,689,693,695,696,698,699,700,701,702,703,708,709,710,711,712,714,715,716,717,719,720,722,723,724,725,726,728,731,735,736,739,740,741,743,744,745,747,750,753,756,757,759,761,762,764,766,768,770,771,773,774,775,776,777,778,779,780,781,782,783,784,785,786,788,789,790,802,804,805,806,807,808,812],timeit:773,timelin:[757,783,784],timeout:[616,712],timeout_exitcod:712,timepassesisen:659,timer:[595,710],timescal:785,timestamp:[585,614,621,635,641,653,736,745,785],tini:[619,703,722],tip:[601,662,667,680,745,768,782],tire:[610,771],tirefactori:610,titl:[600,609,667,742],tli:748,tls:[649,710],tlv:709,tma_hi:[20,589],tma_lo:[20,589],tmax:589,tmp1:[610,611,677],tmp2:[677,808],tmp3:[611,677],tmp4:677,tmp5:677,tmp7:611,tmp9:611,tmp:[605,607,611,616,631,654,676,677,695,704,710,725,743,744,775,780,781,805,808],tmpb:[808,809,810],tmpf:595,tobia:750,toc:607,todai:[594,610,675,677,710,739,745,748,762,764],todefin:768,todo:[607,667,670,685,691,701,705,741,753,782],togeth:[107,196,301,457,571,585,588,590,593,596,598,601,605,607,611,612,616,636,637,640,645,654,657,659,663,664,667,669,676,679,683,688,689,699,705,709,710,712,719,725,726,731,735,741,743,745,754,759,769,775,779,782,783,789,791,802,803,804,806,807,811],toi:[660,726,784,788,789,790,791,802,803,804,805,806,807,808,809,810,811],tojittargetaddress:726,tok:[663,710,810],tok_binari:[807,808,809,810],tok_def:[802,803,804,805,806,807,808,809,810],tok_els:[806,807,808,809,810],tok_eof:[802,803,804,805,806,807,808,809,810],tok_extern:[802,803,804,805,806,807,808,809,810],tok_for:[806,807,808,809,810],tok_identifi:[802,803,804,805,806,807,808,809,810],tok_if:[806,807,808,809,810],tok_in:[806,807,808,809,810],tok_numb:[802,803,804,805,806,807,808,809,810],tok_then:[806,807,808,809,810],tok_unari:[807,808,809,810],tok_var:[808,809,810],tokcod:770,token:[607,639,663,670,688,702,718,731,745,764,770,786,802,803,804,805,806,807,808,809,810],tokidentifi:770,tokinteg:770,tokprec:[803,804,805,806,807,808,809,810],tokstr:770,tokvarnam:770,told:667,toler:[666,676,710,712,763],tolmach94:676,tolmach:676,tom:[702,786],tombston:733,ton:721,tonearest:710,tonearestawai:710,tonga:590,tongapro:590,too:[592,601,607,609,610,611,631,659,664,667,673,679,681,688,689,694,695,697,702,703,710,714,721,722,723,743,747,750,760,765,766,768,770,775,782,785,786,789,803,808,810,811],tool:[590,592,593,597,602,603,605,606,607,610,611,614,616,619,620,623,624,625,626,628,630,631,632,639,644,647,648,649,650,651,652,654,659,660,661,662,664,666,667,671,676,678,680,681,682,694,695,699,700,701,702,704,710,711,713,720,723,724,726,728,736,738,739,740,741,742,744,745,746,748,751,753,754,756,759,761,763,765,766,771,773,775,778,780,782,803,804,805,806],tool_nam:679,toolchain:[603,604,605,607,610,614,681,700,729,730,731,736,739,745,746,757,773],toolingtest:723,toolkit:[660,724,743,751],toolnam:744,toolset:[605,681,783],top:[585,590,593,594,597,605,606,607,610,611,616,619,639,645,667,668,669,675,677,679,684,688,691,694,698,708,710,711,712,714,721,722,723,741,742,743,744,745,754,759,768,769,770,773,774,777,782,784,786,788,789,790,803,804,805,806,807,808,809,810,811],topic:[667,678,717,719,761,811,812],toplevel:682,toplevelexpr:[803,804,805,806,807,808,809,810],topmost:654,topn:645,topolog:[595,611,738],torec:743,torerr:743,toret:759,toreturn:785,tos:787,toshio:607,tot:764,total:[590,594,597,623,638,639,645,650,710,712,722,723,731,735,738,741,743,754,756,758,759,760,769,770,774,782,784,804],totalview:585,touch:[595,662,695,703,705,710,741,743,745,755,757,759,760,782],tough:803,tour:803,toward:[590,598,602,603,605,667,675,678,710,743,749,756,757,760],towardzero:710,town:678,tpeng:757,tpi:[644,729,730,736,739],tpistreamhead:738,tpistreamvers:738,tpoff:764,tr1:743,trac:696,trace:[625,646,659,664,695,710,711,759,760,764,782],trace_ev:784,tracer:785,tracevalu:741,track:[585,590,593,600,601,607,610,612,616,639,664,667,668,672,674,675,676,679,683,689,704,709,710,711,712,716,721,722,726,727,728,741,743,745,757,759,760,762,763,764,766,771,782,804,805,806,807,808,810],tracker:[600,608,609,662,667,675,742,745,757],tracksregl:716,tradbigmip:641,trade:[601,603,679,709,721,743,760,789],tradeoff:[594,610,676,710,725,805],tradit:[585,592,607,621,640,678,695,741,743,759,788,802],tradition:[593,607,726,727],tradlittlemip:641,traffic:[719,743,745,808],tragic:745,trail:[588,606,610,611,616,641,659,664,677,684,710,716,743,786],train:[592,645,710,773],trait:[743,768,786],tramp1:710,tramp:710,transcendent:660,transcrib:804,transcript:805,transfer:[596,660,663,669,708,709,710,716,758,760,773,805,806,807,808,809,810],transform:[588,594,601,605,607,610,611,639,666,670,672,673,674,676,679,681,686,687,688,699,707,709,710,714,722,725,728,743,744,750,760,762,764,768,771,775,777,778,780,781,782,787,789,790,791,804,805,806,807,808,810],transformedtsm:789,transformfunct:789,transformlay:789,transit:[590,607,639,667,689,707,721,722,725,745,749,756,759,766,778,782,788,805],translat:[593,594,607,610,613,618,622,655,656,658,659,663,664,671,672,674,675,677,679,683,689,693,710,714,726,727,731,738,741,743,746,747,749,750,759,760,768,769,777,780,786,810,811],transmit:710,transpar:[610,659,670,677,705,713,743],trap:[20,123,133,134,136,139,140,141,144,145,146,147,148,149,151,152,153,154,155,156,160,161,162,163,164,165,166,167,168,169,170,212,224,226,227,229,231,234,235,236,240,241,243,244,245,246,248,249,250,251,252,253,257,258,259,260,261,262,263,264,265,370,482,604,617,663,674,677,705],trap_pres:590,travers:[607,616,658,671,676,686,691,703,722,741,750,782],trc:607,tre:741,treat:[24,127,217,375,487,585,590,594,596,597,605,607,611,614,616,621,627,630,639,641,653,659,669,671,675,677,684,706,707,709,710,712,713,716,722,726,727,731,757,759,762,764,768,770,775,780,784,804,811],treatment:[590,676,743,780],tree:[590,592,603,605,607,609,610,616,644,658,667,669,672,674,675,676,677,678,679,681,682,684,698,701,703,710,711,714,722,723,724,725,726,727,743,745,750,752,756,759,766,770,774,775,781,782,784,802,804,805,806,807,808,809,810],tri:[601,602,605,607,644,675,710,712,717,722,741,748,754,761,776,780,782,805],triag:[607,678,705],trick:[662,667,680,701,703,710,743,745,760,768,808],tricki:[594,660,676,773,782],trickier:709,trickrevnum:745,trie:642,trig:780,trigger:[589,590,593,601,605,610,632,663,676,695,704,709,710,711,712,715,716,725,726,742,743,762,766,774,775,786,788,790,805],trim:745,triniti:590,trip:[596,610,710,714,741,750,760,776],tripcount:710,tripl:[597,617,619,639,642,674,675,698,701,709,715,716,726,748,759,775,780,809,810],triplet:[659,710],trivial:[590,593,594,596,606,607,609,610,659,670,672,677,679,686,688,695,705,708,709,710,714,741,742,743,745,746,757,759,760,765,766,780,782,785,803,808,811,812],troubl:[659,675,679,681],true_branch_weight:599,truebr:759,truedest:710,truli:[720,743,760,790],trunc:[705,779],truncat:[30,51,142,223,237,380,400,493,513,585,589,621,705,710,780],truncinst:741,trunk:[604,607,668,678,682,702,712,742,745,759,773],trust:[610,667,756,757,759,790],truth:[667,710,721,806],tsc701:780,tsc:[784,785],tsc_delta:785,tsctx:726,tsd:756,tsflag:[770,780],tslp:750,tsm1:726,tsm2:726,tsm:[726,731,788,789,790,791,805,806,807,808],tstri:716,tsvg:784,tti:[748,749],ttmp0:589,ttmp4:589,ttmp5:589,ttmp6:589,ttmp7:589,ttmp:[10,11,12,13,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,54,55,56,57,58,59,60,61,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,132,137,138,157,158,159,225,228,232,233,254,255,256,317,318,322,323,328,329,330,331,332,341,342,343,344,345,347,348,349,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,403,404,405,406,407,410,411,412,413,418,419,420,421,422,423,424,425,426,427,428,429,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,516,517,518,519,520,521,522,523,524,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,590],tuesdai:678,tunabl:[695,746,756],tune:[607,659,674,743,756,759,760,779,808],tupl:[671,688,710,745],turbo:595,ture:[803,805],turk:590,turn:[585,590,594,596,601,602,605,606,607,608,611,621,659,663,667,670,679,697,699,710,713,714,717,721,726,727,740,741,750,755,759,764,768,770,771,780,782,788,789,790,803,804,811],turner:748,turnercamelback:748,turnerlldb:748,tutori:[659,660,678,679,693,703,712,722,723,726,743,787,788,789,790,791,802,803,804,805,806,807,808,809,810],tval:759,tweak:[659,660,679,682,703,740,783,788,805],twice:[592,663,681,682,684,710,714,722,743,753,773,782,805],twiddl:[710,805,806,807,808],twine:[709,769],twist:788,twitter:719,two:[585,587,588,589,590,592,593,594,596,597,598,599,601,602,603,604,605,606,610,611,616,619,627,628,630,631,638,639,644,645,654,659,660,663,664,666,667,668,669,671,673,674,675,676,679,683,684,688,689,691,693,695,696,699,703,704,707,708,709,710,711,712,714,715,716,721,722,723,724,725,726,728,731,735,738,741,742,743,744,745,748,754,756,757,759,760,762,763,764,765,766,768,769,770,771,773,774,775,777,779,780,782,784,786,788,789,790,803,805,806,807,808,811],twoaddressinstructionpass:607,twoargfp:770,twooperandaliasconstraint:770,txt:[592,605,610,616,626,652,654,659,662,667,679,695,702,717,742,745,759,761,769,773,775,780,781,782,784,788,789,790,791],ty2:710,ty64:705,tying:607,type0:688,type1:688,type:[27,129,220,378,490,586,590,593,594,596,597,599,601,602,604,605,607,612,614,617,621,623,626,627,630,632,637,639,640,641,642,649,653,658,659,660,663,666,667,669,671,672,675,676,679,681,685,688,689,691,693,697,701,702,703,704,705,706,708,709,711,713,715,718,721,722,723,724,725,729,733,736,739,742,744,748,750,752,754,759,760,762,764,766,769,771,773,774,775,778,782,783,784,785,788,802,803,804,805,806,807,808,809,810,811,814],type_info:669,type_nam:590,typecod:670,typedef:[610,644,658,671,743,786,789],typedescriptor2:669,typeendbegin:738,typeflag:[658,759],typeid1:777,typeid2:777,typeid3:777,typeid:[670,710,722,777],typeidref:710,typeindex:[730,738],typeindexbegin:738,typeindexend:738,typeinto:698,typeless:587,typemap:670,typenam:[590,710,743],typeof_:768,typeof_kind:768,typeof_xxx:768,typeprint:670,typerecordbyt:738,typesaf:710,typeservermaps:731,typeset:688,typetestr:710,typic:[585,590,593,594,605,607,610,611,621,623,625,631,636,640,650,660,666,668,669,673,675,676,677,681,683,684,685,689,696,700,702,703,709,710,711,712,713,715,716,718,723,725,726,727,728,736,741,742,743,744,746,754,756,759,760,762,769,770,773,775,779,780,782,785,811],typo:[610,611,723],u16:[0,3,4,9,589],u16x2:[0,1,2,3,4,7,8,9,587],u16x4:[0,2,3,4,9],u32:[0,1,2,3,4,7,8,9,586,589,607,724,754],u32x4:[0,2,3,4,9],u4x8:[1,7,8,9],u64:[0,2,3,4,9,589,724],u8x4:[0,1,2,3,4,7,8,9],u8x8:[0,2,3,4,9],u999999:742,ualpha:770,uav:590,uavexportt:590,ubsan:[605,712,756],ubuntu:[667,679,697,701,723,761],ubyt:[585,590],uclock_realtim:698,ud2:[604,607],ud2a:607,udiv:683,ueq:[710,808],ueyama:748,ufmt:0,uge:710,ugli:[611,672,807],uglifi:805,ugt:710,uid:[621,635,641,653,710],uimm20:[56,241,242,405,518],uimm32:145,uimm8:145,uint128:738,uint128oct:738,uint16:[673,735,738,762],uint16_t:[590,707,730,731,738,739,759,768,780,786],uint16short:738,uint16x4_t:596,uint32:[673,733,738,762],uint32_max:[743,759],uint32_t:[590,663,710,731,734,736,738,759,786],uint32long:738,uint32x2_t:596,uint64:[673,738,762],uint64_max:598,uint64_t:[598,706,709,710,722,739,743,754,786],uint64quad:738,uint8:[673,762],uint8_t:[590,712,730,734,768,786],uint:[617,644],uint_64:590,uint_max:[710,714],uintptr_t:760,uitofp:[804,806,808],uiuc:667,ule:710,uleb128:[585,671],uleb:590,ulimit:712,ulittle32_t:[735,736],ulp:710,ult:[710,759,806,808],ultim:[610,616,667,710,721,753,759,760,805],ultrasparc3:780,ultrasparc:[679,780],um1:745,um2:745,um3:745,um4:745,um5:745,um6:745,um7:745,um8:745,umbrella:[667,745],uml:666,umrella:745,unabbrev:597,unabbrevi:597,unabl:[608,639,665,674,688,710,711,728,743,760],unabridg:808,unaccept:667,unadorn:710,unaffect:721,unalign:[594,674,730],unalloc:720,unambigu:705,unannot:745,unari:[770,808,809,810,811,812],unaryexprast:[807,808,809,810],unassign:[589,600],unassignedbug:600,unattend:601,unavail:[594,639,665,705,759,780],unawar:[660,710],unbias:598,unblock:667,unbound:[607,639,710,780],unbuild:745,uncach:590,uncaptur:710,uncategor:659,uncertain:[662,667,766],uncertainti:[602,609],unchang:[585,593,710,743,745,759,764,776],uncharacter:738,uncheck:743,unclear:[585,610,611,731,736,738,808],unclutt:765,uncomfort:608,uncommon:[606,669,688,710],uncompil:726,uncompress:[605,664,679,743],uncondit:[585,607,625,684,710,714,741,764,780,806],uncondition:[593,607,610,671,696,705,710,790,810],unconnect:677,uncontroversi:676,unconvent:811,unconvinc:712,uncoop:676,uncov:[605,610,712],undecid:745,undeclar:[710,804],undef:[594,611,663,669,685,705,708,710,716,721,743,759,768],undefin:[590,594,596,600,605,610,611,640,641,644,653,663,669,671,672,674,677,684,687,704,709,713,714,721,741,743,756,759,768,770,771,776,779,780],undefinedbehaviorsanit:712,undefinit:710,under:[590,592,595,605,607,610,612,616,633,639,648,649,659,663,664,667,669,672,674,678,679,681,694,696,702,704,708,709,710,712,714,720,723,724,726,742,745,757,760,764,769,771,775,777,778,780,781,782,788,789,790,791,803,811,813],underestim:[639,710],underflow:[585,589,590,611],undergo:[609,710,726,741],underli:[596,605,607,610,639,659,667,676,683,684,709,710,726,727,735,736,743,756,760,764,769,777,810],undermin:[610,759],underneath:[679,744,745,775],underscor:[610,626,716,759],understand:[593,597,600,607,608,609,610,611,616,621,639,667,669,670,676,677,700,710,714,717,718,722,723,729,730,734,739,741,742,743,746,755,756,759,760,761,764,765,771,782,784,785,786,790,804,808,810,812],understood:[597,607,609,610,731,734],undesir:[710,757],undetect:[712,756],undisturb:610,undo:596,undocu:[669,731],undu:766,une:710,unequ:[611,710],unexpect:[585,602,604,606,610,667,681,722,743,756,808],unexpectedli:[770,775],unexpos:639,unfamiliar:761,unflatten:784,unfold:760,unforeseen:693,unfortun:[660,698,700,708,710,722,743,745,759,760,782,805,806,811],unfriendli:712,unfus:660,unglamor:667,unguard:710,unhandl:[669,743],unhappi:660,unharden:760,unhid:659,uni:724,unic:659,unicod:642,unif:[585,593],unifi:[585,588,590,597,611,674,700,710,743,745,754],uniform:[610,639,710,728,743],uniformli:[610,639],uniformretv:710,unimagin:764,unimpl:782,unimport:[753,764],unindex:780,uniniti:[585,640,676,677,710,763,769,770],uninstru:731,unintend:741,unintent:667,uninterest:667,unintrus:710,union:[585,593,607,610,639,688,731,759,808],uniprocessor:594,uniqu:[590,597,607,611,614,616,640,641,663,664,667,669,671,696,698,705,706,707,710,713,722,726,728,731,736,738,743,745,759,762,764,766,769,770,771,776,779,784,804,805],unique_ptr:[709,724,726,743,769,788,789,790,791,803,804,805,806,807,808,809,810],uniqueid:736,uniqueretv:710,unistd:765,unit:[20,24,123,127,212,217,370,375,482,487,596,604,605,610,612,625,630,659,662,663,664,669,671,672,677,684,689,705,709,710,711,712,723,725,726,728,731,739,741,742,743,746,754,759,760,766,768,769,777,779,780,781,782,805],unittest:[604,605,723,773,775],univers:[606,615,625,642,643,650,654,664,666,667,710,719,743],unix:[605,610,611,621,659,672,675,679,709,710,743,765,775],unknown:[589,590,607,631,637,639,659,665,668,684,687,694,698,700,710,725,731,734,735,739,743,760,776,786,790,802,803,804,805,806,807,808,809,810],unknownptr:710,unknownvalu:710,unless:[585,587,590,592,601,603,605,606,607,609,610,611,614,616,617,625,626,630,632,636,639,641,642,659,660,663,667,669,674,676,677,702,710,712,717,721,726,739,741,743,745,748,755,757,761,762,764,766,769,773,774,775,776,780,782,806,808],unlik:[585,590,594,597,603,606,607,609,610,652,660,669,676,679,684,694,695,697,703,710,721,726,741,743,748,759,760,762,764,770,781,782,790,805,808],unlimit:639,unlink:[679,743],unload:[590,710,782],unlock:726,unmaintain:[667,771],unmanag:[710,764],unmangl:627,unmap:756,unmask:710,unmatch:645,unmitig:760,unmodel:639,unmodifi:[588,710,764,789],unnam:[606,659,710,716,741,770],unnamed_addr:[597,671,710],unnecessari:[585,604,607,674,685,705,710,716,741,743,760,763,764,780,790],unnecessarili:[593,609,782],unneed:[641,653,808],unnorm:[588,710],uno:710,unoffici:[677,747],unop:[589,807,808,809,810],unoptim:[617,679,723,759],unord:[590,605,610,611,717,761,764,780],unordered_map:610,unordered_set:743,unorm:[0,2,3,4,9],unpack:[662,680,681,701],unpars:[616,803],unpatch:[702,783],unpeel:590,unpleas:760,unpoison:710,unpred:710,unpredict:760,unprofit:[741,760,776],unprotect:[604,760],unprototyp:710,unqualifi:[607,609,759],unreach:[602,610,632,663,664,669,673,674,676,702,709,714,722,741,805,806,807,808,810],unread:[594,610,682,745,760],unreason:667,unrecogn:[659,770],unrecogniz:640,unrecurs:[808,809,810],unreferenc:[606,709,710],unregist:782,unrel:[610,641,649,662,667,702,743,759,760,780],unreli:[593,712],unreloc:[710,764],unreserv:590,unresolv:[616,811],unrespons:745,unreview:667,unrol:[631,660,750],unroll_and_jam:776,unsaf:[594,617,619,710,759,762,763,776,811],unsat:710,unseen:597,unset:[605,606,733,769,770,810],unsett:782,unshadow:[806,807,808,809,810],unsign:[54,55,56,144,145,240,241,242,342,403,404,405,516,517,518,585,587,588,590,593,597,599,607,610,611,617,645,659,664,676,677,683,684,710,713,714,716,722,724,727,738,743,759,760,768,779,780,784,785,786,804,805,806,807,808,809,810],unsignedcharact:738,unsimm:607,unsound:764,unspecifi:[585,602,607,627,641,684,710,724,768,770,774,785],unstabl:[604,631,697,710,753],unstructur:785,unsuccess:726,unsuit:[604,667,698,773],unsuppor:590,unsupport:[590,594,607,616,681,688,689,709,710,712,753,775,780],unsupportedfor:688,unsupportedif:688,unsur:[609,667,755],unswitch:714,unswizzl:[585,590],untest:753,until:[585,590,596,597,601,607,609,610,612,616,631,639,640,659,662,663,667,669,674,679,682,683,689,706,709,710,712,715,721,726,727,729,741,742,743,745,753,755,757,759,760,770,773,775,780,782,783,788,789,790,802,803,804,805,806,807,808,809,810],untrust:757,untrusted_data_from_cal:760,untrusted_offset_from_cal:760,untrusted_size_from_cal:760,untyp:[716,727],unus:[27,129,220,288,306,308,378,490,585,588,589,590,602,607,610,611,632,663,711,724,725,728,730,731,735,738,743,756,759,764,776,777,785],unused1:731,unused2:731,unused_pad:[588,590],unused_preserv:[588,590],unused_sext:588,unusu:[607,610,667,675,676,689,743,758,770,811],unvectoriz:779,unvers:776,unvisit:602,unwelcom:608,unwieldi:[723,783],unwind:[585,590,599,605,607,642,648,649,663,669,671,674,695,698,708,710,711,741,780,790],unwindcursor:669,unwindregistersrestor:669,unwindregisterssav:669,unwis:710,unwound:[585,710,760],unwrap:[726,743,788],unwritten:723,unzip:[679,681,753],uop:[631,639],upa:710,upcast:703,upcom:[665,667,719,752],updat:[585,590,596,604,607,609,611,614,621,641,653,659,662,663,669,670,676,679,681,703,709,710,713,714,715,716,720,722,725,726,727,735,738,741,742,743,748,755,756,757,759,760,762,764,776,782,783,788,789,790,803,804,805,806,807,808,809,810,811],upenn:595,upgrad:[667,678,679,727,745,750,759,779],upheld:764,uphold:[710,760],upl:710,upload:[679,702,742,745],upon:[585,621,657,667,676,710,715,726,743,744,748,757,759,780],upper16:671,upper:[585,588,590,610,612,640,710,725,743,748,768,780],uppercas:[590,603,770],uppercasemod:769,ups:678,upset:658,upsid:667,upstream:[585,662,667,668,674,679,702,745,764,766],upstream_branch:745,upward:[616,710,746,759],urem:[607,670],urgent:609,uri_encoded_os_file_path:590,url:[717,723,742,745,761],usabl:[607,617,659,698,743,749,759,780,784],usag:[590,599,607,610,611,612,626,630,631,635,637,639,641,642,644,654,659,663,664,679,683,685,697,710,716,722,724,726,729,730,731,738,741,743,751,754,763,764,768,771,773,775,778,782],usb:697,use:[117,205,310,580,585,587,588,589,590,591,592,593,594,595,596,597,598,600,601,602,603,605,606,607,609,611,612,613,614,615,616,617,618,619,621,623,624,625,631,639,641,642,644,645,653,654,655,656,657,658,659,660,662,663,664,665,666,667,668,669,670,671,674,675,676,678,679,681,682,683,684,685,686,687,688,689,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,713,714,715,716,717,718,720,722,723,724,725,727,728,731,735,742,744,745,746,747,748,750,751,753,754,756,757,758,759,760,762,763,764,765,766,768,769,770,771,773,774,775,776,777,778,779,780,781,782,783,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],use_:743,use_back:743,use_begin:[610,743],use_count:712,use_empti:743,use_end:743,use_iter:743,use_s:743,use_value_profil:712,used:[0,1,2,3,4,5,6,7,8,9,25,26,28,31,32,68,82,83,84,107,110,111,117,120,128,130,159,175,196,199,200,205,209,218,219,221,256,270,301,304,305,310,314,316,321,326,327,338,339,340,341,351,355,376,377,379,381,382,420,433,434,435,457,467,468,479,488,489,491,494,495,532,546,547,548,571,574,575,580,584,585,586,587,588,589,590,592,593,594,597,598,599,601,603,604,606,608,609,610,611,612,613,614,616,617,618,621,623,624,625,626,627,630,631,632,636,639,640,641,642,643,644,645,647,648,649,651,653,654,655,656,658,659,660,661,663,664,666,667,668,669,670,671,672,674,675,676,677,679,681,683,684,686,688,689,693,694,695,698,699,700,703,704,705,707,708,709,711,712,713,714,715,716,721,722,723,724,725,726,727,728,731,733,735,736,738,739,743,744,745,747,748,750,751,754,756,757,759,760,761,762,763,764,765,766,768,769,770,771,773,775,776,777,778,779,780,781,782,783,784,785,786,788,789,790,791,803,804,805,806,807,808,809,810,811],usedlib:744,useful:[585,592,594,597,599,601,602,603,605,606,607,610,611,612,616,617,621,623,624,630,631,639,641,645,654,657,658,659,660,661,663,664,666,668,669,672,674,676,678,679,681,682,684,686,695,696,699,700,705,709,710,717,721,723,724,725,726,727,728,731,738,741,742,744,748,754,756,759,760,761,762,764,766,768,769,770,771,775,780,782,802,803,804,805,806,807,808,810,813],usefulness:585,useless:[596,699,759,806,811],uselistord:710,uselistorder_bb:710,usenamedoperandt:[770,780],user:[585,592,593,594,598,601,602,605,606,607,608,609,610,611,612,613,616,618,623,631,639,645,655,656,658,659,661,662,663,664,667,669,671,676,678,679,681,685,688,695,696,699,700,702,703,709,710,711,713,714,718,719,721,722,723,726,727,739,740,742,745,746,750,752,754,756,757,759,760,761,763,766,768,769,771,775,776,779,780,782,785,787,788,802,803,804,805,806,809,810,811,812,813],user_data:590,user_data_entri:590,user_data_limit:590,user_sgpr:590,user_sgpr_count:590,usercb:712,userdatamap:590,userdefinederror:743,usernam:[667,745,757],userspac:669,uses:[585,589,590,592,593,594,597,598,601,603,605,606,607,608,610,611,612,616,620,621,624,631,639,649,654,659,660,663,666,667,668,669,670,674,675,676,677,679,681,686,689,694,695,698,699,703,705,707,709,710,712,713,714,715,716,721,722,723,724,726,727,728,731,736,741,744,745,748,750,759,760,761,763,764,765,768,770,774,776,777,779,780,781,782,783,785,786,788,802,803,804,805,807,808,810,811,812],uses_append_consum:590,uses_prim_id:590,uses_rov:590,uses_uav:590,uses_viewport_array_index:590,usescustominsert:[770,771],usesmetadata:676,using:[27,84,129,220,378,490,585,586,588,589,590,591,592,593,594,595,596,597,602,603,605,606,607,608,609,611,612,614,616,619,621,623,625,629,630,631,639,641,644,645,652,659,660,662,663,664,665,666,667,668,670,672,673,674,676,677,678,679,680,684,685,686,687,688,689,691,695,696,697,698,699,702,703,704,707,708,709,710,711,712,713,716,719,721,722,723,724,726,728,736,738,741,742,743,745,746,748,750,751,754,755,756,757,759,760,762,764,765,768,769,770,771,773,774,775,776,778,779,780,781,782,783,784,786,787,788,790,804,805,806,807,808,809,810,813],usr:[605,635,659,660,668,679,694,695,697,700,701,712,724],usual:[589,593,594,597,601,605,607,610,611,623,639,640,657,659,660,663,667,670,671,676,677,679,682,683,684,688,694,700,703,709,710,711,712,713,714,716,717,722,723,726,727,731,736,738,742,743,745,756,757,758,759,761,763,766,769,770,773,774,775,780,782,788,810,811],utc:678,utf:[585,590,658,671,768],util:[585,590,592,593,605,610,619,621,623,639,640,642,645,646,658,659,667,668,676,681,693,699,702,703,704,710,714,726,743,753,757,768,771,773,778,779,780,781,788,790,803,808,809],uuid:630,uuidcreat:736,uval:710,uvari:605,uword:590,uwtabl:[597,710,759,783],v10:660,v110:731,v128:[710,724],v16:724,v16f32:710,v16i1:710,v16i32:710,v16p0f32:710,v16p1f32:710,v1f64:683,v1size:593,v200:590,v252:589,v253:589,v254:589,v255:[588,589],v256f64:710,v256i64:710,v2f64:710,v2i32:710,v2i64:710,v2i8:710,v2p1f64:710,v2s32:688,v2s64:688,v2size:593,v31:710,v32:[589,724],v36:588,v3i32:683,v40:738,v41:738,v4f128:710,v4f32:[683,710],v4f64:710,v4i100:710,v4i1:710,v4i32:710,v4i8:710,v4p0f64:710,v4p0f_i32f:710,v4p0p0f64:710,v4s32:688,v4t:700,v50:[731,738],v520:590,v60:731,v64:[710,724],v70:[731,738],v80:738,v8deprecatedinst:780,v8f64:710,v8i16:780,v8i1:710,v8i32:710,v8p0f32:710,v8p0f64:710,v8p0f_i32f:710,v8p0i32:710,v8p0p0f32:710,v_accvgpr_mov_b32:9,v_accvgpr_read_b32:[8,9],v_accvgpr_write_b32:[8,9],v_add3_u32:[0,4,9],v_add_co_ci_u32:0,v_add_co_ci_u32_dpp:0,v_add_co_ci_u32_e64:0,v_add_co_ci_u32_sdwa:0,v_add_co_u32:[0,4,9],v_add_co_u32_dpp:[4,9],v_add_co_u32_e32:589,v_add_co_u32_e64:[4,9],v_add_co_u32_sdwa:[4,9],v_add_f16:[0,3,4,9,589],v_add_f16_dpp:[0,3,4,9],v_add_f16_e64:[0,3,4,9],v_add_f16_sdwa:[0,3,4,9],v_add_f32:[0,2,3,4,9,589,590],v_add_f32_dpp:[0,3,4,9],v_add_f32_e32:590,v_add_f32_e64:[0,2,3,4,9],v_add_f32_sdwa:[0,3,4,9],v_add_f64:[0,2,3,4,9],v_add_i16:[4,9],v_add_i32:[2,4,9],v_add_i32_e64:2,v_add_lshl_u32:[0,4,9],v_add_nc_i16:0,v_add_nc_i32:0,v_add_nc_u16:0,v_add_nc_u32:0,v_add_nc_u32_dpp:0,v_add_nc_u32_e64:0,v_add_nc_u32_sdwa:0,v_add_u16:[3,4,9,589],v_add_u16_dpp:[3,4,9],v_add_u16_e64:[3,4,9],v_add_u16_sdwa:[3,4,9],v_add_u32:[3,4,9,589],v_add_u32_dpp:[3,4,9],v_add_u32_e64:[3,4,9],v_add_u32_sdwa:[3,4,9],v_addc_co_u32:[4,9],v_addc_co_u32_dpp:[4,9],v_addc_co_u32_e64:[4,9],v_addc_co_u32_sdwa:[4,9],v_addc_u32:[2,3,590],v_addc_u32_dpp:3,v_addc_u32_e64:[2,3],v_addc_u32_sdwa:3,v_alignbit_b32:[0,2,3,4,9],v_alignbyte_b32:[0,2,3,4,9],v_and_b32:[0,2,3,4,9],v_and_b32_dpp:[0,3,4,9],v_and_b32_e64:[0,2,3,4,9],v_and_b32_sdwa:[0,3,4,9],v_and_or_b32:[0,4,9],v_ashr_i32:2,v_ashr_i32_e64:2,v_ashr_i64:2,v_ashrrev_i16:[0,3,4,9],v_ashrrev_i16_dpp:[3,4,9],v_ashrrev_i16_e64:[3,4,9],v_ashrrev_i16_sdwa:[3,4,9],v_ashrrev_i32:[0,2,3,4,9],v_ashrrev_i32_dpp:[0,3,4,9],v_ashrrev_i32_e64:[0,2,3,4,9],v_ashrrev_i32_sdwa:[0,3,4,9],v_ashrrev_i64:[0,3,4,9],v_bcnt_u32_b32:[0,2,3,4,9],v_bcnt_u32_b32_e64:2,v_bfe_i32:[0,2,3,4,9],v_bfe_u32:[0,2,3,4,9],v_bfi_b32:[0,2,3,4,9],v_bfm_b32:[0,2,3,4,9],v_bfm_b32_e64:2,v_bfrev_b32:[0,2,3,4,9],v_bfrev_b32_dpp:[0,3,4,9],v_bfrev_b32_e32:590,v_bfrev_b32_e64:[0,2,3,4,9],v_bfrev_b32_sdwa:[0,3,4,9],v_ceil_f16:[0,3,4,9],v_ceil_f16_dpp:[0,3,4,9],v_ceil_f16_e64:[0,3,4,9],v_ceil_f16_sdwa:[0,3,4,9],v_ceil_f32:[0,2,3,4,9],v_ceil_f32_dpp:[0,3,4,9],v_ceil_f32_e64:[0,2,3,4,9],v_ceil_f32_sdwa:[0,3,4,9],v_ceil_f64:[0,2,3,4,9,589],v_ceil_f64_dpp:9,v_ceil_f64_e32:589,v_ceil_f64_e64:[0,2,3,4,9],v_clrexcp:[0,2,3,4,9],v_clrexcp_e64:[0,2,3,4,9],v_cmp:588,v_cmp_class_f16:[0,3,4,9],v_cmp_class_f16_e64:[0,3,4,9],v_cmp_class_f16_sdwa:[0,3,4,9],v_cmp_class_f32:[0,2,3,4,9],v_cmp_class_f32_e64:[0,2,3,4,9],v_cmp_class_f32_sdwa:[0,3,4,9],v_cmp_class_f64:[0,2,3,4,9],v_cmp_class_f64_e64:[0,2,3,4,9],v_cmp_eq_f16:[0,3,4,9],v_cmp_eq_f16_e64:[0,3,4,9],v_cmp_eq_f16_sdwa:[0,3,4,9],v_cmp_eq_f32:[0,2,3,4,9],v_cmp_eq_f32_e64:[0,2,3,4,9],v_cmp_eq_f32_sdwa:[0,3,4,9],v_cmp_eq_f64:[0,2,3,4,9],v_cmp_eq_f64_e64:[0,2,3,4,9],v_cmp_eq_i16:[0,3,4,9],v_cmp_eq_i16_e64:[0,3,4,9],v_cmp_eq_i16_sdwa:[0,3,4,9],v_cmp_eq_i32:[0,2,3,4,9],v_cmp_eq_i32_e64:[0,2,3,4,9],v_cmp_eq_i32_sdwa:[0,3,4,9],v_cmp_eq_i64:[0,2,3,4,9],v_cmp_eq_i64_e64:[0,2,3,4,9],v_cmp_eq_u16:[0,3,4,9],v_cmp_eq_u16_e64:[0,3,4,9],v_cmp_eq_u16_sdwa:[0,3,4,9],v_cmp_eq_u32:[0,2,3,4,9],v_cmp_eq_u32_e64:[0,2,3,4,9],v_cmp_eq_u32_sdwa:[0,3,4,9],v_cmp_eq_u64:[0,2,3,4,9],v_cmp_eq_u64_e64:[0,2,3,4,9],v_cmp_f_f16:[0,3,4,9],v_cmp_f_f16_e64:[0,3,4,9],v_cmp_f_f16_sdwa:[0,3,4,9],v_cmp_f_f32:[0,2,3,4,9],v_cmp_f_f32_e64:[0,2,3,4,9],v_cmp_f_f32_sdwa:[0,3,4,9],v_cmp_f_f64:[0,2,3,4,9],v_cmp_f_f64_e64:[0,2,3,4,9],v_cmp_f_i16:[3,4,9],v_cmp_f_i16_e64:[3,4,9],v_cmp_f_i16_sdwa:[3,4,9],v_cmp_f_i32:[0,2,3,4,9],v_cmp_f_i32_e64:[0,2,3,4,9],v_cmp_f_i32_sdwa:[0,3,4,9],v_cmp_f_i64:[0,2,3,4,9],v_cmp_f_i64_e64:[0,2,3,4,9],v_cmp_f_u16:[3,4,9],v_cmp_f_u16_e64:[3,4,9],v_cmp_f_u16_sdwa:[3,4,9],v_cmp_f_u32:[0,2,3,4,9],v_cmp_f_u32_e64:[0,2,3,4,9],v_cmp_f_u32_sdwa:[0,3,4,9],v_cmp_f_u64:[0,2,3,4,9],v_cmp_f_u64_e64:[0,2,3,4,9],v_cmp_ge_f16:[0,3,4,9],v_cmp_ge_f16_e64:[0,3,4,9],v_cmp_ge_f16_sdwa:[0,3,4,9],v_cmp_ge_f32:[0,2,3,4,9],v_cmp_ge_f32_e64:[0,2,3,4,9],v_cmp_ge_f32_sdwa:[0,3,4,9],v_cmp_ge_f64:[0,2,3,4,9],v_cmp_ge_f64_e64:[0,2,3,4,9],v_cmp_ge_i16:[0,3,4,9],v_cmp_ge_i16_e64:[0,3,4,9],v_cmp_ge_i16_sdwa:[0,3,4,9],v_cmp_ge_i32:[0,2,3,4,9],v_cmp_ge_i32_e64:[0,2,3,4,9],v_cmp_ge_i32_sdwa:[0,3,4,9],v_cmp_ge_i64:[0,2,3,4,9],v_cmp_ge_i64_e64:[0,2,3,4,9],v_cmp_ge_u16:[0,3,4,9],v_cmp_ge_u16_e64:[0,3,4,9],v_cmp_ge_u16_sdwa:[0,3,4,9],v_cmp_ge_u32:[0,2,3,4,9],v_cmp_ge_u32_e64:[0,2,3,4,9],v_cmp_ge_u32_sdwa:[0,3,4,9],v_cmp_ge_u64:[0,2,3,4,9],v_cmp_ge_u64_e64:[0,2,3,4,9],v_cmp_gt_f16:[0,3,4,9],v_cmp_gt_f16_e64:[0,3,4,9],v_cmp_gt_f16_sdwa:[0,3,4,9],v_cmp_gt_f32:[0,2,3,4,9],v_cmp_gt_f32_e64:[0,2,3,4,9],v_cmp_gt_f32_sdwa:[0,3,4,9],v_cmp_gt_f64:[0,2,3,4,9],v_cmp_gt_f64_e64:[0,2,3,4,9],v_cmp_gt_i16:[0,3,4,9],v_cmp_gt_i16_e64:[0,3,4,9],v_cmp_gt_i16_sdwa:[0,3,4,9],v_cmp_gt_i32:[0,2,3,4,9],v_cmp_gt_i32_e64:[0,2,3,4,9],v_cmp_gt_i32_sdwa:[0,3,4,9],v_cmp_gt_i64:[0,2,3,4,9],v_cmp_gt_i64_e64:[0,2,3,4,9],v_cmp_gt_u16:[0,3,4,9],v_cmp_gt_u16_e64:[0,3,4,9],v_cmp_gt_u16_sdwa:[0,3,4,9],v_cmp_gt_u32:[0,2,3,4,9],v_cmp_gt_u32_e64:[0,2,3,4,9],v_cmp_gt_u32_sdwa:[0,3,4,9],v_cmp_gt_u64:[0,2,3,4,9],v_cmp_gt_u64_e64:[0,2,3,4,9],v_cmp_le_f16:[0,3,4,9],v_cmp_le_f16_e64:[0,3,4,9],v_cmp_le_f16_sdwa:[0,3,4,9],v_cmp_le_f32:[0,2,3,4,9],v_cmp_le_f32_e64:[0,2,3,4,9],v_cmp_le_f32_sdwa:[0,3,4,9],v_cmp_le_f64:[0,2,3,4,9],v_cmp_le_f64_e64:[0,2,3,4,9],v_cmp_le_i16:[0,3,4,9],v_cmp_le_i16_e64:[0,3,4,9],v_cmp_le_i16_sdwa:[0,3,4,9],v_cmp_le_i32:[0,2,3,4,9],v_cmp_le_i32_e64:[0,2,3,4,9],v_cmp_le_i32_sdwa:[0,3,4,9],v_cmp_le_i64:[0,2,3,4,9],v_cmp_le_i64_e64:[0,2,3,4,9],v_cmp_le_u16:[0,3,4,9],v_cmp_le_u16_e64:[0,3,4,9],v_cmp_le_u16_sdwa:[0,3,4,9],v_cmp_le_u32:[0,2,3,4,9],v_cmp_le_u32_e64:[0,2,3,4,9],v_cmp_le_u32_sdwa:[0,3,4,9],v_cmp_le_u64:[0,2,3,4,9],v_cmp_le_u64_e64:[0,2,3,4,9],v_cmp_lg_f16:[0,3,4,9],v_cmp_lg_f16_e64:[0,3,4,9],v_cmp_lg_f16_sdwa:[0,3,4,9],v_cmp_lg_f32:[0,2,3,4,9],v_cmp_lg_f32_e64:[0,2,3,4,9],v_cmp_lg_f32_sdwa:[0,3,4,9],v_cmp_lg_f64:[0,2,3,4,9],v_cmp_lg_f64_e64:[0,2,3,4,9],v_cmp_lt_f16:[0,3,4,9],v_cmp_lt_f16_e64:[0,3,4,9],v_cmp_lt_f16_sdwa:[0,3,4,9],v_cmp_lt_f32:[0,2,3,4,9],v_cmp_lt_f32_e64:[0,2,3,4,9],v_cmp_lt_f32_sdwa:[0,3,4,9],v_cmp_lt_f64:[0,2,3,4,9],v_cmp_lt_f64_e64:[0,2,3,4,9],v_cmp_lt_i16:[0,3,4,9],v_cmp_lt_i16_e64:[0,3,4,9],v_cmp_lt_i16_sdwa:[0,3,4,9],v_cmp_lt_i32:[0,2,3,4,9],v_cmp_lt_i32_e64:[0,2,3,4,9],v_cmp_lt_i32_sdwa:[0,3,4,9],v_cmp_lt_i64:[0,2,3,4,9],v_cmp_lt_i64_e64:[0,2,3,4,9],v_cmp_lt_u16:[0,3,4,9],v_cmp_lt_u16_e64:[0,3,4,9],v_cmp_lt_u16_sdwa:[0,3,4,9],v_cmp_lt_u32:[0,2,3,4,9],v_cmp_lt_u32_e64:[0,2,3,4,9],v_cmp_lt_u32_sdwa:[0,3,4,9],v_cmp_lt_u64:[0,2,3,4,9],v_cmp_lt_u64_e64:[0,2,3,4,9],v_cmp_ne_i16:[0,3,4,9],v_cmp_ne_i16_e64:[0,3,4,9],v_cmp_ne_i16_sdwa:[0,3,4,9],v_cmp_ne_i32:[0,2,3,4,9],v_cmp_ne_i32_e64:[0,2,3,4,9],v_cmp_ne_i32_sdwa:[0,3,4,9],v_cmp_ne_i64:[0,2,3,4,9],v_cmp_ne_i64_e64:[0,2,3,4,9],v_cmp_ne_u16:[0,3,4,9],v_cmp_ne_u16_e64:[0,3,4,9],v_cmp_ne_u16_sdwa:[0,3,4,9],v_cmp_ne_u32:[0,2,3,4,9],v_cmp_ne_u32_e64:[0,2,3,4,9],v_cmp_ne_u32_sdwa:[0,3,4,9],v_cmp_ne_u64:[0,2,3,4,9],v_cmp_ne_u64_e64:[0,2,3,4,9],v_cmp_neq_f16:[0,3,4,9],v_cmp_neq_f16_e64:[0,3,4,9],v_cmp_neq_f16_sdwa:[0,3,4,9],v_cmp_neq_f32:[0,2,3,4,9],v_cmp_neq_f32_e64:[0,2,3,4,9],v_cmp_neq_f32_sdwa:[0,3,4,9],v_cmp_neq_f64:[0,2,3,4,9],v_cmp_neq_f64_e64:[0,2,3,4,9],v_cmp_nge_f16:[0,3,4,9],v_cmp_nge_f16_e64:[0,3,4,9],v_cmp_nge_f16_sdwa:[0,3,4,9],v_cmp_nge_f32:[0,2,3,4,9],v_cmp_nge_f32_e64:[0,2,3,4,9],v_cmp_nge_f32_sdwa:[0,3,4,9],v_cmp_nge_f64:[0,2,3,4,9],v_cmp_nge_f64_e64:[0,2,3,4,9],v_cmp_ngt_f16:[0,3,4,9],v_cmp_ngt_f16_e64:[0,3,4,9],v_cmp_ngt_f16_sdwa:[0,3,4,9],v_cmp_ngt_f32:[0,2,3,4,9],v_cmp_ngt_f32_e64:[0,2,3,4,9],v_cmp_ngt_f32_sdwa:[0,3,4,9],v_cmp_ngt_f64:[0,2,3,4,9],v_cmp_ngt_f64_e64:[0,2,3,4,9],v_cmp_nle_f16:[0,3,4,9],v_cmp_nle_f16_e64:[0,3,4,9],v_cmp_nle_f16_sdwa:[0,3,4,9],v_cmp_nle_f32:[0,2,3,4,9],v_cmp_nle_f32_e64:[0,2,3,4,9],v_cmp_nle_f32_sdwa:[0,3,4,9],v_cmp_nle_f64:[0,2,3,4,9],v_cmp_nle_f64_e64:[0,2,3,4,9],v_cmp_nlg_f16:[0,3,4,9],v_cmp_nlg_f16_e64:[0,3,4,9],v_cmp_nlg_f16_sdwa:[0,3,4,9],v_cmp_nlg_f32:[0,2,3,4,9],v_cmp_nlg_f32_e64:[0,2,3,4,9],v_cmp_nlg_f32_sdwa:[0,3,4,9],v_cmp_nlg_f64:[0,2,3,4,9],v_cmp_nlg_f64_e64:[0,2,3,4,9],v_cmp_nlt_f16:[0,3,4,9],v_cmp_nlt_f16_e64:[0,3,4,9],v_cmp_nlt_f16_sdwa:[0,3,4,9],v_cmp_nlt_f32:[0,2,3,4,9],v_cmp_nlt_f32_e64:[0,2,3,4,9],v_cmp_nlt_f32_sdwa:[0,3,4,9],v_cmp_nlt_f64:[0,2,3,4,9],v_cmp_nlt_f64_e64:[0,2,3,4,9],v_cmp_o_f16:[0,3,4,9],v_cmp_o_f16_e64:[0,3,4,9],v_cmp_o_f16_sdwa:[0,3,4,9],v_cmp_o_f32:[0,2,3,4,9],v_cmp_o_f32_e64:[0,2,3,4,9],v_cmp_o_f32_sdwa:[0,3,4,9],v_cmp_o_f64:[0,2,3,4,9],v_cmp_o_f64_e64:[0,2,3,4,9],v_cmp_t_i16:[3,4,9],v_cmp_t_i16_e64:[3,4,9],v_cmp_t_i16_sdwa:[3,4,9],v_cmp_t_i32:[0,2,3,4,9],v_cmp_t_i32_e64:[0,2,3,4,9],v_cmp_t_i32_sdwa:[0,3,4,9],v_cmp_t_i64:[0,2,3,4,9],v_cmp_t_i64_e64:[0,2,3,4,9],v_cmp_t_u16:[3,4,9],v_cmp_t_u16_e64:[3,4,9],v_cmp_t_u16_sdwa:[3,4,9],v_cmp_t_u32:[0,2,3,4,9],v_cmp_t_u32_e64:[0,2,3,4,9],v_cmp_t_u32_sdwa:[0,3,4,9],v_cmp_t_u64:[0,2,3,4,9],v_cmp_t_u64_e64:[0,2,3,4,9],v_cmp_tru_f16:[0,3,4,9],v_cmp_tru_f16_e64:[0,3,4,9],v_cmp_tru_f16_sdwa:[0,3,4,9],v_cmp_tru_f32:[0,2,3,4,9],v_cmp_tru_f32_e64:[0,2,3,4,9],v_cmp_tru_f32_sdwa:[0,3,4,9],v_cmp_tru_f64:[0,2,3,4,9],v_cmp_tru_f64_e64:[0,2,3,4,9],v_cmp_u_f16:[0,3,4,9],v_cmp_u_f16_e64:[0,3,4,9],v_cmp_u_f16_sdwa:[0,3,4,9],v_cmp_u_f32:[0,2,3,4,9],v_cmp_u_f32_e64:[0,2,3,4,9],v_cmp_u_f32_sdwa:[0,3,4,9],v_cmp_u_f64:[0,2,3,4,9],v_cmp_u_f64_e64:[0,2,3,4,9],v_cmps_eq_f32:2,v_cmps_eq_f32_e64:2,v_cmps_eq_f64:2,v_cmps_eq_f64_e64:2,v_cmps_f_f32:2,v_cmps_f_f32_e64:2,v_cmps_f_f64:2,v_cmps_f_f64_e64:2,v_cmps_ge_f32:2,v_cmps_ge_f32_e64:2,v_cmps_ge_f64:2,v_cmps_ge_f64_e64:2,v_cmps_gt_f32:2,v_cmps_gt_f32_e64:2,v_cmps_gt_f64:2,v_cmps_gt_f64_e64:2,v_cmps_le_f32:2,v_cmps_le_f32_e64:2,v_cmps_le_f64:2,v_cmps_le_f64_e64:2,v_cmps_lg_f32:2,v_cmps_lg_f32_e64:2,v_cmps_lg_f64:2,v_cmps_lg_f64_e64:2,v_cmps_lt_f32:2,v_cmps_lt_f32_e64:2,v_cmps_lt_f64:2,v_cmps_lt_f64_e64:2,v_cmps_neq_f32:2,v_cmps_neq_f32_e64:2,v_cmps_neq_f64:2,v_cmps_neq_f64_e64:2,v_cmps_nge_f32:2,v_cmps_nge_f32_e64:2,v_cmps_nge_f64:2,v_cmps_nge_f64_e64:2,v_cmps_ngt_f32:2,v_cmps_ngt_f32_e64:2,v_cmps_ngt_f64:2,v_cmps_ngt_f64_e64:2,v_cmps_nle_f32:2,v_cmps_nle_f32_e64:2,v_cmps_nle_f64:2,v_cmps_nle_f64_e64:2,v_cmps_nlg_f32:2,v_cmps_nlg_f32_e64:2,v_cmps_nlg_f64:2,v_cmps_nlg_f64_e64:2,v_cmps_nlt_f32:2,v_cmps_nlt_f32_e64:2,v_cmps_nlt_f64:2,v_cmps_nlt_f64_e64:2,v_cmps_o_f32:2,v_cmps_o_f32_e64:2,v_cmps_o_f64:2,v_cmps_o_f64_e64:2,v_cmps_tru_f32:2,v_cmps_tru_f32_e64:2,v_cmps_tru_f64:2,v_cmps_tru_f64_e64:2,v_cmps_u_f32:2,v_cmps_u_f32_e64:2,v_cmps_u_f64:2,v_cmps_u_f64_e64:2,v_cmpsx_eq_f32:2,v_cmpsx_eq_f32_e64:2,v_cmpsx_eq_f64:2,v_cmpsx_eq_f64_e64:2,v_cmpsx_f_f32:2,v_cmpsx_f_f32_e64:2,v_cmpsx_f_f64:2,v_cmpsx_f_f64_e64:2,v_cmpsx_ge_f32:2,v_cmpsx_ge_f32_e64:2,v_cmpsx_ge_f64:2,v_cmpsx_ge_f64_e64:2,v_cmpsx_gt_f32:2,v_cmpsx_gt_f32_e64:2,v_cmpsx_gt_f64:2,v_cmpsx_gt_f64_e64:2,v_cmpsx_le_f32:2,v_cmpsx_le_f32_e64:2,v_cmpsx_le_f64:2,v_cmpsx_le_f64_e64:2,v_cmpsx_lg_f32:2,v_cmpsx_lg_f32_e64:2,v_cmpsx_lg_f64:2,v_cmpsx_lg_f64_e64:2,v_cmpsx_lt_f32:2,v_cmpsx_lt_f32_e64:2,v_cmpsx_lt_f64:2,v_cmpsx_lt_f64_e64:2,v_cmpsx_neq_f32:2,v_cmpsx_neq_f32_e64:2,v_cmpsx_neq_f64:2,v_cmpsx_neq_f64_e64:2,v_cmpsx_nge_f32:2,v_cmpsx_nge_f32_e64:2,v_cmpsx_nge_f64:2,v_cmpsx_nge_f64_e64:2,v_cmpsx_ngt_f32:2,v_cmpsx_ngt_f32_e64:2,v_cmpsx_ngt_f64:2,v_cmpsx_ngt_f64_e64:2,v_cmpsx_nle_f32:2,v_cmpsx_nle_f32_e64:2,v_cmpsx_nle_f64:2,v_cmpsx_nle_f64_e64:2,v_cmpsx_nlg_f32:2,v_cmpsx_nlg_f32_e64:2,v_cmpsx_nlg_f64:2,v_cmpsx_nlg_f64_e64:2,v_cmpsx_nlt_f32:2,v_cmpsx_nlt_f32_e64:2,v_cmpsx_nlt_f64:2,v_cmpsx_nlt_f64_e64:2,v_cmpsx_o_f32:2,v_cmpsx_o_f32_e64:2,v_cmpsx_o_f64:2,v_cmpsx_o_f64_e64:2,v_cmpsx_tru_f32:2,v_cmpsx_tru_f32_e64:2,v_cmpsx_tru_f64:2,v_cmpsx_tru_f64_e64:2,v_cmpsx_u_f32:2,v_cmpsx_u_f32_e64:2,v_cmpsx_u_f64:2,v_cmpsx_u_f64_e64:2,v_cmpx_class_f16:[0,3,4,9],v_cmpx_class_f16_e64:[0,3,4,9],v_cmpx_class_f16_sdwa:[0,3,4,9],v_cmpx_class_f32:[0,2,3,4,9],v_cmpx_class_f32_e64:[0,2,3,4,9],v_cmpx_class_f32_sdwa:[0,3,4,9],v_cmpx_class_f64:[0,2,3,4,9],v_cmpx_class_f64_e64:[0,2,3,4,9],v_cmpx_eq_f16:[0,3,4,9],v_cmpx_eq_f16_e64:[0,3,4,9],v_cmpx_eq_f16_sdwa:[0,3,4,9],v_cmpx_eq_f32:[0,2,3,4,9],v_cmpx_eq_f32_e64:[0,2,3,4,9],v_cmpx_eq_f32_sdwa:[0,3,4,9],v_cmpx_eq_f64:[0,2,3,4,9],v_cmpx_eq_f64_e64:[0,2,3,4,9],v_cmpx_eq_i16:[0,3,4,9],v_cmpx_eq_i16_e64:[0,3,4,9],v_cmpx_eq_i16_sdwa:[0,3,4,9],v_cmpx_eq_i32:[0,2,3,4,9],v_cmpx_eq_i32_e64:[0,2,3,4,9],v_cmpx_eq_i32_sdwa:[0,3,4,9],v_cmpx_eq_i64:[0,2,3,4,9],v_cmpx_eq_i64_e64:[0,2,3,4,9],v_cmpx_eq_u16:[0,3,4,9],v_cmpx_eq_u16_e64:[0,3,4,9],v_cmpx_eq_u16_sdwa:[0,3,4,9],v_cmpx_eq_u32:[0,2,3,4,9],v_cmpx_eq_u32_e64:[0,2,3,4,9],v_cmpx_eq_u32_sdwa:[0,3,4,9],v_cmpx_eq_u64:[0,2,3,4,9],v_cmpx_eq_u64_e64:[0,2,3,4,9],v_cmpx_f_f16:[0,3,4,9],v_cmpx_f_f16_e64:[0,3,4,9],v_cmpx_f_f16_sdwa:[0,3,4,9],v_cmpx_f_f32:[0,2,3,4,9],v_cmpx_f_f32_e64:[0,2,3,4,9],v_cmpx_f_f32_sdwa:[0,3,4,9],v_cmpx_f_f64:[0,2,3,4,9],v_cmpx_f_f64_e64:[0,2,3,4,9],v_cmpx_f_i16:[3,4,9],v_cmpx_f_i16_e64:[3,4,9],v_cmpx_f_i16_sdwa:[3,4,9],v_cmpx_f_i32:[0,2,3,4,9],v_cmpx_f_i32_e64:[0,2,3,4,9],v_cmpx_f_i32_sdwa:[0,3,4,9],v_cmpx_f_i64:[0,2,3,4,9],v_cmpx_f_i64_e64:[0,2,3,4,9],v_cmpx_f_u16:[3,4,9],v_cmpx_f_u16_e64:[3,4,9],v_cmpx_f_u16_sdwa:[3,4,9],v_cmpx_f_u32:[0,2,3,4,9],v_cmpx_f_u32_e64:[0,2,3,4,9],v_cmpx_f_u32_sdwa:[0,3,4,9],v_cmpx_f_u64:[0,2,3,4,9],v_cmpx_f_u64_e64:[0,2,3,4,9],v_cmpx_ge_f16:[0,3,4,9],v_cmpx_ge_f16_e64:[0,3,4,9],v_cmpx_ge_f16_sdwa:[0,3,4,9],v_cmpx_ge_f32:[0,2,3,4,9],v_cmpx_ge_f32_e64:[0,2,3,4,9],v_cmpx_ge_f32_sdwa:[0,3,4,9],v_cmpx_ge_f64:[0,2,3,4,9],v_cmpx_ge_f64_e64:[0,2,3,4,9],v_cmpx_ge_i16:[0,3,4,9],v_cmpx_ge_i16_e64:[0,3,4,9],v_cmpx_ge_i16_sdwa:[0,3,4,9],v_cmpx_ge_i32:[0,2,3,4,9],v_cmpx_ge_i32_e64:[0,2,3,4,9],v_cmpx_ge_i32_sdwa:[0,3,4,9],v_cmpx_ge_i64:[0,2,3,4,9],v_cmpx_ge_i64_e64:[0,2,3,4,9],v_cmpx_ge_u16:[0,3,4,9],v_cmpx_ge_u16_e64:[0,3,4,9],v_cmpx_ge_u16_sdwa:[0,3,4,9],v_cmpx_ge_u32:[0,2,3,4,9],v_cmpx_ge_u32_e64:[0,2,3,4,9],v_cmpx_ge_u32_sdwa:[0,3,4,9],v_cmpx_ge_u64:[0,2,3,4,9],v_cmpx_ge_u64_e64:[0,2,3,4,9],v_cmpx_gt_f16:[0,3,4,9],v_cmpx_gt_f16_e64:[0,3,4,9],v_cmpx_gt_f16_sdwa:[0,3,4,9],v_cmpx_gt_f32:[0,2,3,4,9],v_cmpx_gt_f32_e64:[0,2,3,4,9],v_cmpx_gt_f32_sdwa:[0,3,4,9],v_cmpx_gt_f64:[0,2,3,4,9],v_cmpx_gt_f64_e64:[0,2,3,4,9],v_cmpx_gt_i16:[0,3,4,9],v_cmpx_gt_i16_e64:[0,3,4,9],v_cmpx_gt_i16_sdwa:[0,3,4,9],v_cmpx_gt_i32:[0,2,3,4,9],v_cmpx_gt_i32_e64:[0,2,3,4,9],v_cmpx_gt_i32_sdwa:[0,3,4,9],v_cmpx_gt_i64:[0,2,3,4,9],v_cmpx_gt_i64_e64:[0,2,3,4,9],v_cmpx_gt_u16:[0,3,4,9],v_cmpx_gt_u16_e64:[0,3,4,9],v_cmpx_gt_u16_sdwa:[0,3,4,9],v_cmpx_gt_u32:[0,2,3,4,9],v_cmpx_gt_u32_e64:[0,2,3,4,9],v_cmpx_gt_u32_sdwa:[0,3,4,9],v_cmpx_gt_u64:[0,2,3,4,9],v_cmpx_gt_u64_e64:[0,2,3,4,9],v_cmpx_le_f16:[0,3,4,9],v_cmpx_le_f16_e64:[0,3,4,9],v_cmpx_le_f16_sdwa:[0,3,4,9],v_cmpx_le_f32:[0,2,3,4,9],v_cmpx_le_f32_e64:[0,2,3,4,9],v_cmpx_le_f32_sdwa:[0,3,4,9],v_cmpx_le_f64:[0,2,3,4,9],v_cmpx_le_f64_e64:[0,2,3,4,9],v_cmpx_le_i16:[0,3,4,9],v_cmpx_le_i16_e64:[0,3,4,9],v_cmpx_le_i16_sdwa:[0,3,4,9],v_cmpx_le_i32:[0,2,3,4,9],v_cmpx_le_i32_e64:[0,2,3,4,9],v_cmpx_le_i32_sdwa:[0,3,4,9],v_cmpx_le_i64:[0,2,3,4,9],v_cmpx_le_i64_e64:[0,2,3,4,9],v_cmpx_le_u16:[0,3,4,9],v_cmpx_le_u16_e64:[0,3,4,9],v_cmpx_le_u16_sdwa:[0,3,4,9],v_cmpx_le_u32:[0,2,3,4,9,590],v_cmpx_le_u32_e64:[0,2,3,4,9],v_cmpx_le_u32_sdwa:[0,3,4,9],v_cmpx_le_u64:[0,2,3,4,9],v_cmpx_le_u64_e64:[0,2,3,4,9],v_cmpx_lg_f16:[0,3,4,9],v_cmpx_lg_f16_e64:[0,3,4,9],v_cmpx_lg_f16_sdwa:[0,3,4,9],v_cmpx_lg_f32:[0,2,3,4,9],v_cmpx_lg_f32_e64:[0,2,3,4,9],v_cmpx_lg_f32_sdwa:[0,3,4,9],v_cmpx_lg_f64:[0,2,3,4,9],v_cmpx_lg_f64_e64:[0,2,3,4,9],v_cmpx_lt_f16:[0,3,4,9],v_cmpx_lt_f16_e64:[0,3,4,9],v_cmpx_lt_f16_sdwa:[0,3,4,9],v_cmpx_lt_f32:[0,2,3,4,9],v_cmpx_lt_f32_e64:[0,2,3,4,9],v_cmpx_lt_f32_sdwa:[0,3,4,9],v_cmpx_lt_f64:[0,2,3,4,9],v_cmpx_lt_f64_e64:[0,2,3,4,9],v_cmpx_lt_i16:[0,3,4,9],v_cmpx_lt_i16_e64:[0,3,4,9],v_cmpx_lt_i16_sdwa:[0,3,4,9],v_cmpx_lt_i32:[0,2,3,4,9],v_cmpx_lt_i32_e64:[0,2,3,4,9],v_cmpx_lt_i32_sdwa:[0,3,4,9],v_cmpx_lt_i64:[0,2,3,4,9],v_cmpx_lt_i64_e64:[0,2,3,4,9],v_cmpx_lt_u16:[0,3,4,9],v_cmpx_lt_u16_e64:[0,3,4,9],v_cmpx_lt_u16_sdwa:[0,3,4,9],v_cmpx_lt_u32:[0,2,3,4,9],v_cmpx_lt_u32_e64:[0,2,3,4,9],v_cmpx_lt_u32_sdwa:[0,3,4,9],v_cmpx_lt_u64:[0,2,3,4,9],v_cmpx_lt_u64_e64:[0,2,3,4,9],v_cmpx_ne_i16:[0,3,4,9],v_cmpx_ne_i16_e64:[0,3,4,9],v_cmpx_ne_i16_sdwa:[0,3,4,9],v_cmpx_ne_i32:[0,2,3,4,9],v_cmpx_ne_i32_e64:[0,2,3,4,9],v_cmpx_ne_i32_sdwa:[0,3,4,9],v_cmpx_ne_i64:[0,2,3,4,9],v_cmpx_ne_i64_e64:[0,2,3,4,9],v_cmpx_ne_u16:[0,3,4,9],v_cmpx_ne_u16_e64:[0,3,4,9],v_cmpx_ne_u16_sdwa:[0,3,4,9],v_cmpx_ne_u32:[0,2,3,4,9],v_cmpx_ne_u32_e64:[0,2,3,4,9],v_cmpx_ne_u32_sdwa:[0,3,4,9],v_cmpx_ne_u64:[0,2,3,4,9],v_cmpx_ne_u64_e64:[0,2,3,4,9],v_cmpx_neq_f16:[0,3,4,9],v_cmpx_neq_f16_e64:[0,3,4,9],v_cmpx_neq_f16_sdwa:[0,3,4,9],v_cmpx_neq_f32:[0,2,3,4,9],v_cmpx_neq_f32_e64:[0,2,3,4,9],v_cmpx_neq_f32_sdwa:[0,3,4,9],v_cmpx_neq_f64:[0,2,3,4,9],v_cmpx_neq_f64_e64:[0,2,3,4,9],v_cmpx_nge_f16:[0,3,4,9],v_cmpx_nge_f16_e64:[0,3,4,9],v_cmpx_nge_f16_sdwa:[0,3,4,9],v_cmpx_nge_f32:[0,2,3,4,9],v_cmpx_nge_f32_e64:[0,2,3,4,9],v_cmpx_nge_f32_sdwa:[0,3,4,9],v_cmpx_nge_f64:[0,2,3,4,9],v_cmpx_nge_f64_e64:[0,2,3,4,9],v_cmpx_ngt_f16:[0,3,4,9],v_cmpx_ngt_f16_e64:[0,3,4,9],v_cmpx_ngt_f16_sdwa:[0,3,4,9],v_cmpx_ngt_f32:[0,2,3,4,9],v_cmpx_ngt_f32_e64:[0,2,3,4,9],v_cmpx_ngt_f32_sdwa:[0,3,4,9],v_cmpx_ngt_f64:[0,2,3,4,9],v_cmpx_ngt_f64_e64:[0,2,3,4,9],v_cmpx_nle_f16:[0,3,4,9],v_cmpx_nle_f16_e64:[0,3,4,9],v_cmpx_nle_f16_sdwa:[0,3,4,9],v_cmpx_nle_f32:[0,2,3,4,9],v_cmpx_nle_f32_e64:[0,2,3,4,9],v_cmpx_nle_f32_sdwa:[0,3,4,9],v_cmpx_nle_f64:[0,2,3,4,9],v_cmpx_nle_f64_e64:[0,2,3,4,9],v_cmpx_nlg_f16:[0,3,4,9],v_cmpx_nlg_f16_e64:[0,3,4,9],v_cmpx_nlg_f16_sdwa:[0,3,4,9],v_cmpx_nlg_f32:[0,2,3,4,9],v_cmpx_nlg_f32_e64:[0,2,3,4,9],v_cmpx_nlg_f32_sdwa:[0,3,4,9],v_cmpx_nlg_f64:[0,2,3,4,9],v_cmpx_nlg_f64_e64:[0,2,3,4,9],v_cmpx_nlt_f16:[0,3,4,9],v_cmpx_nlt_f16_e64:[0,3,4,9],v_cmpx_nlt_f16_sdwa:[0,3,4,9],v_cmpx_nlt_f32:[0,2,3,4,9],v_cmpx_nlt_f32_e64:[0,2,3,4,9],v_cmpx_nlt_f32_sdwa:[0,3,4,9],v_cmpx_nlt_f64:[0,2,3,4,9],v_cmpx_nlt_f64_e64:[0,2,3,4,9],v_cmpx_o_f16:[0,3,4,9],v_cmpx_o_f16_e64:[0,3,4,9],v_cmpx_o_f16_sdwa:[0,3,4,9],v_cmpx_o_f32:[0,2,3,4,9],v_cmpx_o_f32_e64:[0,2,3,4,9],v_cmpx_o_f32_sdwa:[0,3,4,9],v_cmpx_o_f64:[0,2,3,4,9],v_cmpx_o_f64_e64:[0,2,3,4,9],v_cmpx_t_i16:[3,4,9],v_cmpx_t_i16_e64:[3,4,9],v_cmpx_t_i16_sdwa:[3,4,9],v_cmpx_t_i32:[0,2,3,4,9],v_cmpx_t_i32_e64:[0,2,3,4,9],v_cmpx_t_i32_sdwa:[0,3,4,9],v_cmpx_t_i64:[0,2,3,4,9],v_cmpx_t_i64_e64:[0,2,3,4,9],v_cmpx_t_u16:[3,4,9],v_cmpx_t_u16_e64:[3,4,9],v_cmpx_t_u16_sdwa:[3,4,9],v_cmpx_t_u32:[0,2,3,4,9],v_cmpx_t_u32_e64:[0,2,3,4,9],v_cmpx_t_u32_sdwa:[0,3,4,9],v_cmpx_t_u64:[0,2,3,4,9],v_cmpx_t_u64_e64:[0,2,3,4,9],v_cmpx_tru_f16:[0,3,4,9],v_cmpx_tru_f16_e64:[0,3,4,9],v_cmpx_tru_f16_sdwa:[0,3,4,9],v_cmpx_tru_f32:[0,2,3,4,9],v_cmpx_tru_f32_e64:[0,2,3,4,9],v_cmpx_tru_f32_sdwa:[0,3,4,9],v_cmpx_tru_f64:[0,2,3,4,9],v_cmpx_tru_f64_e64:[0,2,3,4,9],v_cmpx_u_f16:[0,3,4,9],v_cmpx_u_f16_e64:[0,3,4,9],v_cmpx_u_f16_sdwa:[0,3,4,9],v_cmpx_u_f32:[0,2,3,4,9],v_cmpx_u_f32_e64:[0,2,3,4,9],v_cmpx_u_f32_sdwa:[0,3,4,9],v_cmpx_u_f64:[0,2,3,4,9],v_cmpx_u_f64_e64:[0,2,3,4,9],v_cndmask_b32:[0,2,3,4,9],v_cndmask_b32_dpp:[0,3,4,9],v_cndmask_b32_e64:[0,2,3,4,9],v_cndmask_b32_sdwa:[0,3,4,9],v_cos_f16:[0,3,4,9],v_cos_f16_dpp:[0,3,4,9],v_cos_f16_e64:[0,3,4,9],v_cos_f16_sdwa:[0,3,4,9],v_cos_f32:[0,2,3,4,9],v_cos_f32_dpp:[0,3,4,9],v_cos_f32_e64:[0,2,3,4,9],v_cos_f32_sdwa:[0,3,4,9],v_cubeid_f32:[0,2,3,4,9],v_cubema_f32:[0,2,3,4,9],v_cubesc_f32:[0,2,3,4,9],v_cubetc_f32:[0,2,3,4,9],v_cvt_f16_f32:[0,2,3,4,9,588],v_cvt_f16_f32_dpp:[0,3,4,9],v_cvt_f16_f32_e64:[0,2,3,4,9],v_cvt_f16_f32_sdwa:[0,3,4,9],v_cvt_f16_i16:[0,3,4,9],v_cvt_f16_i16_dpp:[0,3,4,9],v_cvt_f16_i16_e64:[0,3,4,9],v_cvt_f16_i16_sdwa:[0,3,4,9],v_cvt_f16_u16:[0,3,4,9],v_cvt_f16_u16_dpp:[0,3,4,9],v_cvt_f16_u16_e64:[0,3,4,9],v_cvt_f16_u16_sdwa:[0,3,4,9],v_cvt_f32_f16:[0,2,3,4,9],v_cvt_f32_f16_dpp:[0,3,4,9],v_cvt_f32_f16_e64:[0,2,3,4,9],v_cvt_f32_f16_sdwa:[0,3,4,9],v_cvt_f32_f64:[0,2,3,4,9],v_cvt_f32_f64_dpp:9,v_cvt_f32_f64_e64:[0,2,3,4,9],v_cvt_f32_i32:[0,2,3,4,9],v_cvt_f32_i32_dpp:[0,3,4,9],v_cvt_f32_i32_e64:[0,2,3,4,9],v_cvt_f32_i32_sdwa:[0,3,4,9],v_cvt_f32_u32:[0,2,3,4,9,587],v_cvt_f32_u32_dpp:[0,3,4,9],v_cvt_f32_u32_e64:[0,2,3,4,9],v_cvt_f32_u32_sdwa:[0,3,4,9],v_cvt_f32_ubyte0:[0,2,3,4,9],v_cvt_f32_ubyte0_dpp:[0,3,4,9],v_cvt_f32_ubyte0_e64:[0,2,3,4,9],v_cvt_f32_ubyte0_sdwa:[0,3,4,9],v_cvt_f32_ubyte1:[0,2,3,4,9],v_cvt_f32_ubyte1_dpp:[0,3,4,9],v_cvt_f32_ubyte1_e64:[0,2,3,4,9],v_cvt_f32_ubyte1_sdwa:[0,3,4,9],v_cvt_f32_ubyte2:[0,2,3,4,9],v_cvt_f32_ubyte2_dpp:[0,3,4,9],v_cvt_f32_ubyte2_e64:[0,2,3,4,9],v_cvt_f32_ubyte2_sdwa:[0,3,4,9],v_cvt_f32_ubyte3:[0,2,3,4,9],v_cvt_f32_ubyte3_dpp:[0,3,4,9],v_cvt_f32_ubyte3_e64:[0,2,3,4,9],v_cvt_f32_ubyte3_sdwa:[0,3,4,9],v_cvt_f64_f32:[0,2,3,4,9],v_cvt_f64_f32_e64:[0,2,3,4,9],v_cvt_f64_i32:[0,2,3,4,9],v_cvt_f64_i32_e32:590,v_cvt_f64_i32_e64:[0,2,3,4,9],v_cvt_f64_u32:[0,2,3,4,9],v_cvt_f64_u32_e64:[0,2,3,4,9],v_cvt_flr_i32_f32:[0,2,3,4,9],v_cvt_flr_i32_f32_dpp:[0,3,4,9],v_cvt_flr_i32_f32_e64:[0,2,3,4,9],v_cvt_flr_i32_f32_sdwa:[0,3,4,9],v_cvt_i16_f16:[0,3,4,9],v_cvt_i16_f16_dpp:[0,3,4,9],v_cvt_i16_f16_e64:[0,3,4,9],v_cvt_i16_f16_sdwa:[0,3,4,9],v_cvt_i32_f32:[0,2,3,4,9],v_cvt_i32_f32_dpp:[0,3,4,9],v_cvt_i32_f32_e64:[0,2,3,4,9],v_cvt_i32_f32_sdwa:[0,3,4,9],v_cvt_i32_f64:[0,2,3,4,9],v_cvt_i32_f64_dpp:9,v_cvt_i32_f64_e64:[0,2,3,4,9],v_cvt_norm_i16_f16:[0,4,9],v_cvt_norm_i16_f16_dpp:[0,4,9],v_cvt_norm_i16_f16_e64:[0,4,9],v_cvt_norm_i16_f16_sdwa:[0,4,9],v_cvt_norm_u16_f16:[0,4,9],v_cvt_norm_u16_f16_dpp:[0,4,9],v_cvt_norm_u16_f16_e64:[0,4,9],v_cvt_norm_u16_f16_sdwa:[0,4,9],v_cvt_off_f32_i4:[0,2,3,4,9],v_cvt_off_f32_i4_dpp:[0,3,4,9],v_cvt_off_f32_i4_e64:[0,2,3,4,9],v_cvt_off_f32_i4_sdwa:[0,3,4,9],v_cvt_pk_i16_i32:[0,2,3,4,9],v_cvt_pk_i16_i32_e64:2,v_cvt_pk_u16_u32:[0,2,3,4,9],v_cvt_pk_u16_u32_e64:2,v_cvt_pk_u8_f32:[0,2,3,4,9],v_cvt_pkaccum_u8_f32:[2,3,4,9],v_cvt_pkaccum_u8_f32_e64:2,v_cvt_pknorm_i16_f16:[0,4,9],v_cvt_pknorm_i16_f32:[0,2,3,4,9],v_cvt_pknorm_i16_f32_e64:2,v_cvt_pknorm_u16_f16:[0,4,9],v_cvt_pknorm_u16_f32:[0,2,3,4,9],v_cvt_pknorm_u16_f32_e64:2,v_cvt_pkrtz_f16_f32:[0,2,3,4,9],v_cvt_pkrtz_f16_f32_e64:[0,2],v_cvt_rpi_i32_f32:[0,2,3,4,9],v_cvt_rpi_i32_f32_dpp:[0,3,4,9],v_cvt_rpi_i32_f32_e64:[0,2,3,4,9],v_cvt_rpi_i32_f32_sdwa:[0,3,4,9],v_cvt_u16_f16:[0,3,4,9],v_cvt_u16_f16_dpp:[0,3,4,9],v_cvt_u16_f16_e64:[0,3,4,9],v_cvt_u16_f16_sdwa:[0,3,4,9],v_cvt_u32_f32:[0,2,3,4,9],v_cvt_u32_f32_dpp:[0,3,4,9],v_cvt_u32_f32_e64:[0,2,3,4,9],v_cvt_u32_f32_sdwa:[0,3,4,9],v_cvt_u32_f64:[0,2,3,4,9],v_cvt_u32_f64_dpp:9,v_cvt_u32_f64_e64:[0,2,3,4,9],v_div_fixup_f16:[0,3,4,9],v_div_fixup_f32:[0,2,3,4,9],v_div_fixup_f64:[0,2,3,4,9],v_div_fixup_legacy_f16:[4,9],v_div_fmas_f32:[0,2,3,4,9],v_div_fmas_f64:[0,2,3,4,9],v_div_scale_f32:[0,2,3,4,9],v_div_scale_f64:[0,2,3,4,9],v_dot2_f32_f16:[1,7,8,9],v_dot2_i32_i16:[1,7,8,9],v_dot2_u32_u16:[1,7,8,9],v_dot2c_f32_f16:[1,8,9],v_dot2c_f32_f16_dpp:[1,8,9],v_dot2c_i32_i16:[8,9],v_dot2c_i32_i16_dpp:[8,9],v_dot4_i32_i8:[1,7,8,9],v_dot4_u32_u8:[1,7,8,9],v_dot4c_i32_i8:[1,8,9],v_dot4c_i32_i8_dpp:[1,8,9],v_dot8_i32_i4:[1,7,8,9],v_dot8_u32_u4:[1,7,8,9],v_dot8c_i32_i4:[8,9],v_dot8c_i32_i4_dpp:[8,9],v_exp_f16:[0,3,4,9],v_exp_f16_dpp:[0,3,4,9],v_exp_f16_e64:[0,3,4,9],v_exp_f16_sdwa:[0,3,4,9],v_exp_f32:[0,2,3,4,9],v_exp_f32_dpp:[0,3,4,9],v_exp_f32_e64:[0,2,3,4,9],v_exp_f32_sdwa:[0,3,4,9],v_exp_legacy_f32:[2,3,4,9],v_exp_legacy_f32_dpp:[3,4,9],v_exp_legacy_f32_e64:[2,3,4,9],v_exp_legacy_f32_sdwa:[3,4,9],v_ffbh_i32:[0,2,3,4,9],v_ffbh_i32_dpp:[0,3,4,9],v_ffbh_i32_e64:[0,2,3,4,9],v_ffbh_i32_sdwa:[0,3,4,9],v_ffbh_u32:[0,2,3,4,9],v_ffbh_u32_dpp:[0,3,4,9],v_ffbh_u32_e64:[0,2,3,4,9],v_ffbh_u32_sdwa:[0,3,4,9],v_ffbl_b32:[0,2,3,4,9],v_ffbl_b32_dpp:[0,3,4,9],v_ffbl_b32_e64:[0,2,3,4,9],v_ffbl_b32_sdwa:[0,3,4,9],v_floor_f16:[0,3,4,9],v_floor_f16_dpp:[0,3,4,9],v_floor_f16_e64:[0,3,4,9],v_floor_f16_sdwa:[0,3,4,9],v_floor_f32:[0,2,3,4,9],v_floor_f32_dpp:[0,3,4,9],v_floor_f32_e32:590,v_floor_f32_e64:[0,2,3,4,9],v_floor_f32_sdwa:[0,3,4,9],v_floor_f64:[0,2,3,4,9],v_floor_f64_dpp:9,v_floor_f64_e64:[0,2,3,4,9],v_fma_f16:[0,3,4,9],v_fma_f32:[0,2,3,4,9],v_fma_f64:[0,2,3,4,9],v_fma_legacy_f16:[4,9],v_fma_mix:588,v_fma_mix_f32:[0,6,7,8,9],v_fma_mixhi_f16:[0,6,7,8,9],v_fma_mixlo_f16:[0,6,7,8,9],v_fmaak_f16:0,v_fmaak_f32:0,v_fmac_f16:0,v_fmac_f16_dpp:0,v_fmac_f16_e64:0,v_fmac_f32:[0,7,8,9],v_fmac_f32_dpp:[0,7,8,9],v_fmac_f32_e64:[0,7,8,9],v_fmac_f64:9,v_fmac_f64_dpp:9,v_fmac_f64_e64:9,v_fmamk_f16:0,v_fmamk_f32:0,v_fract_f16:[0,3,4,9],v_fract_f16_dpp:[0,3,4,9],v_fract_f16_e64:[0,3,4,9],v_fract_f16_sdwa:[0,3,4,9],v_fract_f32:[0,2,3,4,9,590],v_fract_f32_dpp:[0,3,4,9],v_fract_f32_e64:[0,2,3,4,9],v_fract_f32_sdwa:[0,3,4,9],v_fract_f64:[0,2,3,4,9],v_fract_f64_dpp:9,v_fract_f64_e64:[0,2,3,4,9],v_frexp_exp_i16_f16:[0,3,4,9],v_frexp_exp_i16_f16_dpp:[0,3,4,9],v_frexp_exp_i16_f16_e64:[0,3,4,9],v_frexp_exp_i16_f16_sdwa:[0,3,4,9],v_frexp_exp_i32_f32:[0,2,3,4,9],v_frexp_exp_i32_f32_dpp:[0,3,4,9],v_frexp_exp_i32_f32_e64:[0,2,3,4,9],v_frexp_exp_i32_f32_sdwa:[0,3,4,9],v_frexp_exp_i32_f64:[0,2,3,4,9],v_frexp_exp_i32_f64_dpp:9,v_frexp_exp_i32_f64_e64:[0,2,3,4,9],v_frexp_mant_f16:[0,3,4,9],v_frexp_mant_f16_dpp:[0,3,4,9],v_frexp_mant_f16_e64:[0,3,4,9],v_frexp_mant_f16_sdwa:[0,3,4,9],v_frexp_mant_f32:[0,2,3,4,9],v_frexp_mant_f32_dpp:[0,3,4,9],v_frexp_mant_f32_e64:[0,2,3,4,9],v_frexp_mant_f32_sdwa:[0,3,4,9],v_frexp_mant_f64:[0,2,3,4,9],v_frexp_mant_f64_dpp:9,v_frexp_mant_f64_e64:[0,2,3,4,9],v_interp_mov_f32:[0,2,3,4],v_interp_mov_f32_e64:[0,3,4],v_interp_p1_f32:[0,2,3,4,17,121,210,480],v_interp_p1_f32_e64:[0,3,4],v_interp_p1ll_f16:[0,3,4],v_interp_p1lv_f16:[0,3,4],v_interp_p2_f16:[0,3,4],v_interp_p2_f32:[0,2,3,4],v_interp_p2_f32_e64:[0,3,4],v_interp_p2_legacy_f16:4,v_ldexp_f16:[0,3,4,9],v_ldexp_f16_dpp:[0,3,4,9],v_ldexp_f16_e64:[0,3,4,9],v_ldexp_f16_sdwa:[0,3,4,9],v_ldexp_f32:[0,2,3,4,9],v_ldexp_f32_e64:2,v_ldexp_f64:[0,2,3,4,9],v_lerp_u8:[0,2,3,4,9],v_log_clamp_f32:2,v_log_clamp_f32_e64:2,v_log_f16:[0,3,4,9],v_log_f16_dpp:[0,3,4,9],v_log_f16_e64:[0,3,4,9],v_log_f16_sdwa:[0,3,4,9],v_log_f32:[0,2,3,4,9],v_log_f32_dpp:[0,3,4,9],v_log_f32_e64:[0,2,3,4,9],v_log_f32_sdwa:[0,3,4,9],v_log_legacy_f32:[2,3,4,9],v_log_legacy_f32_dpp:[3,4,9],v_log_legacy_f32_e64:[2,3,4,9],v_log_legacy_f32_sdwa:[3,4,9],v_lshl_add_u32:[0,4,9],v_lshl_b32:2,v_lshl_b32_e64:2,v_lshl_b64:2,v_lshl_or_b32:[0,4,9],v_lshlrev_b16:[0,3,4,9],v_lshlrev_b16_dpp:[3,4,9],v_lshlrev_b16_e64:[3,4,9],v_lshlrev_b16_sdwa:[3,4,9],v_lshlrev_b32:[0,2,3,4,9],v_lshlrev_b32_dpp:[0,3,4,9],v_lshlrev_b32_e64:[0,2,3,4,9],v_lshlrev_b32_sdwa:[0,3,4,9],v_lshlrev_b64:[0,3,4,9],v_lshr_b32:2,v_lshr_b32_e64:2,v_lshr_b64:2,v_lshrrev_b16:[0,3,4,9],v_lshrrev_b16_dpp:[3,4,9],v_lshrrev_b16_e64:[3,4,9],v_lshrrev_b16_sdwa:[3,4,9],v_lshrrev_b32:[0,2,3,4,9],v_lshrrev_b32_dpp:[0,3,4,9],v_lshrrev_b32_e64:[0,2,3,4,9],v_lshrrev_b32_sdwa:[0,3,4,9],v_lshrrev_b64:[0,3,4,9],v_mac_f16:[3,4,9],v_mac_f16_dpp:[3,4,9],v_mac_f16_e64:[3,4,9],v_mac_f16_sdwa:3,v_mac_f32:[0,2,3,4,9],v_mac_f32_dpp:[0,3,4,9],v_mac_f32_e64:[0,2,3,4,9],v_mac_f32_sdwa:3,v_mac_legacy_f32:[0,2],v_mac_legacy_f32_e64:[0,2],v_mad_f16:[3,4,9],v_mad_f32:[0,2,3,4,9],v_mad_i16:[0,3,4,9],v_mad_i32_i16:[0,4,9],v_mad_i32_i24:[0,2,3,4,9],v_mad_i64_i32:[0,2,3,4,9],v_mad_legacy_f16:[4,9],v_mad_legacy_f32:[0,2,3,4,9],v_mad_legacy_i16:[4,9],v_mad_legacy_u16:[4,9],v_mad_mix:588,v_mad_mix_f32:5,v_mad_mixhi_f16:5,v_mad_mixlo_f16:5,v_mad_u16:[0,3,4,9],v_mad_u32_u16:[0,4,9],v_mad_u32_u24:[0,2,3,4,9],v_mad_u64_u32:[0,2,3,4,9],v_madak_f16:[3,4,9],v_madak_f32:[0,2,3,4,9],v_madmk_f16:[3,4,9],v_madmk_f32:[0,2,3,4,9],v_max3_f16:[0,4,9],v_max3_f32:[0,2,3,4,9,587],v_max3_i16:[0,4,9,587],v_max3_i32:[0,2,3,4,9],v_max3_u16:[0,4,9],v_max3_u32:[0,2,3,4,9],v_max_f16:[0,3,4,9,590],v_max_f16_dpp:[0,3,4,9],v_max_f16_e32:590,v_max_f16_e64:[0,3,4,9],v_max_f16_sdwa:[0,3,4,9],v_max_f32:[0,2,3,4,9],v_max_f32_dpp:[0,3,4,9],v_max_f32_e64:[0,2,3,4,9],v_max_f32_sdwa:[0,3,4,9],v_max_f64:[0,2,3,4,9],v_max_i16:[0,3,4,9],v_max_i16_dpp:[3,4,9],v_max_i16_e64:[3,4,9],v_max_i16_sdwa:[3,4,9],v_max_i32:[0,2,3,4,9],v_max_i32_dpp:[0,3,4,9],v_max_i32_e64:[0,2,3,4,9],v_max_i32_sdwa:[0,3,4,9],v_max_legacy_f32:2,v_max_legacy_f32_e64:2,v_max_u16:[0,3,4,9],v_max_u16_dpp:[3,4,9],v_max_u16_e64:[3,4,9],v_max_u16_sdwa:[3,4,9],v_max_u32:[0,2,3,4,9],v_max_u32_dpp:[0,3,4,9],v_max_u32_e64:[0,2,3,4,9],v_max_u32_sdwa:[0,3,4,9],v_mbcnt_hi_u32_b32:[0,2,3,4,9],v_mbcnt_hi_u32_b32_e64:2,v_mbcnt_lo_u32_b32:[0,2,3,4,9],v_mbcnt_lo_u32_b32_e64:2,v_med3_f16:[0,4,9],v_med3_f32:[0,2,3,4,9],v_med3_i16:[0,4,9],v_med3_i32:[0,2,3,4,9],v_med3_u16:[0,4,9],v_med3_u32:[0,2,3,4,9],v_mfma_f32_16x16x16bf16_1k:9,v_mfma_f32_16x16x16f16:[8,9],v_mfma_f32_16x16x1f32:[8,9],v_mfma_f32_16x16x2bf16:[8,9],v_mfma_f32_16x16x4bf16_1k:9,v_mfma_f32_16x16x4f16:[8,9],v_mfma_f32_16x16x4f32:[8,9],v_mfma_f32_16x16x8bf16:[8,9],v_mfma_f32_32x32x1f32:[8,9],v_mfma_f32_32x32x2bf16:[8,9],v_mfma_f32_32x32x2f32:[8,9],v_mfma_f32_32x32x4bf16:[8,9],v_mfma_f32_32x32x4bf16_1k:9,v_mfma_f32_32x32x4f16:[8,9],v_mfma_f32_32x32x8bf16_1k:9,v_mfma_f32_32x32x8f16:[8,9],v_mfma_f32_4x4x1f32:[8,9],v_mfma_f32_4x4x2bf16:[8,9],v_mfma_f32_4x4x4bf16_1k:9,v_mfma_f32_4x4x4f16:[8,9],v_mfma_f64_16x16x4f64:9,v_mfma_f64_4x4x4f64:9,v_mfma_i32_16x16x16i8:[8,9],v_mfma_i32_16x16x4i8:[8,9],v_mfma_i32_32x32x4i8:[8,9],v_mfma_i32_32x32x8i8:[8,9],v_mfma_i32_4x4x4i8:[8,9],v_min3_f16:[0,4,9],v_min3_f32:[0,2,3,4,9],v_min3_i16:[0,4,9],v_min3_i32:[0,2,3,4,9],v_min3_u16:[0,4,9],v_min3_u32:[0,2,3,4,9],v_min_f16:[0,3,4,9],v_min_f16_dpp:[0,3,4,9],v_min_f16_e64:[0,3,4,9],v_min_f16_sdwa:[0,3,4,9],v_min_f32:[0,2,3,4,9],v_min_f32_dpp:[0,3,4,9],v_min_f32_e64:[0,2,3,4,9],v_min_f32_sdwa:[0,3,4,9],v_min_f64:[0,2,3,4,9],v_min_i16:[0,3,4,9],v_min_i16_dpp:[3,4,9],v_min_i16_e64:[3,4,9],v_min_i16_sdwa:[3,4,9],v_min_i32:[0,2,3,4,9],v_min_i32_dpp:[0,3,4,9],v_min_i32_e64:[0,2,3,4,9],v_min_i32_sdwa:[0,3,4,9],v_min_legacy_f32:2,v_min_legacy_f32_e64:2,v_min_u16:[0,3,4,9],v_min_u16_dpp:[3,4,9],v_min_u16_e64:[3,4,9],v_min_u16_sdwa:[3,4,9],v_min_u32:[0,2,3,4,9,590],v_min_u32_dpp:[0,3,4,9],v_min_u32_e64:[0,2,3,4,9],v_min_u32_sdwa:[0,3,4,9],v_mov_b32:[0,2,3,4,9,590],v_mov_b32_dpp:[0,3,4,9],v_mov_b32_e32:590,v_mov_b32_e64:[0,2,3,4,9],v_mov_b32_sdwa:[0,3,4,9],v_movreld_b32:[0,2],v_movreld_b32_dpp:0,v_movreld_b32_e64:[0,2],v_movreld_b32_sdwa:0,v_movrels_b32:[0,2],v_movrels_b32_dpp:0,v_movrels_b32_e64:[0,2],v_movrels_b32_sdwa:0,v_movrelsd_2_b32:0,v_movrelsd_2_b32_dpp:0,v_movrelsd_2_b32_e64:0,v_movrelsd_2_b32_sdwa:0,v_movrelsd_b32:[0,2],v_movrelsd_b32_dpp:0,v_movrelsd_b32_e64:[0,2],v_movrelsd_b32_sdwa:0,v_mqsad_pk_u16_u8:[0,2,3,4,9],v_mqsad_u32_u8:[0,2,3,4,9],v_msad_u8:[0,2,3,4,9],v_mul_f16:[0,3,4,9],v_mul_f16_dpp:[0,3,4,9],v_mul_f16_e64:[0,3,4,9],v_mul_f16_sdwa:[0,3,4,9],v_mul_f32:[0,2,3,4,9],v_mul_f32_dpp:[0,3,4,9],v_mul_f32_e64:[0,2,3,4,9],v_mul_f32_sdwa:[0,3,4,9],v_mul_f64:[0,2,3,4,9],v_mul_hi_i32:[0,2,3,4,9],v_mul_hi_i32_i24:[0,2,3,4,9],v_mul_hi_i32_i24_dpp:[0,3,4,9],v_mul_hi_i32_i24_e64:[0,2,3,4,9],v_mul_hi_i32_i24_sdwa:[0,3,4,9],v_mul_hi_u32:[0,2,3,4,9],v_mul_hi_u32_u24:[0,2,3,4,9],v_mul_hi_u32_u24_dpp:[0,3,4,9],v_mul_hi_u32_u24_e64:[0,2,3,4,9],v_mul_hi_u32_u24_sdwa:[0,3,4,9],v_mul_i32_i24:[0,2,3,4,9],v_mul_i32_i24_dpp:[0,3,4,9],v_mul_i32_i24_e32:590,v_mul_i32_i24_e64:[0,2,3,4,9,590],v_mul_i32_i24_sdwa:[0,3,4,9],v_mul_legacy_f32:[0,2,3,4,9],v_mul_legacy_f32_dpp:[0,3,4],v_mul_legacy_f32_e64:[0,2,3,4],v_mul_legacy_f32_sdwa:[0,3,4],v_mul_lo_i32:2,v_mul_lo_u16:[0,3,4,9],v_mul_lo_u16_dpp:[3,4,9],v_mul_lo_u16_e64:[3,4,9],v_mul_lo_u16_sdwa:[3,4,9],v_mul_lo_u32:[0,2,3,4,9],v_mul_u32_u24:[0,2,3,4,9],v_mul_u32_u24_dpp:[0,3,4,9],v_mul_u32_u24_e64:[0,2,3,4,9],v_mul_u32_u24_sdwa:[0,3,4,9],v_mullit_f32:[0,2],v_nop:[0,2,3,4,9,590],v_nop_e64:[0,2,3,4,9],v_not_b32:[0,2,3,4,9],v_not_b32_dpp:[0,3,4,9],v_not_b32_e64:[0,2,3,4,9],v_not_b32_sdwa:[0,3,4,9],v_or3_b32:[0,4,9],v_or_b32:[0,2,3,4,9],v_or_b32_dpp:[0,3,4,9],v_or_b32_e64:[0,2,3,4,9],v_or_b32_sdwa:[0,3,4,9],v_pack_b32_f16:[0,4,9],v_perm_b32:[0,3,4,9],v_permlane16_b32:0,v_permlanex16_b32:0,v_pipeflush:0,v_pipeflush_e64:0,v_pk_add_f16:[0,4,9,587],v_pk_add_f32:9,v_pk_add_i16:[0,4,9,587],v_pk_add_u16:[0,4,9,587],v_pk_ashrrev_i16:[0,4,9],v_pk_fma_f16:[0,4,9],v_pk_fma_f32:9,v_pk_fmac_f16:[0,8,9],v_pk_lshlrev_b16:[0,4,9],v_pk_lshrrev_b16:[0,4,9],v_pk_mad_i16:[0,4,9],v_pk_mad_u16:[0,4,9],v_pk_max_f16:[0,4,9],v_pk_max_i16:[0,4,9],v_pk_max_u16:[0,4,9],v_pk_min_f16:[0,4,9],v_pk_min_i16:[0,4,9],v_pk_min_u16:[0,4,9],v_pk_mov_b32:9,v_pk_mul_f16:[0,4,9],v_pk_mul_f32:9,v_pk_mul_lo_u16:[0,4,9],v_pk_sub_i16:[0,4,9],v_pk_sub_u16:[0,4,9],v_qsad_pk_u16_u8:[0,2,3,4,9],v_rcp_clamp_f32:2,v_rcp_clamp_f32_e64:2,v_rcp_clamp_f64:2,v_rcp_clamp_f64_e64:2,v_rcp_f16:[0,3,4,9],v_rcp_f16_dpp:[0,3,4,9],v_rcp_f16_e64:[0,3,4,9],v_rcp_f16_sdwa:[0,3,4,9],v_rcp_f32:[0,2,3,4,9],v_rcp_f32_dpp:[0,3,4,9],v_rcp_f32_e64:[0,2,3,4,9],v_rcp_f32_sdwa:[0,3,4,9],v_rcp_f64:[0,2,3,4,9],v_rcp_f64_e64:[0,2,3,4,9],v_rcp_iflag_f32:[0,2,3,4,9],v_rcp_iflag_f32_dpp:[0,3,4,9],v_rcp_iflag_f32_e64:[0,2,3,4,9],v_rcp_iflag_f32_sdwa:[0,3,4,9],v_rcp_legacy_f32:2,v_rcp_legacy_f32_e64:2,v_readfirstlane_b32:[0,2,3,4,9],v_readlane_b32:[0,2,3,4,9],v_reg:607,v_rndne_f16:[0,3,4,9],v_rndne_f16_dpp:[0,3,4,9],v_rndne_f16_e64:[0,3,4,9],v_rndne_f16_sdwa:[0,3,4,9],v_rndne_f32:[0,2,3,4,9],v_rndne_f32_dpp:[0,3,4,9],v_rndne_f32_e64:[0,2,3,4,9],v_rndne_f32_sdwa:[0,3,4,9],v_rndne_f64:[0,2,3,4,9],v_rndne_f64_e64:[0,2,3,4,9],v_rsq_clamp_f32:2,v_rsq_clamp_f32_e64:2,v_rsq_clamp_f64:2,v_rsq_clamp_f64_e64:2,v_rsq_f16:[0,3,4,9],v_rsq_f16_dpp:[0,3,4,9],v_rsq_f16_e64:[0,3,4,9],v_rsq_f16_sdwa:[0,3,4,9],v_rsq_f32:[0,2,3,4,9],v_rsq_f32_dpp:[0,3,4,9],v_rsq_f32_e64:[0,2,3,4,9],v_rsq_f32_sdwa:[0,3,4,9],v_rsq_f64:[0,2,3,4,9],v_rsq_f64_e64:[0,2,3,4,9],v_rsq_legacy_f32:2,v_rsq_legacy_f32_e64:2,v_sad_hi_u8:[0,2,3,4,9],v_sad_u16:[0,2,3,4,9],v_sad_u32:[0,2,3,4,9],v_sad_u8:[0,2,3,4,9],v_sat_pk_u8_i16:[0,4,9],v_sat_pk_u8_i16_dpp:[0,4,9],v_sat_pk_u8_i16_e64:[0,4,9],v_sat_pk_u8_i16_sdwa:[0,4,9],v_screen_partition_4se_b32:[4,9],v_screen_partition_4se_b32_dpp:[4,9],v_screen_partition_4se_b32_e64:[4,9],v_screen_partition_4se_b32_sdwa:[4,9],v_sin_f16:[0,3,4,9],v_sin_f16_dpp:[0,3,4,9],v_sin_f16_e64:[0,3,4,9],v_sin_f16_sdwa:[0,3,4,9],v_sin_f32:[0,2,3,4,9,590],v_sin_f32_dpp:[0,3,4,9],v_sin_f32_e64:[0,2,3,4,9],v_sin_f32_sdwa:[0,3,4,9],v_sqrt_f16:[0,3,4,9],v_sqrt_f16_dpp:[0,3,4,9],v_sqrt_f16_e64:[0,3,4,9],v_sqrt_f16_sdwa:[0,3,4,9],v_sqrt_f32:[0,2,3,4,9],v_sqrt_f32_dpp:[0,3,4,9],v_sqrt_f32_e64:[0,2,3,4,9],v_sqrt_f32_sdwa:[0,3,4,9],v_sqrt_f64:[0,2,3,4,9],v_sqrt_f64_e64:[0,2,3,4,9],v_sub_co_ci_u32:0,v_sub_co_ci_u32_dpp:0,v_sub_co_ci_u32_e64:0,v_sub_co_ci_u32_sdwa:0,v_sub_co_u32:[0,4,9],v_sub_co_u32_dpp:[4,9],v_sub_co_u32_e64:[4,9],v_sub_co_u32_sdwa:[4,9],v_sub_f16:[0,3,4,9],v_sub_f16_dpp:[0,3,4,9],v_sub_f16_e64:[0,3,4,9],v_sub_f16_sdwa:[0,3,4,9],v_sub_f32:[0,2,3,4,9],v_sub_f32_dpp:[0,3,4,9],v_sub_f32_e64:[0,2,3,4,9],v_sub_f32_sdwa:[0,3,4,9],v_sub_i16:[4,9],v_sub_i32:[2,4,9],v_sub_i32_e64:2,v_sub_nc_i16:0,v_sub_nc_i32:0,v_sub_nc_u16:0,v_sub_nc_u32:0,v_sub_nc_u32_dpp:0,v_sub_nc_u32_e64:0,v_sub_nc_u32_sdwa:0,v_sub_u16:[3,4,9],v_sub_u16_dpp:[3,4,9],v_sub_u16_e64:[3,4,9],v_sub_u16_sdwa:[3,4,9],v_sub_u32:[3,4,9],v_sub_u32_dpp:[3,4,9],v_sub_u32_e64:[3,4,9],v_sub_u32_sdwa:[3,4,9],v_subb_co_u32:[4,9],v_subb_co_u32_dpp:[4,9],v_subb_co_u32_e64:[4,9],v_subb_co_u32_sdwa:[4,9],v_subb_u32:[2,3],v_subb_u32_dpp:3,v_subb_u32_e64:[2,3],v_subb_u32_sdwa:3,v_subbrev_co_u32:[4,9],v_subbrev_co_u32_dpp:[4,9],v_subbrev_co_u32_e64:[4,9],v_subbrev_co_u32_sdwa:[4,9],v_subbrev_u32:[2,3],v_subbrev_u32_dpp:3,v_subbrev_u32_e64:[2,3],v_subbrev_u32_sdwa:3,v_subrev_co_ci_u32:0,v_subrev_co_ci_u32_dpp:0,v_subrev_co_ci_u32_e64:0,v_subrev_co_ci_u32_sdwa:0,v_subrev_co_u32:[0,4,9],v_subrev_co_u32_dpp:[4,9],v_subrev_co_u32_e64:[4,9],v_subrev_co_u32_sdwa:[4,9],v_subrev_f16:[0,3,4,9],v_subrev_f16_dpp:[0,3,4,9],v_subrev_f16_e64:[0,3,4,9],v_subrev_f16_sdwa:[0,3,4,9],v_subrev_f32:[0,2,3,4,9],v_subrev_f32_dpp:[0,3,4,9],v_subrev_f32_e64:[0,2,3,4,9],v_subrev_f32_sdwa:[0,3,4,9],v_subrev_i32:2,v_subrev_i32_e64:2,v_subrev_nc_u32:0,v_subrev_nc_u32_dpp:0,v_subrev_nc_u32_e64:0,v_subrev_nc_u32_sdwa:0,v_subrev_u16:[3,4,9],v_subrev_u16_dpp:[3,4,9],v_subrev_u16_e64:[3,4,9],v_subrev_u16_sdwa:[3,4,9],v_subrev_u32:[3,4,9],v_subrev_u32_dpp:[3,4,9],v_subrev_u32_e64:[3,4,9],v_subrev_u32_sdwa:[3,4,9],v_swap_b32:[0,4,9],v_swaprel_b32:0,v_trig_preop_f64:[0,2,3,4,9],v_trunc_f16:[0,3,4,9],v_trunc_f16_dpp:[0,3,4,9],v_trunc_f16_e64:[0,3,4,9],v_trunc_f16_sdwa:[0,3,4,9],v_trunc_f32:[0,2,3,4,9],v_trunc_f32_dpp:[0,3,4,9],v_trunc_f32_e64:[0,2,3,4,9],v_trunc_f32_sdwa:[0,3,4,9],v_trunc_f64:[0,2,3,4,9],v_trunc_f64_dpp:9,v_trunc_f64_e64:[0,2,3,4,9],v_writelane_b32:[0,2,3,4,9],v_xad_u32:[0,4,9],v_xnor_b32:[0,7,8,9],v_xnor_b32_dpp:[0,7,8,9],v_xnor_b32_e64:[0,7,8,9],v_xnor_b32_sdwa:[0,7,8,9],v_xor3_b32:0,v_xor_b32:[0,2,3,4,9],v_xor_b32_dpp:[0,3,4,9],v_xor_b32_e64:[0,2,3,4,9],v_xor_b32_sdwa:[0,3,4,9],va_list:710,vacat:755,vaddr:[0,2,3,4,8,9,31,32,341,381,382,494,495],vadv:774,vagu:667,val0:[663,710],val1:[663,710,712,768,769,770],val29:663,val2:[663,710,712,768,769,770],val3:[710,769],val7:710,val:[594,597,610,659,663,706,710,724,743,762,770,780,803,804,805,806,807,808,809,810],val_express:585,val_offset:585,val_success:710,vala:724,valb:724,valc:724,valgrind:[612,616,704,775],valid:[27,129,220,378,490,585,588,589,590,597,598,601,604,605,607,610,611,614,617,619,624,626,631,639,641,644,648,649,652,658,659,667,669,672,675,677,678,679,684,687,688,702,709,710,711,712,714,715,722,724,725,733,735,738,741,743,745,754,759,760,762,763,764,766,770,775,777,781,782,784,785,803,804,805,806,807,808,809,810],validli:710,valids:770,valn:770,valu:[20,21,22,23,24,27,30,36,37,38,51,52,53,93,96,97,98,100,101,112,114,120,123,124,125,126,127,129,142,143,186,187,189,190,191,209,212,213,214,215,216,217,220,223,237,238,239,281,282,283,286,287,288,306,308,314,354,370,371,372,373,374,375,378,380,386,387,388,400,401,402,444,447,448,449,451,452,470,479,482,483,484,485,486,487,490,493,499,500,501,513,514,515,557,560,561,562,564,565,576,578,584,586,587,588,591,593,594,596,597,599,602,603,605,606,607,610,611,612,616,617,619,621,622,623,624,625,626,628,629,630,631,632,633,636,638,639,640,641,642,643,644,645,648,649,650,652,653,654,657,658,663,664,665,667,668,669,670,671,674,676,679,681,683,684,685,686,687,688,700,702,703,706,707,708,709,711,714,720,721,724,726,727,730,731,733,734,735,736,738,739,744,750,751,754,756,758,762,764,768,769,771,774,775,777,779,780,781,782,783,785,788,789,802,803,804,805,806,807,808,809,810,811,814],valuabl:[609,616,662,667,689,695,748,774],value1:759,value2:[759,760],value_align:710,value_desc:659,value_kind:[590,710],value_load:710,value_typ:[590,786],valuecol:707,valuedisallow:659,valuefield:768,valuekind:[590,610],valuelist:770,valuelistn:770,valueopt:659,valuerequir:659,valuesuffix:770,valuet:743,valuetrack:[710,764],valuetyp:[590,607,670,771,780],vanilla:660,var1:606,var_loc:684,var_nam:606,vararg:[597,606,607,710,743,804],varbit:768,vardecl:610,varexpr:[808,809,810],varexprast:[808,809,810],vari:[175,270,435,548,585,590,594,597,601,607,608,663,672,679,684,710,712,719,726,735,743,757,765,770,811],variabl:[585,590,592,593,594,595,602,603,614,616,620,621,623,625,630,632,638,642,644,654,658,659,661,663,664,669,672,674,676,677,678,679,681,694,695,704,705,709,711,714,715,720,721,722,723,724,728,729,730,731,734,735,738,743,756,760,761,762,763,764,768,769,773,774,775,777,778,780,782,783,784,786,788,802,803,804,805,806,807,809,811,812,813],variable_nam:[605,748],variable_op:770,variableexprast:[803,804,805,806,807,808,809,810],variad:[710,743],variadicopsaredef:770,variant:[587,594,611,639,659,669,671,683,684,686,691,705,710,714,735,741,743,762,765,769,770,775,780,782],variat:[593,595,607,616,639,669,712,748,761,765,779,780,788],varieti:[593,594,597,603,606,607,621,661,667,669,676,679,697,711,712,723,743,746,752,757,759,762,768,770,804,805,806,807,811],variou:[589,593,594,597,601,602,605,607,610,612,634,639,644,657,661,662,664,666,669,674,675,679,684,688,704,705,707,710,713,715,716,721,724,725,726,727,729,730,731,738,739,743,745,748,751,752,754,756,759,760,768,769,770,771,774,775,780,781,782,783,784,786,802,803,805,806,810],varnam:[806,807,808,809,810],vasileio:750,vbr4:597,vbr5:597,vbr6:[597,754],vbr7:754,vbr8:[597,754],vbr:623,vc110:736,vc140:736,vc2:736,vc41:[731,736],vc4:736,vc50:736,vc70:736,vc70dep:736,vc80:736,vc98:736,vcall:604,vcc:[0,2,3,4,9,10,11,12,13,31,32,33,35,36,37,39,40,42,43,45,48,49,50,54,55,56,57,58,59,60,61,63,64,65,69,70,71,72,73,74,75,76,77,133,134,136,139,140,144,145,146,147,148,149,151,152,153,154,155,156,160,161,162,163,164,165,166,167,168,169,170,224,226,227,229,231,234,235,240,241,243,244,245,246,248,249,250,251,252,253,257,258,259,260,261,262,263,264,265,317,318,322,323,328,329,330,331,332,341,342,343,344,345,347,348,381,382,383,385,386,387,389,390,392,394,397,398,403,404,405,406,407,410,411,412,413,421,422,423,424,425,426,427,428,429,494,495,496,498,499,500,502,503,505,507,510,511,516,517,518,519,520,521,522,523,524,526,527,528,529,533,534,535,536,537,538,539,540,541,590],vcc_32:590,vcc_64:590,vcc_hi:589,vcc_lo:[86,589],vccz:[10,11,12,13,54,57,58,59,60,61,63,64,65,69,70,75,77,144,146,147,148,149,151,152,153,154,155,156,160,161,162,163,166,167,240,243,244,245,246,248,249,250,251,252,253,257,258,261,265,317,318,322,323,328,329,330,331,332,342,343,344,345,347,348,403,406,407,410,411,412,413,421,422,425,429,516,519,520,521,522,523,524,526,527,528,529,533,534,537,541],vcrt:698,vcs:712,vcsrevis:605,vdata0:[0,2,3,4,9],vdata1:[0,2,3,4,9],vdata:[0,2,3,4,8,9,586],vdst:[0,1,2,3,4,5,6,7,8,9,586],vec0123:710,vec012:710,vec01:710,vec0:710,vec1:[710,743],vec2:[710,743],vec:[710,743],vec_splat:711,vec_type_hint:590,vector:[86,120,177,209,272,314,437,479,550,584,585,586,589,590,596,597,607,610,639,658,659,674,678,683,686,709,711,722,724,769,773,777,778,780,786,803,804,805,806,807,808,809,810,811],vector_length:710,vector_redirect:710,vectoriz:776,vectorize_width:779,vectorti:710,vectortyp:743,vectsiz:770,vectypehint:590,vega7nm:590,vega:590,vehicl:[606,610],vehiclemak:610,vehicletyp:610,veli:607,vend:708,vendor:[585,610,710,746,757,760,809],vendor_and_architecture_nam:590,vendor_name_s:590,venu:755,venv:773,ver60:731,verb:610,verbatim:[659,742,770,780],verbos:[611,612,614,616,630,636,642,643,650,654,704,705,709,716,743,748,780,783,784,786],verd:590,veri:[585,593,594,595,597,601,603,606,607,609,610,611,628,639,644,659,666,667,668,669,672,674,675,676,677,678,679,682,689,693,695,697,699,702,703,704,709,710,712,716,718,719,721,722,723,725,726,730,738,741,743,745,748,751,754,759,760,764,768,771,773,774,775,779,782,783,786,789,790,802,803,804,805,806,807,808,811],verif:[590,659,689,710,741,756,773,777,778],verifi:[591,592,601,604,607,614,615,623,637,639,657,667,679,681,689,702,705,709,710,712,717,723,736,744,745,759,760,761,762,764,775,782,784,804,805,806,807,808,809,810],verify_arch:637,verifyfunct:[804,805,806,807,808,809,810],verifypreservedanalysi:784,versa:[594,663,705,710,735,750,764,775],version:[588,590,591,592,593,594,597,602,605,606,607,611,614,616,617,619,621,623,624,625,626,630,633,635,636,637,640,641,642,643,648,649,650,652,653,654,658,659,660,661,662,664,667,668,670,673,674,676,679,681,684,686,688,694,696,700,701,705,710,718,721,722,723,724,726,727,731,736,738,740,742,743,745,747,748,750,752,754,756,757,759,761,762,764,766,768,773,779,780,782,783,784,785,787,788,804,805,806,808,809,810],version_less:605,versionhead:731,versioninglicm:776,versionsignatur:731,vertex:[78,171,266,542,590],vertexbuffert:590,vertic:[745,783],vex:[658,768],vfabi:710,vfp3:701,vfp:[596,710],vfprintf:710,vfuncid:710,vg_leak:616,vgpr0:590,vgpr104:590,vgpr120:590,vgpr136:590,vgpr152:590,vgpr168:590,vgpr184:590,vgpr1:590,vgpr200:590,vgpr216:590,vgpr232:590,vgpr248:590,vgpr255:590,vgpr31:590,vgpr40:590,vgpr56:590,vgpr72:590,vgpr88:590,vgpr:[20,84,123,212,370,482,588,710],vgpr_limit:590,vgpr_spill_count:590,vgprs_use:590,vgrp:590,vhaddp:[631,639],via:[590,594,602,605,607,611,639,644,645,654,657,660,662,663,665,666,667,668,669,671,673,675,676,679,685,693,694,700,701,710,712,713,715,719,725,726,727,728,729,741,743,750,752,756,760,762,765,766,768,770,771,775,781,782,783,785,789,804,805,810],viabl:[639,695,705,709,760],vibrant:667,vic:639,vice:[594,663,705,710,735,750,764,775],video:[608,743],view:[585,590,594,607,608,625,644,712,713,714,721,723,733,745,746,748,761,774,802,806],viewcfg:[743,806],viewcfgonli:[743,806],viewer:[610,783],viewgraph:743,viewid:590,viewpoint:608,viewport:590,viewvc:745,vii:590,vim:[679,761,771],violat:[590,604,608,667,676,677,710,714,743,755,806],violent:608,virginia:747,virt:743,virtreg2indexfunctor:607,virtreg:607,virtregmap:607,virtregrewrit:759,virtual:[585,590,593,604,611,616,624,644,663,666,676,684,685,686,689,691,693,703,706,709,710,716,721,726,741,743,750,759,760,768,769,771,773,775,780,782,803,804,805,806,807,808,809,810],virtualconstprop:710,virtualenv:773,virtualindex:710,vis:780,visibl:[590,594,597,607,610,621,641,659,695,698,709,711,713,719,724,726,743,756,759,764,777,782],visit:[601,633,635,637,641,642,643,650,652,653,670,676,743,769,770,789],visitbasicblock:743,visitconstrainedfpintrins:591,visitfab:670,visitfunct:784,visitgcroot:676,visiticmpinst:601,visitinstruct:784,visitor:[601,658,670,676,768,804,812],visitsrl:670,visitxor:601,visual:[592,607,610,611,669,671,679,680,698,710,712,723,730,735,741,743,745,754,759,760,780,783,806],viv:710,vk_argument:610,vk_basicblock:610,vla:[671,710],vle:661,vlen:710,vliw:[658,768],vlk:760,vm_cnt:[120,209,314,479,584],vma:[642,654,784],vmaddr:709,vmcnt:[120,209,314,479,584,590],vmcnt_sat:[120,209,314,479,584],vmodulekei:789,vmov:611,vmulp:[631,639],vocabulari:710,voidtyp:743,volatil:[588,590,593,594,607,639,654,672,676,721,730,741,780],volcan:590,volkan:692,volum:[678,743],volumin:743,volunt:[696,702,745,753,757],voluntari:745,von:710,vop1:[587,590],vop2:[587,590],vop3:[586,587,590],vop_dpp:590,vop_sdwa:590,vopc:[587,590],vote:[719,757],vp_fdiv:749,vpaddbyrr_vpadddyrr_vpaddqyrr_vpaddwyrr_vpsubbyrr_vpsubdyrr_vpsubqyrr_vpsubwyrr:631,vpaddqyrr:631,vpbasicblock:750,vpblock:750,vpblockbas:750,vpdef:750,vpic:747,vpinstruct:750,vplan:749,vprecip:750,vprecipebas:750,vpregionblock:750,vprfc:749,vpsubqyrr:631,vpsubusw:775,vptr:[710,760],vptransformst:750,vpuser:750,vpvalu:750,vreg:[607,685,686,688],vscale_rang:[597,710],vscnt:590,vset_lan:596,vsetq_lane_s32:596,vsinstalldir:681,vsp:590,vsplth:711,vsrc0:[0,1,2,3,4,7,8,9],vsrc1:[0,1,2,3,4,7,8,9],vsrc2:[0,2,3,4,8,9],vsrc3:[0,2,3,4],vsrc:[0,2,3,4,8,9],vsx:710,vtabl:[610,611,698,709,710,760,777],vulner:[710,751,756,757,760],vzeroupp:631,w30:[710,716],w31:710,w64:605,w7100:590,w8100:590,w9100:590,wai:[585,590,592,593,594,597,601,602,603,605,606,607,609,610,611,612,630,638,639,641,644,645,654,659,660,663,664,665,666,667,668,669,672,673,674,675,676,679,681,682,683,685,686,688,689,693,695,700,701,702,703,704,705,706,708,710,712,714,716,720,721,722,723,724,725,726,727,728,735,736,738,741,742,743,744,745,746,748,750,753,754,756,757,759,760,762,764,766,768,770,771,773,774,775,777,779,780,781,782,783,784,786,788,789,802,803,804,805,806,807,808,809,810,811,812],wait:[120,209,314,479,584,590,609,610,639,696,709,726,753,765,789,803,805],waitcnt:[0,2,3,4,9],waitset:639,waiv:667,wake:745,walk:[590,686,699,709,711,714,721,722,741,743,745,782,786],walkarch:743,wall:[616,723,769,782,811],walltimemark:785,wambigu:627,want:[585,590,593,595,597,603,605,607,608,609,610,611,616,625,639,659,660,662,667,668,670,672,675,677,678,679,681,682,683,686,695,696,698,699,700,701,702,703,704,705,707,708,709,710,712,714,717,721,722,723,724,725,726,727,740,741,742,743,744,745,746,756,757,759,760,761,763,764,765,769,770,771,773,774,780,781,782,783,784,786,788,802,803,804,805,806,808,809,810,811,812],wari:674,warn:[588,590,603,605,611,614,621,627,635,641,653,658,660,665,667,671,679,681,686,699,704,705,710,714,723,726,764,768,769,771,773,776,788,789,790,810,812],warnmissedtransformationspass:776,warpsiz:724,warrant:[609,667,670,766],warranti:[667,782],warren:666,wasincrementallylink:731,wasn:[593,722,807,808,809,810],wast:[715,717,743,759,761,762,808],watch:[590,667,690],waterfal:[696,723],wave32:[588,589],wave:[20,123,212,370,482,590],wave_rol:588,wave_ror:588,wave_shl:[588,590],wave_shr:588,wavefront32:590,wavefront64:590,wavefront:[42,74,86,585,588,590],wavefront_s:590,wavefronts:590,wavefrontsize64:590,waypoint:601,wchar_t:710,wcover:610,weak:[594,597,640,641,642,671,674,709,710,713,722,726,743,808],weak_odr:[597,710],weakanylinkag:743,weaken:[639,641],weaker:710,weakest:[594,607],weakodrlinkag:743,weakvh:743,wealth:662,web:[609,667,681,723,747,752],webassembl:[607,679,710,748],webkit:[710,748,762],webkit_jscc:[597,710],websit:[606,745],wednesdai:[678,719],week:[609,662,678,702,719,723,748,755,757,766],weekend:[609,719],weekli:[678,719],weight:[671,710,716,751],weird:607,welcom:[608,609,661,667,678,679,681,700,719,726,764,782,787,788,789,790,791,803,804,805,806,807,808,809,810,811,812],well:[585,590,591,593,594,597,601,602,603,605,606,607,608,609,610,612,625,626,639,640,644,657,659,662,663,666,667,670,672,673,674,679,683,686,693,694,695,699,703,705,709,712,713,714,716,719,722,723,724,726,730,731,733,734,739,741,742,743,745,747,748,749,754,755,756,757,759,760,761,764,766,770,771,775,776,778,779,782,783,784,788,803,804,805,807,808,809,810,812],weng:660,wennborg:702,went:[610,805,806],were:[585,590,594,596,607,610,611,616,625,630,638,639,648,649,654,657,659,660,663,664,667,669,674,681,684,686,702,704,705,708,709,710,711,712,721,722,725,726,731,741,743,745,755,759,760,764,766,768,769,771,779,784,789,811],weren:[667,686],werror:694,wg21:760,wget:679,wgp:590,wgp_mode:590,what:[585,588,589,590,593,594,596,597,598,600,602,605,607,610,611,615,616,617,625,636,639,644,657,659,660,661,663,666,667,669,670,678,679,681,683,684,686,687,689,690,695,696,701,702,704,705,709,710,711,714,716,717,719,721,723,724,725,726,727,729,731,735,738,739,741,742,743,744,746,748,750,753,754,759,760,761,763,764,765,768,770,771,774,780,783,785,786,790,802,803,804,805,806,807,808,809,810,811],whatev:[593,605,611,631,659,660,663,669,677,679,699,701,709,710,712,719,721,729,741,742,743,765,781,786],whatsoev:[667,710],when:[82,351,433,546,585,587,588,589,590,591,592,593,594,596,597,600,602,603,605,606,607,608,611,612,614,616,621,624,625,626,630,631,639,640,641,642,643,644,645,648,649,650,653,654,657,658,659,660,662,663,664,665,667,668,669,670,671,675,676,677,678,679,681,683,684,687,688,689,691,695,696,697,698,699,700,701,702,703,704,709,710,711,713,714,715,716,717,719,720,721,722,723,724,725,726,728,730,731,733,735,738,740,741,742,743,745,746,750,753,754,755,756,757,758,759,760,761,762,763,764,765,766,768,769,770,771,773,774,775,776,777,778,779,780,782,783,784,785,786,788,789,790,802,803,804,805,806,807,808,809,810,811],whenev:[585,592,593,606,610,611,612,659,665,695,707,710,721,743,759,762,766,775,802,810],where:[24,127,217,375,487,585,586,588,589,590,591,594,596,597,598,599,601,603,604,605,607,608,609,610,611,614,616,621,625,627,630,631,638,639,640,641,644,645,657,658,659,660,663,664,665,667,669,670,673,674,675,676,677,679,681,682,683,684,688,689,694,695,698,701,702,703,704,705,706,709,710,712,714,715,716,720,721,722,723,724,725,726,727,728,729,730,731,733,738,739,741,742,743,744,745,750,753,754,755,756,757,759,760,764,766,768,770,771,774,775,776,780,781,782,783,784,786,788,790,802,804,805,806,808,809,810,812],wherea:[590,596,607,620,649,663,669,674,683,685,710,721,725,726,743,750,759],wherebi:[735,745],wherev:[660,710,743,810],whether:[585,588,590,591,593,594,597,598,601,604,605,607,610,617,619,621,627,646,657,660,663,669,670,671,676,677,679,684,685,693,694,700,702,703,704,709,710,712,713,714,722,725,728,735,738,741,742,743,744,745,748,750,754,755,756,757,759,760,762,764,768,769,770,776,777,780,783,785,789,803,805,806,807,808,809,810,811],which:[24,30,34,66,67,84,85,99,127,132,157,158,175,176,188,213,217,223,225,254,255,270,271,289,349,352,371,375,380,384,418,419,435,436,450,483,487,493,497,530,531,548,549,563,585,586,587,588,589,590,594,595,596,597,599,601,602,603,605,606,607,608,609,610,611,612,616,617,619,621,623,624,625,626,627,628,631,635,637,639,640,641,644,645,648,649,652,657,658,659,660,662,663,664,665,667,669,670,671,672,674,675,676,679,680,681,682,683,684,688,689,690,691,693,694,695,696,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,721,722,723,724,725,726,728,731,733,734,735,736,738,739,741,742,743,744,745,746,747,748,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,768,769,770,771,773,774,775,776,777,778,779,780,781,782,783,784,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],whichev:[742,759],whilst:[640,648,649,654],whip:807,whirlwind:803,whisker:760,white:661,whitelist:625,whitelist_fun:625,whitepap:[720,783],whitespac:[611,621,635,637,641,667,770,802,803,804,805,806,807,808,809,810],whitespaceoranycom:770,whitespaceorccom:770,whizbang:610,who:[600,603,605,606,664,667,668,678,683,685,704,709,710,719,722,723,726,740,742,745,755,757,765,766,770,778,780,788,789,808,811,812],whoa:805,whole:[585,588,590,592,607,609,621,645,667,669,670,674,676,693,701,702,706,710,712,714,716,722,726,741,743,754,756,763,766,773,774,775,777,782,788,789,803,805,806],whom:[722,739],whose:[585,588,590,593,597,601,607,610,616,621,630,639,640,641,644,645,664,666,709,710,736,738,743,750,754,757,759,768,769,770,780,785,789,803,807,808],why:[585,590,593,599,600,608,609,610,639,658,660,667,675,679,694,703,710,717,721,722,723,736,742,743,747,757,759,760,761,764,768,782,786,803,804,806,812],wibbl:652,wide:[589,590,593,594,597,603,606,607,608,642,667,676,677,679,697,710,724,729,738,739,743,750,752,757,759,770,777,787,805,806,811],wideaddr:710,widecharact:738,widen:[607,688,750,779,782],widenable_cond:710,widenable_condit:710,widenscalar:688,widenscalarfor:688,widenscalarif:688,widenscalartonextpow2:688,wider:[590,594,670,677,684,688],widest:667,width:[589,594,596,631,639,642,660,676,684,731,741,743,748,776,779,811],width_fiv:684,width_thre:684,wiki:[605,679,701,712,747,748],wikipedia:[710,714,806],wild:[659,807],wildcard:[625,641,653],wili:677,willing:[609,723,811],willreturn:[597,710],win32:[607,679,698,736,765,775],win:[593,610,658,743,760],wincomplet:627,windbg:759,window:[590,603,605,607,610,616,621,627,644,659,660,663,664,672,678,679,680,681,696,708,710,726,736,739,743,745,759,765,775,778,782,804,805,806,807],wineh:663,winfinit:723,winrt:730,winrtsmartpoint:730,wip:590,wire:672,wireshark:712,wise:[671,711,770],wish:[605,609,616,621,639,659,660,670,674,677,679,689,700,704,710,722,726,739,743,755,760,764,780,812],wit:[590,755],with_assert:605,withcolor:610,withentryv:638,withhold:759,within:[585,588,590,593,596,597,601,606,607,608,610,611,616,630,639,641,642,652,653,657,660,663,664,667,669,671,676,677,679,688,693,703,705,709,710,711,714,716,722,723,724,725,726,728,729,731,735,738,739,741,743,744,745,748,750,754,755,756,757,759,760,762,764,766,768,770,773,774,775,777,782,785,786,788,805,806,807,808,809,810],withmoduledo:[726,789,790,791],without:[585,588,589,590,594,596,597,598,601,603,605,606,607,609,610,611,612,616,619,621,624,630,635,639,641,642,645,653,654,659,660,663,664,665,667,669,671,674,676,677,679,683,684,688,689,694,705,706,709,710,711,712,713,714,717,718,721,722,726,728,731,736,738,739,740,741,742,743,744,745,746,748,754,755,759,760,761,762,764,765,766,768,769,770,771,775,776,779,780,782,803,805,806,807,808,809,810],wlandri:747,wno:699,woff2:712,wojciech:741,wolf:666,won:[605,607,610,660,667,677,679,686,700,701,706,710,723,727,745,753,756,759,760,766,774,775,804,805,810],wonder:[610,667,672,677,703,760,805],wontfix:600,word16:590,word32:590,word64:590,word:[588,590,594,598,607,608,609,610,664,666,667,669,671,672,676,687,689,703,710,712,721,723,727,733,743,748,755,756,759,760,770,780,785,788,804],word_0:[588,590,733],word_1:[588,590,733],word_n:733,wordsiz:676,work:[585,591,592,593,594,598,601,602,603,605,606,607,608,610,611,612,616,617,619,623,625,631,633,635,638,641,642,644,645,653,659,660,661,663,664,668,669,670,672,675,676,678,679,681,682,685,689,694,695,696,698,699,700,701,702,703,704,705,708,709,710,711,713,714,715,716,719,720,721,723,725,727,736,741,742,743,744,745,746,748,749,755,756,757,758,764,765,766,770,771,773,774,775,777,778,780,782,784,786,789,790,802,803,804,805,806,807,808,809,810,811,812],work_group_size_hint:590,workabl:743,workaround:[590,639,682,746],worker:[616,667,696,712,726],workflow:[592,603,679,696,709,742,757,783],workgroup:590,workgroup_size_hint:590,workgroupsizehint:590,workitem:590,worklist:[722,741,743],workload:[590,667,681],worksform:600,workshop:719,workspac:[665,679],world:[606,608,664,667,676,679,695,709,710,717,719,723,742,757,759,761,811],worri:[659,710,723,742,803,810],wors:[596,607,705,710],worsen:667,worst:[693,743,758,759,764,811],worth:[585,601,602,603,610,674,682,688,697,703,721,743,760,783],worthwhil:743,would:[585,589,590,592,593,594,596,597,598,600,601,602,604,605,606,607,609,610,611,614,616,617,623,641,644,653,659,660,663,664,666,667,668,669,671,672,673,674,675,676,677,681,683,688,693,696,699,702,703,704,705,709,710,711,712,714,716,717,721,722,723,725,726,728,729,730,731,735,738,739,741,742,743,744,745,747,748,755,756,757,759,760,761,762,764,765,768,769,770,771,775,776,777,779,781,782,783,784,785,786,788,790,803,804,805,806,810,811],wouldn:[610,705,714,808,810],wpdre:710,wpdresolut:710,wrap:[585,597,606,610,631,659,667,674,677,679,683,708,710,715,725,726,743,759,760,764,765,786,788,789,803,804,812],wrapcolumn:786,wrapper:[610,638,660,709,715,722,726,728,741,743,765,780],wrf_:747,wrf_r:747,wrinkl:708,writabl:[589,640,668,671,709,731,759],write:[30,223,380,493,585,590,594,601,611,613,618,621,622,623,625,627,629,631,632,636,641,644,645,652,653,654,655,656,657,658,660,662,668,669,674,678,679,681,682,684,685,689,708,709,710,712,716,717,722,724,725,726,731,735,736,741,743,748,750,751,757,759,760,761,762,765,768,769,770,771,778,783,784,786,802,805,806,809,810,811,813],write_escap:782,write_onli:590,writeabl:[20,123,212,370,482],writealia:722,writealu:[631,770],writeattribut:768,writeback:590,writeonli:[590,597,710],writer:[594,597,607,628,658,659,670,672,676,680,710,722,780,782],writes_depth:590,writes_uav:590,writesetcc:631,writethunk:722,writethunkoralia:722,writetypet:670,writev:780,written:[585,590,592,593,606,607,610,623,625,627,629,631,632,639,641,645,662,667,668,671,673,679,685,704,710,712,714,716,722,723,724,731,735,736,741,743,745,746,747,748,757,759,760,761,764,765,768,770,773,775,779,780,785,786,806,807,810,811,812],wrong:[595,600,608,610,660,667,672,704,756,765,805,810],wrote:[670,677,710,809,810],wsl:681,wswitch:610,www:[595,605,702,710,745,747,748],wzr:710,x00:[754,814],x00argpath:754,x00function:754,x00kei:754,x00path:754,x00remark:754,x00valu:754,x01:814,x03:814,x04:814,x08:814,x0abar:712,x0c:814,x11:743,x15:[671,710],x16:[587,671],x20:716,x21:716,x24:814,x30:710,x32:611,x44:814,x64:[611,679,681,696],x83:814,x86:[592,594,604,605,611,617,619,631,639,641,642,646,658,663,669,670,674,679,681,683,697,709,710,711,716,731,748,758,764,768,770,771,775,777,779,780,784,786,809],x86_64:[604,607,611,616,631,639,654,665,668,674,675,695,697,698,699,700,709,710,716,753,759,762,764,775,783,786,809],x86_amx:597,x86_fastcal:607,x86_fastcallcc:597,x86_fp80:[597,710,722],x86_mmx:597,x86_ssecal:780,x86_stdcall:607,x86_stdcallcc:597,x86_thiscal:607,x86add_flag:770,x86call:770,x86callingconv:780,x86codeemitt:780,x86dagtodagisel:784,x86framelow:607,x86genregisterinfo:[607,780],x86ii:780,x86inst:[770,771],x86instrinfo:780,x86instrmmx:780,x86instrss:780,x86iseldagtodag:784,x86isellow:784,x86registerinfo:[607,780],x86reloc:780,x86retflag:770,x86subtarget:784,x86targetasminfo:780,x86targetlow:784,x86targetmachin:[607,784],x87:[683,710],x8b:814,x_was_zext:684,xab:712,xadd:594,xarch:679,xarg:745,xbase:769,xc3:814,xc4:814,xc7:814,xchg:[594,710],xclang:[698,704,705],xcode:[605,665,679],xcodebuild:605,xcoff:710,xcore:[607,679,710,748],xctoolchain:605,xdata:731,xdemangl:625,xdigit:611,xe8:814,xec:814,xemac:679,xentri:769,xf7:712,xf8:712,xfail:[616,775],xgmi:590,xinmin:750,xlc:679,xlen:710,xmax:807,xme:644,xmi:644,xmin:807,xml:597,xmm0:[611,639,762,770,771,775],xmm10:771,xmm11:771,xmm12:771,xmm13:771,xmm14:771,xmm15:771,xmm1:[631,639,770,771],xmm2:[631,639,710,770,771],xmm3:[631,639,710,770,771],xmm4:[639,770,771],xmm5:[770,771],xmm6:[770,771],xmm7:[770,771],xmm8:771,xmm9:771,xmm:[611,710,780],xmo:661,xnack:[589,590],xnack_mask:[20,224,226,227,229,231,234,235,236,240,241,243,244,245,246,248,249,250,251,252,253,257,258,259,260,261,262,263,264,265,317,318,322,323,328,329,330,331,332,341,342,343,344,345,347,348,381,382,383,385,386,387,389,390,392,394,397,398,399,403,404,405,406,407,410,411,412,413,421,422,423,424,425,426,427,428,429,494,495,496,498,499,500,502,503,505,507,510,511,512,516,517,518,519,520,521,522,523,524,526,527,528,529,533,534,535,536,537,538,539,540,541,590],xnack_mask_hi:589,xnack_mask_lo:589,xnor:780,xnorrr:780,xor16rr:607,xor32rr:[607,716],xor64rr:607,xor8rr:607,xor:[588,589,607,663,724,741,743,770,779,780],xorl:760,xorri:780,xorrr:780,xpass:616,xplus1:770,xrai:773,xray_always_instru:[783,784],xray_basic_opt:783,xray_fdr_opt:783,xray_instr_map:[783,784],xray_log_fin:783,xray_log_flush:783,xray_log_interfac:783,xray_logfile_bas:783,xray_mod:[783,784],xray_never_instru:783,xray_opt:[783,784],xraylogimpl:783,xs1:661,xs2:661,xstep:807,xtemp:594,xue:750,xuetian:660,xvjf:679,xword:780,xxx:[610,611,768,769,773,774,780],xxxasmprint:780,xxxbegin:743,xxxbranchselector:780,xxxcallingconv:780,xxxcodeemitt:780,xxxend:743,xxxgenasmwrit:780,xxxgencallingconv:780,xxxgencodeemitt:780,xxxgendagisel:780,xxxgeninstrinfo:780,xxxgenregisterinfo:780,xxxinstrdescriptor:780,xxxinstrformat:780,xxxinstrinfo:[707,780],xxxiseldagtodag:780,xxxisellow:780,xxxiter:743,xxxjitinfo:780,xxxkind:703,xxxlayer:726,xxxregisterinfo:780,xxxreloc:780,xxxschedul:780,xxxsubtarget:780,xxxtargetasminfo:780,xxxtargetlow:780,xxxtargetmachin:780,xxxtrait:786,xxxxxx:[704,783],xyz:[587,617,619,679,707],xyzw:587,xyzzi:610,xzr:710,y_was_zext:684,yaml:[590,614,631,644,716,751,783,784,814],yaxxz:669,year:[602,667,746,757],yes:[717,722,758,761],yet:[585,594,607,609,610,611,624,645,659,663,667,669,676,684,696,699,708,709,710,715,722,723,726,741,743,747,757,760,764,766,780,782,783,790,804,806,807],yield:[597,616,659,663,677,710,741,743,764,774,784,789],yin:786,ymax:807,ymin:807,ymm0:762,ymm:710,yosefk:610,you:[592,593,594,595,597,600,601,603,605,606,607,608,609,610,611,612,615,616,619,621,625,629,630,631,639,644,654,657,659,660,662,664,665,667,668,669,670,672,674,675,676,677,678,679,680,681,682,683,686,687,688,689,694,695,696,697,698,699,700,701,702,703,704,705,706,709,710,712,714,716,719,721,722,723,724,725,726,727,728,731,735,739,740,741,742,743,744,745,748,752,753,756,757,759,760,761,764,765,768,769,770,771,773,774,775,778,780,781,782,783,784,786,787,788,789,790,791,802,803,804,805,806,807,808,809,810,811,812],young:743,younger:639,your:[592,593,594,595,601,603,607,608,609,610,611,614,621,625,631,639,644,659,660,662,667,668,669,670,672,674,675,676,677,678,679,681,682,687,694,695,697,699,700,701,702,704,705,709,710,711,712,714,717,720,721,724,726,727,735,740,742,744,745,751,753,754,755,756,757,761,764,767,769,770,771,772,773,774,775,778,780,781,784,786,787,788,804,805,806,807,808,809,810,811,812],youralloc:695,yourregex:611,yourself:[600,605,609,662,667,670,679,710,723,742,743],yout:786,youtu:609,yplus1:770,ystep:807,yypvr:761,z_was_sext:684,zachari:748,zak:750,zchf:701,zero:[585,586,588,589,590,596,597,607,610,611,612,616,617,619,621,622,623,624,625,626,628,629,632,633,635,636,639,640,641,642,643,645,648,649,650,652,653,654,657,658,659,660,663,664,671,674,676,677,684,687,689,709,710,711,712,715,716,724,726,731,741,743,750,756,759,760,762,764,770,775,777,779,780,785,803,806,811],zero_cont:756,zeroargfp:770,zerodirect:780,zeroext:[597,710],zeroiniti:710,zeroormor:659,zeroth:770,zext:[705,706],zhou:750,zip:[679,681],zipp:745,zlib1g:701,zlib:[605,641,664,679],zone:[710,760],zoo:721,zorg:696},titles:["Syntax of Core GFX10 Instructions","Syntax of gfx1011 and gfx1012 Instructions","Syntax of GFX7 Instructions","Syntax of GFX8 Instructions","Syntax of Core GFX9 Instructions","Syntax of gfx900, gfx902 and gfx909 Instructions","Syntax of gfx904 Instructions","Syntax of gfx906 Instructions","Syntax of gfx908 Instructions","Syntax of gfx90a Instructions","src","src","src","src","Type Deviation","vdst","vsrc","attr","dst","FX Operand","hwreg","imm16","imm16","imm16","label","m","m","msg","opt","param","probe","saddr","saddr","sbase","sbase","sbase","sdata","sdata","sdata","sdata","sdata","sdata","sdst","sdst","sdst","sdst","sdst","sdst","sdst","sdst","sdst","simm32","simm32","simm32","soffset","soffset","soffset","src","src","src","src","src","src","src","src","src","srsrc","srsrc","ssamp","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","tgt","Type Deviation","vaddr","vaddr","vaddr","vaddr","vaddr","vaddr","vcc","vdata","vdata0","vdata0","vdata1","vdata1","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vsrc","vsrc","vsrc","vsrc","waitcnt","attr","dst","hwreg","imm16","imm16","imm16","label","m","msg","opt","param","sbase","sbase","sdst","sdst","sdst","sdst","sdst","sdst","sdst","sdst","simm32","simm32","soffset","soffset","src","src","src","src","src","src","src","src","src","src","src","srsrc","srsrc","ssamp","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","tgt","Type Deviation","vaddr","vaddr","vaddr","vaddr","vcc","vdata","vdata0","vdata0","vdata1","vdata1","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vsrc","vsrc","vsrc","vsrc","waitcnt","attr","dst","hwreg","imask","imm16","imm16","imm16","label","m","m","msg","opt","param","probe","sbase","sbase","sdata","sdata","sdata","sdst","sdst","sdst","sdst","sdst","sdst","sdst","sdst","simm32","simm32","simm32","soffset","soffset","soffset","src","src","src","src","src","src","src","src","src","src","src","srsrc","srsrc","ssamp","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","tgt","Type Deviation","vaddr","vaddr","vaddr","vaddr","vcc","vdata","vdata0","vdata0","vdata1","vdata1","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vsrc","vsrc","vsrc","vsrc","waitcnt","FX Operand","m","src","src","vdst","FX Operand","m","src","src","vdst","FX Operand","m","m","src","src","src","src","src","Type Deviation","vdst","vsrc","dst","FX Operand","m","m","opt","saddr","soffset","src","src","src","src","src","src","srsrc","Type Deviation","vaddr","vaddr","vdata","vdata","vdst","vdst","vdst","vdst","vdst","vdst","vsrc","vsrc","vsrc","vsrc","vsrc","vsrc","vsrc","dst","FX Operand","hwreg","imask","imm16","imm16","imm16","label","m","m","msg","opt","probe","saddr","saddr","sbase","sbase","sbase","sdata","sdata","sdata","sdata","sdata","sdata","sdst","sdst","sdst","sdst","sdst","sdst","sdst","sdst","simm32","simm32","simm32","soffset","soffset","soffset","src","src","src","src","src","src","src","src","src","src","src","src","srsrc","srsrc","ssamp","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","Type Deviation","vaddr","vaddr","vaddr","vaddr","vaddr","vaddr","vcc","vdata","vdata0","vdata0","vdata1","vdata1","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vsrc","vsrc","vsrc","vsrc","vsrc","vsrc","waitcnt","attr","dst","hwreg","imask","imm16","imm16","imm16","label","m","m","msg","opt","param","probe","saddr","saddr","sbase","sbase","sbase","sdata","sdata","sdata","sdata","sdata","sdata","sdst","sdst","sdst","sdst","sdst","sdst","sdst","sdst","simm32","simm32","simm32","soffset","soffset","soffset","src","src","src","src","src","src","src","src","src","src","src","srsrc","srsrc","ssamp","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","ssrc","tgt","Type Deviation","vaddr","vaddr","vaddr","vaddr","vaddr","vaddr","vcc","vdata","vdata0","vdata0","vdata1","vdata1","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdata","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vdst","vsrc","vsrc","vsrc","vsrc","waitcnt","DWARF Extensions For Heterogeneous Debugging","AMDGPU Instructions Notation","AMDGPU Instruction Syntax","Syntax of AMDGPU Instruction Modifiers","Syntax of AMDGPU Instruction Operands","User Guide for AMDGPU Backend","How To Add A Constrained Floating-Point Intrinsic","Advanced Build Configurations","LLVM Alias Analysis Infrastructure","LLVM Atomic Instructions and Concurrency Guide","Benchmarking tips","Using ARM NEON instructions in big endian mode","LLVM Bitcode File Format","LLVM Block Frequency Terminology","LLVM Branch Weight Metadata","LLVM Bug Life Cycle","LLVM bugpoint tool: design and usage","Bugpoint Redesign","Building a Distribution of LLVM","Control Flow Verification Tool Design Document","Building LLVM with CMake","CMake Primer","The LLVM Target-Independent Code Generator","LLVM Community Code of Conduct","LLVM Code-Review Policy and Practices","LLVM Coding Standards","FileCheck - Flexible pattern matching file verifier","bugpoint - automatic test case reduction tool","clang-tblgen - Description to C++ Code for Clang","dsymutil - manipulate archived DWARF debug symbol files","LLVM Command Guide","lit - LLVM Integrated Tester","llc - LLVM static compiler","lldb-tblgen - Description to C++ Code for LLDB","lli - directly execute programs from LLVM bitcode","llvm-addr2line - a drop-in replacement for addr2line","llvm-ar - LLVM archiver","llvm-as - LLVM assembler","llvm-bcanalyzer - LLVM bitcode analyzer","llvm-config - Print LLVM compilation options","llvm-cov - emit coverage information","llvm-cxxfilt - LLVM symbol name demangler","llvm-cxxmap - Mangled name remapping tool","llvm-diff - LLVM structural \u2018diff\u2019","llvm-dis - LLVM disassembler","llvm-dwarfdump - dump and verify DWARF debug information","llvm-exegesis - LLVM Machine Instruction Benchmark","llvm-extract - extract a function from an LLVM module","llvm-install-name-tool - LLVM tool for manipulating install-names and rpaths","llvm-lib - LLVM lib.exe compatible library tool","llvm-libtool-darwin - LLVM tool for creating libraries for Darwin","llvm-link - LLVM bitcode linker","llvm-lipo - LLVM tool for manipulating universal binaries","llvm-locstats - calculate statistics on DWARF debug location","llvm-mca - LLVM Machine Code Analyzer","llvm-nm - list LLVM bitcode and object file\u2019s symbol table","llvm-objcopy - object copying and editing tool","llvm-objdump - LLVM\u2019s object file dumper","llvm-otool - Mach-O dumping tool","llvm-pdbutil - PDB File forensics and diagnostics","llvm-profdata - Profile data tool","llvm-profgen - LLVM SPGO profile generation tool","llvm-ranlib - generates an archive index","llvm-readelf - GNU-style LLVM Object Reader","llvm-readobj - LLVM Object Reader","llvm-size - print size information","llvm-stress - generate random .ll files","llvm-strings - print strings","llvm-strip - object stripping tool","llvm-symbolizer - convert addresses into source code locations","llvm-tblgen - Target Description to C++ Code for LLVM","mlir-tblgen - Description to C++ Code for MLIR","opt - LLVM optimizer","tblgen - Description to C++ Code","CommandLine 2.0 Library Manual","Compiling CUDA with clang","Architecture & Platform Information for Compiler Writers","Contributing to LLVM","Coroutines in LLVM","LLVM Code Coverage Mapping Format","Debugging JIT-ed Code","Dependence Graphs in LLVM","LLVM Developer Policy","A guide to Dockerfiles for building LLVM","Exception Handling in LLVM","Extending LLVM: Adding instructions, intrinsics, types, etc.","LLVM Extensions","Frequently Asked Questions (FAQ)","FaultMaps and implicit checks","Performance Tips for Frontend Authors","Fuzzing LLVM libraries and tools","Garbage Collection with LLVM","The Often Misunderstood GEP Instruction","Getting Involved","Getting Started with the LLVM System","Getting Started/Tutorials","Getting Started with the LLVM System using Microsoft Visual Studio","Bisecting LLVM code","Generic Machine IR","Generic Opcodes","IRTranslator","InstructionSelect","Known Bits Analysis","Legalizer","Core Pipeline","Porting GlobalISel to A New Target","RegBankSelect","Resources","Global Instruction Selection","The LLVM gold plugin","GWP-ASan","How To Add Your Build Configuration To LLVM Buildbot Infrastructure","How To Build On ARM","How to build Windows Itanium applications.","How To Build Clang and LLVM with Profile-Guided Optimizations","How to Cross Compile Compiler-rt Builtins For Arm","How To Cross-Compile Clang/LLVM using Clang/LLVM","How To Release LLVM To The Public","How to set up LLVM-style RTTI for your class hierarchy","How to submit an LLVM bug report","How to Update Debug Info: A Guide for LLVM Pass Authors","How To Use Attributes","How To Use Instruction Mappings","Design and Usage of the InAlloca Attribute","JITLink and ORC\u2019s ObjectLinkingLayer","LLVM Language Reference Manual","The LLVM Lexicon","libFuzzer \u2013 a library for coverage-guided fuzz testing.","LLVM Link Time Optimization: Design and Implementation","LLVM Loop Terminology (and Canonical Forms)","MCJIT Design and Implementation","Machine IR (MIR) Format Reference Manual","Markdown Quickstart Template","LLVM\u2019s Optional Rich Disassembly Output","How to start LLVM Social in your town","MemTagSanitizer","MemorySSA","MergeFunctions pass, how it works","MyFirstTypoFix","User Guide for NVPTX Back-end","Using the New Pass Manager","ORC Design and Implementation","Opaque Pointers","Using -opt-bisect-limit to debug optimization errors","CodeView Symbol Records","CodeView Type Records","The PDB DBI (Debug Info) Stream","The PDB Global Symbol Stream","The PDB Serialized Hash Table Format","The Module Information Stream","The MSF File Format","The PDB Info Stream (aka the PDB Stream)","The PDB Public Symbol Stream","The PDB TPI and IPI Streams","The PDB File Format","Advice on Packaging LLVM","LLVM\u2019s Analysis and Transform Passes","Code Reviews with Phabricator","LLVM Programmer\u2019s Manual","Creating an LLVM Project","Moving LLVM Projects to GitHub","\u201cllvm-libc\u201d C Standard Library","Test-Suite Extensions","Variable Names Plan","Vector Predication Roadmap","Vectorization Plan","Reference","LLVM 14.0.0 Release Notes","How To Validate a New Release","Remarks","Reporting Guide","Scudo Hardened Allocator","LLVM Security Group","Segmented Stacks in LLVM","Source Level Debugging with LLVM","Speculative Load Hardening","Sphinx Quickstart Template","Stack maps and patch points in LLVM","Stack Safety Analysis","Garbage Collection Safepoints in LLVM","Support Library","LLVM Community Support Policy","System Library","TableGen BackEnds","1 TableGen Backend Developer\u2019s Guide","1 TableGen Programmer\u2019s Reference","TableGen Overview","TableGen Fundamentals","test-suite Guide","test-suite Makefile Guide (deprecated)","LLVM Testing Infrastructure Guide","Code Transformation Metadata","Type Metadata","User Guides","Auto-Vectorization in LLVM","Writing an LLVM Backend","Writing an LLVM Pass","Writing an LLVM Pass","XRay Instrumentation","Debugging with XRay","XRay Flight Data Recorder Trace Format","YAML I/O","About","1. Building a JIT: Starting out with KaleidoscopeJIT","2. Building a JIT: Adding Optimizations \u2013 An introduction to ORC Layers","3. Building a JIT: Per-function Lazy Compilation","4. Building a JIT: Extreme Laziness - Using LazyReexports to JIT from ASTs","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","Kaleidoscope Tutorial","1. Kaleidoscope: Kaleidoscope Introduction and the Lexer","2. Kaleidoscope: Implementing a Parser and AST","3. Kaleidoscope: Code generation to LLVM IR","4. Kaleidoscope: Adding JIT and Optimizer Support","5. Kaleidoscope: Extending the Language: Control Flow","6. Kaleidoscope: Extending the Language: User-defined Operators","7. Kaleidoscope: Extending the Language: Mutable Variables","8. Kaleidoscope: Compiling to Object Code","9. Kaleidoscope: Adding Debug Information","10. Kaleidoscope: Conclusion and other useful LLVM tidbits","My First Language Frontend with LLVM Tutorial","LLVM Tutorial: Table of Contents","yaml2obj"],titleterms:{"000":606,"0x0006":729,"0x000a":730,"0x000e":730,"0x0014":730,"0x1001":730,"0x1002":730,"0x1008":730,"0x1009":730,"0x1012":729,"0x1101":729,"0x1102":729,"0x1103":729,"0x1105":729,"0x1106":729,"0x1107":729,"0x1108":729,"0x110b":729,"0x110c":729,"0x110e":729,"0x110f":729,"0x1110":729,"0x1111":729,"0x1112":729,"0x1113":729,"0x1116":729,"0x111c":729,"0x111d":729,"0x1124":729,"0x1125":729,"0x1127":729,"0x112c":729,"0x112d":729,"0x1136":729,"0x1137":729,"0x1138":729,"0x1139":729,"0x113a":729,"0x113c":729,"0x113d":729,"0x113e":729,"0x113f":729,"0x1140":729,"0x1141":729,"0x1142":729,"0x1143":729,"0x1144":729,"0x1145":729,"0x1146":729,"0x1147":729,"0x114c":729,"0x114d":729,"0x114e":729,"0x114f":729,"0x1153":729,"0x1155":729,"0x1156":729,"0x115a":729,"0x115b":729,"0x115e":729,"0x1167":729,"0x1168":729,"0x1201":730,"0x1203":730,"0x1205":730,"0x1206":730,"0x1400":730,"0x1401":730,"0x1402":730,"0x1404":730,"0x1409":730,"0x1502":730,"0x1503":730,"0x1504":730,"0x1505":730,"0x1506":730,"0x1507":730,"0x1509":730,"0x150d":730,"0x150e":730,"0x150f":730,"0x1510":730,"0x1511":730,"0x1515":730,"0x1519":730,"0x151a":730,"0x151d":730,"0x1601":730,"0x1602":730,"0x1603":730,"0x1604":730,"0x1605":730,"0x1606":730,"0x1607":730,"0xf0":730,"2017":747,"abstract":[597,674,676,710,747,750,764,765,770,803],"boolean":659,"break":[741,743,760],"byte":644,"case":[602,612,667,726,783],"catch":669,"class":[585,590,593,607,610,659,660,703,707,710,743,769,770,780,782],"const":722,"default":[600,610,721,725,786],"export":625,"final":[663,702,715],"float":[589,590,591,619,671,684,700,710],"function":[590,599,606,607,610,632,659,663,664,670,672,685,710,716,722,724,741,743,759,760,765,779,783,785,790,804,810],"import":[686,714,743,745],"long":748,"new":[593,602,667,669,670,690,725,741,743,746,753,757,759,760,761,769,775,782,808],"null":[589,676,677],"public":[660,702,729,737,743],"return":[610,663,743,760],"static":[610,617,741],"switch":[599,610,663,710],"throw":[669,765],"true":770,"try":[669,700,710],"var":710,"void":[710,722],"while":[743,760],AND:[641,642,649,653],Adding:[659,667,670,674,725,728,743,789,805,810],And:776,Are:609,Being:609,For:[585,674,679,688,700,777],GCs:[676,764],IDs:597,NAS:747,NOT:[611,741],Near:726,Not:[610,745],TLS:760,The:[585,593,607,610,611,659,673,674,676,677,679,694,700,702,703,705,709,710,711,721,723,724,727,731,732,733,734,735,736,737,738,739,743,745,750,754,760,769,770,771,779,780,782,784,802,803],Then:806,USE:741,USING:639,Ups:678,Use:[609,610,674,695,706,707,710,721,726,743,760,765],Used:[605,609,741],Useful:606,Using:[593,596,607,660,672,676,679,699,701,705,712,725,728,743,752,770,782,783,791],With:723,__atomic_:594,__device__:660,__host__:660,__nvvm_reflect:724,__sync_:594,_global__i_a:672,a16:588,aapc:596,aarch64:[661,700,752],abandon:742,abbrevi:597,abi:[590,607,661,669,698,743,746,760],abid:588,abl:722,about:[662,672,745,787],abs:[588,710],absolut:[589,659,726],absolute_symbol:710,acceler:[585,590,759],accept:[609,723,757],access:[585,590,607,667,710,722,723,742,745,757],accumul:760,accur:710,achiev:[600,743],acknowledg:609,acquir:594,acquirereleas:594,acronym:748,across:682,action:688,activ:[600,710],acycl:770,adc:[593,741],add:[591,677,696,710,726],addescapingus:593,adding:659,addit:[590,616,751,752,770,778],addpreserv:782,addr2lin:620,addr64:588,addr:[710,759],addrequir:782,addrequiredtransit:782,address:[585,590,607,654,671,677,710,715,724,760,777],addressofreturnaddress:710,addrspacecast:710,adjust:[710,808],administr:702,adt:743,advanc:[592,664,712,743,813],advantag:[666,748],advertis:719,advic:[601,740],advisori:755,afl:712,after:[609,610,695,713,745,755,760],again:[723,760],aggreg:[674,685,710,741],aggress:741,ahead:810,aid:743,aim:609,aka:736,algorithm:[607,660,709,747],alia:[593,607,659,677,710,741,779],alias:[607,659,674,710,722,726],aliasanalysi:593,aliassettrack:593,align:[596,597,674,735],all:[609,672,741,745,809],alloc:[607,663,695,743,756,758,759],alloca:[674,710,758],allow:659,along:[722,764],also:[610,612,614,616,617,619,620,622,623,626,629,630,632,633,635,638,640,641,642,643,647,648,649,653,654],altern:[659,681,700,713,760,764],alwai:[677,741],always_inlin:741,ambigu:776,amd_kernel_code_t:590,amdgcn:590,amdgcn_target:590,amdgpu:[586,587,588,589,590,607,661,752],amdgpu_hsa_kernel:590,amdgpu_metadata:590,amdhsa:590,amdhsa_kernel:590,amdpal:590,analys:725,analysi:[593,602,607,631,639,677,687,725,741,763,770,775,779,782,783],analysisusag:782,analyz:[623,639,773],anchor:610,ani:712,annot:[710,718],announc:702,anonym:[610,741],anoth:[743,746],api:[590,607,667,672,686,688,691,718,721,726,743,745,751,752,754,760,788],appeal:755,append:590,appendix:770,appl:592,applic:[698,747,783],approach:[713,760],aql:590,arc:710,arch:590,architectur:[590,661,700,762,764],archiv:[614,621,647,679],area:[663,710],arg:710,argpromot:[593,741],argument:[590,606,659,663,684,710,741,743],arithmet:[585,607,677,710],arm64:671,arm:[596,661,671,697,700,701,752],armneon:768,armneonsema:768,armneontest:768,armv6:700,armv7:700,arrai:[664,671,677,710,743],arrayref:743,articl:761,asan:695,ashr:710,ask:672,asm:[607,669,710],asmmatch:768,asmwrit:768,assembl:[590,596,607,622,671,675,676,698,710,780],assert:[610,770],assign:[741,808],associ:[710,722],assum:710,ast:[791,803,806],async:[663,710],atom:[594,710,741],atomicrmw:710,attach:710,attack:760,attent:663,attr:[17,121,210,480,741],attrbuild:706,attrdoc:768,attribut:[585,590,659,660,667,706,708,710,716,741,759,783,784],attributelist:706,audienc:780,author:[674,705,761],auto:[610,779],automat:[601,612,705,710,760],autoreleas:710,autoreleasepoolpop:710,autoreleasepoolpush:710,autoreleasereturnvalu:710,autotool:694,avail:[593,675,676,709],avoid:[610,663,674],avr:752,awar:703,back:[639,672,724],backend:[590,607,677,704,709,752,768,769,771,780],background:[602,604,665,669,703,759],backward:667,bake:764,bang:[748,770],bank:683,bank_mask:588,barrier0:724,barrier:[676,724],base:[607,610,660,672,677,703,710,741,743,764,770],basic:[593,597,605,615,645,668,671,674,682,703,709,710,716,722,741,743,745,771,780,781,782,783,788,803],basicblock:[722,743],basiccg:741,bcanalyz:623,befor:[610,745],begin:[663,710],begincatch:669,behavior:593,behaviour:[639,671],behind:759,benchmark:[595,631,699,747,773],berkelei:607,best:674,between:[603,660,677,713,743,782],bewar:610,bia:598,big:[596,748],bigger:662,binari:[589,637,641,672,702,710,783,803,807],bind:[679,752],binutil:615,bisect:[679,682,728,745],bit:[585,590,597,659,687,710,733,743,760],bitcast:710,bitcod:[597,619,623,636,640,672,713],bitconvert:596,bitinit:769,bitrevers:710,bitsinit:769,bitsrecti:769,bitstream:[597,754],bitvalu:786,bitvector:743,bitwis:[684,710],blame:748,blgp:588,blitz:747,block:[597,598,609,610,611,639,671,710,716,735,741,743,760,761,786],blockinfo:597,bodi:[610,741,770],bootstrap:592,bot:746,both:[679,729],bottleneck:639,bound:[677,760],bound_ctrl:588,boundari:735,bpf_ab:607,bpf_ind:607,brace:610,branch:[598,599,702,741,745,760,780],branch_weight:710,branchinst:599,breakpoint:782,bridg:745,bswap:710,buffer:[590,695],bug:[600,628,633,635,637,640,641,642,643,650,652,653,662,675,703,704,753,764],bugpoint:[601,602,612,741],build:[591,592,603,605,668,672,675,679,681,694,696,697,698,699,700,701,702,712,721,723,742,743,744,745,746,752,778,781,782,784,788,789,790,791,813],buildbot:696,builder:696,buildmast:696,built:[599,606,607,676,770,786],builtin:[659,700],bundl:[590,607,710,716],bypass:760,c99:671,cach:[605,699,700],calcul:638,call:[585,590,593,607,610,671,672,678,685,708,710,741,743,760,779,780],callabl:743,callargu:785,callback:710,callbr:710,calle:[708,710],callgraph:[741,782],callgraphsccpass:782,callingconv:768,callinst:599,callsit:[669,741,743],can:[609,672,674,677,729,760],candid:702,canon:714,canonic:[710,741],cantfail:743,captur:710,cast:[677,743],catchpad:710,catchret:710,catchswitch:710,categori:[659,729,730],caus:602,cbsz:588,ceil:710,cfa:585,cfg:[599,741],cfi:590,cfiindex:716,chain:[593,679,743],chang:[585,590,667,679,723,742,743,745,752,757,759,804],chapter:[788,789,790,791,803,804,805,806,807,808,809,810],charact:[597,611],check:[611,673,710,741,743,760,770,779],checkout:[679,723,745],choos:[668,757,809],chrome:784,clamp:588,clang:[590,592,613,658,660,675,679,697,699,700,701,756,768,778],clangattrclass:768,clangattrdump:768,clangattrimpl:768,clangattrlist:768,clangattrparsedattrimpl:768,clangattrparsedattrkind:768,clangattrparsedattrlist:768,clangattrparserstringswitch:768,clangattrpchread:768,clangattrpchwrit:768,clangattrspel:768,clangattrspellinglistindex:768,clangattrtemplateinstanti:768,clangattrvisitor:768,clangcommentcommandinfo:768,clangcommentcommandlist:768,clangcommenthtmlnamedcharacterrefer:768,clangcommenthtmltag:768,clangcommenthtmltagsproperti:768,clangcommentnod:768,clangdeclnod:768,clangdiaggroup:768,clangdiagsdef:768,clangdiagsindexnam:768,clangsacheck:768,clangstmtnod:768,classof:703,cleanup:[669,708],cleanuppad:710,cleanupret:710,clear_cach:710,client:593,clobber:[607,710,721],clone:745,close:[600,714,741,804],cmake:[603,605,606,679,698,699,700,701,773],cmath:660,cmp:712,cmpconstant:722,cmpgep:722,cmpoper:722,cmptype:722,cmpvalu:722,cmpxchg:710,coalescingbitvector:743,code:[590,601,607,608,609,610,613,618,619,631,639,654,655,656,658,660,664,665,667,669,672,676,679,682,704,710,715,716,723,736,741,742,743,744,748,755,759,761,765,776,778,780,781,782,788,789,790,791,803,804,805,806,807,808,809,810],codeemitt:768,codegen:[594,749,759],codegenprepar:741,codeview:[671,710,729,730,734,738,739,759],coff:[649,653,671,814],collabor:757,collect:[659,661,676,710,751,764,811],collector:[676,710],color:747,column:710,com:611,combin:[607,741],comdat:710,command:[602,606,615,621,625,637,642,645,646,659,681,742,751,779],commandlin:659,comment:[610,716,723],commit:[609,667,679,723,742,745],committe:755,common:[679,698,724,743,773],commun:[608,678,702,713,766,787],compar:[677,722],comparison:722,compat:[634,667,712,746],compil:[605,610,617,624,660,661,672,679,700,701,710,726,740,760,773,790,805,809,810],complain:724,complex:[592,660,710,743],complexpattern:686,complic:712,compon:[591,600,607,624,667,750],composit:[585,688,757],compr:588,comprehens:752,compress:710,compressstor:710,comput:[676,677,724],compute_pgm_rsrc1:590,compute_pgm_rsrc2:590,compute_pgm_rsrc3:590,concaten:743,concept:[664,764,770,771,782],conclus:[723,803,811],concret:[703,770],concurr:[594,710],condit:[609,710,741,760,770],conduct:[608,755],confidenti:757,config:624,configur:[592,603,616,617,675,679,696,698,701,723,773,774,809],connect:709,consid:[674,757],consider:[596,708],consist:[600,639,765],constant:[589,671,684,685,710,716,741,743,759,760,805],constants_block:597,constmerg:741,constrain:[591,710,775],constraint:[590,710],construct:[607,666,672,709,716,741],constructor:[610,743],consum:[743,759],consumeaft:659,contain:[610,644,739,743,754],content:[585,597,703,746,749,759,813],context:[585,710,786],contextu:718,continu:[610,663,675,773],contract:703,contribut:[662,731],control:[585,604,606,659,660,684,760,784,806],convent:[588,589,590,607,672,708,710,724,760,780],convers:[585,589,684,710,724,747,779,780],convert:[654,705,710,741],copi:[610,641,708,710],copyright:667,copysign:710,copyvalu:593,copyweak:710,coral:747,core:[0,4,689,743,764,766],coreclr:676,coro:663,corocleanup:663,coroearli:663,coroelid:663,corosplit:663,coroutin:663,corpu:712,cos:710,cost:[669,748],could:722,count:[599,611,710,741,779],counter:[664,743],cov:625,cover:[610,766],coverag:[625,664,712],cpu:747,crash:[601,704,712],creat:[635,702,726,743,744,761,769,770,782],creation:[585,715],crit:741,criteria:[702,757],critic:741,cross:[605,641,653,679,700,701,773],ctag:768,ctlz:710,ctpop:710,cttz:710,cuda:660,current:[726,745,746,748,757],custom:[631,639,659,663,676,677,686,726,743,773,774,780,786],customeventmark:785,cv_def_rang:671,cv_file:671,cv_filechecksum:671,cv_filechecksumoffset:671,cv_fpo_data:671,cv_func_id:671,cv_inline_linet:671,cv_inline_site_id:671,cv_linet:671,cv_loc:671,cv_stringtabl:671,cxxfilt:626,cxxmap:627,cycl:600,d16:588,dag:[598,607,611,752,770],dagcombin:749,daginit:769,dagisel:768,darpa:747,darwin:635,data:[585,590,597,607,645,664,666,710,724,741,743,760,765,769,783,785,786],datalayout:607,date:719,dbg:[741,759],dbi:[644,731],dce:741,dead:741,deadargelim:741,deadarghax0r:741,deadtypeelim:741,deal:600,dealloc:663,debug:[585,590,593,614,615,630,638,665,689,705,710,716,728,731,741,743,751,752,759,769,775,784,810],debug_info:585,debug_typ:743,debugg:[585,590,601,710,741,759],debugifi:705,debuginfo:741,debugtrap:710,decl:710,declar:[610,688,741,759],decod:741,deconstruct:607,decrement:710,deduc:741,deduct:610,deep:717,deeper:703,def:[743,770],defer:760,defici:771,defin:[610,710,760,770,780,807,808],define_abbrev:597,definit:[585,610,623,711,714,750,762,769,770],defm:770,defset:770,defvar:770,delet:[705,733,741,743],deletevalu:593,demangl:[626,675],demo:672,demot:741,denorm:590,densemap:743,denseset:743,deoptim:710,depend:[593,666,671,710,716,723,740,741,760],deprec:[749,766,773,774],dequ:743,dereferenc:[606,677,710],dereferenceable_or_nul:710,deriv:[670,743,764],describ:674,descript:[585,601,607,611,612,613,614,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,713,745],descriptor:590,design:[601,602,604,607,666,677,693,695,708,713,715,721,726,743,750,756,787],destroi:663,destroyweak:710,detail:[610,623,666,718,758,759,760,768,770,779],detect:[660,695,741,747],determin:[592,610,659],determinist:610,develop:[605,615,667,678,679,709,712,721,745,769,779],deviat:[14,79,172,267,333,350,430,543],devic:[660,773],dfapacket:768,diagnos:639,diagnost:[644,754,779],dialect:660,diarglist:710,dibasictyp:710,dicompileunit:710,dicompositetyp:710,dictionari:712,did:672,diderivedtyp:710,didn:748,die:741,dienumer:710,diexpress:710,diff:[628,754],differ:[593,603,649,660,677,774],differenti:748,difil:710,diflag:710,diglobalvari:710,diglobalvariableexpress:710,diimportedent:710,dilabel:710,dilexicalblock:710,dilexicalblockfil:710,diloc:710,dilocalvari:710,dim:588,dimacro:710,dimacrofil:710,dinamespac:710,diobjcproperti:710,direct:[590,607,611,671,710,762,770,779],directli:[593,619,760],directori:[679,735],dis:629,disabl:710,disable_nonforc:710,disadvantag:[666,748],disassembl:[607,629,675,718,768],disclosur:757,discoveri:616,discuss:757,disk:733,dispatch:[590,639],displai:773,dissect:[664,724],distanc:677,distinct:663,distribut:[603,702,710,776,778],disubprogram:710,disubrang:710,disubroutinetyp:710,ditemplatetypeparamet:710,ditemplatevalueparamet:710,dither:747,dlc:588,dll:710,dmask:588,doc:661,docker:668,dockerfil:668,document:[590,591,604,610,661,674,702,717,722,761,786,787],doe:[672,677,712,782],doesn:712,doesnotaccessmemori:593,dofin:782,doiniti:782,dom:741,domfronti:741,domin:741,domtre:741,don:[609,610,672,677,760,765],done:[588,663],donoth:710,dot:741,down:760,downstream:748,doxygen:610,dpp16:[0,1],dpp16_ctrl:588,dpp32_ctrl:588,dpp64_ctrl:588,dpp8:[0,1,588],dpp8_sel:588,dpp:588,dpp_ctrl:588,dpp_op_sel:588,drawback:745,driven:593,driver:[803,804],drop:[620,705],dse:[593,741],dso:710,dst:[18,122,211,336,368,481],dst_sel:588,dst_unus:588,dsymutil:614,due:610,dump:[630,643,644],dumper:642,duplic:[741,765],dure:[759,779],dw_at_llvm_active_lan:590,dw_at_llvm_augment:590,dw_at_llvm_lane_pc:590,dwarf:[585,590,614,630,638,759,810],dwarfdump:[630,675],dyn_cast:743,dynam:[607,659,710,782],e_flag:590,each:741,earli:610,easier:760,ebpf:607,edg:[741,760,764],edit:[641,723],ef_amdgpu_mach:590,effect:[674,677],effici:[593,609,708],either:729,element:[610,677,710],elf:[590,641,649,653,671],elimin:741,elis:708,els:[606,610,806],elseif:606,email:745,emb:760,embed:[590,605,661,716],emiss:[607,671,810],emit:[607,625,676,754,769,809],emitt:780,empti:611,enabl:[710,754],encod:[585,587,597,607,664],end:[610,617,663,672,677,704,710,724,743,759],end_block:597,endcatch:669,endian:596,endl:610,endofbuff:785,engin:715,enhanc:764,enough:601,enter_subblock:597,entiti:585,entri:[585,590,599,710,741,762],enumer:[590,610,768],environ:[590,710,782],epilog:607,epilogu:779,equal:610,equival:710,erlang:676,error:[610,709,723,728,743,756,765,769,786],error_cod:743,erroror:743,establish:667,etc:[670,743],eval:[593,741],evalu:[585,593,610,741],even:760,event:678,everi:610,everyon:609,everyth:760,evolut:741,exactli:712,exampl:[585,590,596,616,624,626,631,638,645,652,654,663,665,676,679,687,694,695,707,710,712,713,717,728,743,761,770,771,782],except:[610,669,708,710,741,764],exceptionpoint:669,exe:634,exec:589,execut:[590,605,616,619,639,709,724,736,743,775],executor:709,execz:589,exegesi:631,exhaust:741,exist:[593,782,808],exit:[610,611,612,614,616,617,619,621,622,623,624,625,626,628,629,630,631,632,633,635,636,638,639,640,641,643,645,648,649,650,651,652,653,654,657,658,741],exitonerror:743,exp2:710,exp:[0,2,3,4,588,710],expand:[710,780],expandload:710,expans:759,expect:[599,710,723],experi:745,experiment:[710,749,762],expert:609,explicit:[727,764],exploit:659,explor:784,expos:765,express:[585,589,590,664,669,710,741,760,770,803,804,806],extend:[607,670,806,807,808],extens:[585,659,671,684,747,759,782,806],extern:[659,716,741,752,773,774,813],externalfnconst:741,extra:[639,677,775],extract:[632,710,741],extractel:710,extractvalu:710,extrahelp:659,extrem:791,fab:710,facil:770,fadd:710,fail:700,fair:748,fallibl:743,faq:[672,712,781],fast:710,fastisel:[693,752,768],fault:673,faultmap:673,fcmp:710,fconst:589,fdiv:710,featur:[590,607,676,709,712,726,736,740,746,747,775,779],feedback:609,fenc:710,ffmpeg:747,fidel:784,field:[722,769,770],file:[597,607,610,611,614,616,627,640,642,644,651,659,664,679,700,713,716,731,735,739,741,754,755,759,770,775,783],filecheck:611,filenam:710,filter:[607,644,669],find:[674,675,723,741,742,743],findregress:753,fine:[712,743,760],finer:728,firehos:747,first:[677,710,812],fix:[590,597,600,607,662,710,716,722,743,759],flag:[659,660,710,716,740,760,773,779],flame:784,flat:[0,2,3,4,8,9,588,590],flat_scratch:589,flexibl:611,flight:[783,785],floor:710,flow:[585,604,606,639,684,786,806],flt:710,fma:710,fma_mix:588,fmax:710,fmin:710,fmt:588,fmul:710,fmuladd:710,fneg:710,focu:602,fold:[607,760,780,805],foldingset:743,follow:[677,722],followup:710,followup_al:710,followup_coincid:710,followup_epilogu:710,followup_fallback:710,followup_inn:710,followup_out:710,followup_remaind:710,followup_remainder_inn:710,followup_remainder_out:710,followup_sequenti:710,followup_vector:710,forbidden:610,forc:741,foreach:770,forens:644,fork:712,form:[585,607,714,741],format:[585,590,597,607,610,616,630,641,649,659,664,669,675,716,733,735,739,743,759,761,762,764,785],formatv:743,formed:710,fortran:759,fp16:710,fpext:710,fpmath:710,fptosi:710,fptoui:710,fptrunc:710,fragil:775,frame:[585,590,607,663,669,811],frameaddress:710,framework:667,free:[663,695,735],freeform:659,freepooma:747,freez:710,frem:710,frequenc:598,frequent:[605,672],friendli:712,from:[590,598,607,619,632,659,660,677,679,701,710,726,741,742,743,760,791],front:[672,677,704,759],frontend:[674,727,812],frontier:741,fshl:710,fshr:710,fsub:710,ftensor:747,full:[710,788,789,790,791,803,804,805,806,807,808,809,810],fulli:610,funclet:[669,710],function_block:597,function_entry_count:710,function_ref:743,functioncompar:722,functionpass:782,fundament:[670,772],further:[639,726,744,784],futur:[607,693,726,760,782,783],fuzz:[675,712],fuzzer:[675,712],g_ab:684,g_add:684,g_addrspace_cast:684,g_and:684,g_anyext:684,g_ashr:684,g_assert_sext:684,g_assert_zext:684,g_atomic_cmpxchg:684,g_atomic_cmpxchg_with_success:684,g_atomicrmw_add:684,g_atomicrmw_and:684,g_atomicrmw_fadd:684,g_atomicrmw_fsub:684,g_atomicrmw_max:684,g_atomicrmw_min:684,g_atomicrmw_nand:684,g_atomicrmw_or:684,g_atomicrmw_sub:684,g_atomicrmw_umax:684,g_atomicrmw_umin:684,g_atomicrmw_xchg:684,g_atomicrmw_xor:684,g_bitcast:684,g_bitrevers:684,g_block_addr:684,g_br:684,g_brcond:684,g_brindirect:684,g_brjt:684,g_bswap:684,g_build_vector:684,g_build_vector_trunc:684,g_bzero:684,g_concat_vector:684,g_constant:684,g_ctlz:684,g_ctlz_zero_undef:684,g_ctpop:684,g_cttz:684,g_cttz_zero_undef:684,g_dyn_stackalloc:684,g_extract:684,g_extract_vector_elt:684,g_fab:684,g_fadd:684,g_fcanonic:684,g_fceil:684,g_fcmp:684,g_fco:684,g_fconstant:684,g_fcopysign:684,g_fdiv:684,g_fenc:684,g_fexp2:684,g_fexp:684,g_ffloor:684,g_flog10:684,g_flog2:684,g_flog:684,g_fma:684,g_fmad:684,g_fmaximum:684,g_fmaxnum:684,g_fmaxnum_iee:684,g_fminimum:684,g_fminnum:684,g_fminnum_iee:684,g_fmul:684,g_fnearbyint:684,g_fneg:684,g_fpext:684,g_fpow:684,g_fptosi:684,g_fptoui:684,g_fptrunc:684,g_frame_index:684,g_frem:684,g_frint:684,g_fsin:684,g_fsqrt:684,g_fsub:684,g_global_valu:684,g_icmp:684,g_implicit_def:684,g_indexed_load:684,g_indexed_sextload:684,g_indexed_stor:684,g_indexed_zextload:684,g_insert:684,g_insert_vector_elt:684,g_intrins:684,g_intrinsic_round:684,g_intrinsic_trunc:684,g_intrinsic_w_side_effect:684,g_inttoptr:684,g_jump_tabl:684,g_llround:684,g_load:684,g_lround:684,g_lshr:684,g_memcpi:684,g_memcpy_inlin:684,g_memmov:684,g_memset:684,g_merge_valu:684,g_mul:684,g_or:684,g_phi:684,g_ptr_add:684,g_ptrmask:684,g_ptrtoint:684,g_rotl:684,g_rotr:684,g_sadd:684,g_saddo:684,g_saddsat:684,g_sbfx:684,g_sdiv:684,g_sdivrem:684,g_select:684,g_sext:684,g_sext_inreg:684,g_sextload:684,g_shl:684,g_shuffle_vector:684,g_sitofp:684,g_smax:684,g_smin:684,g_smulh:684,g_smulo:684,g_srem:684,g_sshlsat:684,g_ssube:684,g_ssubo:684,g_ssubsat:684,g_store:684,g_sub:684,g_trunc:684,g_uadd:684,g_uaddo:684,g_uaddsat:684,g_ubfx:684,g_udiv:684,g_udivrem:684,g_uitofp:684,g_umax:684,g_umin:684,g_umulh:684,g_umulo:684,g_unmerge_valu:684,g_urem:684,g_ushlsat:684,g_usub:684,g_usubo:684,g_usubsat:684,g_vaarg:684,g_vastart:684,g_vecreduce_fadd:684,g_vecreduce_fmax:684,g_vecreduce_fmin:684,g_vecreduce_fmul:684,g_vecreduce_seq_fadd:684,g_vecreduce_seq_fmul:684,g_xor:684,g_zext:684,g_zextload:684,garbag:[676,710,751,764,811],gather:[710,779],gcmetadataprint:676,gcov:625,gcread:[676,710],gcroot:710,gcwrite:[676,710],gdb:[665,782],gds:588,gen:724,gener:[585,595,601,603,607,609,616,619,621,641,646,647,649,651,653,658,669,671,672,675,676,683,684,704,709,710,715,716,717,719,741,743,747,761,768,774,778,784,785,804,806],geometr:747,gep:[674,677],get:[598,678,679,680,681,710,712,723,728,769,784],getanalysi:782,getanalysisifavail:782,getanalysisusag:782,getelementptr:[672,710],getmodrefinfo:593,getposit:659,getregisteredopt:659,gfx1011:1,gfx1012:1,gfx10:[0,590],gfx6:590,gfx7:2,gfx8:3,gfx900:5,gfx902:5,gfx904:6,gfx906:7,gfx908:8,gfx909:5,gfx90a:[9,590],gfx9:[4,590],gfx_generation_minor:590,gfx_generation_numb:590,gfx_generation_step:590,git:[679,682,745,748],github:745,given:700,glc:588,global:[693,710,716,722,729,732,741,759,779],global_ctor:[672,710],global_dtor:710,globaldc:741,globalisel:690,globalopt:741,globalsmodref:[593,741],globalvalu:743,globalvari:743,gnu:[615,648],gnupg:747,goal:[676,721],gold:694,good:712,got:704,grain:[743,760],grammar:785,granular:728,graph500:747,graph:[666,671,741,743,747,760,770,784],great:723,group:[659,710,757,782],guarante:811,guard:[610,695,710,760],guid:[590,594,615,659,668,699,705,712,716,724,741,755,769,773,774,775,778],guidanc:603,guidelin:[610,717,750,761],gvn:[593,741],gwp:695,hack:[701,741],half:[710,764],handl:[606,607,669,709,710,741,743,786],handler:[590,743],happen:[672,677,755],har:[709,747],hard:[760,808,810],harden:[756,760],hardwar:[590,661,679,681,710],hasglobalalias:722,hash:[733,759],haswel:760,have:677,hcc:590,header:[585,590,610,664,700,731,736,738,756,759,765,785],heap:[663,676,720,743],heavi:760,hello:[781,782],help:[659,660,662,672,743,744],heterogen:585,hex:786,hexadecim:671,hexagon:[661,752],hide:659,hierarchi:[703,743],high:[588,607,610,664,666,710,716,747,750,760,765],higher:672,hint:[684,710,743,779],histor:[602,683],histori:745,hook:679,hopefulli:717,host:[660,672,679,700,719],how:[591,600,607,609,639,662,672,677,694,696,697,698,699,700,701,702,703,704,705,706,707,712,719,722,726,753,757,759,761,763,770],hpc:747,hsa:590,hsa_code_object_isa:590,hsa_code_object_vers:590,hwreg:[20,123,212,370,482],hydrobench:747,hyphen:659,i32:677,ia64:661,ibm:661,icmp:710,iconst:589,idea:[604,764,807],identifi:[590,676,710,770],idxen:588,ifunc:710,ilist:743,ilist_nod:743,ilist_trait:743,imag:[590,668,747],imask:[213,371,483],imm16:[21,22,23,124,125,126,214,215,216,372,373,374,484,485,486],immedi:716,immutablemap:743,immutablepass:782,immutableset:743,impl:741,implement:[593,596,598,602,607,610,666,676,693,710,713,715,720,722,725,726,728,747,758,760,765,780,782,803,811,813],implicit:[585,590,673],implicitnullcheck:673,inalloca:708,includ:[610,672,679,765,770],inclus:766,incorrect:704,increment:[667,710],incub:667,indent:610,independ:[607,672,811],index:[585,647,677,710,716,728,768],indexedmap:743,indic:[674,677,716,738,787],indirect:[710,760],indirectbr:710,indirectbrinst:599,individu:[716,743],induct:[741,779],indvar:741,info:[705,716,729,731,736,741,743,752,759,780],inform:[585,590,607,610,625,630,650,661,662,667,710,720,734,741,743,752,759,775,777,810],infrastructur:[593,616,696,752,775],init:[710,769],initi:[590,593,607,610,676,712,743],initweak:710,inlin:[589,607,610,710,741],input:[641,645,675,710,786],insert:[607,710,725,743,764],insertel:710,insertvalu:710,inspect:743,instal:[603,633,681,701],instcombin:[672,741,749],instcount:741,instnam:741,instrinfo:768,instrmap:707,instrprof:710,instruct:[0,1,2,3,4,5,6,7,8,9,585,586,587,588,589,590,594,596,599,607,631,639,669,670,672,677,683,693,705,707,710,712,716,718,741,743,749,759,760,780,784],instructionselect:686,instructionselector:686,instrument:[720,783,784],instsimplifi:749,integ:[589,597,677,684,710],integr:[616,710,741,760],intel:[595,617],intend:708,inteqclass:743,interact:[743,782],interchang:776,interfac:[593,602,672,742,743,765],interleav:[710,776],intern:[610,659,741,760],interoper:743,interprocedur:[741,760],interv:[607,741],intervalmap:743,intinit:769,intrins:[590,591,596,663,669,670,676,685,710,724,741,749,759,762,764,768],intrinsicid:716,introduc:667,introduct:[0,1,2,3,4,5,6,7,8,9,585,586,590,592,593,594,595,596,598,599,600,602,603,605,606,607,610,659,660,663,664,666,667,668,669,670,671,675,676,677,682,693,694,695,696,697,698,699,700,701,702,704,705,706,707,708,709,710,712,715,716,717,718,720,721,722,723,724,726,728,729,730,731,733,734,738,739,741,743,745,746,748,752,753,754,756,758,759,763,768,769,770,771,780,781,782,783,785,786,788,789,790,791,802,803,804,805,806,807,808,809,810],inttoptr:[677,710],invalid:[721,725],invari:[710,741,760,785],invok:[660,710,725,741,770],invokeinst:599,involv:678,iostream:[610,672],ipi:738,iplist:743,ipsccp:741,irc:678,irr_loop:710,irtransformlay:789,irtransl:685,isa:743,isel:675,isn:601,isol:743,issu:[593,602,610,639,662,698,724,727,757],itanium:[661,669,698],item:[590,679],iter:[710,743,770,779],its:736,ival:589,jam:[741,776],jit:[607,665,709,743,778,780,788,789,790,791,805,813],jitdylib:726,jitlink:709,jitlinkmemorymanag:709,joinerror:743,json:768,jump:[607,716,741],kaleidoscop:[792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,813],kaleidoscopejit:788,keep:[610,748,757,765],kei:786,kernel:[590,724],keyword:610,kick:807,kind:[586,748],know:722,known:[687,698],kokko:747,label:[24,127,217,375,487,610,611,710],laissez:748,lambda:610,lanai:661,landingpad:710,lane:[590,596,710],languag:[585,590,610,672,674,710,759,802,806,807,808,811,812,813],larg:[674,760],late:607,launder:710,layer:[607,610,746,789],layout:[590,607,679,710,724,734,735,739,741,743,744,759,777],lazi:[726,741,743,790,791],lazyreexport:791,lcssa:[714,741],ld1:596,ldr:596,lds:588,lds_direct:589,leaf:730,leak:712,leb128:664,leftov:776,legaci:[667,725],legal:[607,674,688,780],legalizerinfo:688,legalizetyp:607,length:671,less:760,let:770,level:[596,607,610,659,664,666,672,683,705,710,716,726,741,749,750,759,760,765],lexer:[802,806],lexic:770,lexicon:711,lf_arglist:730,lf_arrai:730,lf_bclass:730,lf_binterfac:730,lf_bitfield:730,lf_buildinfo:730,lf_class:730,lf_endprecomp:730,lf_enum:730,lf_enumer:730,lf_fieldlist:730,lf_func_id:730,lf_index:730,lf_interfac:730,lf_ivbclass:730,lf_label:730,lf_member:730,lf_method:730,lf_methodlist:730,lf_mfunc_id:730,lf_mfunction:730,lf_modifi:730,lf_nesttyp:730,lf_onemethod:730,lf_padn:730,lf_pointer:730,lf_precomp:730,lf_procedur:730,lf_stmember:730,lf_string_id:730,lf_structur:730,lf_substr_list:730,lf_typeserver2:730,lf_udt_mod_src_lin:730,lf_udt_src_lin:730,lf_union:730,lf_vbclass:730,lf_vftabl:730,lf_vfunctab:730,lf_vtshape:730,lfenc:760,lgtm:609,lib:[634,679,710],libc:[698,746],libcal:[594,741],libdevic:724,liber:610,libfuzz:[712,751],liblto:713,libm:710,libprotobuf:675,librari:[603,610,634,635,659,660,671,675,679,710,712,726,740,743,744,746,747,756,765,767,783],libtool:635,libunwind:698,licens:[667,672,694],licm:[593,710,741,776],licm_vers:710,life:600,lifetim:[708,710,759],lift:749,like:[610,672,743],limit:[593,716,728,764,777],line:[585,590,602,659,681,742,751,779],link:[636,679,681,694,709,710,713,724,761],linkag:[710,726],linker:[636,671,710,713],linkgraph:709,linkonc:671,lint:741,linux:[595,661],lipo:637,list:[585,606,610,640,659,661,678,710,738,743,752,761,786,788,789,790,791,803,804,805,806,807,808,809,810],listinit:769,listrecti:769,lit:616,liter:[585,589,770],littl:610,live:[607,710,716],livedebugvalu:759,llc:617,lldb:[618,658,665,752],lli:619,lljit:726,lllazyjit:726,llrint:710,llround:710,llvm:[590,593,594,596,597,598,599,600,601,603,605,606,607,608,609,610,615,616,617,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,657,658,662,663,664,666,667,668,669,670,671,672,675,676,677,678,679,681,682,694,696,697,699,701,702,703,704,705,709,710,711,712,713,714,718,719,721,723,724,740,741,743,744,745,746,749,750,751,752,754,757,758,759,762,764,765,766,768,775,778,779,780,781,782,783,784,787,804,805,806,808,811,812,813],llvm_debug:743,llvm_shutdown:743,llvmcontext:743,lnt:773,load:[590,607,639,674,710,715,760,782],loadweak:710,loadweakretain:710,local:[616,679,710,723,745,808],localescap:710,localrecov:710,locat:[585,604,638,654,679,705,716,721,759,760,810],locstat:638,log10:710,log2:710,log:[710,722],logarithm:722,logic:585,longjmp:669,look:674,lookup:[585,590,741,759],loop:[598,606,610,710,714,741,776,779,782,806],loopinfo:714,looppass:782,lostdebuglocobserv:705,low:[610,683],lower:[663,676,677,710,741,764],loweratom:741,lowerinvok:741,lowerswitch:741,lppassmanag:782,lrint:710,lround:710,lsda:669,lshr:710,lto:[694,710],lto_code_gen_t:713,lto_module_t:713,lwe:588,m_op_sel:588,m_op_sel_hi:588,mach:[640,641,642,643,649,654,759],machin:[607,631,639,671,674,683,716,764,780,809],machine_version_major:590,machine_version_minor:590,machine_version_step:590,machinebasicblock:607,machinefunct:[607,782],machinefunctionpass:782,machineinstr:[607,705],machineinstrbuild:607,machineverifi:689,machsuit:747,maco:661,macro:[606,743,786],mad_mix:588,magic:597,mai:[593,722],mail:678,main:722,mainten:600,major:[590,667,710],make:[609,610,667,672,673,723,743,760],makefil:[744,773,774],man:745,manag:[709,725,726,745],managedstat:743,mangl:627,manipul:[614,633,637,663,710],manual:[659,661,681,710,716,743],map:[590,591,607,664,671,673,676,707,710,731,735,736,743,762,764,780,786],mapvector:743,mark:724,markdown:717,marker:[639,710],markup:718,mask:[710,749],mass:598,match:[607,611,736],math:[660,710],matrix:[607,710],maximum:710,maxnum:710,mca:639,mccontext:607,mcinst:607,mcjit:[665,715],mcsection:607,mcstreamer:607,mcsymbol:[607,716],mechan:610,medium:757,meet:721,meetup:[678,719],mem2reg:741,member:[677,730,743,755,757,759],membership:[757,777],memcpi:[710,741],memcpyopt:[593,741],memdep:741,memmov:710,memori:[585,590,593,639,674,684,709,710,741,743,756,808],memorydependenceanalysi:593,memoryssa:721,memset:710,memtagsanit:720,merg:[644,645,702,705,712,722,741,742,745,748],mergefunc:741,mergefunct:722,mergereturn:741,mergetwofunct:722,messag:[610,667,769],meta_block:754,metadata:[590,599,600,673,710,724,754,759,776,777,785],metadata_attach:597,metadata_block:597,method:[590,593,610,743,765,782],mfma:588,microscop:610,microsoft:[605,681],middl:[672,704],migrat:[727,745],mimg:[0,2,3,4,9,588],minim:[668,765,783],minimis:748,minimum:[688,710],minnum:710,minor:590,mip:[661,752],mir:[705,716,759],mirror:745,miscellan:[588,644,659,661,716,744,782],miscompil:[601,704],mismatch:672,miss:741,misunderstood:677,mitig:760,mix:[764,779],mlir:[656,658],mnemon:[587,607],mod:741,mode:[590,596,607,691,709,712,754,783,810],model:[590,639,660,674,710,760,764],modern:679,modif:[599,700],modifi:[586,587,588,590,611,621,659,672,710],modul:[606,632,644,710,716,729,731,734,741,743,782,789,809],module_block:597,module_code_alia:597,module_code_asm:597,module_code_datalayout:597,module_code_deplib:597,module_code_funct:597,module_code_gcnam:597,module_code_globalvar:597,module_code_sectionnam:597,module_code_tripl:597,module_code_vers:597,modulepass:782,monorepo:[667,745],monoton:594,more:[592,610,682,712,714,720,743,783],most:741,motion:741,motiv:[585,673,749,762],move:[745,767,772],moveweak:710,mri:621,msf:[644,735,739],msg:[27,129,220,378,490],mtbuf:[0,2,3,4,9,588],mubuf:[0,2,3,4,8,9,588,590],mul:710,multi:[592,603,649,713],multiclass:770,multigrid:747,multipl:[663,682,743,745,770],multipli:710,multithread:782,must:[593,609,659],mustprogress:710,mutabl:808,mutat:[675,705,712,808],myfirsttypofix:723,name:[585,590,607,610,626,627,633,659,710,736,741,748,759,769,780],namespac:610,narr:722,narrow:602,nativ:[597,607],natur:741,nearbyint:710,need:[677,717,727],neg:[588,677],neg_hi:588,neg_lo:588,neon:596,nest:[708,761],newbuff:785,newcpuid:785,newer:760,newlin:611,next:[611,681,784],next_free_sgpr:590,next_free_vgpr:590,nightli:753,noalia:710,node:[591,670,710,741],nomin:757,non:[590,592,609,610,676,710,721,741,752,764],nondebug:741,noop:663,normal:[663,786],notat:[0,1,2,3,4,5,6,7,8,9,586,679],notatom:594,note:[590,603,604,607,661,683,697,698,714,752],number:[585,589,590,597,659,741,745,748],numer:[611,660],nvcc:660,nvptx:[607,661,724],nvvm:724,nwchem:747,objc:710,objcopi:641,objdump:642,object:[590,597,604,640,641,642,648,649,653,677,679,710,715,743,744,754,759,764,783,809],objectlinkinglay:709,objects:710,obtain:[660,667],ocaml:[676,752],occurr:659,off:[589,666],offen:588,offici:[661,702],offset0:588,offset11:588,offset12:588,offset13:588,offset1:588,offset:[588,710,760,777],offsetof:811,often:677,omod:588,omp2012:747,onc:663,one:[672,677,741],ongo:779,onli:[603,642,741,743,745],onlin:678,onlyreadsmemori:593,op_sel:588,op_sel_hi:588,opaqu:[710,727],opcod:[586,587,683,684],open:[703,752],openbenchmark:747,opencl:590,opencv:747,openmp:747,oper:[585,589,590,621,674,682,684,688,710,743,760,770,807,808],operand:[19,315,320,325,337,369,586,587,588,589,590,710,716,762,780],opt:[28,130,221,340,379,491,657,659,675,725,728,754,781,782],optim:[590,594,603,607,657,660,672,684,694,699,704,705,710,713,721,728,741,754,759,760,773,778,789,805,811],option:[590,602,603,605,611,612,614,616,617,619,621,622,623,624,625,626,627,629,630,631,632,633,635,636,638,639,640,641,642,643,644,645,646,648,649,650,651,652,653,654,657,658,659,671,679,695,698,712,718,731,743,748,756,773,775],optioncategori:659,optparserdef:768,orc:[709,726,789],orcv1:726,orcv2:726,order:[590,594,596,610,639,674,710,776,785,786],org:747,organ:[765,775],origin:705,other:[585,593,599,604,606,621,644,659,661,672,674,677,684,700,710,724,743,775,811],otool:643,out:[605,677,788],output:[616,623,630,641,659,710,712,718,769,774,786],outsid:594,outstand:745,over:[610,674,743,746,770],overflow:[677,695,710],overhead:[760,764],overlap:645,overload:660,overrid:[593,770],overridden:722,overview:[0,1,2,3,4,5,6,7,8,9,593,597,606,610,663,664,668,669,676,679,681,707,710,716,724,725,726,740,744,753,761,762,764,771,774,775,776,787],own:743,owner:667,pack:[587,590],packag:740,packedvector:743,packet:607,pad:[585,730],page:672,parallel:[712,747],parallel_access:710,param:[29,131,222,492,663,710],paramattr_block:597,paramattr_code_entri:597,paramattr_code_entry_old:597,paramattr_group_block:597,paramattr_grp_code_entri:597,paramet:[710,724],parboil:747,parent:669,parenthes:610,parr:747,pars:[607,659,803],parsec:747,parsecommandlineopt:659,parser:[607,659,803,806],partial:[741,779],partit:[671,741],pass:[593,605,663,673,674,700,705,709,716,722,725,728,741,743,760,764,776,781,782,805],passmanag:782,past:770,patch:[609,662,667,679,702,762],patchpoint:762,patent:667,path:[590,710],patleaf:686,pattern:[588,611,741],pcmarker:710,pdb2yaml:644,pdb:[644,654,731,732,733,736,737,738,739],pdbutil:644,per:[590,790],perform:[639,674,747,760,779],peripher:766,person:710,pgo:[592,699],phabric:742,phase:[607,713,769,780],phi:[710,721],philosophi:[601,713,759],physic:607,pick:[719,743],piec:662,pipelin:[590,689,725],placement:[721,741,744],placesafepoint:764,plan:[727,745,748,750,782],platform:[605,641,653,661,672,746,756,775,783],plugin:[676,694,709,782],point:[589,590,591,619,663,671,676,684,710,762],pointe:727,pointer:[590,593,610,677,688,710,727,743,760,764,779],pointstoconstantmemori:593,poison:710,polici:[609,667,757,766],polybench:747,polymag:747,polymorph:743,pool:[695,716],port:690,portabl:[610,672,765,811],posit:659,possibl:[610,659,722],post:[710,723,741,745],postdom:741,postdomfronti:741,postdomin:741,postdomtre:741,potenti:742,pow:710,power:723,powerpc:[607,661,752],powi:710,practic:[609,674],pragma:779,pre:[679,742,753],prealloc:710,preassign:607,precis:[710,721,741],preconfigur:699,predecessor:743,predefin:590,predic:[610,686,688,710,716,749,760],preemption:710,prefer:[610,674],prefetch:710,prefix:[611,710],preincrement:610,preliminari:780,prepar:[663,715],preprocess:770,prerequisit:[660,698,700,780],present:[721,722,733],preserv:[705,710,760],pretti:644,previous:610,primer:606,primit:597,print:[593,624,650,652,741,768,769,782],printdetailedrecord:769,printer:[741,780],printrecord:769,prior:609,privat:[590,610],privileg:757,probabl:[598,599,710],probe:[30,223,380,493,671],problem:[596,679,712,760,782,808,810],process:[607,678,688,702,722,723,726,747,750,753,757],processor:[590,661,747,760],product:600,prof:710,profdata:645,profgen:646,profil:[645,646,671,699,710,712,741,773],program:[585,607,619,666,744,771],programm:[743,770],programmat:743,progress:693,project:[605,606,667,679,694,744,745,752],prolog:[590,607],prologu:710,promis:663,promot:[741,780],propag:[741,743,760],properli:610,properti:[590,674,759,811],propos:[602,678,745,759],protect:760,proto:675,prototyp:741,provid:610,provision:748,proxi:723,prune:741,pseudo:[611,664],pseudolow:768,ptr:[710,724],ptrmask:710,ptrtoint:[677,710],ptx:724,ptxa:724,push:679,put:809,pwm:747,qemu:700,qualif:702,qualifi:610,qualiti:[667,764],queri:741,question:[608,672,745],queue:590,quick:[605,659,676,775,781,782],quickstart:[694,717,761,773],quirki:602,r128:588,raja:747,random:[651,675,722,756],rang:[610,664,710,743],ranlib:647,rare:605,rational:[677,710],raw:764,raw_ostream:610,rawspe:747,read:[676,713,724,745,748,780],read_regist:710,read_volatile_regist:710,readabl:610,readcyclecount:710,readelf:[648,649],reader:[648,649],readobj:649,reassoci:741,reciproc:609,reclaim:756,record:[590,597,644,664,729,730,738,764,768,769,770,783,785],recordkeep:769,recordrecti:769,recordv:769,recover:743,recti:769,redesign:602,redistribut:672,reduc:[603,710,741,748,749],reduct:[602,612,684,710,741,779],redund:741,reexport:726,ref:[710,741],refer:[585,659,666,676,683,693,710,716,741,743,745,747,748,749,750,751,764,768,770],referenc:716,reflect:724,reg2mem:741,reg:710,regbankselect:691,regex:611,region:[664,741,764,782],regionpass:782,regist:[585,590,607,672,674,683,716,724,741,759,760,780,782],registeranalysisgroup:782,registerbankinfo:691,registerinfo:768,registr:[780,782],registri:782,regress:[702,705,709,775],regular:678,reinstat:766,reject:672,rel:[585,710,741,760],relat:[605,750,780],relationship:[726,743],releas:[594,702,710,752,753],releasememori:782,relev:[603,661],reliabl:607,relicens:667,reloc:[590,671,710,764],relocat:589,remap:[627,715],remark:754,remark_block:754,remot:709,remov:[741,766],removeus:722,replac:[615,620,693,741,743],replacedirectcal:722,replacewithnewvalu:593,report:[600,625,662,702,704,741,753,755,757],repositori:745,repres:[607,777],represent:[585,593,663,664,672,710,764],request:[609,702,742],requir:[593,607,609,659,663,667,676,677,679,681,742,744,766,775,781,782],resampl:747,resolut:713,resolv:600,resourc:[590,661,682,692],respons:[593,659,741,757],rest:803,restrict:[669,674],result:[593,616,710,742,773],resum:[663,710,712],ret:710,retain:710,retainautoreleas:710,retainautoreleasedreturnvalu:710,retainautoreleasereturnvalu:710,retainblock:710,retcon:663,retir:639,returnaddress:710,revers:[667,710,779],revert:[679,723],review:[609,667,723,742],revis:[723,745],rewrit:760,rewritestatepointsforgc:764,rfc:609,rgpassmanag:782,rich:718,right:743,rint:710,rip:760,risc:661,roadmap:[709,726,749,750],rodinia:747,role:585,root:[602,676,682],rotat:[714,741],round:[590,710],roundeven:710,routin:743,row:585,row_mask:588,rpath:633,rtti:[610,703],rule:[585,677,686,688,702,703,705,710],run:[616,675,682,701,712,724,771,773,774,781,782],runonfunct:782,runonloop:782,runonmachinefunct:782,runonmodul:[722,782],runonregion:782,runonscc:782,runtim:[669,709,710,779,783],runtimedyld:709,s_block32:729,s_bprel32:729,s_buildinfo:729,s_calle:729,s_caller:729,s_callsiteinfo:729,s_coffgroup:729,s_compile2:729,s_compile3:729,s_constant:729,s_defrang:729,s_defrange_framepointer_rel:729,s_defrange_framepointer_rel_full_scop:729,s_defrange_regist:729,s_defrange_register_rel:729,s_defrange_subfield:729,s_defrange_subfield_regist:729,s_end:729,s_envblock:729,s_export:729,s_fastlink:729,s_filestat:729,s_framecooki:729,s_frameproc:729,s_gdata32:729,s_gmandata:729,s_gproc32:729,s_gproc32_id:729,s_gthread32:729,s_heapallocsit:729,s_inline:729,s_inlinesit:729,s_inlinesite_end:729,s_label32:729,s_ldata32:729,s_lmandata:729,s_local:729,s_lproc32:729,s_lproc32_dpc:729,s_lproc32_dpc_id:729,s_lproc32_id:729,s_lprocref:729,s_lthread32:729,s_manconst:729,s_objnam:729,s_proc_id_end:729,s_procref:729,s_pub32:729,s_regist:729,s_regrel32:729,s_section:729,s_thunk32:729,s_trampolin:729,s_udt:729,s_unamespac:729,sadd:710,saddr:[31,32,341,381,382,494,495],safe:[676,743],safepoint:764,safeti:[741,763,811],same:611,sampl:[664,707,770],sampler:590,sanit:752,sat:710,satur:710,save:663,sbase:[33,34,35,132,133,224,225,383,384,385,496,497,498],scalabl:710,scalar:[684,688,741,786],scalarenumerationtrait:786,scalarevolut:741,scale:598,scatter:[710,779],scc:[589,741],sccp:741,scev:[593,741],schedul:[607,759,780],scope:[585,590,606,710,759],scratch:590,script:[606,621,699,753],scrub:745,scudo:756,sdata:[36,37,38,39,40,41,226,227,228,386,387,388,389,390,391,499,500,501,502,503,504],sdiv:710,sdnode:686,sdst:[42,43,44,45,46,47,48,49,50,134,135,136,137,138,139,140,141,229,230,231,232,233,234,235,236,392,393,394,395,396,397,398,399,505,506,507,508,509,510,511,512],sdwa:[0,588],search:[722,747,768],searchablet:768,section:[585,590,671,673,717,731,761,762,785],secur:[662,709,757,760],see:[610,612,614,616,617,619,620,622,623,626,629,630,632,633,635,638,640,641,642,643,647,648,649,653,654],segment:[585,590,607,758],seh:[669,710],select:[601,607,616,659,693,699,710,759,770],selectiondag:[591,607,670,686,780],selector:[591,780],self:[610,672],semant:[663,674,710,724,762,764,765],send:679,sentinel:743,sequenc:[590,770,786],sequenti:743,sequentiallyconsist:594,seri:598,serial:[733,754],server:731,set:[590,593,600,659,688,703,710,726,741,743,770,780,781,782],setjmp:669,setup:[703,710,804,810],setvector:743,setversionprint:659,sext:[588,674,710],sgpr:590,sgpr_count:590,shader:590,shadow:676,share:740,shl:710,should:[609,668,672,722],show:[625,645,672],sht_llvm_addrsig:671,sht_llvm_bb_addr_map:671,sht_llvm_call_graph_profil:671,sht_llvm_dependent_librari:671,sht_llvm_sympart:671,shufflevector:710,sibl:607,sideeffect:710,sign:[597,742],signal:590,signific:671,simd:747,simm21:589,simm32:[51,52,53,142,143,237,238,239,400,401,402,513,514,515],simpl:[610,710,724,741,743,770],simplif:764,simplifi:[610,714,716,741,743],simplifycfg:[672,741],sin:710,singl:[590,610,710,741,745,760],sink:741,site:[710,743],sitofp:710,size:[587,589,603,607,650,663,668,743,758,783],sizeof:811,sjlj:669,skeleton:769,skip:728,sky:661,slambench:747,slc:588,slightli:743,slot:741,slp:779,smallbitvector:743,smaller:712,smallptrset:743,smallset:743,smallstr:743,smallvector:743,smax:710,smem:[0,3,4,9,588,590],smin:710,smrd:[2,588,590],smul:710,snippet:631,social:[678,719],soffset:[54,55,56,144,145,240,241,242,342,403,404,405,516,517,518],soft:[700,765],softwar:[679,681],solut:[712,722],someon:742,sop1:[0,2,3,4,9,590],sop2:[0,2,3,4,9,590],sopc:[0,2,3,4,9,590],sopk:[0,2,3,4,9],sopp:[0,2,3,4,9,590],sort:[610,644,743],sourc:[585,590,605,610,654,664,672,679,710,716,744,752,759,770,810],space:[585,590,607,610,677,724],span:747,sparc:661,spars:741,sparsebitvector:743,sparsemultiset:743,sparseset:743,spec:747,special:[585,603,710,724,759,783],specialis:710,specif:[605,607,617,621,639,640,641,649,653,654,671,674,684,759,765,775,776,811],specifi:[593,659,674,676,710,782],spectr:760,specul:760,speed:609,spgo:646,sphinx:761,spill:590,splice:710,split:609,sponentri:710,sqrt:710,src0_sel:588,src1_sel:588,src:[10,11,12,13,57,58,59,60,61,62,63,64,65,146,147,148,149,150,151,152,153,154,155,156,243,244,245,246,247,248,249,250,251,252,253,317,318,322,323,328,329,330,331,332,343,344,345,346,347,348,406,407,408,409,410,411,412,413,414,415,416,417,519,520,521,522,523,524,525,526,527,528,529],sreg:724,srem:710,sroa:741,srsrc:[66,67,157,158,254,255,349,418,419,530,531],ssa:[607,672,710,714,741],ssamp:[68,159,256,420,532],ssca:747,sshl:710,ssrc:[69,70,71,72,73,74,75,76,77,160,161,162,163,164,165,166,167,168,169,170,257,258,259,260,261,262,263,264,265,421,422,423,424,425,426,427,428,429,533,534,535,536,537,538,539,540,541],ssub:710,stabl:668,stack:[585,590,607,671,676,710,720,741,758,760,762,763,764,784,811],stackguard:710,stacklet:758,stackmap:[762,764],stackprotector:710,stackrestor:710,stacksav:710,stage:[590,592,639,700],stai:667,standard:[585,597,610,660,663,679,710,743,746,747,748,759,765],start:[605,607,659,676,679,680,681,710,712,719,728,775,781,782,788],startup:712,stash:745,stat:[743,754],state:[585,590,722,760],stateless:741,statement:[599,610,760,770],statepoint:[676,710],statist:[630,638,639,743,782],statu:[611,612,614,616,617,619,621,622,623,624,625,626,628,629,630,631,632,633,635,636,638,639,640,641,643,645,648,649,650,651,652,653,654,657,658,709,725,742,745,746,764],std:[610,660,743],stderr:741,steen:593,step:[590,681,696,710,727,745,766,780,784],stepvector:710,still:[674,727],storag:[659,710,743],store:[607,639,672,674,710,741,760],storestrong:710,storeweak:710,strategi:[676,710,760],straw:745,stream:[644,729,731,732,734,735,736,737,738,739,769],streamer:754,strength:741,stress:651,string:[611,652,664,710,726,743,754],stringerror:743,stringinit:769,stringmap:743,stringref:743,stringset:743,strip:[653,710,741],strtab_block:597,struct:[610,677,710],structur:[585,628,663,667,675,710,716,721,722,743,769,773,775,776,779],studio:681,stuff:672,style:[593,610,648,671,703,710,744],sub:[664,710,745],subclass:[743,780],subcommand:644,subdirectori:744,submit:[662,667,704],subregist:716,subroutin:770,subsect:[717,761],substitut:[611,616,775],substream:[731,734],subsubsect:717,subtarget:[768,780],successor:[716,743],suffic:587,suffix:[587,770],suggest:719,suit:[616,679,681,747,753,773,774,775],summari:[596,623,644,677,710],superblock:735,superclass:769,suppli:712,support:[590,599,607,641,660,669,672,695,710,712,726,746,749,762,764,765,766,780,805],suspend:663,svn:745,swift:710,switchinst:[599,741],swizzl:588,symbol:[589,590,614,626,640,644,654,671,713,716,726,729,732,734,737,741],sync:[590,678],synopsi:[611,612,613,614,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658],syntax:[0,1,2,3,4,5,6,7,8,9,587,588,589,611,663,671,710,724,762,771,803,814],synthes:710,system:[590,667,679,681,710,765,767],systemz:661,tabl:[590,607,640,669,671,716,733,741,746,749,754,759,768,777,787,813],tablegen:[607,752,768,769,770,771,772],tag:[585,586,664,702,720,759,786],tail:[607,741],tailcallelim:741,talk:719,target:[590,607,619,655,667,669,671,677,685,690,700,710,712,716,724,741,752,780,809,811],targetdata:741,targetframelow:607,targetinstrinfo:[607,780],targetjitinfo:607,targetlow:607,targetmachin:607,targetregisterinfo:[607,780],targetsubtarget:607,task:[702,743],tba:589,tbaa:710,tblgen:[613,618,655,656,658],teardown:710,tech:719,techniqu:760,tell:677,templat:[710,717,743,761],tempt:760,termin:710,terminolog:[598,679,714],test:[591,602,605,612,616,667,679,681,689,700,702,705,709,710,712,716,723,742,747,751,753,763,770,773,774,775,777,781],tester:616,text:[659,761,769],tfe:588,tgt:[78,171,266,542],than:677,thi:[672,674,677,712,717,722,745,752,761,808,810],thing:[672,674],thinlto:710,thought:804,thread:[590,710,741,743],threadsafecontext:726,threadsafemodul:726,threshold:784,through:610,thumb:703,tidbit:811,tier:766,time:[609,610,694,713,721,760,769,810],timelin:[639,702],tinyptrvector:743,tip:[595,674,709,811],tire:807,tma:589,todo:[590,605,659,703,804],togeth:809,toi:712,token:[669,710],too:748,tool:[601,604,609,612,615,627,633,634,635,637,641,643,645,646,653,672,675,679,709,743,752,769,783,784],toolchain:[667,679,698],top:[659,726],topic:[605,743,751,778,813],tos:726,town:719,tpi:738,trace:[712,783,784,785],track:773,trade:666,tradeoff:[721,760],trampolin:710,transform:[593,663,705,741,759,776],transit:[669,710,726,727,748,760,764],translat:685,transpar:757,transpos:710,trap:[589,590,710],travers:743,treat:610,tree:[741,744,747,803],triag:600,trick:811,trip:779,tripl:[590,607,710,724],trivial:805,trophi:712,troubleshoot:700,trunc:710,truncat:684,tscwrap:785,ttmp:589,tune:617,turn:[610,672,743],tutori:[611,680,690,724,792,793,794,795,796,797,798,799,800,801,811,812,813],tvm:747,twine:743,two:[607,677],type:[14,79,172,267,333,350,430,543,585,587,589,591,606,610,644,664,670,674,677,683,684,710,727,730,731,738,741,743,756,768,770,777,779,780,786],type_block:597,type_code_arrai:597,type_code_bfloat:597,type_code_doubl:597,type_code_float:597,type_code_fp128:597,type_code_funct:597,type_code_function_old:597,type_code_half:597,type_code_integ:597,type_code_label:597,type_code_metadata:597,type_code_numentri:597,type_code_opaqu:597,type_code_point:597,type_code_ppc_fp128:597,type_code_struct_anon:597,type_code_struct_nam:597,type_code_vector:597,type_code_void:597,type_code_x86_amx:597,type_code_x86_fp80:597,type_code_x86_mmx:597,typecheckedloadconstvcal:710,typecheckedloadvcal:710,typedinit:769,typeid:669,typeidinfo:710,typetest:710,typetestassumeconstvcal:710,typetestassumevcal:710,uadd:710,ubsantrap:710,udiv:710,ufmt:588,uglygep:677,uimm20:589,uimm32:589,uimm8:589,uitofp:710,umax:710,umin:710,umul:710,unabbrev_record:597,unari:[589,710,807],undef:672,undefin:[585,710,724],underflow:695,underli:677,understand:672,unifi:741,uniform:590,unintention:609,union:710,uniqu:786,uniquevector:743,unit:[585,590,607,639,775,810],univers:637,unknown:779,unnecessari:610,unord:[594,710],unorm:588,unpack:[590,679],unpredict:710,unreach:[672,710],unrol:[710,741,776,779],unroll_and_jam:710,unsetinit:769,unspecifi:590,unswitch:741,unus:[741,765],unwindless:741,updat:[591,593,667,702,705,721,723,745],upload:723,urem:710,uri:590,usabl:748,usag:[585,601,605,645,668,687,694,695,708,709,712,720,728,756,762,779],use:[610,672,677,712,721,726,741,743,761],used:[605,607,710,741],useful:[593,743,811],user:[590,617,712,724,741,743,778,786,807,808],uses:[700,743],ushl:710,using:[601,610,669,675,681,694,700,701,777,789],usub:710,util:[675,679,705,709,741,751,764,786],va_arg:710,va_copi:710,va_end:710,va_start:710,vaddr:[80,81,82,83,84,85,173,174,175,176,268,269,270,271,351,352,431,432,433,434,435,436,544,545,546,547,548,549],valid:[753,786],valu:[585,589,590,659,672,677,705,710,712,716,722,728,741,743,759,760,770,786],value_symtab_block:597,valuemap:743,valuesymbolt:743,vari:716,variabl:[597,605,606,607,610,611,671,710,716,741,744,748,758,759,770,779,808,810],variad:684,variant:[745,760],variou:741,vbr:597,vcall_vis:777,vcc:[86,177,272,437,550,589],vccz:589,vdata0:[88,89,179,180,274,275,439,440,552,553],vdata1:[90,91,181,182,276,277,441,442,554,555],vdata:[87,92,93,94,95,96,97,98,99,100,101,178,183,184,185,186,187,188,189,190,191,273,278,279,280,281,282,283,284,285,286,287,288,289,290,291,353,354,438,443,444,445,446,447,448,449,450,451,452,551,556,557,558,559,560,561,562,563,564,565],vdst:[15,102,103,104,105,106,107,108,109,110,111,112,113,114,115,192,193,194,195,196,197,198,199,200,201,202,203,204,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,319,324,334,355,356,357,358,359,360,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,566,567,568,569,570,571,572,573,574,575,576,577,578,579],vecmathlib:747,vector:[677,684,688,710,733,741,743,749,750,776,779],vendor:590,verif:[604,764],verifi:[611,630,672,741],versa:743,version:[585,610,665,702,712,776],vgpr:590,vgpr_count:590,via:[709,742,773],vice:743,view:[606,639,741,743],viewer:[754,784],vintrp:[0,2,3,4,588],virtual:[607,610,672,683,765,777],visibl:710,visual:[605,681,784],vla:677,vliw:607,volatil:710,vop1:[0,2,3,4,9],vop2:[0,1,2,3,4,7,8,9],vop3:[0,2,3,4,7,8,9,588],vop3p:[0,1,4,5,6,7,8,9,588],vopc:[0,2,3,4,9],vplan:750,vscale:710,vsrc:[16,116,117,118,119,205,206,207,208,310,311,312,313,335,361,362,363,364,365,366,367,473,474,475,476,477,478,580,581,582,583],wai:[662,677],waitcnt:[120,209,314,479,584],walker:721,wallclocktim:785,warn:[610,670,741,766],web:742,webassembl:752,websit:702,weight:[598,599,645],well:710,what:[601,609,672,674,676,677,712,722,745,755,757,766,781,782],when:[601,609,610,672,674,705,712],where:[672,719],wherev:610,whether:659,which:[593,668,677,729],whitespac:610,who:609,why:[668,672,677,712,745,746,808,810],wide:678,widen:710,width:[597,610,674,710],window:[654,661,669,671,698,712],wise:710,without:672,won:717,word:597,work:[590,600,639,662,667,677,693,712,722,726,759,760,763,783],workflow:[609,745,750],workspac:723,world:[781,782],wrapper:[597,606],write:[593,607,610,639,659,672,675,676,677,744,745,774,775,780,781,782],write_regist:710,writer:661,written:672,x86:[607,661,671,752,760],x86_64:701,x86_amx:710,x86_mmx:710,x86evex2vex:768,xcoff:642,xcore:661,xnack_mask:589,xor:710,xrai:[751,783,784,785],yaml2obj:814,yaml2pdb:644,yaml:[754,786],you:[717,755],your:[605,696,703,719,723,743,782,783],yourself:721,zen:760,zero:669,zext:[674,710],zip:745}}) \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Security.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Security.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Security.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Security.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,340 @@ + + + + + + + + + LLVM Security Group — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Security Group

+

The LLVM Security Group has the following goals:

+
    +
  1. Allow LLVM contributors and security researchers to disclose security-related issues affecting the LLVM project to members of the LLVM community.

  2. +
  3. Organize fixes, code reviews, and release management for said issues.

  4. +
  5. Allow distributors time to investigate and deploy fixes before wide dissemination of vulnerabilities or mitigation shortcomings.

  6. +
  7. Ensure timely notification and release to vendors who package and distribute LLVM-based toolchains and projects.

  8. +
  9. Ensure timely notification to users of LLVM-based toolchains whose compiled code is security-sensitive, through the CVE process.

  10. +
  11. Strive to improve security over time, for example by adding additional testing, fuzzing, and hardening after fixing issues.

  12. +
+

Note: these goals ensure timely action, provide disclosure timing when issues are reported, and respect vendors’ / packagers’ / users’ constraints.

+

The LLVM Security Group is private. It is composed of trusted LLVM contributors. Its discussions remain within the Security Group (plus issue reporter and key experts) while an issue is being investigated. After an issue becomes public, the entirety of the group’s discussions pertaining to that issue also become public.

+
+

How to report a security issue?

+

To report a security issue in the LLVM Project, please open a new issue in the LLVM project page, on the chromium issue tracker. Be sure to use the “Security bug report” template.

+

We aim to acknowledge your report within two business days since you first reach out. If you do not receive any response by then, you can escalate by sending a message to the llvm-dev mailing list asking to get in touch with someone from the LLVM Security Group. The escalation mailing list is public: avoid discussing or mentioning the specific issue when posting on it.

+
+
+

Group Composition

+
+

Security Group Members

+

The members of the group represent a wide cross-section of the community, and meet the criteria for inclusion below. The list is in the format * ${full_name} (${affiliation}) [${phabricator_username}]. If a phabricator username for an individual isn’t available, the brackets will be empty.

+
    +
  • Ahmed Bougacha (Apple) [ab]

  • +
  • Artur Pilipenko (Azul Systems Inc) [apilipenko]

  • +
  • Dimitry Andric (individual; FreeBSD) [dim]

  • +
  • Ed Maste (individual; FreeBSD) [emaste]

  • +
  • George Burgess IV (Google) [george.burgess.iv]

  • +
  • Kate McInnes (Apple) []

  • +
  • Kristof Beyls (ARM) [kristof.beyls]

  • +
  • Matthew Riley (Google) [mattdr]

  • +
  • Nikhil Gupta (Nvidia) [nikhgupt]

  • +
  • Oliver Hunt (Apple) [ojhunt]

  • +
  • Paul Robinson (Sony) [probinson]

  • +
  • Peter Smith (ARM) [peter.smith]

  • +
  • Pietro Albini (individual; Rust) [pietroalbini]

  • +
  • Serge Guelton (RedHat) [serge-sans-paille]

  • +
  • Shayne Hiet-Block (Microsoft) [Shayne]

  • +
  • Steve Klabnik (Oxide Computer Company; Rust) [steveklabnik]

  • +
  • Tim Penge (Sony) [tpenge]

  • +
+
+
+

Criteria

+
    +
  • Nominees for LLVM Security Group membership should fall in one of these groups:

    +
      +
    • Individual contributors:

      +
        +
      • Specializes in fixing compiler-based security related issues or often participates in their exploration and resolution.

      • +
      • Has a track record of finding security vulnerabilities and responsible disclosure of those vulnerabilities.

      • +
      • Is a compiler expert who has specific interests in knowing about, resolving, and preventing future security vulnerabilities.

      • +
      • Has actively contributed non-trivial code to the LLVM project in the last year.

      • +
      +
    • +
    • Researchers:

      +
        +
      • Has a track record of finding security vulnerabilities and responsible disclosure of those vulnerabilities.

      • +
      • Is a compiler expert who has specific interests in knowing about, resolving, and preventing future security vulnerabilities.

      • +
      +
    • +
    • Vendor contacts:

      +
        +
      • Represents an organization or company which ships products that include their own copy of LLVM. Due to their position in the organization, the nominee has a reasonable need to know about security issues and disclosure embargoes.

      • +
      +
    • +
    +
  • +
  • Additionally, the following are necessary but not sufficient criteria for membership in the LLVM Security Group:

    +
      +
    • If already in the LLVM Security Group, has actively participated in one (if any) security issue in the last year.

    • +
    • If already in the LLVM Security Group, has actively participated in most membership discussions in the last year.

    • +
    • If already in the LLVM Security Group, has actively participated in writing or reviewing a transparency report in the last year.

    • +
    • When employed by a company or other entity, the parent entity has no more than three members already in the LLVM Security Group.

    • +
    • When nominated as a vendor contact, their position with that vendor remains the same as when originally nominated.

    • +
    • Nominees are trusted by existing Security Group members to keep communications embargoed while still active.

    • +
    +
  • +
+
+
+

Nomination process

+

Anyone who feels they meet these criteria can nominate themselves, or may be nominated by a third party such as an existing LLVM Security Group member. The nomination should state whether the nominee is nominated as an individual, researcher, or as a vendor contact. It should clearly describe the grounds for nomination.

+

For the moment, nominations are generally proposed, discussed, and voted on using Phabricator. An example nomination is available here. The use of Phabricator helps keep membership discussions open, transparent, and easily accessible to LLVM developers in many ways. If, for any reason, a fully-world-readable nomination seems inappropriate, you may open a new issue, and a discussion can be had about the best way to approach nomination, given the constraints that individuals are under.

+

Our recommended method of nomination may change as our Discussion Medium story evolves over time.

+
+
+

Choosing new members

+

If a nomination for LLVM Security Group membership is supported by a majority of existing LLVM Security Group members, then it carries within five business days unless an existing member of the Security Group objects. If an objection is raised, the LLVM Security Group members should discuss the matter and try to come to consensus; failing this, the nomination will succeed only by a two-thirds supermajority vote of the LLVM Security Group.

+
+
+

Accepting membership

+

Before new LLVM Security Group membership is finalized, the successful nominee should accept membership and agree to abide by this security policy, particularly Privileges and Responsibilities of LLVM Security Group Members below.

+
+
+

Keeping Membership Current

+
    +
  • At least every six months, the LLVM Security Group applies the above criteria. The membership list is pruned accordingly.

  • +
  • Any Security Group member can ask that the criteria be applied within the next five business days.

  • +
  • If a member of the LLVM Security Group does not act in accordance with the letter and spirit of this policy, then their LLVM Security Group membership can be revoked by a majority vote of the members, not including the person under consideration for revocation. After a member calls for a revocation vote, voting will be open for five business days.

  • +
  • Emergency suspension: an LLVM Security Group member who blatantly disregards the LLVM Security Policy may have their membership temporarily suspended on the request of any two members. In such a case, the requesting members should notify the Security Group with a description of the offense. At this point, membership will be temporarily suspended for five business days, pending outcome of the vote for permanent revocation.

  • +
  • The LLVM Board may remove any member from the LLVM Security Group.

  • +
+
+
+

Transparency Report

+

Every year, the LLVM Security Group must publish a transparency report. The intent of this report is to keep the community informed by summarizing the disclosures that have been made public in the last year. It shall contain a list of all public disclosures, as well as statistics on time to fix issues, length of embargo periods, and so on.

+
+
+
+

Privileges and Responsibilities of LLVM Security Group Members

+
+

Access

+

LLVM Security Group members will be subscribed to a private Discussion Medium (FUTURE: see section below). It will be used for technical discussions of security issues, as well as process discussions about matters such as disclosure timelines and group membership. Members have access to all security issues.

+
+
+

Confidentiality

+

Members of the LLVM Security Group will be expected to treat LLVM security issue information shared with the group as confidential until publicly disclosed:

+
    +
  • Members should not disclose security issue information to non-members unless both members are employed by the same vendor of a LLVM based product, in which case information can be shared within that organization on a need-to-know basis and handled as confidential information normally is within that organization.

  • +
  • If the LLVM Security Group agrees, designated members may share issues with vendors of non-LLVM based products if their product suffers from the same issue. The non-LLVM vendor should be asked to respect the issue’s embargo date, and to not share the information beyond the need-to-know people within their organization.

  • +
  • If the LLVM Security Group agrees, key experts can be brought in to help address particular issues. The key expert should be asked to respect the issue’s embargo date, and to not share the information.

  • +
+
+
+

Disclosure

+

Following the process below, the LLVM Security Group decides on embargo date for public disclosure for each Security issue. An embargo may be lifted before the agreed-upon date if all vendors planning to ship a fix have already done so, and if the reporter does not object.

+
+
+

Collaboration

+

Members of the LLVM Security Group are expected to:

+
    +
  • Promptly share any LLVM vulnerabilities they become aware of.

  • +
  • Volunteer to drive issues forward.

  • +
  • Help evaluate the severity of incoming issues.

  • +
  • Help write and review patches to address security issues.

  • +
  • Participate in the member nomination and removal processes.

  • +
+
+
+
+

Discussion Medium

+

FUTURE: this section needs more work! Where discussions occur is influenced by other factors that are still open in this document. We can finalize it later. +It seems like bugzilla and email don’t meet security requirements.

+

The medium used to host LLVM Security Group discussions is security-sensitive. It should therefore run on infrastructure which can meet our security expectations.

+

We are currently using the chromium issue tracker (as the llvm project) to have security discussions:

+
    +
  • File security issues.

  • +
  • Discuss security improvements to LLVM.

  • +
+

When a new issue is filed, a template is provided to help issue reporters provide all relevant information.

+

FUTURE: The Github security workflow allows publicly disclosing resolved security issues on the github project page, and we would be interested in adopting it for that purpose. However, it does not easily allow confidential reporting of security issues, as creating Github Security Advisories is currently restricted to Github project admins. That is why we have started with the chromium issue tracker instead.

+

We also occasionally need to discuss logistics of the LLVM Security Group itself:

+
    +
  • Nominate new members.

  • +
  • Propose member removal.

  • +
  • Suggest policy changes.

  • +
+

We often have these discussions publicly, in our monthly public sync-up call and on public LLVM mailing lists. For internal or confidential discussions, we also use a private mailing list.

+
+
+

Process

+

The following process occurs on the discussion medium for each reported issue:

+
    +
  • A security issue reporter (not necessarily an LLVM contributor) reports an issue.

  • +
  • Within two business days, a member of the Security Group is put in charge of driving the issue to an acceptable resolution. This champion doesn’t need to be the same person for each issue. This person can self-nominate.

  • +
  • Members of the Security Group discuss in which circumstances (if any) an issue is relevant to security, and determine if it is a security issue.

  • +
  • Negotiate an embargo date for public disclosure, with a default minimum time limit of ninety days.

  • +
  • Security Group members can recommend that key experts be pulled in to specific issue discussions. The key expert can be pulled in unless there are objections from other Security Group members.

  • +
  • Patches are written and reviewed.

  • +
  • Backporting security patches from recent versions to old versions cannot always work. It is up to the Security Group to decide if such backporting should be done, and how far back.

  • +
  • The Security Group figures out how the LLVM project’s own releases, as well as individual vendors’ releases, can be timed to patch the issue simultaneously.

  • +
  • Embargo date can be delayed or pulled forward at the Security Group’s discretion.

  • +
  • The issue champion obtains a CVE entry from MITRE.

  • +
  • Once the embargo expires, the patch is posted publicly according to LLVM’s usual code review process.

  • +
  • All security issues (as well as nomination / removal discussions) become public within approximately fourteen weeks of the fix landing in the LLVM repository. Precautions should be taken to avoid disclosing particularly sensitive data included in the report (e.g. username and password pairs).

  • +
+
+
+

Changes to the Policy

+

The LLVM Security Policy may be changed by majority vote of the LLVM Security Group. Such changes also need to be approved by the LLVM Board.

+
+
+

What is considered a security issue?

+

FUTURE: this section will be expanded once the Security Group is formed, and it agrees on an initial security surface area.

+

The LLVM Project has a significant amount of code, and not all of it is considered security-sensitive. This is particularly true because LLVM is used in a wide variety of circumstances: there are different threat models, untrusted inputs differ, and the environment LLVM runs in is varied. Therefore, what the LLVM Project considers a security issue is what its members have signed up to maintain securely.

+

As this security process matures, members of the LLVM community can propose that a part of the codebase be designated as security-sensitive (or no longer security-sensitive). This requires a rationale, and buy-in from the LLVM community as for any RFC. In some cases, parts of the codebase could be handled as security-sensitive but need significant work to get to the stage where that’s manageable. The LLVM community will need to decide whether it wants to invest in making these parts of the code secure-able, and maintain these security properties over time. In all cases the LLVM Security Group should be consulted, since they’ll be responding to security issues filed against these parts of the codebase.

+

If you’re not sure whether an issue is in-scope for this security process or not, err towards assuming that it is. The Security Group might agree or disagree and will explain its rationale in the report, as well as update this document through the above process.

+

The security-sensitive parts of the LLVM Project currently are:

+
    +
  • None (this process is new, the list hasn’t been populated yet)

  • +
  • FUTURE: this section will be expanded.

  • +
+

The parts of the LLVM Project which are currently treated as non-security sensitive are:

+
    +
  • Language front-ends, such as clang, for which a malicious input file can cause undesirable behavior. For example, a maliciously-crafter C or Rust source file can cause arbitrary code to execute in LLVM. These parts of LLVM haven’t been hardened, and compiling untrusted code usually also includes running utilities such as make which can more readily perform malicious things.

  • +
  • FUTURE: this section will be expanded.

  • +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SegmentedStacks.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SegmentedStacks.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SegmentedStacks.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SegmentedStacks.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,214 @@ + + + + + + + + + Segmented Stacks in LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Segmented Stacks in LLVM

+ +
+

Introduction

+

Segmented stack allows stack space to be allocated incrementally than as a +monolithic chunk (of some worst case size) at thread initialization. This is +done by allocating stack blocks (henceforth called stacklets) and linking them +into a doubly linked list. The function prologue is responsible for checking if +the current stacklet has enough space for the function to execute; and if not, +call into the libgcc runtime to allocate more stack space. Segmented stacks are +enabled with the "split-stack" attribute on LLVM functions.

+

The runtime functionality is already there in libgcc.

+
+
+

Implementation Details

+
+

Allocating Stacklets

+

As mentioned above, the function prologue checks if the current stacklet has +enough space. The current approach is to use a slot in the TCB to store the +current stack limit (minus the amount of space needed to allocate a new block) - +this slot’s offset is again dictated by libgcc. The generated +assembly looks like this on x86-64:

+
  leaq     -8(%rsp), %r10
+  cmpq     %fs:112,  %r10
+  jg       .LBB0_2
+
+  # More stack space needs to be allocated
+  movabsq  $8, %r10   # The amount of space needed
+  movabsq  $0, %r11   # The total size of arguments passed on stack
+  callq    __morestack
+  ret                 # The reason for this extra return is explained below
+.LBB0_2:
+  # Usual prologue continues here
+
+
+

The size of function arguments on the stack needs to be passed to +__morestack (this function is implemented in libgcc) since that number +of bytes has to be copied from the previous stacklet to the current one. This is +so that SP (and FP) relative addressing of function arguments work as expected.

+

The unusual ret is needed to have the function which made a call to +__morestack return correctly. __morestack, instead of returning, calls +into .LBB0_2. This is possible since both, the size of the ret +instruction and the PC of call to __morestack are known. When the function +body returns, control is transferred back to __morestack. __morestack +then de-allocates the new stacklet, restores the correct SP value, and does a +second return, which returns control to the correct caller.

+
+
+

Variable Sized Allocas

+

The section on allocating stacklets automatically assumes that every stack +frame will be of fixed size. However, LLVM allows the use of the llvm.alloca +intrinsic to allocate dynamically sized blocks of memory on the stack. When +faced with such a variable-sized alloca, code is generated to:

+
    +
  • Check if the current stacklet has enough space. If yes, just bump the SP, like +in the normal case.

  • +
  • If not, generate a call to libgcc, which allocates the memory from the +heap.

  • +
+

The memory allocated from the heap is linked into a list in the current +stacklet, and freed along with the same. This prevents a memory leak.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SourceLevelDebugging.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SourceLevelDebugging.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SourceLevelDebugging.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SourceLevelDebugging.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,2090 @@ + + + + + + + + + Source Level Debugging with LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Source Level Debugging with LLVM

+ +
+

Introduction

+

This document is the central repository for all information pertaining to debug +information in LLVM. It describes the actual format that the LLVM debug +information takes, which is useful for those interested in creating +front-ends or dealing directly with the information. Further, this document +provides specific examples of what debug information for C/C++ looks like.

+
+

Philosophy behind LLVM debugging information

+

The idea of the LLVM debugging information is to capture how the important +pieces of the source-language’s Abstract Syntax Tree map onto LLVM code. +Several design aspects have shaped the solution that appears here. The +important ones are:

+
    +
  • Debugging information should have very little impact on the rest of the +compiler. No transformations, analyses, or code generators should need to +be modified because of debugging information.

  • +
  • LLVM optimizations should interact in well-defined and easily described +ways with the debugging information.

  • +
  • Because LLVM is designed to support arbitrary programming languages, +LLVM-to-LLVM tools should not need to know anything about the semantics of +the source-level-language.

  • +
  • Source-level languages are often widely different from one another. +LLVM should not put any restrictions of the flavor of the source-language, +and the debugging information should work with any language.

  • +
  • With code generator support, it should be possible to use an LLVM compiler +to compile a program to native machine code and standard debugging +formats. This allows compatibility with traditional machine-code level +debuggers, like GDB or DBX.

  • +
+

The approach used by the LLVM implementation is to use a small set of +intrinsic functions to define a mapping +between LLVM program objects and the source-level objects. The description of +the source-level program is maintained in LLVM metadata in an +implementation-defined format (the C/C++ front-end +currently uses working draft 7 of the DWARF 3 standard).

+

When a program is being debugged, a debugger interacts with the user and turns +the stored debug information into source-language specific information. As +such, a debugger must be aware of the source-language, and is thus tied to a +specific language or family of languages.

+
+
+

Debug information consumers

+

The role of debug information is to provide meta information normally stripped +away during the compilation process. This meta information provides an LLVM +user a relationship between generated code and the original program source +code.

+

Currently, there are two backend consumers of debug info: DwarfDebug and +CodeViewDebug. DwarfDebug produces DWARF suitable for use with GDB, LLDB, and +other DWARF-based debuggers. CodeViewDebug produces CodeView, +the Microsoft debug info format, which is usable with Microsoft debuggers such +as Visual Studio and WinDBG. LLVM’s debug information format is mostly derived +from and inspired by DWARF, but it is feasible to translate into other target +debug info formats such as STABS.

+

It would also be reasonable to use debug information to feed profiling tools +for analysis of generated code, or, tools for reconstructing the original +source from generated code.

+
+
+

Debug information and optimizations

+

An extremely high priority of LLVM debugging information is to make it interact +well with optimizations and analysis. In particular, the LLVM debug +information provides the following guarantees:

+
    +
  • LLVM debug information always provides information to accurately read +the source-level state of the program, regardless of which LLVM +optimizations have been run. How to Update Debug Info: A Guide for LLVM Pass Authors specifies how debug +info should be updated in various kinds of code transformations to avoid +breaking this guarantee, and how to preserve as much useful debug info as +possible. Note that some optimizations may impact the ability to modify the +current state of the program with a debugger, such as setting program +variables, or calling functions that have been deleted.

  • +
  • As desired, LLVM optimizations can be upgraded to be aware of debugging +information, allowing them to update the debugging information as they +perform aggressive optimizations. This means that, with effort, the LLVM +optimizers could optimize debug code just as well as non-debug code.

  • +
  • LLVM debug information does not prevent optimizations from +happening (for example inlining, basic block reordering/merging/cleanup, +tail duplication, etc).

  • +
  • LLVM debug information is automatically optimized along with the rest of +the program, using existing facilities. For example, duplicate +information is automatically merged by the linker, and unused information +is automatically removed.

  • +
+

Basically, the debug information allows you to compile a program with +“-O0 -g” and get full debug information, allowing you to arbitrarily modify +the program as it executes from a debugger. Compiling a program with +“-O3 -g” gives you full debug information that is always available and +accurate for reading (e.g., you get accurate stack traces despite tail call +elimination and inlining), but you might lose the ability to modify the program +and call functions which were optimized out of the program, or inlined away +completely.

+

The LLVM test-suite provides a framework to +test the optimizer’s handling of debugging information. It can be run like +this:

+
% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+% make TEST=dbgopt
+
+
+

This will test impact of debugging information on optimization passes. If +debugging information influences optimization passes then it will be reported +as a failure. See LLVM Testing Infrastructure Guide for more information on LLVM test +infrastructure and how to run various tests.

+
+
+
+

Debugging information format

+

LLVM debugging information has been carefully designed to make it possible for +the optimizer to optimize the program and debugging information without +necessarily having to know anything about debugging information. In +particular, the use of metadata avoids duplicated debugging information from +the beginning, and the global dead code elimination pass automatically deletes +debugging information for a function if it decides to delete the function.

+

To do this, most of the debugging information (descriptors for types, +variables, functions, source files, etc) is inserted by the language front-end +in the form of LLVM metadata.

+

Debug information is designed to be agnostic about the target debugger and +debugging information representation (e.g. DWARF/Stabs/etc). It uses a generic +pass to decode the information that represents variables, types, functions, +namespaces, etc: this allows for arbitrary source-language semantics and +type-systems to be used, as long as there is a module written for the target +debugger to interpret the information.

+

To provide basic functionality, the LLVM debugger does have to make some +assumptions about the source-level language being debugged, though it keeps +these to a minimum. The only common features that the LLVM debugger assumes +exist are source files, and program objects. These abstract objects are used by a +debugger to form stack traces, show information about local variables, etc.

+

This section of the documentation first describes the representation aspects +common to any source-language. C/C++ front-end specific debug information describes the data layout +conventions used by the C and C++ front-ends.

+

Debug information descriptors are specialized metadata nodes, first-class subclasses of Metadata.

+
+

Debugger intrinsic functions

+

LLVM uses several intrinsic functions (name prefixed with “llvm.dbg”) to +track source local variables through optimization and code generation.

+
+

llvm.dbg.addr

+
void @llvm.dbg.addr(metadata, metadata, metadata)
+
+
+

This intrinsic provides information about a local element (e.g., variable). +The first argument is metadata holding the address of variable, typically a +static alloca in the function entry block. The second argument is a +local variable containing a description of +the variable. The third argument is a complex expression. An llvm.dbg.addr intrinsic describes the +address of a source variable.

+
%i.addr = alloca i32, align 4
+call void @llvm.dbg.addr(metadata i32* %i.addr, metadata !1,
+                         metadata !DIExpression()), !dbg !2
+!1 = !DILocalVariable(name: "i", ...) ; int i
+!2 = !DILocation(...)
+...
+%buffer = alloca [256 x i8], align 8
+; The address of i is buffer+64.
+call void @llvm.dbg.addr(metadata [256 x i8]* %buffer, metadata !3,
+                         metadata !DIExpression(DW_OP_plus, 64)), !dbg !4
+!3 = !DILocalVariable(name: "i", ...) ; int i
+!4 = !DILocation(...)
+
+
+

A frontend should generate exactly one call to llvm.dbg.addr at the point +of declaration of a source variable. Optimization passes that fully promote the +variable from memory to SSA values will replace this call with possibly +multiple calls to llvm.dbg.value. Passes that delete stores are effectively +partial promotion, and they will insert a mix of calls to llvm.dbg.value +and llvm.dbg.addr to track the source variable value when it is available. +After optimization, there may be multiple calls to llvm.dbg.addr describing +the program points where the variables lives in memory. All calls for the same +concrete source variable must agree on the memory location.

+
+
+

llvm.dbg.declare

+
void @llvm.dbg.declare(metadata, metadata, metadata)
+
+
+

This intrinsic is identical to llvm.dbg.addr, except that there can only be +one call to llvm.dbg.declare for a given concrete local variable. It is not control-dependent, meaning that if +a call to llvm.dbg.declare exists and has a valid location argument, that +address is considered to be the true home of the variable across its entire +lifetime. This makes it hard for optimizations to preserve accurate debug info +in the presence of llvm.dbg.declare, so we are transitioning away from it, +and we plan to deprecate it in future LLVM releases.

+
+
+

llvm.dbg.value

+
void @llvm.dbg.value(metadata, metadata, metadata)
+
+
+

This intrinsic provides information when a user source variable is set to a new +value. The first argument is the new value (wrapped as metadata). The second +argument is a local variable containing a +description of the variable. The third argument is a complex expression.

+

An llvm.dbg.value intrinsic describes the value of a source variable +directly, not its address. Note that the value operand of this intrinsic may +be indirect (i.e, a pointer to the source variable), provided that interpreting +the complex expression derives the direct value.

+
+
+
+
+

Object lifetimes and scoping

+

In many languages, the local variables in functions can have their lifetimes or +scopes limited to a subset of a function. In the C family of languages, for +example, variables are only live (readable and writable) within the source +block that they are defined in. In functional languages, values are only +readable after they have been defined. Though this is a very obvious concept, +it is non-trivial to model in LLVM, because it has no notion of scoping in this +sense, and does not want to be tied to a language’s scoping rules.

+

In order to handle this, the LLVM debug format uses the metadata attached to +llvm instructions to encode line number and scoping information. Consider the +following C fragment, for example:

+
1.  void foo() {
+2.    int X = 21;
+3.    int Y = 22;
+4.    {
+5.      int Z = 23;
+6.      Z = X;
+7.    }
+8.    X = Y;
+9.  }
+
+
+

Compiled to LLVM, this function would be represented like this:

+
; Function Attrs: nounwind ssp uwtable
+define void @foo() #0 !dbg !4 {
+entry:
+  %X = alloca i32, align 4
+  %Y = alloca i32, align 4
+  %Z = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !13), !dbg !14
+  store i32 21, i32* %X, align 4, !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %Y, metadata !15, metadata !13), !dbg !16
+  store i32 22, i32* %Y, align 4, !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %Z, metadata !17, metadata !13), !dbg !19
+  store i32 23, i32* %Z, align 4, !dbg !19
+  %0 = load i32, i32* %X, align 4, !dbg !20
+  store i32 %0, i32* %Z, align 4, !dbg !21
+  %1 = load i32, i32* %Y, align 4, !dbg !22
+  store i32 %1, i32* %X, align 4, !dbg !23
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "/dev/stdin", directory: "/Users/dexonsmith/data/llvm/debug-info")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)"}
+!11 = !DILocalVariable(name: "X", scope: !4, file: !1, line: 2, type: !12)
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !DIExpression()
+!14 = !DILocation(line: 2, column: 9, scope: !4)
+!15 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12)
+!16 = !DILocation(line: 3, column: 9, scope: !4)
+!17 = !DILocalVariable(name: "Z", scope: !18, file: !1, line: 5, type: !12)
+!18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
+!19 = !DILocation(line: 5, column: 11, scope: !18)
+!20 = !DILocation(line: 6, column: 11, scope: !18)
+!21 = !DILocation(line: 6, column: 9, scope: !18)
+!22 = !DILocation(line: 8, column: 9, scope: !4)
+!23 = !DILocation(line: 8, column: 7, scope: !4)
+!24 = !DILocation(line: 9, column: 3, scope: !4)
+
+
+

This example illustrates a few important details about LLVM debugging +information. In particular, it shows how the llvm.dbg.declare intrinsic and +location information, which are attached to an instruction, are applied +together to allow a debugger to analyze the relationship between statements, +variable definitions, and the code used to implement the function.

+
call void @llvm.dbg.declare(metadata i32* %X, metadata !11, metadata !13), !dbg !14
+  ; [debug line = 2:7] [debug variable = X]
+
+
+

The first intrinsic %llvm.dbg.declare encodes debugging information for the +variable X. The metadata !dbg !14 attached to the intrinsic provides +scope information for the variable X.

+
!14 = !DILocation(line: 2, column: 9, scope: !4)
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5,
+                            isLocal: false, isDefinition: true, scopeLine: 1,
+                            isOptimized: false, retainedNodes: !2)
+
+
+

Here !14 is metadata providing location information. In this example, scope is encoded by !4, a +subprogram descriptor. This way the location +information attached to the intrinsics indicates that the variable X is +declared at line number 2 at a function level scope in function foo.

+

Now lets take another example.

+
call void @llvm.dbg.declare(metadata i32* %Z, metadata !17, metadata !13), !dbg !19
+  ; [debug line = 5:9] [debug variable = Z]
+
+
+

The third intrinsic %llvm.dbg.declare encodes debugging information for +variable Z. The metadata !dbg !19 attached to the intrinsic provides +scope information for the variable Z.

+
!18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
+!19 = !DILocation(line: 5, column: 11, scope: !18)
+
+
+

Here !19 indicates that Z is declared at line number 5 and column +number 11 inside of lexical scope !18. The lexical scope itself resides +inside of subprogram !4 described above.

+

The scope information attached with each instruction provides a straightforward +way to find instructions covered by a scope.

+
+
+

Object lifetime in optimized code

+

In the example above, every variable assignment uniquely corresponds to a +memory store to the variable’s position on the stack. However in heavily +optimized code LLVM promotes most variables into SSA values, which can +eventually be placed in physical registers or memory locations. To track SSA +values through compilation, when objects are promoted to SSA values an +llvm.dbg.value intrinsic is created for each assignment, recording the +variable’s new location. Compared with the llvm.dbg.declare intrinsic:

+
    +
  • A dbg.value terminates the effect of any preceding dbg.values for (any +overlapping fragments of) the specified variable.

  • +
  • The dbg.value’s position in the IR defines where in the instruction stream +the variable’s value changes.

  • +
  • Operands can be constants, indicating the variable is assigned a +constant value.

  • +
+

Care must be taken to update llvm.dbg.value intrinsics when optimization +passes alter or move instructions and blocks – the developer could observe such +changes reflected in the value of variables when debugging the program. For any +execution of the optimized program, the set of variable values presented to the +developer by the debugger should not show a state that would never have existed +in the execution of the unoptimized program, given the same input. Doing so +risks misleading the developer by reporting a state that does not exist, +damaging their understanding of the optimized program and undermining their +trust in the debugger.

+

Sometimes perfectly preserving variable locations is not possible, often when a +redundant calculation is optimized out. In such cases, a llvm.dbg.value +with operand undef should be used, to terminate earlier variable locations +and let the debugger present optimized out to the developer. Withholding +these potentially stale variable values from the developer diminishes the +amount of available debug information, but increases the reliability of the +remaining information.

+

To illustrate some potential issues, consider the following example:

+
define i32 @foo(i32 %bar, i1 %cond) {
+entry:
+  call @llvm.dbg.value(metadata i32 0, metadata !1, metadata !2)
+  br i1 %cond, label %truebr, label %falsebr
+truebr:
+  %tval = add i32 %bar, 1
+  call @llvm.dbg.value(metadata i32 %tval, metadata !1, metadata !2)
+  %g1 = call i32 @gazonk()
+  br label %exit
+falsebr:
+  %fval = add i32 %bar, 2
+  call @llvm.dbg.value(metadata i32 %fval, metadata !1, metadata !2)
+  %g2 = call i32 @gazonk()
+  br label %exit
+exit:
+  %merge = phi [ %tval, %truebr ], [ %fval, %falsebr ]
+  %g = phi [ %g1, %truebr ], [ %g2, %falsebr ]
+  call @llvm.dbg.value(metadata i32 %merge, metadata !1, metadata !2)
+  call @llvm.dbg.value(metadata i32 %g, metadata !3, metadata !2)
+  %plusten = add i32 %merge, 10
+  %toret = add i32 %plusten, %g
+  call @llvm.dbg.value(metadata i32 %toret, metadata !1, metadata !2)
+  ret i32 %toret
+}
+
+
+

Containing two source-level variables in !1 and !3. The function could, +perhaps, be optimized into the following code:

+
define i32 @foo(i32 %bar, i1 %cond) {
+entry:
+  %g = call i32 @gazonk()
+  %addoper = select i1 %cond, i32 11, i32 12
+  %plusten = add i32 %bar, %addoper
+  %toret = add i32 %plusten, %g
+  ret i32 %toret
+}
+
+
+

What llvm.dbg.value intrinsics should be placed to represent the original variable +locations in this code? Unfortunately the second, third and fourth +dbg.values for !1 in the source function have had their operands +(%tval, %fval, %merge) optimized out. Assuming we cannot recover them, we +might consider this placement of dbg.values:

+
define i32 @foo(i32 %bar, i1 %cond) {
+entry:
+  call @llvm.dbg.value(metadata i32 0, metadata !1, metadata !2)
+  %g = call i32 @gazonk()
+  call @llvm.dbg.value(metadata i32 %g, metadata !3, metadata !2)
+  %addoper = select i1 %cond, i32 11, i32 12
+  %plusten = add i32 %bar, %addoper
+  %toret = add i32 %plusten, %g
+  call @llvm.dbg.value(metadata i32 %toret, metadata !1, metadata !2)
+  ret i32 %toret
+}
+
+
+

However, this will cause !3 to have the return value of @gazonk() at +the same time as !1 has the constant value zero – a pair of assignments +that never occurred in the unoptimized program. To avoid this, we must terminate +the range that !1 has the constant value assignment by inserting an undef +dbg.value before the dbg.value for !3:

+
define i32 @foo(i32 %bar, i1 %cond) {
+entry:
+  call @llvm.dbg.value(metadata i32 0, metadata !1, metadata !2)
+  %g = call i32 @gazonk()
+  call @llvm.dbg.value(metadata i32 undef, metadata !1, metadata !2)
+  call @llvm.dbg.value(metadata i32 %g, metadata !3, metadata !2)
+  %addoper = select i1 %cond, i32 11, i32 12
+  %plusten = add i32 %bar, %addoper
+  %toret = add i32 %plusten, %g
+  call @llvm.dbg.value(metadata i32 %toret, metadata !1, metadata !2)
+  ret i32 %toret
+}
+
+
+

In general, if any dbg.value has its operand optimized out and cannot be +recovered, then an undef dbg.value is necessary to terminate earlier variable +locations. Additional undef dbg.values may be necessary when the debugger can +observe re-ordering of assignments.

+
+
+

How variable location metadata is transformed during CodeGen

+

LLVM preserves debug information throughout mid-level and backend passes, +ultimately producing a mapping between source-level information and +instruction ranges. This +is relatively straightforwards for line number information, as mapping +instructions to line numbers is a simple association. For variable locations +however the story is more complex. As each llvm.dbg.value intrinsic +represents a source-level assignment of a value to a source variable, the +variable location intrinsics effectively embed a small imperative program +within the LLVM IR. By the end of CodeGen, this becomes a mapping from each +variable to their machine locations over ranges of instructions. +From IR to object emission, the major transformations which affect variable +location fidelity are:

+
    +
  1. Instruction Selection

  2. +
  3. Register allocation

  4. +
  5. Block layout

  6. +
+

each of which are discussed below. In addition, instruction scheduling can +significantly change the ordering of the program, and occurs in a number of +different passes.

+

Some variable locations are not transformed during CodeGen. Stack locations +specified by llvm.dbg.declare are valid and unchanging for the entire +duration of the function, and are recorded in a simple MachineFunction table. +Location changes in the prologue and epilogue of a function are also ignored: +frame setup and destruction may take several instructions, require a +disproportionate amount of debugging information in the output binary to +describe, and should be stepped over by debuggers anyway.

+
+

Variable locations in Instruction Selection and MIR

+

Instruction selection creates a MIR function from an IR function, and just as +it transforms intermediate instructions into machine instructions, so must +intermediate variable locations become machine variable locations. +Within IR, variable locations are always identified by a Value, but in MIR +there can be different types of variable locations. In addition, some IR +locations become unavailable, for example if the operation of multiple IR +instructions are combined into one machine instruction (such as +multiply-and-accumulate) then intermediate Values are lost. To track variable +locations through instruction selection, they are first separated into +locations that do not depend on code generation (constants, stack locations, +allocated virtual registers) and those that do. For those that do, debug +metadata is attached to SDNodes in SelectionDAGs. After instruction selection +has occurred and a MIR function is created, if the SDNode associated with debug +metadata is allocated a virtual register, that virtual register is used as the +variable location. If the SDNode is folded into a machine instruction or +otherwise transformed into a non-register, the variable location becomes +unavailable.

+

Locations that are unavailable are treated as if they have been optimized out: +in IR the location would be assigned undef by a debug intrinsic, and in MIR +the equivalent location is used.

+

After MIR locations are assigned to each variable, machine pseudo-instructions +corresponding to each llvm.dbg.value and llvm.dbg.addr intrinsic are +inserted. There are two forms of this type of instruction.

+

The first form, DBG_VALUE, appears thus:

+
DBG_VALUE %1, $noreg, !123, !DIExpression()
+
+
+
+
And has the following operands:
    +
  • The first operand can record the variable location as a register, +a frame index, an immediate, or the base address register if the original +debug intrinsic referred to memory. $noreg indicates the variable +location is undefined, equivalent to an undef dbg.value operand.

  • +
  • The type of the second operand indicates whether the variable location is +directly referred to by the DBG_VALUE, or whether it is indirect. The +$noreg register signifies the former, an immediate operand (0) the +latter.

  • +
  • Operand 3 is the Variable field of the original debug intrinsic.

  • +
  • Operand 4 is the Expression field of the original debug intrinsic.

  • +
+
+
+

The second form, DBG_VALUE_LIST, appears thus:

+
DBG_VALUE_LIST !123, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus), %1, %2
+
+
+
+
And has the following operands:
    +
  • The first operand is the Variable field of the original debug intrinsic.

  • +
  • The second operand is the Expression field of the original debug intrinsic.

  • +
  • Any number of operands, from the 3rd onwards, record a sequence of variable +location operands, which may take any of the same values as the first +operand of the DBG_VALUE instruction above. These variable location +operands are inserted into the final DWARF Expression in positions indicated +by the DW_OP_LLVM_arg operator in the DIExpression +<LangRef.html#diexpression>.

  • +
+
+
+

The position at which the DBG_VALUEs are inserted should correspond to the +positions of their matching llvm.dbg.value intrinsics in the IR block. As +with optimization, LLVM aims to preserve the order in which variable +assignments occurred in the source program. However SelectionDAG performs some +instruction scheduling, which can reorder assignments (discussed below). +Function parameter locations are moved to the beginning of the function if +they’re not already, to ensure they’re immediately available on function entry.

+

To demonstrate variable locations during instruction selection, consider +the following example:

+
define i32 @foo(i32* %addr) {
+entry:
+  call void @llvm.dbg.value(metadata i32 0, metadata !3, metadata !DIExpression()), !dbg !5
+  br label %bb1, !dbg !5
+
+bb1:                                              ; preds = %bb1, %entry
+  %bar.0 = phi i32 [ 0, %entry ], [ %add, %bb1 ]
+  call void @llvm.dbg.value(metadata i32 %bar.0, metadata !3, metadata !DIExpression()), !dbg !5
+  %addr1 = getelementptr i32, i32 *%addr, i32 1, !dbg !5
+  call void @llvm.dbg.value(metadata i32 *%addr1, metadata !3, metadata !DIExpression()), !dbg !5
+  %loaded1 = load i32, i32* %addr1, !dbg !5
+  %addr2 = getelementptr i32, i32 *%addr, i32 %bar.0, !dbg !5
+  call void @llvm.dbg.value(metadata i32 *%addr2, metadata !3, metadata !DIExpression()), !dbg !5
+  %loaded2 = load i32, i32* %addr2, !dbg !5
+  %add = add i32 %bar.0, 1, !dbg !5
+  call void @llvm.dbg.value(metadata i32 %add, metadata !3, metadata !DIExpression()), !dbg !5
+  %added = add i32 %loaded1, %loaded2
+  %cond = icmp ult i32 %added, %bar.0, !dbg !5
+  br i1 %cond, label %bb1, label %bb2, !dbg !5
+
+bb2:                                              ; preds = %bb1
+  ret i32 0, !dbg !5
+}
+
+
+

If one compiles this IR with llc -o - -start-after=codegen-prepare -stop-after=expand-isel-pseudos -mtriple=x86_64--, the following MIR is produced:

+
bb.0.entry:
+  successors: %bb.1(0x80000000)
+  liveins: $rdi
+
+  %2:gr64 = COPY $rdi
+  %3:gr32 = MOV32r0 implicit-def dead $eflags
+  DBG_VALUE 0, $noreg, !3, !DIExpression(), debug-location !5
+
+bb.1.bb1:
+  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+
+  %0:gr32 = PHI %3, %bb.0, %1, %bb.1
+  DBG_VALUE %0, $noreg, !3, !DIExpression(), debug-location !5
+  DBG_VALUE %2, $noreg, !3, !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), debug-location !5
+  %4:gr32 = MOV32rm %2, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1)
+  %5:gr64_nosp = MOVSX64rr32 %0, debug-location !5
+  DBG_VALUE $noreg, $noreg, !3, !DIExpression(), debug-location !5
+  %1:gr32 = INC32r %0, implicit-def dead $eflags, debug-location !5
+  DBG_VALUE %1, $noreg, !3, !DIExpression(), debug-location !5
+  %6:gr32 = ADD32rm %4, %2, 4, killed %5, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.addr2)
+  %7:gr32 = SUB32rr %6, %0, implicit-def $eflags, debug-location !5
+  JB_1 %bb.1, implicit $eflags, debug-location !5
+  JMP_1 %bb.2, debug-location !5
+
+bb.2.bb2:
+  %8:gr32 = MOV32r0 implicit-def dead $eflags
+  $eax = COPY %8, debug-location !5
+  RET 0, $eax, debug-location !5
+
+
+

Observe first that there is a DBG_VALUE instruction for every llvm.dbg.value +intrinsic in the source IR, ensuring no source level assignments go missing. +Then consider the different ways in which variable locations have been recorded:

+
    +
  • For the first dbg.value an immediate operand is used to record a zero value.

  • +
  • The dbg.value of the PHI instruction leads to a DBG_VALUE of virtual register +%0.

  • +
  • The first GEP has its effect folded into the first load instruction +(as a 4-byte offset), but the variable location is salvaged by folding +the GEPs effect into the DIExpression.

  • +
  • The second GEP is also folded into the corresponding load. However, it is +insufficiently simple to be salvaged, and is emitted as a $noreg +DBG_VALUE, indicating that the variable takes on an undefined location.

  • +
  • The final dbg.value has its Value placed in virtual register %1.

  • +
+
+
+

Instruction Scheduling

+

A number of passes can reschedule instructions, notably instruction selection +and the pre-and-post RA machine schedulers. Instruction scheduling can +significantly change the nature of the program – in the (very unlikely) worst +case the instruction sequence could be completely reversed. In such +circumstances LLVM follows the principle applied to optimizations, that it is +better for the debugger not to display any state than a misleading state. +Thus, whenever instructions are advanced in order of execution, any +corresponding DBG_VALUE is kept in its original position, and if an instruction +is delayed then the variable is given an undefined location for the duration +of the delay. To illustrate, consider this pseudo-MIR:

+
%1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1)
+DBG_VALUE %1, $noreg, !1, !2
+%4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags
+DBG_VALUE %4, $noreg, !3, !4
+%7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags
+DBG_VALUE %7, $noreg, !5, !6
+
+
+

Imagine that the SUB32rr were moved forward to give us the following MIR:

+
%7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags
+%1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1)
+DBG_VALUE %1, $noreg, !1, !2
+%4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags
+DBG_VALUE %4, $noreg, !3, !4
+DBG_VALUE %7, $noreg, !5, !6
+
+
+

In this circumstance LLVM would leave the MIR as shown above. Were we to move +the DBG_VALUE of virtual register %7 upwards with the SUB32rr, we would re-order +assignments and introduce a new state of the program. Whereas with the solution +above, the debugger will see one fewer combination of variable values, because +!3 and !5 will change value at the same time. This is preferred over +misrepresenting the original program.

+

In comparison, if one sunk the MOV32rm, LLVM would produce the following:

+
DBG_VALUE $noreg, $noreg, !1, !2
+%4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags
+DBG_VALUE %4, $noreg, !3, !4
+%7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags
+DBG_VALUE %7, $noreg, !5, !6
+%1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1)
+DBG_VALUE %1, $noreg, !1, !2
+
+
+

Here, to avoid presenting a state in which the first assignment to !1 +disappears, the DBG_VALUE at the top of the block assigns the variable the +undefined location, until its value is available at the end of the block where +an additional DBG_VALUE is added. Were any other DBG_VALUE for !1 to occur +in the instructions that the MOV32rm was sunk past, the DBG_VALUE for %1 +would be dropped and the debugger would never observe it in the variable. This +accurately reflects that the value is not available during the corresponding +portion of the original program.

+
+
+

Variable locations during Register Allocation

+

To avoid debug instructions interfering with the register allocator, the +LiveDebugVariables pass extracts variable locations from a MIR function and +deletes the corresponding DBG_VALUE instructions. Some localized copy +propagation is performed within blocks. After register allocation, the +VirtRegRewriter pass re-inserts DBG_VALUE instructions in their original +positions, translating virtual register references into their physical +machine locations. To avoid encoding incorrect variable locations, in this +pass any DBG_VALUE of a virtual register that is not live, is replaced by +the undefined location. The LiveDebugVariables may insert redundant DBG_VALUEs +because of virtual register rewriting. These will be subsequently removed by +the RemoveRedundantDebugValues pass.

+
+
+

LiveDebugValues expansion of variable locations

+

After all optimizations have run and shortly before emission, the +LiveDebugValues pass runs to achieve two aims:

+
    +
  • To propagate the location of variables through copies and register spills,

  • +
  • For every block, to record every valid variable location in that block.

  • +
+

After this pass the DBG_VALUE instruction changes meaning: rather than +corresponding to a source-level assignment where the variable may change value, +it asserts the location of a variable in a block, and loses effect outside the +block. Propagating variable locations through copies and spills is +straightforwards: determining the variable location in every basic block +requires the consideration of control flow. Consider the following IR, which +presents several difficulties:

+
define dso_local i32 @foo(i1 %cond, i32 %input) !dbg !12 {
+entry:
+  br i1 %cond, label %truebr, label %falsebr
+
+bb1:
+  %value = phi i32 [ %value1, %truebr ], [ %value2, %falsebr ]
+  br label %exit, !dbg !26
+
+truebr:
+  call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !24
+  call void @llvm.dbg.value(metadata i32 1, metadata !23, metadata !DIExpression()), !dbg !24
+  %value1 = add i32 %input, 1
+  br label %bb1
+
+falsebr:
+  call void @llvm.dbg.value(metadata i32 %input, metadata !30, metadata !DIExpression()), !dbg !24
+  call void @llvm.dbg.value(metadata i32 2, metadata !23, metadata !DIExpression()), !dbg !24
+  %value = add i32 %input, 2
+  br label %bb1
+
+exit:
+  ret i32 %value, !dbg !30
+}
+
+
+

Here the difficulties are:

+
    +
  • The control flow is roughly the opposite of basic block order

  • +
  • The value of the !23 variable merges into %bb1, but there is no PHI +node

  • +
+

As mentioned above, the llvm.dbg.value intrinsics essentially form an +imperative program embedded in the IR, with each intrinsic defining a variable +location. This could be converted to an SSA form by mem2reg, in the same way +that it uses use-def chains to identify control flow merges and insert phi +nodes for IR Values. However, because debug variable locations are defined for +every machine instruction, in effect every IR instruction uses every variable +location, which would lead to a large number of debugging intrinsics being +generated.

+

Examining the example above, variable !30 is assigned %input on both +conditional paths through the function, while !23 is assigned differing +constant values on either path. Where control flow merges in %bb1 we would +want !30 to keep its location (%input), but !23 to become undefined +as we cannot determine at runtime what value it should have in %bb1 without +inserting a PHI node. mem2reg does not insert the PHI node to avoid changing +codegen when debugging is enabled, and does not insert the other dbg.values +to avoid adding very large numbers of intrinsics.

+

Instead, LiveDebugValues determines variable locations when control +flow merges. A dataflow analysis is used to propagate locations between blocks: +when control flow merges, if a variable has the same location in all +predecessors then that location is propagated into the successor. If the +predecessor locations disagree, the location becomes undefined.

+

Once LiveDebugValues has run, every block should have all valid variable +locations described by DBG_VALUE instructions within the block. Very little +effort is then required by supporting classes (such as +DbgEntityHistoryCalculator) to build a map of each instruction to every +valid variable location, without the need to consider control flow. From +the example above, it is otherwise difficult to determine that the location +of variable !30 should flow “up” into block %bb1, but that the location +of variable !23 should not flow “down” into the %exit block.

+
+
+
+

C/C++ front-end specific debug information

+

The C and C++ front-ends represent information about the program in a +format that is effectively identical to DWARF +in terms of information content. This allows code generators to +trivially support native debuggers by generating standard dwarf +information, and contains enough information for non-dwarf targets to +translate it as needed.

+

This section describes the forms used to represent C and C++ programs. Other +languages could pattern themselves after this (which itself is tuned to +representing programs in the same way that DWARF does), or they could choose +to provide completely different forms if they don’t fit into the DWARF model. +As support for debugging information gets added to the various LLVM +source-language front-ends, the information used should be documented here.

+

The following sections provide examples of a few C/C++ constructs and +the debug information that would best describe those constructs. The +canonical references are the DINode classes defined in +include/llvm/IR/DebugInfoMetadata.h and the implementations of the +helper functions in lib/IR/DIBuilder.cpp.

+
+

C/C++ source file information

+

llvm::Instruction provides easy access to metadata attached with an +instruction. One can extract line number information encoded in LLVM IR using +Instruction::getDebugLoc() and DILocation::getLine().

+
if (DILocation *Loc = I->getDebugLoc()) { // Here I is an LLVM instruction
+  unsigned Line = Loc->getLine();
+  StringRef File = Loc->getFilename();
+  StringRef Dir = Loc->getDirectory();
+  bool ImplicitCode = Loc->isImplicitCode();
+}
+
+
+

When the flag ImplicitCode is true then it means that the Instruction has been +added by the front-end but doesn’t correspond to source code written by the user. For example

+
if (MyBoolean) {
+  MyObject MO;
+  ...
+}
+
+
+

At the end of the scope the MyObject’s destructor is called but it isn’t written +explicitly. This information is useful to avoid to have counters on brackets when +making code coverage.

+
+
+

C/C++ global variable information

+

Given an integer global variable declared as follows:

+
_Alignas(8) int MyGlobal = 100;
+
+
+

a C/C++ front-end would generate the following descriptors:

+
;;
+;; Define the global itself.
+;;
+@MyGlobal = global i32 100, align 8, !dbg !0
+
+;;
+;; List of debug info of globals
+;;
+!llvm.dbg.cu = !{!1}
+
+;; Some unrelated metadata.
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+;; Define the global variable itself
+!0 = distinct !DIGlobalVariable(name: "MyGlobal", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true, align: 64)
+
+;; Define the compile unit.
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2,
+                             producer: "clang version 4.0.0",
+                             isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug,
+                             enums: !3, globals: !4)
+
+;;
+;; Define the file
+;;
+!2 = !DIFile(filename: "/dev/stdin",
+             directory: "/Users/dexonsmith/data/llvm/debug-info")
+
+;; An empty array.
+!3 = !{}
+
+;; The Array of Global Variables
+!4 = !{!0}
+
+;;
+;; Define the type
+;;
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+
+;; Dwarf version to output.
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+
+;; Debug info schema version.
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+
+;; Compiler identification
+!8 = !{!"clang version 4.0.0"}
+
+
+

The align value in DIGlobalVariable description specifies variable alignment in +case it was forced by C11 _Alignas(), C++11 alignas() keywords or compiler +attribute __attribute__((aligned ())). In other case (when this field is missing) +alignment is considered default. This is used when producing DWARF output +for DW_AT_alignment value.

+
+
+

C/C++ function information

+

Given a function declared as follows:

+
int main(int argc, char *argv[]) {
+  return 0;
+}
+
+
+

a C/C++ front-end would generate the following descriptors:

+
;;
+;; Define the anchor for subprograms.
+;;
+!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5,
+                   isLocal: false, isDefinition: true, scopeLine: 1,
+                   flags: DIFlagPrototyped, isOptimized: false,
+                   retainedNodes: !2)
+
+;;
+;; Define the subprogram itself.
+;;
+define i32 @main(i32 %argc, i8** %argv) !dbg !4 {
+...
+}
+
+
+
+
+
+

C++ specific debug information

+
+

C++ special member functions information

+

DWARF v5 introduces attributes defined to enhance debugging information of C++ programs. LLVM can generate (or omit) these appropriate DWARF attributes. In C++ a special member function Ctors, Dtors, Copy/Move Ctors, assignment operators can be declared with C++11 keyword deleted. This is represented in LLVM using spFlags value DISPFlagDeleted.

+

Given a class declaration with copy constructor declared as deleted:

+
class foo {
+ public:
+   foo(const foo&) = deleted;
+};
+
+
+

A C++ frontend would generate following:

+
!17 = !DISubprogram(name: "foo", scope: !11, file: !1, line: 5, type: !18, scopeLine: 5, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagDeleted)
+
+
+

and this will produce an additional DWARF attribute as:

+
DW_TAG_subprogram [7] *
+  DW_AT_name [DW_FORM_strx1]    (indexed (00000006) string = "foo")
+  DW_AT_decl_line [DW_FORM_data1]       (5)
+  ...
+  DW_AT_deleted [DW_FORM_flag_present]  (true)
+
+
+
+
+
+

Fortran specific debug information

+
+

Fortran function information

+

There are a few DWARF attributes defined to support client debugging of Fortran programs. LLVM can generate (or omit) the appropriate DWARF attributes for the prefix-specs of ELEMENTAL, PURE, IMPURE, RECURSIVE, and NON_RECURSIVE. This is done by using the spFlags values: DISPFlagElemental, DISPFlagPure, and DISPFlagRecursive.

+
elemental function elem_func(a)
+
+
+

a Fortran front-end would generate the following descriptors:

+
!11 = distinct !DISubprogram(name: "subroutine2", scope: !1, file: !1,
+        line: 5, type: !8, scopeLine: 6,
+        spFlags: DISPFlagDefinition | DISPFlagElemental, unit: !0,
+        retainedNodes: !2)
+
+
+

and this will materialize an additional DWARF attribute as:

+
DW_TAG_subprogram [3]
+   DW_AT_low_pc [DW_FORM_addr]     (0x0000000000000010 ".text")
+   DW_AT_high_pc [DW_FORM_data4]   (0x00000001)
+   ...
+   DW_AT_elemental [DW_FORM_flag_present]  (true)
+
+
+

There are a few DWARF tags defined to represent Fortran specific constructs i.e DW_TAG_string_type for representing Fortran character(n). In LLVM this is represented as DIStringType.

+
character(len=*), intent(in) :: string
+
+
+

a Fortran front-end would generate the following descriptors:

+
!DILocalVariable(name: "string", arg: 1, scope: !10, file: !3, line: 4, type: !15)
+!DIStringType(name: "character(*)!2", stringLength: !16, stringLengthExpression: !DIExpression(), size: 32)
+
+
+

and this will materialize in DWARF tags as:

+
DW_TAG_string_type
+             DW_AT_name      ("character(*)!2")
+             DW_AT_string_length     (0x00000064)
+0x00000064:    DW_TAG_variable
+               DW_AT_location      (DW_OP_fbreg +16)
+               DW_AT_type  (0x00000083 "integer*8")
+               ...
+               DW_AT_artificial    (true)
+
+
+
+
+
+

Debugging information format

+
+

Debugging Information Extension for Objective C Properties

+
+

Introduction

+

Objective C provides a simpler way to declare and define accessor methods using +declared properties. The language provides features to declare a property and +to let compiler synthesize accessor methods.

+

The debugger lets developer inspect Objective C interfaces and their instance +variables and class variables. However, the debugger does not know anything +about the properties defined in Objective C interfaces. The debugger consumes +information generated by compiler in DWARF format. The format does not support +encoding of Objective C properties. This proposal describes DWARF extensions to +encode Objective C properties, which the debugger can use to let developers +inspect Objective C properties.

+
+
+

Proposal

+

Objective C properties exist separately from class members. A property can be +defined only by “setter” and “getter” selectors, and be calculated anew on each +access. Or a property can just be a direct access to some declared ivar. +Finally it can have an ivar “automatically synthesized” for it by the compiler, +in which case the property can be referred to in user code directly using the +standard C dereference syntax as well as through the property “dot” syntax, but +there is no entry in the @interface declaration corresponding to this ivar.

+

To facilitate debugging, these properties we will add a new DWARF TAG into the +DW_TAG_structure_type definition for the class to hold the description of a +given property, and a set of DWARF attributes that provide said description. +The property tag will also contain the name and declared type of the property.

+

If there is a related ivar, there will also be a DWARF property attribute placed +in the DW_TAG_member DIE for that ivar referring back to the property TAG +for that property. And in the case where the compiler synthesizes the ivar +directly, the compiler is expected to generate a DW_TAG_member for that +ivar (with the DW_AT_artificial set to 1), whose name will be the name used +to access this ivar directly in code, and with the property attribute pointing +back to the property it is backing.

+

The following examples will serve as illustration for our discussion:

+
@interface I1 {
+  int n2;
+}
+
+@property int p1;
+@property int p2;
+@end
+
+@implementation I1
+@synthesize p1;
+@synthesize p2 = n2;
+@end
+
+
+

This produces the following DWARF (this is a “pseudo dwarfdump” output):

+
0x00000100:  TAG_structure_type [7] *
+               AT_APPLE_runtime_class( 0x10 )
+               AT_name( "I1" )
+               AT_decl_file( "Objc_Property.m" )
+               AT_decl_line( 3 )
+
+0x00000110    TAG_APPLE_property
+                AT_name ( "p1" )
+                AT_type ( {0x00000150} ( int ) )
+
+0x00000120:   TAG_APPLE_property
+                AT_name ( "p2" )
+                AT_type ( {0x00000150} ( int ) )
+
+0x00000130:   TAG_member [8]
+                AT_name( "_p1" )
+                AT_APPLE_property ( {0x00000110} "p1" )
+                AT_type( {0x00000150} ( int ) )
+                AT_artificial ( 0x1 )
+
+0x00000140:    TAG_member [8]
+                 AT_name( "n2" )
+                 AT_APPLE_property ( {0x00000120} "p2" )
+                 AT_type( {0x00000150} ( int ) )
+
+0x00000150:  AT_type( ( int ) )
+
+
+

Note, the current convention is that the name of the ivar for an +auto-synthesized property is the name of the property from which it derives +with an underscore prepended, as is shown in the example. But we actually +don’t need to know this convention, since we are given the name of the ivar +directly.

+

Also, it is common practice in ObjC to have different property declarations in +the @interface and @implementation - e.g. to provide a read-only property in +the interface, and a read-write interface in the implementation. In that case, +the compiler should emit whichever property declaration will be in force in the +current translation unit.

+

Developers can decorate a property with attributes which are encoded using +DW_AT_APPLE_property_attribute.

+
@property (readonly, nonatomic) int pr;
+
+
+
TAG_APPLE_property [8]
+  AT_name( "pr" )
+  AT_type ( {0x00000147} (int) )
+  AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
+
+
+

The setter and getter method names are attached to the property using +DW_AT_APPLE_property_setter and DW_AT_APPLE_property_getter attributes.

+
@interface I1
+@property (setter=myOwnP3Setter:) int p3;
+-(void)myOwnP3Setter:(int)a;
+@end
+
+@implementation I1
+@synthesize p3;
+-(void)myOwnP3Setter:(int)a{ }
+@end
+
+
+

The DWARF for this would be:

+
0x000003bd: TAG_structure_type [7] *
+              AT_APPLE_runtime_class( 0x10 )
+              AT_name( "I1" )
+              AT_decl_file( "Objc_Property.m" )
+              AT_decl_line( 3 )
+
+0x000003cd      TAG_APPLE_property
+                  AT_name ( "p3" )
+                  AT_APPLE_property_setter ( "myOwnP3Setter:" )
+                  AT_type( {0x00000147} ( int ) )
+
+0x000003f3:     TAG_member [8]
+                  AT_name( "_p3" )
+                  AT_type ( {0x00000147} ( int ) )
+                  AT_APPLE_property ( {0x000003cd} )
+                  AT_artificial ( 0x1 )
+
+
+
+
+

New DWARF Tags

+ ++++ + + + + + + + + + + +

TAG

Value

DW_TAG_APPLE_property

0x4200

+
+
+

New DWARF Attributes

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +

Attribute

Value

Classes

DW_AT_APPLE_property

0x3fed

Reference

DW_AT_APPLE_property_getter

0x3fe9

String

DW_AT_APPLE_property_setter

0x3fea

String

DW_AT_APPLE_property_attribute

0x3feb

Constant

+
+
+

New DWARF Constants

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Name

Value

DW_APPLE_PROPERTY_readonly

0x01

DW_APPLE_PROPERTY_getter

0x02

DW_APPLE_PROPERTY_assign

0x04

DW_APPLE_PROPERTY_readwrite

0x08

DW_APPLE_PROPERTY_retain

0x10

DW_APPLE_PROPERTY_copy

0x20

DW_APPLE_PROPERTY_nonatomic

0x40

DW_APPLE_PROPERTY_setter

0x80

DW_APPLE_PROPERTY_atomic

0x100

DW_APPLE_PROPERTY_weak

0x200

DW_APPLE_PROPERTY_strong

0x400

DW_APPLE_PROPERTY_unsafe_unretained

0x800

DW_APPLE_PROPERTY_nullability

0x1000

DW_APPLE_PROPERTY_null_resettable

0x2000

DW_APPLE_PROPERTY_class

0x4000

+
+
+
+

Name Accelerator Tables

+
+

Introduction

+

The “.debug_pubnames” and “.debug_pubtypes” formats are not what a +debugger needs. The “pub” in the section name indicates that the entries +in the table are publicly visible names only. This means no static or hidden +functions show up in the “.debug_pubnames”. No static variables or private +class variables are in the “.debug_pubtypes”. Many compilers add different +things to these tables, so we can’t rely upon the contents between gcc, icc, or +clang.

+

The typical query given by users tends not to match up with the contents of +these tables. For example, the DWARF spec states that “In the case of the name +of a function member or static data member of a C++ structure, class or union, +the name presented in the “.debug_pubnames” section is not the simple name +given by the DW_AT_name attribute of the referenced debugging information +entry, but rather the fully qualified name of the data or function member.” +So the only names in these tables for complex C++ entries is a fully +qualified name. Debugger users tend not to enter their search strings as +“a::b::c(int,const Foo&) const”, but rather as “c”, “b::c” , or +“a::b::c”. So the name entered in the name table must be demangled in +order to chop it up appropriately and additional names must be manually entered +into the table to make it effective as a name lookup table for debuggers to +use.

+

All debuggers currently ignore the “.debug_pubnames” table as a result of +its inconsistent and useless public-only name content making it a waste of +space in the object file. These tables, when they are written to disk, are not +sorted in any way, leaving every debugger to do its own parsing and sorting. +These tables also include an inlined copy of the string values in the table +itself making the tables much larger than they need to be on disk, especially +for large C++ programs.

+

Can’t we just fix the sections by adding all of the names we need to this +table? No, because that is not what the tables are defined to contain and we +won’t know the difference between the old bad tables and the new good tables. +At best we could make our own renamed sections that contain all of the data we +need.

+

These tables are also insufficient for what a debugger like LLDB needs. LLDB +uses clang for its expression parsing where LLDB acts as a PCH. LLDB is then +often asked to look for type “foo” or namespace “bar”, or list items in +namespace “baz”. Namespaces are not included in the pubnames or pubtypes +tables. Since clang asks a lot of questions when it is parsing an expression, +we need to be very fast when looking up names, as it happens a lot. Having new +accelerator tables that are optimized for very quick lookups will benefit this +type of debugging experience greatly.

+

We would like to generate name lookup tables that can be mapped into memory +from disk, and used as is, with little or no up-front parsing. We would also +be able to control the exact content of these different tables so they contain +exactly what we need. The Name Accelerator Tables were designed to fix these +issues. In order to solve these issues we need to:

+
    +
  • Have a format that can be mapped into memory from disk and used as is

  • +
  • Lookups should be very fast

  • +
  • Extensible table format so these tables can be made by many producers

  • +
  • Contain all of the names needed for typical lookups out of the box

  • +
  • Strict rules for the contents of tables

  • +
+

Table size is important and the accelerator table format should allow the reuse +of strings from common string tables so the strings for the names are not +duplicated. We also want to make sure the table is ready to be used as-is by +simply mapping the table into memory with minimal header parsing.

+

The name lookups need to be fast and optimized for the kinds of lookups that +debuggers tend to do. Optimally we would like to touch as few parts of the +mapped table as possible when doing a name lookup and be able to quickly find +the name entry we are looking for, or discover there are no matches. In the +case of debuggers we optimized for lookups that fail most of the time.

+

Each table that is defined should have strict rules on exactly what is in the +accelerator tables and documented so clients can rely on the content.

+
+
+

Hash Tables

+
+
Standard Hash Tables
+

Typical hash tables have a header, buckets, and each bucket points to the +bucket contents:

+
.------------.
+|  HEADER    |
+|------------|
+|  BUCKETS   |
+|------------|
+|  DATA      |
+`------------'
+
+
+

The BUCKETS are an array of offsets to DATA for each hash:

+
.------------.
+| 0x00001000 | BUCKETS[0]
+| 0x00002000 | BUCKETS[1]
+| 0x00002200 | BUCKETS[2]
+| 0x000034f0 | BUCKETS[3]
+|            | ...
+| 0xXXXXXXXX | BUCKETS[n_buckets]
+'------------'
+
+
+

So for bucket[3] in the example above, we have an offset into the table +0x000034f0 which points to a chain of entries for the bucket. Each bucket must +contain a next pointer, full 32 bit hash value, the string itself, and the data +for the current string value.

+
            .------------.
+0x000034f0: | 0x00003500 | next pointer
+            | 0x12345678 | 32 bit hash
+            | "erase"    | string value
+            | data[n]    | HashData for this bucket
+            |------------|
+0x00003500: | 0x00003550 | next pointer
+            | 0x29273623 | 32 bit hash
+            | "dump"     | string value
+            | data[n]    | HashData for this bucket
+            |------------|
+0x00003550: | 0x00000000 | next pointer
+            | 0x82638293 | 32 bit hash
+            | "main"     | string value
+            | data[n]    | HashData for this bucket
+            `------------'
+
+
+

The problem with this layout for debuggers is that we need to optimize for the +negative lookup case where the symbol we’re searching for is not present. So +if we were to lookup “printf” in the table above, we would make a 32-bit +hash for “printf”, it might match bucket[3]. We would need to go to +the offset 0x000034f0 and start looking to see if our 32 bit hash matches. To +do so, we need to read the next pointer, then read the hash, compare it, and +skip to the next bucket. Each time we are skipping many bytes in memory and +touching new pages just to do the compare on the full 32 bit hash. All of +these accesses then tell us that we didn’t have a match.

+
+
+
Name Hash Tables
+

To solve the issues mentioned above we have structured the hash tables a bit +differently: a header, buckets, an array of all unique 32 bit hash values, +followed by an array of hash value data offsets, one for each hash value, then +the data for all hash values:

+
.-------------.
+|  HEADER     |
+|-------------|
+|  BUCKETS    |
+|-------------|
+|  HASHES     |
+|-------------|
+|  OFFSETS    |
+|-------------|
+|  DATA       |
+`-------------'
+
+
+

The BUCKETS in the name tables are an index into the HASHES array. By +making all of the full 32 bit hash values contiguous in memory, we allow +ourselves to efficiently check for a match while touching as little memory as +possible. Most often checking the 32 bit hash values is as far as the lookup +goes. If it does match, it usually is a match with no collisions. So for a +table with “n_buckets” buckets, and “n_hashes” unique 32 bit hash +values, we can clarify the contents of the BUCKETS, HASHES and +OFFSETS as:

+
.-------------------------.
+|  HEADER.magic           | uint32_t
+|  HEADER.version         | uint16_t
+|  HEADER.hash_function   | uint16_t
+|  HEADER.bucket_count    | uint32_t
+|  HEADER.hashes_count    | uint32_t
+|  HEADER.header_data_len | uint32_t
+|  HEADER_DATA            | HeaderData
+|-------------------------|
+|  BUCKETS                | uint32_t[n_buckets] // 32 bit hash indexes
+|-------------------------|
+|  HASHES                 | uint32_t[n_hashes] // 32 bit hash values
+|-------------------------|
+|  OFFSETS                | uint32_t[n_hashes] // 32 bit offsets to hash value data
+|-------------------------|
+|  ALL HASH DATA          |
+`-------------------------'
+
+
+

So taking the exact same data from the standard hash example above we end up +with:

+
            .------------.
+            | HEADER     |
+            |------------|
+            |          0 | BUCKETS[0]
+            |          2 | BUCKETS[1]
+            |          5 | BUCKETS[2]
+            |          6 | BUCKETS[3]
+            |            | ...
+            |        ... | BUCKETS[n_buckets]
+            |------------|
+            | 0x........ | HASHES[0]
+            | 0x........ | HASHES[1]
+            | 0x........ | HASHES[2]
+            | 0x........ | HASHES[3]
+            | 0x........ | HASHES[4]
+            | 0x........ | HASHES[5]
+            | 0x12345678 | HASHES[6]    hash for BUCKETS[3]
+            | 0x29273623 | HASHES[7]    hash for BUCKETS[3]
+            | 0x82638293 | HASHES[8]    hash for BUCKETS[3]
+            | 0x........ | HASHES[9]
+            | 0x........ | HASHES[10]
+            | 0x........ | HASHES[11]
+            | 0x........ | HASHES[12]
+            | 0x........ | HASHES[13]
+            | 0x........ | HASHES[n_hashes]
+            |------------|
+            | 0x........ | OFFSETS[0]
+            | 0x........ | OFFSETS[1]
+            | 0x........ | OFFSETS[2]
+            | 0x........ | OFFSETS[3]
+            | 0x........ | OFFSETS[4]
+            | 0x........ | OFFSETS[5]
+            | 0x000034f0 | OFFSETS[6]   offset for BUCKETS[3]
+            | 0x00003500 | OFFSETS[7]   offset for BUCKETS[3]
+            | 0x00003550 | OFFSETS[8]   offset for BUCKETS[3]
+            | 0x........ | OFFSETS[9]
+            | 0x........ | OFFSETS[10]
+            | 0x........ | OFFSETS[11]
+            | 0x........ | OFFSETS[12]
+            | 0x........ | OFFSETS[13]
+            | 0x........ | OFFSETS[n_hashes]
+            |------------|
+            |            |
+            |            |
+            |            |
+            |            |
+            |            |
+            |------------|
+0x000034f0: | 0x00001203 | .debug_str ("erase")
+            | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x........ | HashData[3]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            |------------|
+0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
+            | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x00001203 | String offset into .debug_str ("dump")
+            | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            |------------|
+0x00003550: | 0x00001203 | String offset into .debug_str ("main")
+            | 0x00000009 | A 32 bit array count - number of HashData with name "main"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x........ | HashData[3]
+            | 0x........ | HashData[4]
+            | 0x........ | HashData[5]
+            | 0x........ | HashData[6]
+            | 0x........ | HashData[7]
+            | 0x........ | HashData[8]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            `------------'
+
+
+

So we still have all of the same data, we just organize it more efficiently for +debugger lookup. If we repeat the same “printf” lookup from above, we +would hash “printf” and find it matches BUCKETS[3] by taking the 32 bit +hash value and modulo it by n_buckets. BUCKETS[3] contains “6” which +is the index into the HASHES table. We would then compare any consecutive +32 bit hashes values in the HASHES array as long as the hashes would be in +BUCKETS[3]. We do this by verifying that each subsequent hash value modulo +n_buckets is still 3. In the case of a failed lookup we would access the +memory for BUCKETS[3], and then compare a few consecutive 32 bit hashes +before we know that we have no match. We don’t end up marching through +multiple words of memory and we really keep the number of processor data cache +lines being accessed as small as possible.

+

The string hash that is used for these lookup tables is the Daniel J. +Bernstein hash which is also used in the ELF GNU_HASH sections. It is a +very good hash for all kinds of names in programs with very few hash +collisions.

+

Empty buckets are designated by using an invalid hash index of UINT32_MAX.

+
+
+
+

Details

+

These name hash tables are designed to be generic where specializations of the +table get to define additional data that goes into the header (“HeaderData”), +how the string value is stored (“KeyType”) and the content of the data for each +hash value.

+
+
Header Layout
+

The header has a fixed part, and the specialized part. The exact format of the +header is:

+
struct Header
+{
+  uint32_t   magic;           // 'HASH' magic value to allow endian detection
+  uint16_t   version;         // Version number
+  uint16_t   hash_function;   // The hash function enumeration that was used
+  uint32_t   bucket_count;    // The number of buckets in this hash table
+  uint32_t   hashes_count;    // The total number of unique hash values and hash data offsets in this table
+  uint32_t   header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
+                              // Specifically the length of the following HeaderData field - this does not
+                              // include the size of the preceding fields
+  HeaderData header_data;     // Implementation specific header data
+};
+
+
+

The header starts with a 32 bit “magic” value which must be 'HASH' +encoded as an ASCII integer. This allows the detection of the start of the +hash table and also allows the table’s byte order to be determined so the table +can be correctly extracted. The “magic” value is followed by a 16 bit +version number which allows the table to be revised and modified in the +future. The current version number is 1. hash_function is a uint16_t +enumeration that specifies which hash function was used to produce this table. +The current values for the hash function enumerations include:

+
enum HashFunctionType
+{
+  eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
+};
+
+
+

bucket_count is a 32 bit unsigned integer that represents how many buckets +are in the BUCKETS array. hashes_count is the number of unique 32 bit +hash values that are in the HASHES array, and is the same number of offsets +are contained in the OFFSETS array. header_data_len specifies the size +in bytes of the HeaderData that is filled in by specialized versions of +this table.

+
+
+
Fixed Lookup
+

The header is followed by the buckets, hashes, offsets, and hash value data.

+
struct FixedTable
+{
+  uint32_t buckets[Header.bucket_count];  // An array of hash indexes into the "hashes[]" array below
+  uint32_t hashes [Header.hashes_count];  // Every unique 32 bit hash for the entire table is in this table
+  uint32_t offsets[Header.hashes_count];  // An offset that corresponds to each item in the "hashes[]" array above
+};
+
+
+

buckets is an array of 32 bit indexes into the hashes array. The +hashes array contains all of the 32 bit hash values for all names in the +hash table. Each hash in the hashes table has an offset in the offsets +array that points to the data for the hash value.

+

This table setup makes it very easy to repurpose these tables to contain +different data, while keeping the lookup mechanism the same for all tables. +This layout also makes it possible to save the table to disk and map it in +later and do very efficient name lookups with little or no parsing.

+

DWARF lookup tables can be implemented in a variety of ways and can store a lot +of information for each name. We want to make the DWARF tables extensible and +able to store the data efficiently so we have used some of the DWARF features +that enable efficient data storage to define exactly what kind of data we store +for each name.

+

The HeaderData contains a definition of the contents of each HashData chunk. +We might want to store an offset to all of the debug information entries (DIEs) +for each name. To keep things extensible, we create a list of items, or +Atoms, that are contained in the data for each name. First comes the type of +the data in each atom:

+
enum AtomType
+{
+  eAtomTypeNULL       = 0u,
+  eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
+  eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that contains the item in question
+  eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
+  eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
+  eAtomTypeTypeFlags  = 5u,   // Flags from enum TypeFlags
+};
+
+
+

The enumeration values and their meanings are:

+
eAtomTypeNULL       - a termination atom that specifies the end of the atom list
+eAtomTypeDIEOffset  - an offset into the .debug_info section for the DWARF DIE for this name
+eAtomTypeCUOffset   - an offset into the .debug_info section for the CU that contains the DIE
+eAtomTypeDIETag     - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
+eAtomTypeNameFlags  - Flags for functions and global variables (isFunction, isInlined, isExternal...)
+eAtomTypeTypeFlags  - Flags for types (isCXXClass, isObjCClass, ...)
+
+
+

Then we allow each atom type to define the atom type and how the data for each +atom type data is encoded:

+
struct Atom
+{
+  uint16_t type;  // AtomType enum value
+  uint16_t form;  // DWARF DW_FORM_XXX defines
+};
+
+
+

The form type above is from the DWARF specification and defines the exact +encoding of the data for the Atom type. See the DWARF specification for the +DW_FORM_ definitions.

+
struct HeaderData
+{
+  uint32_t die_offset_base;
+  uint32_t atom_count;
+  Atoms    atoms[atom_count0];
+};
+
+
+

HeaderData defines the base DIE offset that should be added to any atoms +that are encoded using the DW_FORM_ref1, DW_FORM_ref2, +DW_FORM_ref4, DW_FORM_ref8 or DW_FORM_ref_udata. It also defines +what is contained in each HashData object – Atom.form tells us how large +each field will be in the HashData and the Atom.type tells us how this data +should be interpreted.

+

For the current implementations of the “.apple_names” (all functions + +globals), the “.apple_types” (names of all types that are defined), and +the “.apple_namespaces” (all namespaces), we currently set the Atom +array to be:

+
HeaderData.atom_count = 1;
+HeaderData.atoms[0].type = eAtomTypeDIEOffset;
+HeaderData.atoms[0].form = DW_FORM_data4;
+
+
+

This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is +encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have +multiple matching DIEs in a single file, which could come up with an inlined +function for instance. Future tables could include more information about the +DIE such as flags indicating if the DIE is a function, method, block, +or inlined.

+

The KeyType for the DWARF table is a 32 bit string table offset into the +“.debug_str” table. The “.debug_str” is the string table for the DWARF which +may already contain copies of all of the strings. This helps make sure, with +help from the compiler, that we reuse the strings between all of the DWARF +sections and keeps the hash table size down. Another benefit to having the +compiler generate all strings as DW_FORM_strp in the debug info, is that +DWARF parsing can be made much faster.

+

After a lookup is made, we get an offset into the hash data. The hash data +needs to be able to deal with 32 bit hash collisions, so the chunk of data +at the offset in the hash data consists of a triple:

+
uint32_t str_offset
+uint32_t hash_data_count
+HashData[hash_data_count]
+
+
+

If “str_offset” is zero, then the bucket contents are done. 99.9% of the +hash data chunks contain a single item (no 32 bit hash collision):

+
.------------.
+| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+| 0x00000004 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x........ | uint32_t HashData[2] DIE offset
+| 0x........ | uint32_t HashData[3] DIE offset
+| 0x00000000 | uint32_t KeyType (end of hash chain)
+`------------'
+
+
+

If there are collisions, you will have multiple valid string offsets:

+
.------------.
+| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+| 0x00000004 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x........ | uint32_t HashData[2] DIE offset
+| 0x........ | uint32_t HashData[3] DIE offset
+| 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
+| 0x00000002 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x00000000 | uint32_t KeyType (end of hash chain)
+`------------'
+
+
+

Current testing with real world C++ binaries has shown that there is around 1 +32 bit hash collision per 100,000 name entries.

+
+
+
+

Contents

+

As we said, we want to strictly define exactly what is included in the +different tables. For DWARF, we have 3 tables: “.apple_names”, +“.apple_types”, and “.apple_namespaces”.

+

.apple_names” sections should contain an entry for each DWARF DIE whose +DW_TAG is a DW_TAG_label, DW_TAG_inlined_subroutine, or +DW_TAG_subprogram that has address attributes: DW_AT_low_pc, +DW_AT_high_pc, DW_AT_ranges or DW_AT_entry_pc. It also contains +DW_TAG_variable DIEs that have a DW_OP_addr in the location (global and +static variables). All global and static variables should be included, +including those scoped within functions and classes. For example using the +following code:

+
static int var = 0;
+
+void f ()
+{
+  static int var = 0;
+}
+
+
+

Both of the static var variables would be included in the table. All +functions should emit both their full names and their basenames. For C or C++, +the full name is the mangled name (if available) which is usually in the +DW_AT_MIPS_linkage_name attribute, and the DW_AT_name contains the +function basename. If global or static variables have a mangled name in a +DW_AT_MIPS_linkage_name attribute, this should be emitted along with the +simple name found in the DW_AT_name attribute.

+

.apple_types” sections should contain an entry for each DWARF DIE whose +tag is one of:

+
    +
  • DW_TAG_array_type

  • +
  • DW_TAG_class_type

  • +
  • DW_TAG_enumeration_type

  • +
  • DW_TAG_pointer_type

  • +
  • DW_TAG_reference_type

  • +
  • DW_TAG_string_type

  • +
  • DW_TAG_structure_type

  • +
  • DW_TAG_subroutine_type

  • +
  • DW_TAG_typedef

  • +
  • DW_TAG_union_type

  • +
  • DW_TAG_ptr_to_member_type

  • +
  • DW_TAG_set_type

  • +
  • DW_TAG_subrange_type

  • +
  • DW_TAG_base_type

  • +
  • DW_TAG_const_type

  • +
  • DW_TAG_file_type

  • +
  • DW_TAG_namelist

  • +
  • DW_TAG_packed_type

  • +
  • DW_TAG_volatile_type

  • +
  • DW_TAG_restrict_type

  • +
  • DW_TAG_atomic_type

  • +
  • DW_TAG_interface_type

  • +
  • DW_TAG_unspecified_type

  • +
  • DW_TAG_shared_type

  • +
+

Only entries with a DW_AT_name attribute are included, and the entry must +not be a forward declaration (DW_AT_declaration attribute with a non-zero +value). For example, using the following code:

+
int main ()
+{
+  int *b = 0;
+  return *b;
+}
+
+
+

We get a few type DIEs:

+
0x00000067:     TAG_base_type [5]
+                AT_encoding( DW_ATE_signed )
+                AT_name( "int" )
+                AT_byte_size( 0x04 )
+
+0x0000006e:     TAG_pointer_type [6]
+                AT_type( {0x00000067} ( int ) )
+                AT_byte_size( 0x08 )
+
+
+

The DW_TAG_pointer_type is not included because it does not have a DW_AT_name.

+

.apple_namespaces” section should contain all DW_TAG_namespace DIEs. +If we run into a namespace that has no name this is an anonymous namespace, and +the name should be output as “(anonymous namespace)” (without the quotes). +Why? This matches the output of the abi::cxa_demangle() that is in the +standard C++ library that demangles mangled names.

+
+
+

Language Extensions and File Format Changes

+
+
Objective-C Extensions
+

.apple_objc” section should contain all DW_TAG_subprogram DIEs for an +Objective-C class. The name used in the hash table is the name of the +Objective-C class itself. If the Objective-C class has a category, then an +entry is made for both the class name without the category, and for the class +name with the category. So if we have a DIE at offset 0x1234 with a name of +method “-[NSString(my_additions) stringWithSpecialString:]”, we would add +an entry for “NSString” that points to DIE 0x1234, and an entry for +“NSString(my_additions)” that points to 0x1234. This allows us to quickly +track down all Objective-C methods for an Objective-C class when doing +expressions. It is needed because of the dynamic nature of Objective-C where +anyone can add methods to a class. The DWARF for Objective-C methods is also +emitted differently from C++ classes where the methods are not usually +contained in the class definition, they are scattered about across one or more +compile units. Categories can also be defined in different shared libraries. +So we need to be able to quickly find all of the methods and class functions +given the Objective-C class name, or quickly find all methods and class +functions for a class + category name. This table does not contain any +selector names, it just maps Objective-C class names (or class names + +category) to all of the methods and class functions. The selectors are added +as function basenames in the “.debug_names” section.

+

In the “.apple_names” section for Objective-C functions, the full name is +the entire function name with the brackets (“-[NSString +stringWithCString:]”) and the basename is the selector only +(“stringWithCString:”).

+
+
+
Mach-O Changes
+

The sections names for the apple hash tables are for non-mach-o files. For +mach-o files, the sections should be contained in the __DWARF segment with +names as follows:

+
    +
  • .apple_names” -> “__apple_names

  • +
  • .apple_types” -> “__apple_types

  • +
  • .apple_namespaces” -> “__apple_namespac” (16 character limit)

  • +
  • .apple_objc” -> “__apple_objc

  • +
+
+
+
+
+
+

CodeView Debug Info Format

+

LLVM supports emitting CodeView, the Microsoft debug info format, and this +section describes the design and implementation of that support.

+
+

Format Background

+

CodeView as a format is clearly oriented around C++ debugging, and in C++, the +majority of debug information tends to be type information. Therefore, the +overriding design constraint of CodeView is the separation of type information +from other “symbol” information so that type information can be efficiently +merged across translation units. Both type information and symbol information is +generally stored as a sequence of records, where each record begins with a +16-bit record size and a 16-bit record kind.

+

Type information is usually stored in the .debug$T section of the object +file. All other debug info, such as line info, string table, symbol info, and +inlinee info, is stored in one or more .debug$S sections. There may only be +one .debug$T section per object file, since all other debug info refers to +it. If a PDB (enabled by the /Zi MSVC option) was used during compilation, +the .debug$T section will contain only an LF_TYPESERVER2 record pointing +to the PDB. When using PDBs, symbol information appears to remain in the object +file .debug$S sections.

+

Type records are referred to by their index, which is the number of records in +the stream before a given record plus 0x1000. Many common basic types, such +as the basic integral types and unqualified pointers to them, are represented +using type indices less than 0x1000. Such basic types are built in to +CodeView consumers and do not require type records.

+

Each type record may only contain type indices that are less than its own type +index. This ensures that the graph of type stream references is acyclic. While +the source-level type graph may contain cycles through pointer types (consider a +linked list struct), these cycles are removed from the type stream by always +referring to the forward declaration record of user-defined record types. Only +“symbol” records in the .debug$S streams may refer to complete, +non-forward-declaration type records.

+
+
+

Working with CodeView

+

These are instructions for some common tasks for developers working to improve +LLVM’s CodeView support. Most of them revolve around using the CodeView dumper +embedded in llvm-readobj.

+
    +
  • Testing MSVC’s output:

    +
    $ cl -c -Z7 foo.cpp # Use /Z7 to keep types in the object file
    +$ llvm-readobj --codeview foo.obj
    +
    +
    +
  • +
  • Getting LLVM IR debug info out of Clang:

    +
    $ clang -g -gcodeview --target=x86_64-windows-msvc foo.cpp -S -emit-llvm
    +
    +
    +

    Use this to generate LLVM IR for LLVM test cases.

    +
  • +
  • Generate and dump CodeView from LLVM IR metadata:

    +
    $ llc foo.ll -filetype=obj -o foo.obj
    +$ llvm-readobj --codeview foo.obj > foo.txt
    +
    +
    +

    Use this pattern in lit test cases and FileCheck the output of llvm-readobj

    +
  • +
+

Improving LLVM’s CodeView support is a process of finding interesting type +records, constructing a C++ test case that makes MSVC emit those records, +dumping the records, understanding them, and then generating equivalent records +in LLVM’s backend.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AddingConstrainedIntrinsics.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AddingConstrainedIntrinsics.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AddingConstrainedIntrinsics.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AddingConstrainedIntrinsics.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,101 @@ +================================================== +How To Add A Constrained Floating-Point Intrinsic +================================================== + +.. contents:: + :local: + +.. warning:: + This is a work in progress. + +Add the intrinsic +================= + +Multiple files need to be updated when adding a new constrained intrinsic. + +Add the new intrinsic to the table of intrinsics:: + + include/llvm/IR/Intrinsics.td + +Add SelectionDAG node types +=========================== + +Add the new STRICT version of the node type to the ISD::NodeType enum:: + + include/llvm/CodeGen/ISDOpcodes.h + +Strict version name must be a concatenation of prefix ``STRICT_`` and the name +of corresponding non-strict node name. For instance, strict version of the +node FADD must be STRICT_FADD. + +Update mappings +=============== + +Add new record to the mapping of instructions to constrained intrinsic and +DAG nodes:: + + include/llvm/IR/ConstrainedOps.def + +Follow instructions provided in this file. + +Update IR components +==================== + +Update the IR verifier:: + + lib/IR/Verifier.cpp + +Update Selector components +========================== + +Building the SelectionDAG +------------------------- + +The function SelectionDAGBuilder::visitConstrainedFPIntrinsic builds DAG nodes +using mappings specified in ConstrainedOps.def. If however this default build is +not sufficient, the build can be modified, see how it is implemented for +STRICT_FP_ROUND. The new STRICT node will eventually be converted +to the matching non-STRICT node. For this reason it should have the same +operands and values as the non-STRICT version but should also use the chain. +This makes subsequent sharing of code for STRICT and non-STRICT code paths +easier:: + + lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp + +Most of the STRICT nodes get legalized the same as their matching non-STRICT +counterparts. A new STRICT node with this property must get added to the +switch in SelectionDAGLegalize::LegalizeOp().:: + + lib/CodeGen/SelectionDAG/LegalizeDAG.cpp + +Other parts of the legalizer may need to be updated as well. Look for +places where the non-STRICT counterpart is legalized and update as needed. +Be careful of the chain since STRICT nodes use it but their counterparts +often don't. + +The code to do the conversion or mutation of the STRICT node to a non-STRICT +version of the node happens in SelectionDAG::mutateStrictFPToFP(). In most cases +the function can do the conversion using information from ConstrainedOps.def. Be +careful updating this function since some nodes have the same return type +as their input operand, but some are different. Both of these cases must +be properly handled:: + + lib/CodeGen/SelectionDAG/SelectionDAG.cpp + +Whether the mutation may happens or not, depends on how the new node has been +registered in TargetLoweringBase::initActions(). By default all strict nodes are +registered with Expand action:: + + lib/CodeGen/TargetLoweringBase.cpp + +To make debug logs readable it is helpful to update the SelectionDAG's +debug logger::: + + lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp + +Add documentation and tests +=========================== + +:: + + docs/LangRef.rst diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AdvancedBuilds.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AdvancedBuilds.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AdvancedBuilds.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AdvancedBuilds.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,191 @@ +============================= +Advanced Build Configurations +============================= + +.. contents:: + :local: + +Introduction +============ + +`CMake `_ is a cross-platform build-generator tool. CMake +does not build the project, it generates the files needed by your build tool +(GNU make, Visual Studio, etc.) for building LLVM. + +If **you are a new contributor**, please start with the :doc:`GettingStarted` or +:doc:`CMake` pages. This page is intended for users doing more complex builds. + +Many of the examples below are written assuming specific CMake Generators. +Unless otherwise explicitly called out these commands should work with any CMake +generator. + +Bootstrap Builds +================ + +The Clang CMake build system supports bootstrap (aka multi-stage) builds. At a +high level a multi-stage build is a chain of builds that pass data from one +stage into the next. The most common and simple version of this is a traditional +bootstrap build. + +In a simple two-stage bootstrap build, we build clang using the system compiler, +then use that just-built clang to build clang again. In CMake this simplest form +of a bootstrap build can be configured with a single option, +CLANG_ENABLE_BOOTSTRAP. + +.. code-block:: console + + $ cmake -G Ninja -DCLANG_ENABLE_BOOTSTRAP=On + $ ninja stage2 + +This command itself isn't terribly useful because it assumes default +configurations for each stage. The next series of examples utilize CMake cache +scripts to provide more complex options. + +By default, only a few CMake options will be passed between stages. +The list, called _BOOTSTRAP_DEFAULT_PASSTHROUGH, is defined in clang/CMakeLists.txt. +To force the passing of the variables between stages, use the -DCLANG_BOOTSTRAP_PASSTHROUGH +CMake option, each variable separated by a ";". As example: + +.. code-block:: console + + $ cmake -G Ninja -DCLANG_ENABLE_BOOTSTRAP=On -DCLANG_BOOTSTRAP_PASSTHROUGH="CMAKE_INSTALL_PREFIX;CMAKE_VERBOSE_MAKEFILE" + $ ninja stage2 + +CMake options starting by ``BOOTSTRAP_`` will be passed only to the stage2 build. +This gives the opportunity to use Clang specific build flags. +For example, the following CMake call will enabled '-fno-addrsig' only during +the stage2 build for C and C++. + +.. code-block:: console + + $ cmake [..] -DBOOTSTRAP_CMAKE_CXX_FLAGS='-fno-addrsig' -DBOOTSTRAP_CMAKE_C_FLAGS='-fno-addrsig' [..] + +The clang build system refers to builds as stages. A stage1 build is a standard +build using the compiler installed on the host, and a stage2 build is built +using the stage1 compiler. This nomenclature holds up to more stages too. In +general a stage*n* build is built using the output from stage*n-1*. + +Apple Clang Builds (A More Complex Bootstrap) +============================================= + +Apple's Clang builds are a slightly more complicated example of the simple +bootstrapping scenario. Apple Clang is built using a 2-stage build. + +The stage1 compiler is a host-only compiler with some options set. The stage1 +compiler is a balance of optimization vs build time because it is a throwaway. +The stage2 compiler is the fully optimized compiler intended to ship to users. + +Setting up these compilers requires a lot of options. To simplify the +configuration the Apple Clang build settings are contained in CMake Cache files. +You can build an Apple Clang compiler using the following commands: + +.. code-block:: console + + $ cmake -G Ninja -C /cmake/caches/Apple-stage1.cmake + $ ninja stage2-distribution + +This CMake invocation configures the stage1 host compiler, and sets +CLANG_BOOTSTRAP_CMAKE_ARGS to pass the Apple-stage2.cmake cache script to the +stage2 configuration step. + +When you build the stage2-distribution target it builds the minimal stage1 +compiler and required tools, then configures and builds the stage2 compiler +based on the settings in Apple-stage2.cmake. + +This pattern of using cache scripts to set complex settings, and specifically to +make later stage builds include cache scripts is common in our more advanced +build configurations. + +Multi-stage PGO +=============== + +Profile-Guided Optimizations (PGO) is a really great way to optimize the code +clang generates. Our multi-stage PGO builds are a workflow for generating PGO +profiles that can be used to optimize clang. + +At a high level, the way PGO works is that you build an instrumented compiler, +then you run the instrumented compiler against sample source files. While the +instrumented compiler runs it will output a bunch of files containing +performance counters (.profraw files). After generating all the profraw files +you use llvm-profdata to merge the files into a single profdata file that you +can feed into the LLVM_PROFDATA_FILE option. + +Our PGO.cmake cache script automates that whole process. You can use it by +running: + +.. code-block:: console + + $ cmake -G Ninja -C /cmake/caches/PGO.cmake + $ ninja stage2-instrumented-generate-profdata + +If you let that run for a few hours or so, it will place a profdata file in your +build directory. This takes a really long time because it builds clang twice, +and you *must* have compiler-rt in your build tree. + +This process uses any source files under the perf-training directory as training +data as long as the source files are marked up with LIT-style RUN lines. + +After it finishes you can use “find . -name clang.profdata” to find it, but it +should be at a path something like: + +.. code-block:: console + + /tools/clang/stage2-instrumented-bins/utils/perf-training/clang.profdata + +You can feed that file into the LLVM_PROFDATA_FILE option when you build your +optimized compiler. + +The PGO came cache has a slightly different stage naming scheme than other +multi-stage builds. It generates three stages; stage1, stage2-instrumented, and +stage2. Both of the stage2 builds are built using the stage1 compiler. + +The PGO came cache generates the following additional targets: + +**stage2-instrumented** + Builds a stage1 x86 compiler, runtime, and required tools (llvm-config, + llvm-profdata) then uses that compiler to build an instrumented stage2 compiler. + +**stage2-instrumented-generate-profdata** + Depends on "stage2-instrumented" and will use the instrumented compiler to + generate profdata based on the training files in /utils/perf-training + +**stage2** + Depends of "stage2-instrumented-generate-profdata" and will use the stage1 + compiler with the stage2 profdata to build a PGO-optimized compiler. + +**stage2-check-llvm** + Depends on stage2 and runs check-llvm using the stage2 compiler. + +**stage2-check-clang** + Depends on stage2 and runs check-clang using the stage2 compiler. + +**stage2-check-all** + Depends on stage2 and runs check-all using the stage2 compiler. + +**stage2-test-suite** + Depends on stage2 and runs the test-suite using the stage3 compiler (requires + in-tree test-suite). + +3-Stage Non-Determinism +======================= + +In the ancient lore of compilers non-determinism is like the multi-headed hydra. +Whenever its head pops up, terror and chaos ensue. + +Historically one of the tests to verify that a compiler was deterministic would +be a three stage build. The idea of a three stage build is you take your sources +and build a compiler (stage1), then use that compiler to rebuild the sources +(stage2), then you use that compiler to rebuild the sources a third time +(stage3) with an identical configuration to the stage2 build. At the end of +this, you have a stage2 and stage3 compiler that should be bit-for-bit +identical. + +You can perform one of these 3-stage builds with LLVM & clang using the +following commands: + +.. code-block:: console + + $ cmake -G Ninja -C /cmake/caches/3-stage.cmake + $ cmake --build . --target stage3 --parallel + +After the build you can compare the stage2 & stage3 compilers. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AliasAnalysis.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AliasAnalysis.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AliasAnalysis.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AliasAnalysis.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,680 @@ +================================== +LLVM Alias Analysis Infrastructure +================================== + +.. contents:: + :local: + +Introduction +============ + +Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt to +determine whether or not two pointers ever can point to the same object in +memory. There are many different algorithms for alias analysis and many +different ways of classifying them: flow-sensitive vs. flow-insensitive, +context-sensitive vs. context-insensitive, field-sensitive +vs. field-insensitive, unification-based vs. subset-based, etc. Traditionally, +alias analyses respond to a query with a `Must, May, or No`_ alias response, +indicating that two pointers always point to the same object, might point to the +same object, or are known to never point to the same object. + +The LLVM `AliasAnalysis +`__ class is the +primary interface used by clients and implementations of alias analyses in the +LLVM system. This class is the common interface between clients of alias +analysis information and the implementations providing it, and is designed to +support a wide range of implementations and clients (but currently all clients +are assumed to be flow-insensitive). In addition to simple alias analysis +information, this class exposes Mod/Ref information from those implementations +which can provide it, allowing for powerful analyses and transformations to work +well together. + +This document contains information necessary to successfully implement this +interface, use it, and to test both sides. It also explains some of the finer +points about what exactly results mean. + +``AliasAnalysis`` Class Overview +================================ + +The `AliasAnalysis `__ +class defines the interface that the various alias analysis implementations +should support. This class exports two important enums: ``AliasResult`` and +``ModRefResult`` which represent the result of an alias query or a mod/ref +query, respectively. + +The ``AliasAnalysis`` interface exposes information about memory, represented in +several different ways. In particular, memory objects are represented as a +starting address and size, and function calls are represented as the actual +``call`` or ``invoke`` instructions that performs the call. The +``AliasAnalysis`` interface also exposes some helper methods which allow you to +get mod/ref information for arbitrary instructions. + +All ``AliasAnalysis`` interfaces require that in queries involving multiple +values, values which are not :ref:`constants ` are all +defined within the same function. + +Representation of Pointers +-------------------------- + +Most importantly, the ``AliasAnalysis`` class provides several methods which are +used to query whether or not two memory objects alias, whether function calls +can modify or read a memory object, etc. For all of these queries, memory +objects are represented as a pair of their starting address (a symbolic LLVM +``Value*``) and a static size. + +Representing memory objects as a starting address and a size is critically +important for correct Alias Analyses. For example, consider this (silly, but +possible) C code: + +.. code-block:: c++ + + int i; + char C[2]; + char A[10]; + /* ... */ + for (i = 0; i != 10; ++i) { + C[0] = A[i]; /* One byte store */ + C[1] = A[9-i]; /* One byte store */ + } + +In this case, the ``basic-aa`` pass will disambiguate the stores to ``C[0]`` and +``C[1]`` because they are accesses to two distinct locations one byte apart, and +the accesses are each one byte. In this case, the Loop Invariant Code Motion +(LICM) pass can use store motion to remove the stores from the loop. In +contrast, the following code: + +.. code-block:: c++ + + int i; + char C[2]; + char A[10]; + /* ... */ + for (i = 0; i != 10; ++i) { + ((short*)C)[0] = A[i]; /* Two byte store! */ + C[1] = A[9-i]; /* One byte store */ + } + +In this case, the two stores to C do alias each other, because the access to the +``&C[0]`` element is a two byte access. If size information wasn't available in +the query, even the first case would have to conservatively assume that the +accesses alias. + +.. _alias: + +The ``alias`` method +-------------------- + +The ``alias`` method is the primary interface used to determine whether or not +two memory objects alias each other. It takes two memory objects as input and +returns MustAlias, PartialAlias, MayAlias, or NoAlias as appropriate. + +Like all ``AliasAnalysis`` interfaces, the ``alias`` method requires that either +the two pointer values be defined within the same function, or at least one of +the values is a :ref:`constant `. + +.. _Must, May, or No: + +Must, May, and No Alias Responses +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``NoAlias`` response may be used when there is never an immediate dependence +between any memory reference *based* on one pointer and any memory reference +*based* the other. The most obvious example is when the two pointers point to +non-overlapping memory ranges. Another is when the two pointers are only ever +used for reading memory. Another is when the memory is freed and reallocated +between accesses through one pointer and accesses through the other --- in this +case, there is a dependence, but it's mediated by the free and reallocation. + +As an exception to this is with the :ref:`noalias ` keyword; +the "irrelevant" dependencies are ignored. + +The ``MayAlias`` response is used whenever the two pointers might refer to the +same object. + +The ``PartialAlias`` response is used when the two memory objects are known to +be overlapping in some way, regardless whether they start at the same address +or not. + +The ``MustAlias`` response may only be returned if the two memory objects are +guaranteed to always start at exactly the same location. A ``MustAlias`` +response does not imply that the pointers compare equal. + +The ``getModRefInfo`` methods +----------------------------- + +The ``getModRefInfo`` methods return information about whether the execution of +an instruction can read or modify a memory location. Mod/Ref information is +always conservative: if an instruction **might** read or write a location, +``ModRef`` is returned. + +The ``AliasAnalysis`` class also provides a ``getModRefInfo`` method for testing +dependencies between function calls. This method takes two call sites (``CS1`` +& ``CS2``), returns ``NoModRef`` if neither call writes to memory read or +written by the other, ``Ref`` if ``CS1`` reads memory written by ``CS2``, +``Mod`` if ``CS1`` writes to memory read or written by ``CS2``, or ``ModRef`` if +``CS1`` might read or write memory written to by ``CS2``. Note that this +relation is not commutative. + +Other useful ``AliasAnalysis`` methods +-------------------------------------- + +Several other tidbits of information are often collected by various alias +analysis implementations and can be put to good use by various clients. + +The ``pointsToConstantMemory`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``pointsToConstantMemory`` method returns true if and only if the analysis +can prove that the pointer only points to unchanging memory locations +(functions, constant global variables, and the null pointer). This information +can be used to refine mod/ref information: it is impossible for an unchanging +memory location to be modified. + +.. _never access memory or only read memory: + +The ``doesNotAccessMemory`` and ``onlyReadsMemory`` methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These methods are used to provide very simple mod/ref information for function +calls. The ``doesNotAccessMemory`` method returns true for a function if the +analysis can prove that the function never reads or writes to memory, or if the +function only reads from constant memory. Functions with this property are +side-effect free and only depend on their input arguments, allowing them to be +eliminated if they form common subexpressions or be hoisted out of loops. Many +common functions behave this way (e.g., ``sin`` and ``cos``) but many others do +not (e.g., ``acos``, which modifies the ``errno`` variable). + +The ``onlyReadsMemory`` method returns true for a function if analysis can prove +that (at most) the function only reads from non-volatile memory. Functions with +this property are side-effect free, only depending on their input arguments and +the state of memory when they are called. This property allows calls to these +functions to be eliminated and moved around, as long as there is no store +instruction that changes the contents of memory. Note that all functions that +satisfy the ``doesNotAccessMemory`` method also satisfy ``onlyReadsMemory``. + +Writing a new ``AliasAnalysis`` Implementation +============================================== + +Writing a new alias analysis implementation for LLVM is quite straight-forward. +There are already several implementations that you can use for examples, and the +following information should help fill in any details. For a examples, take a +look at the `various alias analysis implementations`_ included with LLVM. + +Different Pass styles +--------------------- + +The first step to determining what type of :doc:`LLVM pass ` +you need to use for your Alias Analysis. As is the case with most other +analyses and transformations, the answer should be fairly obvious from what type +of problem you are trying to solve: + +#. If you require interprocedural analysis, it should be a ``Pass``. +#. If you are a function-local analysis, subclass ``FunctionPass``. +#. If you don't need to look at the program at all, subclass ``ImmutablePass``. + +In addition to the pass that you subclass, you should also inherit from the +``AliasAnalysis`` interface, of course, and use the ``RegisterAnalysisGroup`` +template to register as an implementation of ``AliasAnalysis``. + +Required initialization calls +----------------------------- + +Your subclass of ``AliasAnalysis`` is required to invoke two methods on the +``AliasAnalysis`` base class: ``getAnalysisUsage`` and +``InitializeAliasAnalysis``. In particular, your implementation of +``getAnalysisUsage`` should explicitly call into the +``AliasAnalysis::getAnalysisUsage`` method in addition to doing any declaring +any pass dependencies your pass has. Thus you should have something like this: + +.. code-block:: c++ + + void getAnalysisUsage(AnalysisUsage &AU) const { + AliasAnalysis::getAnalysisUsage(AU); + // declare your dependencies here. + } + +Additionally, your must invoke the ``InitializeAliasAnalysis`` method from your +analysis run method (``run`` for a ``Pass``, ``runOnFunction`` for a +``FunctionPass``, or ``InitializePass`` for an ``ImmutablePass``). For example +(as part of a ``Pass``): + +.. code-block:: c++ + + bool run(Module &M) { + InitializeAliasAnalysis(this); + // Perform analysis here... + return false; + } + +Required methods to override +---------------------------- + +You must override the ``getAdjustedAnalysisPointer`` method on all subclasses +of ``AliasAnalysis``. An example implementation of this method would look like: + +.. code-block:: c++ + + void *getAdjustedAnalysisPointer(const void* ID) override { + if (ID == &AliasAnalysis::ID) + return (AliasAnalysis*)this; + return this; + } + +Interfaces which may be specified +--------------------------------- + +All of the `AliasAnalysis +`__ virtual methods +default to providing :ref:`chaining ` to another alias +analysis implementation, which ends up returning conservatively correct +information (returning "May" Alias and "Mod/Ref" for alias and mod/ref queries +respectively). Depending on the capabilities of the analysis you are +implementing, you just override the interfaces you can improve. + +.. _aliasanalysis-chaining: + +``AliasAnalysis`` chaining behavior +----------------------------------- + +Every alias analysis pass chains to another alias analysis implementation (for +example, the user can specify "``-basic-aa -ds-aa -licm``" to get the maximum +benefit from both alias analyses). The alias analysis class automatically +takes care of most of this for methods that you don't override. For methods +that you do override, in code paths that return a conservative MayAlias or +Mod/Ref result, simply return whatever the superclass computes. For example: + +.. code-block:: c++ + + AliasResult alias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + if (...) + return NoAlias; + ... + + // Couldn't determine a must or no-alias result. + return AliasAnalysis::alias(V1, V1Size, V2, V2Size); + } + +In addition to analysis queries, you must make sure to unconditionally pass LLVM +`update notification`_ methods to the superclass as well if you override them, +which allows all alias analyses in a change to be updated. + +.. _update notification: + +Updating analysis results for transformations +--------------------------------------------- + +Alias analysis information is initially computed for a static snapshot of the +program, but clients will use this information to make transformations to the +code. All but the most trivial forms of alias analysis will need to have their +analysis results updated to reflect the changes made by these transformations. + +The ``AliasAnalysis`` interface exposes four methods which are used to +communicate program changes from the clients to the analysis implementations. +Various alias analysis implementations should use these methods to ensure that +their internal data structures are kept up-to-date as the program changes (for +example, when an instruction is deleted), and clients of alias analysis must be +sure to call these interfaces appropriately. + +The ``deleteValue`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``deleteValue`` method is called by transformations when they remove an +instruction or any other value from the program (including values that do not +use pointers). Typically alias analyses keep data structures that have entries +for each value in the program. When this method is called, they should remove +any entries for the specified value, if they exist. + +The ``copyValue`` method +^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``copyValue`` method is used when a new value is introduced into the +program. There is no way to introduce a value into the program that did not +exist before (this doesn't make sense for a safe compiler transformation), so +this is the only way to introduce a new value. This method indicates that the +new value has exactly the same properties as the value being copied. + +The ``replaceWithNewValue`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This method is a simple helper method that is provided to make clients easier to +use. It is implemented by copying the old analysis information to the new +value, then deleting the old value. This method cannot be overridden by alias +analysis implementations. + +The ``addEscapingUse`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``addEscapingUse`` method is used when the uses of a pointer value have +changed in ways that may invalidate precomputed analysis information. +Implementations may either use this callback to provide conservative responses +for points whose uses have change since analysis time, or may recompute some or +all of their internal state to continue providing accurate responses. + +In general, any new use of a pointer value is considered an escaping use, and +must be reported through this callback, *except* for the uses below: + +* A ``bitcast`` or ``getelementptr`` of the pointer +* A ``store`` through the pointer (but not a ``store`` *of* the pointer) +* A ``load`` through the pointer + +Efficiency Issues +----------------- + +From the LLVM perspective, the only thing you need to do to provide an efficient +alias analysis is to make sure that alias analysis **queries** are serviced +quickly. The actual calculation of the alias analysis results (the "run" +method) is only performed once, but many (perhaps duplicate) queries may be +performed. Because of this, try to move as much computation to the run method +as possible (within reason). + +Limitations +----------- + +The AliasAnalysis infrastructure has several limitations which make writing a +new ``AliasAnalysis`` implementation difficult. + +There is no way to override the default alias analysis. It would be very useful +to be able to do something like "``opt -my-aa -O2``" and have it use ``-my-aa`` +for all passes which need AliasAnalysis, but there is currently no support for +that, short of changing the source code and recompiling. Similarly, there is +also no way of setting a chain of analyses as the default. + +There is no way for transform passes to declare that they preserve +``AliasAnalysis`` implementations. The ``AliasAnalysis`` interface includes +``deleteValue`` and ``copyValue`` methods which are intended to allow a pass to +keep an AliasAnalysis consistent, however there's no way for a pass to declare +in its ``getAnalysisUsage`` that it does so. Some passes attempt to use +``AU.addPreserved``, however this doesn't actually have any +effect. + +Similarly, the ``opt -p`` option introduces ``ModulePass`` passes between each +pass, which prevents the use of ``FunctionPass`` alias analysis passes. + +The ``AliasAnalysis`` API does have functions for notifying implementations when +values are deleted or copied, however these aren't sufficient. There are many +other ways that LLVM IR can be modified which could be relevant to +``AliasAnalysis`` implementations which can not be expressed. + +The ``AliasAnalysisDebugger`` utility seems to suggest that ``AliasAnalysis`` +implementations can expect that they will be informed of any relevant ``Value`` +before it appears in an alias query. However, popular clients such as ``GVN`` +don't support this, and are known to trigger errors when run with the +``AliasAnalysisDebugger``. + +The ``AliasSetTracker`` class (which is used by ``LICM``) makes a +non-deterministic number of alias queries. This can cause debugging techniques +involving pausing execution after a predetermined number of queries to be +unreliable. + +Many alias queries can be reformulated in terms of other alias queries. When +multiple ``AliasAnalysis`` queries are chained together, it would make sense to +start those queries from the beginning of the chain, with care taken to avoid +infinite looping, however currently an implementation which wants to do this can +only start such queries from itself. + +Using alias analysis results +============================ + +There are several different ways to use alias analysis results. In order of +preference, these are: + +Using the ``MemoryDependenceAnalysis`` Pass +------------------------------------------- + +The ``memdep`` pass uses alias analysis to provide high-level dependence +information about memory-using instructions. This will tell you which store +feeds into a load, for example. It uses caching and other techniques to be +efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations. + +.. _AliasSetTracker: + +Using the ``AliasSetTracker`` class +----------------------------------- + +Many transformations need information about alias **sets** that are active in +some scope, rather than information about pairwise aliasing. The +`AliasSetTracker `__ +class is used to efficiently build these Alias Sets from the pairwise alias +analysis information provided by the ``AliasAnalysis`` interface. + +First you initialize the AliasSetTracker by using the "``add``" methods to add +information about various potentially aliasing instructions in the scope you are +interested in. Once all of the alias sets are completed, your pass should +simply iterate through the constructed alias sets, using the ``AliasSetTracker`` +``begin()``/``end()`` methods. + +The ``AliasSet``\s formed by the ``AliasSetTracker`` are guaranteed to be +disjoint, calculate mod/ref information and volatility for the set, and keep +track of whether or not all of the pointers in the set are Must aliases. The +AliasSetTracker also makes sure that sets are properly folded due to call +instructions, and can provide a list of pointers in each set. + +As an example user of this, the `Loop Invariant Code Motion +`_ pass uses ``AliasSetTracker``\s to calculate alias +sets for each loop nest. If an ``AliasSet`` in a loop is not modified, then all +load instructions from that set may be hoisted out of the loop. If any alias +sets are stored to **and** are must alias sets, then the stores may be sunk +to outside of the loop, promoting the memory location to a register for the +duration of the loop nest. Both of these transformations only apply if the +pointer argument is loop-invariant. + +The AliasSetTracker implementation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The AliasSetTracker class is implemented to be as efficient as possible. It +uses the union-find algorithm to efficiently merge AliasSets when a pointer is +inserted into the AliasSetTracker that aliases multiple sets. The primary data +structure is a hash table mapping pointers to the AliasSet they are in. + +The AliasSetTracker class must maintain a list of all of the LLVM ``Value*``\s +that are in each AliasSet. Since the hash table already has entries for each +LLVM ``Value*`` of interest, the AliasesSets thread the linked list through +these hash-table nodes to avoid having to allocate memory unnecessarily, and to +make merging alias sets extremely efficient (the linked list merge is constant +time). + +You shouldn't need to understand these details if you are just a client of the +AliasSetTracker, but if you look at the code, hopefully this brief description +will help make sense of why things are designed the way they are. + +Using the ``AliasAnalysis`` interface directly +---------------------------------------------- + +If neither of these utility class are what your pass needs, you should use the +interfaces exposed by the ``AliasAnalysis`` class directly. Try to use the +higher-level methods when possible (e.g., use mod/ref information instead of the +`alias`_ method directly if possible) to get the best precision and efficiency. + +Existing alias analysis implementations and clients +=================================================== + +If you're going to be working with the LLVM alias analysis infrastructure, you +should know what clients and implementations of alias analysis are available. +In particular, if you are implementing an alias analysis, you should be aware of +the `the clients`_ that are useful for monitoring and evaluating different +implementations. + +.. _various alias analysis implementations: + +Available ``AliasAnalysis`` implementations +------------------------------------------- + +This section lists the various implementations of the ``AliasAnalysis`` +interface. All of these :ref:`chain ` to other +alias analysis implementations. + +The ``-basic-aa`` pass +^^^^^^^^^^^^^^^^^^^^^^ + +The ``-basic-aa`` pass is an aggressive local analysis that *knows* many +important facts: + +* Distinct globals, stack allocations, and heap allocations can never alias. +* Globals, stack allocations, and heap allocations never alias the null pointer. +* Different fields of a structure do not alias. +* Indexes into arrays with statically differing subscripts cannot alias. +* Many common standard C library functions `never access memory or only read + memory`_. +* Pointers that obviously point to constant globals "``pointToConstantMemory``". +* Function calls can not modify or references stack allocations if they never + escape from the function that allocates them (a common case for automatic + arrays). + +The ``-globalsmodref-aa`` pass +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This pass implements a simple context-sensitive mod/ref and alias analysis for +internal global variables that don't "have their address taken". If a global +does not have its address taken, the pass knows that no pointers alias the +global. This pass also keeps track of functions that it knows never access +memory or never read memory. This allows certain optimizations (e.g. GVN) to +eliminate call instructions entirely. + +The real power of this pass is that it provides context-sensitive mod/ref +information for call instructions. This allows the optimizer to know that calls +to a function do not clobber or read the value of the global, allowing loads and +stores to be eliminated. + +.. note:: + + This pass is somewhat limited in its scope (only support non-address taken + globals), but is very quick analysis. + +The ``-steens-aa`` pass +^^^^^^^^^^^^^^^^^^^^^^^ + +The ``-steens-aa`` pass implements a variation on the well-known "Steensgaard's +algorithm" for interprocedural alias analysis. Steensgaard's algorithm is a +unification-based, flow-insensitive, context-insensitive, and field-insensitive +alias analysis that is also very scalable (effectively linear time). + +The LLVM ``-steens-aa`` pass implements a "speculatively field-**sensitive**" +version of Steensgaard's algorithm using the Data Structure Analysis framework. +This gives it substantially more precision than the standard algorithm while +maintaining excellent analysis scalability. + +.. note:: + + ``-steens-aa`` is available in the optional "poolalloc" module. It is not part + of the LLVM core. + +The ``-ds-aa`` pass +^^^^^^^^^^^^^^^^^^^ + +The ``-ds-aa`` pass implements the full Data Structure Analysis algorithm. Data +Structure Analysis is a modular unification-based, flow-insensitive, +context-**sensitive**, and speculatively field-**sensitive** alias +analysis that is also quite scalable, usually at ``O(n * log(n))``. + +This algorithm is capable of responding to a full variety of alias analysis +queries, and can provide context-sensitive mod/ref information as well. The +only major facility not implemented so far is support for must-alias +information. + +.. note:: + + ``-ds-aa`` is available in the optional "poolalloc" module. It is not part of + the LLVM core. + +The ``-scev-aa`` pass +^^^^^^^^^^^^^^^^^^^^^ + +The ``-scev-aa`` pass implements AliasAnalysis queries by translating them into +ScalarEvolution queries. This gives it a more complete understanding of +``getelementptr`` instructions and loop induction variables than other alias +analyses have. + +Alias analysis driven transformations +------------------------------------- + +LLVM includes several alias-analysis driven transformations which can be used +with any of the implementations above. + +The ``-adce`` pass +^^^^^^^^^^^^^^^^^^ + +The ``-adce`` pass, which implements Aggressive Dead Code Elimination uses the +``AliasAnalysis`` interface to delete calls to functions that do not have +side-effects and are not used. + +The ``-licm`` pass +^^^^^^^^^^^^^^^^^^ + +The ``-licm`` pass implements various Loop Invariant Code Motion related +transformations. It uses the ``AliasAnalysis`` interface for several different +transformations: + +* It uses mod/ref information to hoist or sink load instructions out of loops if + there are no instructions in the loop that modifies the memory loaded. + +* It uses mod/ref information to hoist function calls out of loops that do not + write to memory and are loop-invariant. + +* It uses alias information to promote memory objects that are loaded and stored + to in loops to live in a register instead. It can do this if there are no may + aliases to the loaded/stored memory location. + +The ``-argpromotion`` pass +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``-argpromotion`` pass promotes by-reference arguments to be passed in +by-value instead. In particular, if pointer arguments are only loaded from it +passes in the value loaded instead of the address to the function. This pass +uses alias information to make sure that the value loaded from the argument +pointer is not modified between the entry of the function and any load of the +pointer. + +The ``-gvn``, ``-memcpyopt``, and ``-dse`` passes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These passes use AliasAnalysis information to reason about loads and stores. + +.. _the clients: + +Clients for debugging and evaluation of implementations +------------------------------------------------------- + +These passes are useful for evaluating the various alias analysis +implementations. You can use them with commands like: + +.. code-block:: bash + + % opt -ds-aa -aa-eval foo.bc -disable-output -stats + +The ``-print-alias-sets`` pass +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``-print-alias-sets`` pass is exposed as part of the ``opt`` tool to print +out the Alias Sets formed by the `AliasSetTracker`_ class. This is useful if +you're using the ``AliasSetTracker`` class. To use it, use something like: + +.. code-block:: bash + + % opt -ds-aa -print-alias-sets -disable-output + +The ``-aa-eval`` pass +^^^^^^^^^^^^^^^^^^^^^ + +The ``-aa-eval`` pass simply iterates through all pairs of pointers in a +function and asks an alias analysis whether or not the pointers alias. This +gives an indication of the precision of the alias analysis. Statistics are +printed indicating the percent of no/may/must aliases found (a more precise +algorithm will have a lower number of may aliases). + +Memory Dependence Analysis +========================== + +.. note:: + + We are currently in the process of migrating things from + ``MemoryDependenceAnalysis`` to :doc:`MemorySSA`. Please try to use + that instead. + +If you're just looking to be a client of alias analysis information, consider +using the Memory Dependence Analysis interface instead. MemDep is a lazy, +caching layer on top of alias analysis that is able to answer the question of +what preceding memory operations a given instruction depends on, either at an +intra- or inter-block level. Because of its laziness and caching policy, using +MemDep can be a significant performance win over accessing alias analysis +directly. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX1011.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX1011.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX1011.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX1011.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,92 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of gfx1011 and gfx1012 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of *instructions specific to gfx1011 and gfx1012*. + +For a description of other gfx1011 and gfx1012 instructions see :doc:`Syntax of Core GFX10 Instructions`. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +DPP16 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_dot2c_f32_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_dot4c_i32_i8_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + +DPP8 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_dot2c_f32_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` :ref:`dpp8_sel` :ref:`fi` + v_dot4c_i32_i8_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` :ref:`dpp8_sel` :ref:`fi` + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_dot2c_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` + v_dot4c_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_dot2_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`src1`::ref:`f16x2`, :ref:`src2`::ref:`f32` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_dot2_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`src1`::ref:`i16x2`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot2_u32_u16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot4_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`src1`::ref:`i8x4`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot4_u32_u8 :ref:`vdst`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot8_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`src1`::ref:`i4x8`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot8_u32_u4 :ref:`vdst`, :ref:`src0`::ref:`u4x8`, :ref:`src1`::ref:`u4x8`, :ref:`src2`::ref:`u32` :ref:`clamp` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx1011_src + gfx1011_src_1 + gfx1011_src_2 + gfx1011_src_3 + gfx1011_type_deviation + gfx1011_vdst + gfx1011_vsrc diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,2255 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of Core GFX10 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of *core* GFX10 instructions. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +DPP16 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_co_ci_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_add_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_add_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_add_nc_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_and_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_ashrrev_i32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_bfrev_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_ceil_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_ceil_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cndmask_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cos_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cos_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f16_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f16_i16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f16_u16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f32_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f32_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f32_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f32_ubyte0_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f32_ubyte1_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f32_ubyte2_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_f32_ubyte3_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_flr_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_norm_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_norm_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_off_f32_i4_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_rpi_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_cvt_u32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_exp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_exp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_ffbh_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_ffbh_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_ffbl_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_floor_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_floor_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_fmac_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_fmac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_fract_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_fract_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_frexp_exp_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_frexp_exp_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_frexp_mant_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_frexp_mant_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_ldexp_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`i16` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_log_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_log_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_lshlrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_lshrrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_max_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_max_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_max_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_max_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_min_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_min_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_min_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_min_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mov_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_movreld_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_movrels_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_movrelsd_2_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_movrelsd_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mul_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mul_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mul_hi_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mul_hi_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mul_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mul_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_mul_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_not_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_or_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_rcp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_rcp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_rcp_iflag_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_rndne_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_rndne_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_rsq_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_rsq_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sat_pk_u8_i16_dpp :ref:`vdst`::ref:`u8x4`, :ref:`vsrc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sin_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sin_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sqrt_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sqrt_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sub_co_ci_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sub_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sub_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_sub_nc_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_subrev_co_ci_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_subrev_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_subrev_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_subrev_nc_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_trunc_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_trunc_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_xnor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + v_xor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp16_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` :ref:`fi` + +DPP8 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_co_ci_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp8_sel` :ref:`fi` + v_add_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_add_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_add_nc_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_and_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_ashrrev_i32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_bfrev_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_ceil_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_ceil_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cndmask_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp8_sel` :ref:`fi` + v_cos_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cos_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f16_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f16_i16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f16_u16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f32_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f32_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f32_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f32_ubyte0_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f32_ubyte1_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f32_ubyte2_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_f32_ubyte3_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_flr_i32_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_i16_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_i32_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_norm_i16_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_norm_u16_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_off_f32_i4_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_rpi_i32_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_u16_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_cvt_u32_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_exp_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_exp_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_ffbh_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_ffbh_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_ffbl_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_floor_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_floor_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_fmac_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_fmac_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_fract_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_fract_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_frexp_exp_i16_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_frexp_exp_i32_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_frexp_mant_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_frexp_mant_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_ldexp_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`::ref:`i16` :ref:`dpp8_sel` :ref:`fi` + v_log_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_log_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_lshlrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_lshrrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mac_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_max_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_max_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_max_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_max_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_min_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_min_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_min_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_min_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mov_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_movreld_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_movrels_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_movrelsd_2_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_movrelsd_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_mul_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mul_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mul_hi_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mul_hi_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mul_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mul_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_mul_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_not_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_or_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_rcp_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_rcp_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_rcp_iflag_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_rndne_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_rndne_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_rsq_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_rsq_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_sat_pk_u8_i16_dpp :ref:`vdst`::ref:`u8x4`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_sin_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_sin_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_sqrt_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_sqrt_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_sub_co_ci_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp8_sel` :ref:`fi` + v_sub_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_sub_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_sub_nc_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_subrev_co_ci_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp8_sel` :ref:`fi` + v_subrev_f16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_subrev_f32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_subrev_nc_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_trunc_f16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_trunc_f32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp8_sel` :ref:`fi` + v_xnor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + v_xor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp8_sel` :ref:`fi` + +DS +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + ds_add_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_and_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_append :ref:`vdst` :ref:`offset` :ref:`gds` + ds_bpermute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_cmpst_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_condxchg32_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_consume :ref:`vdst` :ref:`offset` :ref:`gds` + ds_dec_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_barrier :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_init :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_br :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_p :ref:`offset` :ref:`gds` + ds_gws_sema_release_all :ref:`offset` :ref:`gds` + ds_gws_sema_v :ref:`offset` :ref:`gds` + ds_inc_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_mskor_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_nop + ds_or_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_or_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_ordered_count :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_permute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_read2_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read_addtid_b32 :ref:`vdst` :ref:`offset` :ref:`gds` + ds_read_b128 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b32 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b64 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b96 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_swizzle_b32 :ref:`vdst`, :ref:`vaddr` :ref:`pattern` :ref:`gds` + ds_wrap_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_write2_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write_addtid_b32 :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b128 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b96 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_write_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_wrxchg2_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_xor_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + +EXP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + exp :ref:`tgt`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vsrc2`, :ref:`vsrc3` :ref:`done` :ref:`compr` :ref:`vm` + +FLAT +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + flat_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_fcmpswap :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32x2` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_fcmpswap_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64x2` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_fmax :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_fmax_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_fmin :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_fmin_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` + flat_load_dword :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_dwordx2 :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_dwordx3 :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_dwordx4 :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_sbyte :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_short_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_short_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_sshort :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_ubyte :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_load_ushort :ref:`vdst`, :ref:`vaddr` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_byte :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_dword :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_dwordx2 :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_dwordx3 :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_dwordx4 :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_short :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + flat_store_short_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset11` :ref:`glc` :ref:`slc` :ref:`dlc` + global_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_fmax :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_fmax_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_fmin :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_fmin_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` + global_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + global_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + scratch_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset12s` :ref:`glc` :ref:`slc` :ref:`dlc` + +MIMG +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + image_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_cmpswap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_dec :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_fcmpswap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_fmax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_fmin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_inc :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_smax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_smin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_umax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_umin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_gather4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lwe` :ref:`d16` + image_gather4_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lwe` :ref:`d16` + image_gather4_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lwe` :ref:`d16` + image_gather4_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lwe` :ref:`d16` + image_gather4_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_gather4_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_get_lod :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` + image_get_resinfo :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` + image_load :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_load_mip :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_load_mip_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` + image_load_mip_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` + image_load_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` + image_load_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` + image_sample :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd_cl_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd_cl_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cd_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d_cl_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d_cl_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_d_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd_cl_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd_cl_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cd_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d_cl_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d_cl_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_d_o_g16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_sample_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`d16` + image_store :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_store_mip :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` :ref:`d16` + image_store_mip_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + image_store_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`dim` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`a16` :ref:`lwe` + +MTBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + tbuffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + tbuffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`ufmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + +MUBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + buffer_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_add_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap :ref:`vdata`::ref:`dst`::ref:`b32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`b64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fcmpswap :ref:`vdata`::ref:`dst`::ref:`f32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fcmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`f64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmax :ref:`vdata`::ref:`dst`::ref:`f32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmax_x2 :ref:`vdata`::ref:`dst`::ref:`f64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmin :ref:`vdata`::ref:`dst`::ref:`f32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmin_x2 :ref:`vdata`::ref:`dst`::ref:`f64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_gl0_inv + buffer_gl1_inv + buffer_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lds` + buffer_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lds` + buffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lds` + buffer_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lds` + buffer_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lds` + buffer_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` + buffer_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`dlc` :ref:`lds` + buffer_store_byte :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_byte_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dword :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx2 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx3 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx4 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_short :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_short_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + +SDWA +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_co_ci_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_nc_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_and_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_bfrev_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cmp_class_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f16_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f32_sdwa :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cndmask_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_cos_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cos_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_i16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_u16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte0_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte1_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte2_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte3_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_flr_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_norm_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_norm_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_off_f32_i4_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_rpi_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbl_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ldexp_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`i16` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_log_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_lshlrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mov_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_movreld_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_movrels_b32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_movrelsd_2_b32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_movrelsd_b32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_mul_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_legacy_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_not_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_or_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_rcp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_iflag_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sat_pk_u8_i16_sdwa :ref:`vdst`::ref:`u8x4`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sub_co_ci_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_nc_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_co_ci_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_nc_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_trunc_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_xnor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_xor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + +SMEM +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_atc_probe :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atc_probe_buffer :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_buffer_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_buffer_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_buffer_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_buffer_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_buffer_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_dcache_discard :ref:`sbase`, :ref:`soffset` + s_dcache_discard_x2 :ref:`sbase`, :ref:`soffset` + s_dcache_inv + s_dcache_wb + s_get_waveid_in_workgroup :ref:`sdst` + s_gl1_inv + s_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_memrealtime :ref:`sdst`::ref:`b64` + s_memtime :ref:`sdst`::ref:`b64` + s_scratch_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_scratch_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_scratch_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` :ref:`dlc` + s_scratch_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + +SOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_abs_i32 :ref:`sdst`, :ref:`ssrc` + s_and_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_and_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_andn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_wrexec_b32 :ref:`sdst`, :ref:`ssrc` + s_andn1_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_andn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_wrexec_b32 :ref:`sdst`, :ref:`ssrc` + s_andn2_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bitreplicate_b64_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_bitset1_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset1_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_brev_b32 :ref:`sdst`, :ref:`ssrc` + s_brev_b64 :ref:`sdst`, :ref:`ssrc` + s_cmov_b32 :ref:`sdst`, :ref:`ssrc` + s_cmov_b64 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_i64 :ref:`sdst`, :ref:`ssrc` + s_getpc_b64 :ref:`sdst` + s_mov_b32 :ref:`sdst`, :ref:`ssrc` + s_mov_b64 :ref:`sdst`, :ref:`ssrc` + s_movreld_b32 :ref:`sdst`, :ref:`ssrc` + s_movreld_b64 :ref:`sdst`, :ref:`ssrc` + s_movrels_b32 :ref:`sdst`, :ref:`ssrc` + s_movrels_b64 :ref:`sdst`, :ref:`ssrc` + s_movrelsd_2_b32 :ref:`sdst`, :ref:`ssrc` + s_nand_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_nand_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_nor_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_nor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_not_b32 :ref:`sdst`, :ref:`ssrc` + s_not_b64 :ref:`sdst`, :ref:`ssrc` + s_or_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_or_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn1_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_orn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn2_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_orn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b32 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b64 :ref:`sdst`, :ref:`ssrc` + s_rfe_b64 :ref:`ssrc` + s_setpc_b64 :ref:`ssrc` + s_sext_i32_i16 :ref:`sdst`, :ref:`ssrc` + s_sext_i32_i8 :ref:`sdst`, :ref:`ssrc` + s_swappc_b64 :ref:`sdst`, :ref:`ssrc` + s_wqm_b32 :ref:`sdst`, :ref:`ssrc` + s_wqm_b64 :ref:`sdst`, :ref:`ssrc` + s_xnor_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_xnor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_xor_saveexec_b32 :ref:`sdst`, :ref:`ssrc` + s_xor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + +SOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_absdiff_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_addc_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_ashr_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_ashr_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfe_u64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfm_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfm_b64 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_cselect_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl1_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl2_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl3_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl4_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshl_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_max_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_max_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_pack_hh_b32_b16 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_pack_lh_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_pack_ll_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_subb_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + +SOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_bitcmp0_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp0_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bitcmp1_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp1_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_cmp_eq_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_u32 :ref:`ssrc0`, :ref:`ssrc1` + +SOPK +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_addk_i32 :ref:`sdst`, :ref:`imm16` + s_call_b64 :ref:`sdst`, :ref:`label` + s_cmovk_i32 :ref:`sdst`, :ref:`imm16` + s_cmpk_eq_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_eq_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_u32 :ref:`ssrc`, :ref:`imm16` + s_getreg_b32 :ref:`sdst`, :ref:`hwreg` + s_movk_i32 :ref:`sdst`, :ref:`imm16` + s_mulk_i32 :ref:`sdst`, :ref:`imm16` + s_setreg_b32 :ref:`hwreg`, :ref:`ssrc` + s_setreg_imm32_b32 :ref:`hwreg`, :ref:`simm32` + s_subvector_loop_begin :ref:`sdst`, :ref:`label` + s_subvector_loop_end :ref:`sdst`, :ref:`label` + s_version :ref:`imm16` + s_waitcnt_expcnt :ref:`ssrc`, :ref:`imm16` + s_waitcnt_lgkmcnt :ref:`ssrc`, :ref:`imm16` + s_waitcnt_vmcnt :ref:`ssrc`, :ref:`imm16` + s_waitcnt_vscnt :ref:`ssrc`, :ref:`imm16` + +SOPP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_barrier + s_branch :ref:`label` + s_cbranch_cdbgsys :ref:`label` + s_cbranch_cdbgsys_and_user :ref:`label` + s_cbranch_cdbgsys_or_user :ref:`label` + s_cbranch_cdbguser :ref:`label` + s_cbranch_execnz :ref:`label` + s_cbranch_execz :ref:`label` + s_cbranch_scc0 :ref:`label` + s_cbranch_scc1 :ref:`label` + s_cbranch_vccnz :ref:`label` + s_cbranch_vccz :ref:`label` + s_clause :ref:`imm16` + s_code_end + s_decperflevel :ref:`imm16` + s_denorm_mode :ref:`imm16` + s_endpgm + s_endpgm_ordered_ps_done + s_endpgm_saved + s_icache_inv + s_incperflevel :ref:`imm16` + s_inst_prefetch :ref:`imm16` + s_nop :ref:`imm16` + s_round_mode :ref:`imm16` + s_sendmsg :ref:`msg` + s_sendmsghalt :ref:`msg` + s_sethalt :ref:`imm16` + s_setkill :ref:`imm16` + s_setprio :ref:`imm16` + s_sleep :ref:`imm16` + s_trap :ref:`imm16` + s_ttracedata + s_ttracedata_imm :ref:`imm16` + s_waitcnt :ref:`waitcnt` + s_wakeup + +VINTRP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_interp_mov_f32 :ref:`vdst`, :ref:`param`::ref:`b32`, :ref:`attr`::ref:`b32` + v_interp_p1_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + v_interp_p2_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + +VOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_bfrev_b32 :ref:`vdst`, :ref:`src` + v_ceil_f16 :ref:`vdst`, :ref:`src` + v_ceil_f32 :ref:`vdst`, :ref:`src` + v_ceil_f64 :ref:`vdst`, :ref:`src` + v_clrexcp + v_cos_f16 :ref:`vdst`, :ref:`src` + v_cos_f32 :ref:`vdst`, :ref:`src` + v_cvt_f16_f32 :ref:`vdst`, :ref:`src` + v_cvt_f16_i16 :ref:`vdst`, :ref:`src` + v_cvt_f16_u16 :ref:`vdst`, :ref:`src` + v_cvt_f32_f16 :ref:`vdst`, :ref:`src` + v_cvt_f32_f64 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32 :ref:`vdst`, :ref:`src` + v_cvt_f32_u32 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte0 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte1 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte2 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte3 :ref:`vdst`, :ref:`src` + v_cvt_f64_f32 :ref:`vdst`, :ref:`src` + v_cvt_f64_i32 :ref:`vdst`, :ref:`src` + v_cvt_f64_u32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i32_f64 :ref:`vdst`, :ref:`src` + v_cvt_norm_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_norm_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_off_f32_i4 :ref:`vdst`, :ref:`src` + v_cvt_rpi_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_u32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u32_f64 :ref:`vdst`, :ref:`src` + v_exp_f16 :ref:`vdst`, :ref:`src` + v_exp_f32 :ref:`vdst`, :ref:`src` + v_ffbh_i32 :ref:`vdst`, :ref:`src` + v_ffbh_u32 :ref:`vdst`, :ref:`src` + v_ffbl_b32 :ref:`vdst`, :ref:`src` + v_floor_f16 :ref:`vdst`, :ref:`src` + v_floor_f32 :ref:`vdst`, :ref:`src` + v_floor_f64 :ref:`vdst`, :ref:`src` + v_fract_f16 :ref:`vdst`, :ref:`src` + v_fract_f32 :ref:`vdst`, :ref:`src` + v_fract_f64 :ref:`vdst`, :ref:`src` + v_frexp_exp_i16_f16 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f32 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f64 :ref:`vdst`, :ref:`src` + v_frexp_mant_f16 :ref:`vdst`, :ref:`src` + v_frexp_mant_f32 :ref:`vdst`, :ref:`src` + v_frexp_mant_f64 :ref:`vdst`, :ref:`src` + v_log_f16 :ref:`vdst`, :ref:`src` + v_log_f32 :ref:`vdst`, :ref:`src` + v_mov_b32 :ref:`vdst`, :ref:`src` + v_movreld_b32 :ref:`vdst`, :ref:`src` + v_movrels_b32 :ref:`vdst`, :ref:`vsrc` + v_movrelsd_2_b32 :ref:`vdst`, :ref:`vsrc` + v_movrelsd_b32 :ref:`vdst`, :ref:`vsrc` + v_nop + v_not_b32 :ref:`vdst`, :ref:`src` + v_pipeflush + v_rcp_f16 :ref:`vdst`, :ref:`src` + v_rcp_f32 :ref:`vdst`, :ref:`src` + v_rcp_f64 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32 :ref:`vdst`, :ref:`src` + v_readfirstlane_b32 :ref:`sdst`, :ref:`src` + v_rndne_f16 :ref:`vdst`, :ref:`src` + v_rndne_f32 :ref:`vdst`, :ref:`src` + v_rndne_f64 :ref:`vdst`, :ref:`src` + v_rsq_f16 :ref:`vdst`, :ref:`src` + v_rsq_f32 :ref:`vdst`, :ref:`src` + v_rsq_f64 :ref:`vdst`, :ref:`src` + v_sat_pk_u8_i16 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_sin_f16 :ref:`vdst`, :ref:`src` + v_sin_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f16 :ref:`vdst`, :ref:`src` + v_sqrt_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f64 :ref:`vdst`, :ref:`src` + v_swap_b32 :ref:`vdst`, :ref:`vsrc` + v_swaprel_b32 :ref:`vdst`, :ref:`vsrc` + v_trunc_f16 :ref:`vdst`, :ref:`src` + v_trunc_f32 :ref:`vdst`, :ref:`src` + v_trunc_f64 :ref:`vdst`, :ref:`src` + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_co_ci_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_nc_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_and_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_ashrrev_i32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_cndmask_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_cvt_pkrtz_f16_f32 :ref:`vdst`, :ref:`src0`::ref:`f32`, :ref:`vsrc1`::ref:`f32` + v_fmaak_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_fmaak_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_fmac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmamk_f16 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_fmamk_f32 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_ldexp_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`i16` + v_lshlrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshrrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_mac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_madak_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madmk_f32 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_pk_fmac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_co_ci_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_sub_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_nc_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_co_ci_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subrev_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_nc_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xnor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + +VOP3 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_co_ci_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_add_co_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_lshl_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_nc_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_add_nc_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_nc_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_nc_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_alignbit_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_alignbyte_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_and_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_and_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_ashrrev_i32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_ashrrev_i64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_bcnt_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfe_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_bfe_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfi_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfrev_b32_e64 :ref:`vdst`, :ref:`src` + v_ceil_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_clrexcp_e64 + v_cmp_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_class_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_eq_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_i16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_eq_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_eq_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_eq_u16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_eq_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_eq_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_f_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_f_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_f_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_f_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ge_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_i16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ge_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ge_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ge_u16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ge_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ge_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_gt_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_i16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_gt_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_gt_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_gt_u16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_gt_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_gt_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_le_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_i16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_le_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_le_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_le_u16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_le_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_le_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_lg_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_i16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_lt_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_lt_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_lt_u16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_lt_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_lt_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ne_i16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ne_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ne_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ne_u16_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ne_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_ne_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_neq_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_t_i32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_t_i64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_t_u32_e64 :ref:`src0`, :ref:`src1` + v_cmpx_t_u64_e64 :ref:`src0`, :ref:`src1` + v_cmpx_tru_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f16_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f32_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f64_e64 :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cndmask_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`ssrc2` + v_cos_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cos_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubeid_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubema_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubesc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubetc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_i16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f16_u16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte0_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte1_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte2_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte3_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f64_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_flr_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_off_f32_i4_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_pk_i16_i32 :ref:`vdst`, :ref:`src0`::ref:`i32`, :ref:`src1`::ref:`i32` + v_cvt_pk_u16_u32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1`::ref:`u32` + v_cvt_pk_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_cvt_pknorm_i16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_i16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pknorm_u16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_u16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pkrtz_f16_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` :ref:`clamp` + v_cvt_rpi_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_div_fixup_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_div_fixup_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_scale_f32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_div_scale_f64 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_exp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ffbh_i32_e64 :ref:`vdst`, :ref:`src` + v_ffbh_u32_e64 :ref:`vdst`, :ref:`src` + v_ffbl_b32_e64 :ref:`vdst`, :ref:`src` + v_floor_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_fma_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fmac_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_fmac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_exp_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_mant_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_interp_mov_f32_e64 :ref:`vdst`, :ref:`param`::ref:`b32`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_interp_p1_f32_e64 :ref:`vdst`, :ref:`vsrc`::ref:`m`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_interp_p1ll_f16 :ref:`vdst`::ref:`f32`, :ref:`vsrc`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32` :ref:`high` :ref:`clamp` :ref:`omod` + v_interp_p1lv_f16 :ref:`vdst`::ref:`f32`, :ref:`vsrc0`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32`, :ref:`vsrc2`::ref:`m`::ref:`f16x2` :ref:`high` :ref:`clamp` :ref:`omod` + v_interp_p2_f16 :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32`, :ref:`vsrc2`::ref:`m`::ref:`f32` :ref:`high` :ref:`clamp` + v_interp_p2_f32_e64 :ref:`vdst`, :ref:`vsrc`::ref:`m`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_ldexp_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i16` :ref:`clamp` :ref:`omod` + v_ldexp_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_ldexp_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_lerp_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`b32`, :ref:`src1`::ref:`b32`, :ref:`src2`::ref:`b32` + v_log_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_lshl_add_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_lshl_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2` + v_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshlrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshlrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshrrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_mac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mac_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_mad_i64_i32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i64` :ref:`clamp` + v_mad_legacy_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mad_u64_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u64` :ref:`clamp` + v_max3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_max3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_max3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_hi_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_lo_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_med3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_med3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_med3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_med3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_min3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_min3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mov_b32_e64 :ref:`vdst`, :ref:`src` + v_movreld_b32_e64 :ref:`vdst`, :ref:`src` + v_movrels_b32_e64 :ref:`vdst`, :ref:`vsrc` + v_movrelsd_2_b32_e64 :ref:`vdst`, :ref:`vsrc` + v_movrelsd_b32_e64 :ref:`vdst`, :ref:`vsrc` + v_mqsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_mqsad_u32_u8 :ref:`vdst`::ref:`u32x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`vsrc2`::ref:`u32x4` :ref:`clamp` + v_msad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`b32`, :ref:`src1`::ref:`b32`, :ref:`src2`::ref:`b32` :ref:`clamp` + v_mul_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_hi_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_mul_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_lo_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_mullit_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_nop_e64 + v_not_b32_e64 :ref:`vdst`, :ref:`src` + v_or3_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_or_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_pack_b32_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`op_sel` + v_perm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_permlane16_b32 :ref:`vdst`, :ref:`vdata`, :ref:`ssrc1`, :ref:`ssrc2` :ref:`dpp_op_sel` + v_permlanex16_b32 :ref:`vdst`, :ref:`vdata`, :ref:`ssrc1`, :ref:`ssrc2` :ref:`dpp_op_sel` + v_pipeflush_e64 + v_qsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_rcp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_iflag_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_readlane_b32 :ref:`sdst`, :ref:`src0`, :ref:`ssrc1` + v_rndne_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sad_hi_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u16 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_sad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sat_pk_u8_i16_e64 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_sin_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sin_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_co_ci_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_sub_co_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_nc_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_sub_nc_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_nc_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_nc_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_co_ci_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subrev_co_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_nc_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_trig_preop_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`u32` :ref:`clamp` :ref:`omod` + v_trunc_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_writelane_b32 :ref:`vdst`, :ref:`ssrc0`, :ref:`ssrc1` + v_xad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_xnor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_xor3_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_xor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_fma_mix_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixhi_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixlo_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_pk_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_add_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_fma_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_sub_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + +VOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_cmp_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_class_f16 :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f32 :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f64 :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_eq_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f64 :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16 :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f32 :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f64 :ref:`src0`, :ref:`vsrc1` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx10_attr + gfx10_dst + gfx10_fx_operand + gfx10_hwreg + gfx10_imm16 + gfx10_imm16_1 + gfx10_imm16_2 + gfx10_label + gfx10_m + gfx10_m_1 + gfx10_msg + gfx10_opt + gfx10_param + gfx10_probe + gfx10_saddr + gfx10_saddr_1 + gfx10_sbase + gfx10_sbase_1 + gfx10_sbase_2 + gfx10_sdata + gfx10_sdata_1 + gfx10_sdata_2 + gfx10_sdata_3 + gfx10_sdata_4 + gfx10_sdata_5 + gfx10_sdst + gfx10_sdst_1 + gfx10_sdst_2 + gfx10_sdst_3 + gfx10_sdst_4 + gfx10_sdst_5 + gfx10_sdst_6 + gfx10_sdst_7 + gfx10_sdst_8 + gfx10_simm32 + gfx10_simm32_1 + gfx10_simm32_2 + gfx10_soffset + gfx10_soffset_1 + gfx10_soffset_2 + gfx10_src + gfx10_src_1 + gfx10_src_2 + gfx10_src_3 + gfx10_src_4 + gfx10_src_5 + gfx10_src_6 + gfx10_src_7 + gfx10_src_8 + gfx10_srsrc + gfx10_srsrc_1 + gfx10_ssamp + gfx10_ssrc + gfx10_ssrc_1 + gfx10_ssrc_2 + gfx10_ssrc_3 + gfx10_ssrc_4 + gfx10_ssrc_5 + gfx10_ssrc_6 + gfx10_ssrc_7 + gfx10_ssrc_8 + gfx10_tgt + gfx10_type_deviation + gfx10_vaddr + gfx10_vaddr_1 + gfx10_vaddr_2 + gfx10_vaddr_3 + gfx10_vaddr_4 + gfx10_vaddr_5 + gfx10_vcc + gfx10_vdata + gfx10_vdata0 + gfx10_vdata0_1 + gfx10_vdata1 + gfx10_vdata1_1 + gfx10_vdata_1 + gfx10_vdata_10 + gfx10_vdata_2 + gfx10_vdata_3 + gfx10_vdata_4 + gfx10_vdata_5 + gfx10_vdata_6 + gfx10_vdata_7 + gfx10_vdata_8 + gfx10_vdata_9 + gfx10_vdst + gfx10_vdst_1 + gfx10_vdst_10 + gfx10_vdst_11 + gfx10_vdst_12 + gfx10_vdst_13 + gfx10_vdst_2 + gfx10_vdst_3 + gfx10_vdst_4 + gfx10_vdst_5 + gfx10_vdst_6 + gfx10_vdst_7 + gfx10_vdst_8 + gfx10_vdst_9 + gfx10_vsrc + gfx10_vsrc_1 + gfx10_vsrc_2 + gfx10_vsrc_3 + gfx10_waitcnt diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,1448 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of GFX7 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of GFX7 instructions. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +DS +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + ds_add_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_and_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_append :ref:`vdst` :ref:`offset` :ref:`gds` + ds_cmpst_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_condxchg32_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_consume :ref:`vdst` :ref:`offset` :ref:`gds` + ds_dec_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_barrier :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_init :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_br :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_p :ref:`offset` :ref:`gds` + ds_gws_sema_release_all :ref:`offset` :ref:`gds` + ds_gws_sema_v :ref:`offset` :ref:`gds` + ds_inc_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_mskor_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_nop + ds_or_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_or_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_ordered_count :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read2_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read_b128 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b32 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b64 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b96 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_swizzle_b32 :ref:`vdst`, :ref:`vaddr` :ref:`pattern` :ref:`gds` + ds_wrap_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_write2_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write_b128 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b96 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_write_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_wrxchg2_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_xor_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + +EXP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + exp :ref:`tgt`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vsrc2`, :ref:`vsrc3` :ref:`done` :ref:`compr` :ref:`vm` + +FLAT +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + flat_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2` :ref:`glc` :ref:`slc` + flat_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_fcmpswap :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32x2` :ref:`glc` :ref:`slc` + flat_atomic_fcmpswap_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64x2` :ref:`glc` :ref:`slc` + flat_atomic_fmax :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32` :ref:`glc` :ref:`slc` + flat_atomic_fmax_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64` :ref:`glc` :ref:`slc` + flat_atomic_fmin :ref:`vdst`::ref:`opt`::ref:`f32`, :ref:`vaddr`, :ref:`vdata`::ref:`f32` :ref:`glc` :ref:`slc` + flat_atomic_fmin_x2 :ref:`vdst`::ref:`opt`::ref:`f64`, :ref:`vaddr`, :ref:`vdata`::ref:`f64` :ref:`glc` :ref:`slc` + flat_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`glc` :ref:`slc` + flat_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`glc` :ref:`slc` + flat_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`glc` :ref:`slc` + flat_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`glc` :ref:`slc` + flat_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_load_dword :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_dwordx2 :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_dwordx3 :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_dwordx4 :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_sbyte :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_sshort :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_ubyte :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_ushort :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_store_byte :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dword :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dwordx2 :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dwordx3 :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dwordx4 :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_short :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + +MIMG +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + image_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_cmpswap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_dec :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_fcmpswap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_fmax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_fmin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_inc :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_smax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_smin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_umax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_umin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_gather4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_gather4_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_get_lod :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_get_resinfo :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_mip :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_mip_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_mip_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_store :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_store_mip :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_store_mip_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_store_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + +MTBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + tbuffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + +MUBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + buffer_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_add_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap :ref:`vdata`::ref:`dst`::ref:`b32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`b64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fcmpswap :ref:`vdata`::ref:`dst`::ref:`f32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fcmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`f64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmax :ref:`vdata`::ref:`dst`::ref:`f32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmax_x2 :ref:`vdata`::ref:`dst`::ref:`f64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmin :ref:`vdata`::ref:`dst`::ref:`f32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_fmin_x2 :ref:`vdata`::ref:`dst`::ref:`f64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_store_byte :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dword :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx2 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx3 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx4 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_short :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`addr64` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_wbinvl1 + buffer_wbinvl1_vol + +SMRD +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_buffer_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_buffer_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_buffer_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_buffer_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_buffer_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_dcache_inv + s_dcache_inv_vol + s_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` + s_memtime :ref:`sdst`::ref:`b64` + +SOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_abs_i32 :ref:`sdst`, :ref:`ssrc` + s_and_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_bitset1_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset1_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_brev_b32 :ref:`sdst`, :ref:`ssrc` + s_brev_b64 :ref:`sdst`, :ref:`ssrc` + s_cbranch_join :ref:`ssrc` + s_cmov_b32 :ref:`sdst`, :ref:`ssrc` + s_cmov_b64 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_i64 :ref:`sdst`, :ref:`ssrc` + s_getpc_b64 :ref:`sdst` + s_mov_b32 :ref:`sdst`, :ref:`ssrc` + s_mov_b64 :ref:`sdst`, :ref:`ssrc` + s_movreld_b32 :ref:`sdst`, :ref:`ssrc` + s_movreld_b64 :ref:`sdst`, :ref:`ssrc` + s_movrels_b32 :ref:`sdst`, :ref:`ssrc` + s_movrels_b64 :ref:`sdst`, :ref:`ssrc` + s_nand_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_nor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_not_b32 :ref:`sdst`, :ref:`ssrc` + s_not_b64 :ref:`sdst`, :ref:`ssrc` + s_or_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b32 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b64 :ref:`sdst`, :ref:`ssrc` + s_rfe_b64 :ref:`ssrc` + s_setpc_b64 :ref:`ssrc` + s_sext_i32_i16 :ref:`sdst`, :ref:`ssrc` + s_sext_i32_i8 :ref:`sdst`, :ref:`ssrc` + s_swappc_b64 :ref:`sdst`, :ref:`ssrc` + s_wqm_b32 :ref:`sdst`, :ref:`ssrc` + s_wqm_b64 :ref:`sdst`, :ref:`ssrc` + s_xnor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_xor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + +SOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_absdiff_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_addc_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_ashr_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_ashr_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfe_u64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfm_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfm_b64 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_cbranch_g_fork :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshl_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_max_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_max_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_subb_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + +SOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_bitcmp0_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp0_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bitcmp1_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp1_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_cmp_eq_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_setvskip :ref:`ssrc0`, :ref:`ssrc1` + +SOPK +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_addk_i32 :ref:`sdst`, :ref:`imm16` + s_cbranch_i_fork :ref:`ssrc`, :ref:`label` + s_cmovk_i32 :ref:`sdst`, :ref:`imm16` + s_cmpk_eq_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_eq_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_u32 :ref:`ssrc`, :ref:`imm16` + s_getreg_b32 :ref:`sdst`, :ref:`hwreg` + s_movk_i32 :ref:`sdst`, :ref:`imm16` + s_mulk_i32 :ref:`sdst`, :ref:`imm16` + s_setreg_b32 :ref:`hwreg`, :ref:`ssrc` + s_setreg_imm32_b32 :ref:`hwreg`, :ref:`simm32` + +SOPP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_barrier + s_branch :ref:`label` + s_cbranch_cdbgsys :ref:`label` + s_cbranch_cdbgsys_and_user :ref:`label` + s_cbranch_cdbgsys_or_user :ref:`label` + s_cbranch_cdbguser :ref:`label` + s_cbranch_execnz :ref:`label` + s_cbranch_execz :ref:`label` + s_cbranch_scc0 :ref:`label` + s_cbranch_scc1 :ref:`label` + s_cbranch_vccnz :ref:`label` + s_cbranch_vccz :ref:`label` + s_decperflevel :ref:`imm16` + s_endpgm + s_icache_inv + s_incperflevel :ref:`imm16` + s_nop :ref:`imm16` + s_sendmsg :ref:`msg` + s_sendmsghalt :ref:`msg` + s_sethalt :ref:`imm16` + s_setkill :ref:`imm16` + s_setprio :ref:`imm16` + s_sleep :ref:`imm16` + s_trap :ref:`imm16` + s_ttracedata + s_waitcnt :ref:`waitcnt` + +VINTRP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_interp_mov_f32 :ref:`vdst`, :ref:`param`::ref:`b32`, :ref:`attr`::ref:`b32` + v_interp_p1_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + v_interp_p2_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + +VOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_bfrev_b32 :ref:`vdst`, :ref:`src` + v_ceil_f32 :ref:`vdst`, :ref:`src` + v_ceil_f64 :ref:`vdst`, :ref:`src` + v_clrexcp + v_cos_f32 :ref:`vdst`, :ref:`src` + v_cvt_f16_f32 :ref:`vdst`, :ref:`src` + v_cvt_f32_f16 :ref:`vdst`, :ref:`src` + v_cvt_f32_f64 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32 :ref:`vdst`, :ref:`src` + v_cvt_f32_u32 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte0 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte1 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte2 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte3 :ref:`vdst`, :ref:`src` + v_cvt_f64_f32 :ref:`vdst`, :ref:`src` + v_cvt_f64_i32 :ref:`vdst`, :ref:`src` + v_cvt_f64_u32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i32_f64 :ref:`vdst`, :ref:`src` + v_cvt_off_f32_i4 :ref:`vdst`, :ref:`src` + v_cvt_rpi_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u32_f64 :ref:`vdst`, :ref:`src` + v_exp_f32 :ref:`vdst`, :ref:`src` + v_exp_legacy_f32 :ref:`vdst`, :ref:`src` + v_ffbh_i32 :ref:`vdst`, :ref:`src` + v_ffbh_u32 :ref:`vdst`, :ref:`src` + v_ffbl_b32 :ref:`vdst`, :ref:`src` + v_floor_f32 :ref:`vdst`, :ref:`src` + v_floor_f64 :ref:`vdst`, :ref:`src` + v_fract_f32 :ref:`vdst`, :ref:`src` + v_fract_f64 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f32 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f64 :ref:`vdst`, :ref:`src` + v_frexp_mant_f32 :ref:`vdst`, :ref:`src` + v_frexp_mant_f64 :ref:`vdst`, :ref:`src` + v_log_clamp_f32 :ref:`vdst`, :ref:`src` + v_log_f32 :ref:`vdst`, :ref:`src` + v_log_legacy_f32 :ref:`vdst`, :ref:`src` + v_mov_b32 :ref:`vdst`, :ref:`src` + v_movreld_b32 :ref:`vdst`, :ref:`src` + v_movrels_b32 :ref:`vdst`, :ref:`vsrc` + v_movrelsd_b32 :ref:`vdst`, :ref:`vsrc` + v_nop + v_not_b32 :ref:`vdst`, :ref:`src` + v_rcp_clamp_f32 :ref:`vdst`, :ref:`src` + v_rcp_clamp_f64 :ref:`vdst`, :ref:`src` + v_rcp_f32 :ref:`vdst`, :ref:`src` + v_rcp_f64 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32 :ref:`vdst`, :ref:`src` + v_rcp_legacy_f32 :ref:`vdst`, :ref:`src` + v_readfirstlane_b32 :ref:`sdst`, :ref:`src` + v_rndne_f32 :ref:`vdst`, :ref:`src` + v_rndne_f64 :ref:`vdst`, :ref:`src` + v_rsq_clamp_f32 :ref:`vdst`, :ref:`src` + v_rsq_clamp_f64 :ref:`vdst`, :ref:`src` + v_rsq_f32 :ref:`vdst`, :ref:`src` + v_rsq_f64 :ref:`vdst`, :ref:`src` + v_rsq_legacy_f32 :ref:`vdst`, :ref:`src` + v_sin_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f64 :ref:`vdst`, :ref:`src` + v_trunc_f32 :ref:`vdst`, :ref:`src` + v_trunc_f64 :ref:`vdst`, :ref:`src` + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_i32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_addc_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_and_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_ashr_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`u32` + v_ashrrev_i32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_bcnt_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_bfm_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_cndmask_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_cvt_pk_i16_i32 :ref:`vdst`, :ref:`src0`::ref:`i32`, :ref:`vsrc1`::ref:`i32` + v_cvt_pk_u16_u32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1`::ref:`u32` + v_cvt_pkaccum_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`f32`, :ref:`vsrc1`::ref:`u32` + v_cvt_pknorm_i16_f32 :ref:`vdst`, :ref:`src0`::ref:`f32`, :ref:`vsrc1`::ref:`f32` + v_cvt_pknorm_u16_f32 :ref:`vdst`, :ref:`src0`::ref:`f32`, :ref:`vsrc1`::ref:`f32` + v_cvt_pkrtz_f16_f32 :ref:`vdst`, :ref:`src0`::ref:`f32`, :ref:`vsrc1`::ref:`f32` + v_ldexp_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`i32` + v_lshl_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`u32` + v_lshlrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshr_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`u32` + v_lshrrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_mac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_madak_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madmk_f32 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_max_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mbcnt_hi_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mbcnt_lo_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_readlane_b32 :ref:`sdst`, :ref:`src0`, :ref:`ssrc1` + v_sub_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_i32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_subb_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subbrev_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subrev_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_i32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_writelane_b32 :ref:`vdst`, :ref:`ssrc0`, :ref:`ssrc1` + v_xor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + +VOP3 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_i32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_addc_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` + v_alignbit_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_alignbyte_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_and_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_ashr_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32` + v_ashr_i64 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32` + v_ashrrev_i32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_bcnt_u32_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfe_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_bfe_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfi_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfm_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfrev_b32_e64 :ref:`vdst`, :ref:`src` + v_ceil_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_clrexcp_e64 + v_cmp_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmp_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmps_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpsx_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cmpx_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` + v_cndmask_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` + v_cos_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubeid_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubema_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubesc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubetc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_f32_f16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte0_e64 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte1_e64 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte2_e64 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte3_e64 :ref:`vdst`, :ref:`src` + v_cvt_f64_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f64_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_flr_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_off_f32_i4_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_pk_i16_i32_e64 :ref:`vdst`, :ref:`src0`::ref:`i32`, :ref:`src1`::ref:`i32` + v_cvt_pk_u16_u32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1`::ref:`u32` + v_cvt_pk_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`f32`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_cvt_pkaccum_u8_f32_e64 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32` + v_cvt_pknorm_i16_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pknorm_u16_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pkrtz_f16_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_rpi_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_div_fixup_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_scale_f32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_div_scale_f64 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_exp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ffbh_i32_e64 :ref:`vdst`, :ref:`src` + v_ffbh_u32_e64 :ref:`vdst`, :ref:`src` + v_ffbl_b32_e64 :ref:`vdst`, :ref:`src` + v_floor_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_exp_i32_f32_e64 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_mant_f32_e64 :ref:`vdst`, :ref:`src` + v_frexp_mant_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ldexp_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_ldexp_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_lerp_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`b32`, :ref:`src1`::ref:`b32`, :ref:`src2`::ref:`b32` + v_log_clamp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_lshl_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32` + v_lshl_b64 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32` + v_lshlrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshr_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32` + v_lshr_b64 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32` + v_lshrrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_mac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mac_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` + v_mad_i64_i32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i64` + v_mad_legacy_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` + v_mad_u64_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u64` + v_max3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_max3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_hi_u32_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_lo_u32_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_med3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_med3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_med3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_min3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mov_b32_e64 :ref:`vdst`, :ref:`src` + v_movreld_b32_e64 :ref:`vdst`, :ref:`src` + v_movrels_b32_e64 :ref:`vdst`, :ref:`vsrc` + v_movrelsd_b32_e64 :ref:`vdst`, :ref:`vsrc` + v_mqsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` + v_mqsad_u32_u8 :ref:`vdst`::ref:`u32x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`vsrc2`::ref:`u32x4` + v_msad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` + v_mul_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_hi_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_lo_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_lo_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mullit_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_nop_e64 + v_not_b32_e64 :ref:`vdst`, :ref:`src` + v_or_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_qsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` + v_rcp_clamp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_clamp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_iflag_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_clamp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_clamp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sad_hi_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` + v_sad_u16 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` + v_sad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_sad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` + v_sin_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_i32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_subb_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` + v_subbrev_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` + v_subrev_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_i32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_trig_preop_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`u32` :ref:`clamp` :ref:`omod` + v_trunc_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_xor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_cmp_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmps_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpsx_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx7_attr + gfx7_dst + gfx7_hwreg + gfx7_imm16 + gfx7_imm16_1 + gfx7_imm16_2 + gfx7_label + gfx7_m + gfx7_msg + gfx7_opt + gfx7_param + gfx7_sbase + gfx7_sbase_1 + gfx7_sdst + gfx7_sdst_1 + gfx7_sdst_2 + gfx7_sdst_3 + gfx7_sdst_4 + gfx7_sdst_5 + gfx7_sdst_6 + gfx7_sdst_7 + gfx7_simm32 + gfx7_simm32_1 + gfx7_soffset + gfx7_soffset_1 + gfx7_src + gfx7_src_1 + gfx7_src_10 + gfx7_src_2 + gfx7_src_3 + gfx7_src_4 + gfx7_src_5 + gfx7_src_6 + gfx7_src_7 + gfx7_src_8 + gfx7_src_9 + gfx7_srsrc + gfx7_srsrc_1 + gfx7_ssamp + gfx7_ssrc + gfx7_ssrc_1 + gfx7_ssrc_10 + gfx7_ssrc_2 + gfx7_ssrc_3 + gfx7_ssrc_4 + gfx7_ssrc_5 + gfx7_ssrc_6 + gfx7_ssrc_7 + gfx7_ssrc_8 + gfx7_ssrc_9 + gfx7_tgt + gfx7_type_deviation + gfx7_vaddr + gfx7_vaddr_1 + gfx7_vaddr_2 + gfx7_vaddr_3 + gfx7_vcc + gfx7_vdata + gfx7_vdata0 + gfx7_vdata0_1 + gfx7_vdata1 + gfx7_vdata1_1 + gfx7_vdata_1 + gfx7_vdata_2 + gfx7_vdata_3 + gfx7_vdata_4 + gfx7_vdata_5 + gfx7_vdata_6 + gfx7_vdata_7 + gfx7_vdata_8 + gfx7_vdata_9 + gfx7_vdst + gfx7_vdst_1 + gfx7_vdst_10 + gfx7_vdst_11 + gfx7_vdst_12 + gfx7_vdst_2 + gfx7_vdst_3 + gfx7_vdst_4 + gfx7_vdst_5 + gfx7_vdst_6 + gfx7_vdst_7 + gfx7_vdst_8 + gfx7_vdst_9 + gfx7_vsrc + gfx7_vsrc_1 + gfx7_vsrc_2 + gfx7_vsrc_3 + gfx7_waitcnt diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,1874 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of GFX8 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of GFX8 instructions. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +DS +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + ds_add_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_and_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_append :ref:`vdst` :ref:`offset` :ref:`gds` + ds_bpermute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_cmpst_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_condxchg32_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_consume :ref:`vdst` :ref:`offset` :ref:`gds` + ds_dec_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_barrier :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_init :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_br :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_p :ref:`offset` :ref:`gds` + ds_gws_sema_release_all :ref:`offset` :ref:`gds` + ds_gws_sema_v :ref:`offset` :ref:`gds` + ds_inc_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_mskor_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_nop + ds_or_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_or_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_ordered_count :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_permute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_read2_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read_b128 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b32 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b64 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b96 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_swizzle_b32 :ref:`vdst`, :ref:`vaddr` :ref:`pattern` :ref:`gds` + ds_wrap_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_write2_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write_b128 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b96 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_write_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_wrxchg2_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_xor_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + +EXP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + exp :ref:`tgt`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vsrc2`, :ref:`vsrc3` :ref:`done` :ref:`compr` :ref:`vm` + +FLAT +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + flat_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2` :ref:`glc` :ref:`slc` + flat_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`glc` :ref:`slc` + flat_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`glc` :ref:`slc` + flat_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`glc` :ref:`slc` + flat_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`glc` :ref:`slc` + flat_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`glc` :ref:`slc` + flat_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`glc` :ref:`slc` + flat_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_load_dword :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_dwordx2 :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_dwordx3 :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_dwordx4 :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_sbyte :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_sshort :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_ubyte :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_load_ushort :ref:`vdst`, :ref:`vaddr` :ref:`glc` :ref:`slc` + flat_store_byte :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dword :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dwordx2 :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dwordx3 :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_dwordx4 :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + flat_store_short :ref:`vaddr`, :ref:`vdata` :ref:`glc` :ref:`slc` + +MIMG +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + image_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_cmpswap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_dec :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_inc :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_smax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_smin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_umax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_umin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_gather4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_get_lod :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_get_resinfo :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_load_mip :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_load_mip_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_mip_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_store :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` :ref:`d16` + image_store_mip :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` :ref:`d16` + image_store_mip_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + image_store_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`lwe` :ref:`da` + +MTBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + tbuffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + +MUBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + buffer_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_add_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap :ref:`vdata`::ref:`dst`::ref:`b32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`b64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_store_byte :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dword :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx2 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx3 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx4 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_lds_dword :ref:`srsrc`, :ref:`soffset` :ref:`offset12` :ref:`lds` :ref:`glc` :ref:`slc` + buffer_store_short :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_wbinvl1 + buffer_wbinvl1_vol + +SMEM +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_atc_probe :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atc_probe_buffer :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_buffer_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_dcache_inv + s_dcache_inv_vol + s_dcache_wb + s_dcache_wb_vol + s_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_memrealtime :ref:`sdst`::ref:`b64` + s_memtime :ref:`sdst`::ref:`b64` + s_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + +SOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_abs_i32 :ref:`sdst`, :ref:`ssrc` + s_and_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_bitset1_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset1_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_brev_b32 :ref:`sdst`, :ref:`ssrc` + s_brev_b64 :ref:`sdst`, :ref:`ssrc` + s_cbranch_join :ref:`ssrc` + s_cmov_b32 :ref:`sdst`, :ref:`ssrc` + s_cmov_b64 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_i64 :ref:`sdst`, :ref:`ssrc` + s_getpc_b64 :ref:`sdst` + s_mov_b32 :ref:`sdst`, :ref:`ssrc` + s_mov_b64 :ref:`sdst`, :ref:`ssrc` + s_movreld_b32 :ref:`sdst`, :ref:`ssrc` + s_movreld_b64 :ref:`sdst`, :ref:`ssrc` + s_movrels_b32 :ref:`sdst`, :ref:`ssrc` + s_movrels_b64 :ref:`sdst`, :ref:`ssrc` + s_nand_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_nor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_not_b32 :ref:`sdst`, :ref:`ssrc` + s_not_b64 :ref:`sdst`, :ref:`ssrc` + s_or_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b32 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b64 :ref:`sdst`, :ref:`ssrc` + s_rfe_b64 :ref:`ssrc` + s_set_gpr_idx_idx :ref:`ssrc` + s_setpc_b64 :ref:`ssrc` + s_sext_i32_i16 :ref:`sdst`, :ref:`ssrc` + s_sext_i32_i8 :ref:`sdst`, :ref:`ssrc` + s_swappc_b64 :ref:`sdst`, :ref:`ssrc` + s_wqm_b32 :ref:`sdst`, :ref:`ssrc` + s_wqm_b64 :ref:`sdst`, :ref:`ssrc` + s_xnor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_xor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + +SOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_absdiff_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_addc_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_ashr_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_ashr_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfe_u64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfm_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfm_b64 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_cbranch_g_fork :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshl_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_max_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_max_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_rfe_restore_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_sub_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_subb_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + +SOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_bitcmp0_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp0_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bitcmp1_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp1_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_cmp_eq_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_set_gpr_idx_on :ref:`ssrc`, :ref:`imask` + s_setvskip :ref:`ssrc0`, :ref:`ssrc1` + +SOPK +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_addk_i32 :ref:`sdst`, :ref:`imm16` + s_cbranch_i_fork :ref:`ssrc`, :ref:`label` + s_cmovk_i32 :ref:`sdst`, :ref:`imm16` + s_cmpk_eq_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_eq_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_u32 :ref:`ssrc`, :ref:`imm16` + s_getreg_b32 :ref:`sdst`, :ref:`hwreg` + s_movk_i32 :ref:`sdst`, :ref:`imm16` + s_mulk_i32 :ref:`sdst`, :ref:`imm16` + s_setreg_b32 :ref:`hwreg`, :ref:`ssrc` + s_setreg_imm32_b32 :ref:`hwreg`, :ref:`simm32` + +SOPP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_barrier + s_branch :ref:`label` + s_cbranch_cdbgsys :ref:`label` + s_cbranch_cdbgsys_and_user :ref:`label` + s_cbranch_cdbgsys_or_user :ref:`label` + s_cbranch_cdbguser :ref:`label` + s_cbranch_execnz :ref:`label` + s_cbranch_execz :ref:`label` + s_cbranch_scc0 :ref:`label` + s_cbranch_scc1 :ref:`label` + s_cbranch_vccnz :ref:`label` + s_cbranch_vccz :ref:`label` + s_decperflevel :ref:`imm16` + s_endpgm + s_endpgm_saved + s_icache_inv + s_incperflevel :ref:`imm16` + s_nop :ref:`imm16` + s_sendmsg :ref:`msg` + s_sendmsghalt :ref:`msg` + s_set_gpr_idx_mode :ref:`imask` + s_set_gpr_idx_off + s_sethalt :ref:`imm16` + s_setkill :ref:`imm16` + s_setprio :ref:`imm16` + s_sleep :ref:`imm16` + s_trap :ref:`imm16` + s_ttracedata + s_waitcnt :ref:`waitcnt` + s_wakeup + +VINTRP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_interp_mov_f32 :ref:`vdst`, :ref:`param`::ref:`b32`, :ref:`attr`::ref:`b32` + v_interp_p1_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + v_interp_p2_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + +VOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_bfrev_b32 :ref:`vdst`, :ref:`src` + v_bfrev_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_bfrev_b32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f16 :ref:`vdst`, :ref:`src` + v_ceil_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f32 :ref:`vdst`, :ref:`src` + v_ceil_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f64 :ref:`vdst`, :ref:`src` + v_clrexcp + v_cos_f16 :ref:`vdst`, :ref:`src` + v_cos_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cos_f32 :ref:`vdst`, :ref:`src` + v_cos_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_f32 :ref:`vdst`, :ref:`src` + v_cvt_f16_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_i16 :ref:`vdst`, :ref:`src` + v_cvt_f16_i16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_i16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_u16 :ref:`vdst`, :ref:`src` + v_cvt_f16_u16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_u16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f16 :ref:`vdst`, :ref:`src` + v_cvt_f32_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f64 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_i32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_u32 :ref:`vdst`, :ref:`src` + v_cvt_f32_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_u32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte0 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte0_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte0_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte1 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte1_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte1_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte2 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte2_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte2_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte3 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte3_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte3_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f64_f32 :ref:`vdst`, :ref:`src` + v_cvt_f64_i32 :ref:`vdst`, :ref:`src` + v_cvt_f64_u32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_flr_i32_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i16_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i32_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f64 :ref:`vdst`, :ref:`src` + v_cvt_off_f32_i4 :ref:`vdst`, :ref:`src` + v_cvt_off_f32_i4_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_off_f32_i4_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_rpi_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_rpi_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_rpi_i32_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u16_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u32_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f64 :ref:`vdst`, :ref:`src` + v_exp_f16 :ref:`vdst`, :ref:`src` + v_exp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_f32 :ref:`vdst`, :ref:`src` + v_exp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_legacy_f32 :ref:`vdst`, :ref:`src` + v_exp_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_legacy_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_i32 :ref:`vdst`, :ref:`src` + v_ffbh_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_i32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_u32 :ref:`vdst`, :ref:`src` + v_ffbh_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_u32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbl_b32 :ref:`vdst`, :ref:`src` + v_ffbl_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbl_b32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f16 :ref:`vdst`, :ref:`src` + v_floor_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f32 :ref:`vdst`, :ref:`src` + v_floor_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f64 :ref:`vdst`, :ref:`src` + v_fract_f16 :ref:`vdst`, :ref:`src` + v_fract_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f32 :ref:`vdst`, :ref:`src` + v_fract_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f64 :ref:`vdst`, :ref:`src` + v_frexp_exp_i16_f16 :ref:`vdst`, :ref:`src` + v_frexp_exp_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i16_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f32 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i32_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f64 :ref:`vdst`, :ref:`src` + v_frexp_mant_f16 :ref:`vdst`, :ref:`src` + v_frexp_mant_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f32 :ref:`vdst`, :ref:`src` + v_frexp_mant_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f64 :ref:`vdst`, :ref:`src` + v_log_f16 :ref:`vdst`, :ref:`src` + v_log_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_f32 :ref:`vdst`, :ref:`src` + v_log_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_legacy_f32 :ref:`vdst`, :ref:`src` + v_log_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_legacy_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_mov_b32 :ref:`vdst`, :ref:`src` + v_mov_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mov_b32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_nop + v_not_b32 :ref:`vdst`, :ref:`src` + v_not_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_not_b32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f16 :ref:`vdst`, :ref:`src` + v_rcp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f32 :ref:`vdst`, :ref:`src` + v_rcp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f64 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_iflag_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_readfirstlane_b32 :ref:`sdst`, :ref:`src` + v_rndne_f16 :ref:`vdst`, :ref:`src` + v_rndne_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f32 :ref:`vdst`, :ref:`src` + v_rndne_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f64 :ref:`vdst`, :ref:`src` + v_rsq_f16 :ref:`vdst`, :ref:`src` + v_rsq_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f32 :ref:`vdst`, :ref:`src` + v_rsq_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f64 :ref:`vdst`, :ref:`src` + v_sin_f16 :ref:`vdst`, :ref:`src` + v_sin_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f32 :ref:`vdst`, :ref:`src` + v_sin_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f16 :ref:`vdst`, :ref:`src` + v_sqrt_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f64 :ref:`vdst`, :ref:`src` + v_trunc_f16 :ref:`vdst`, :ref:`src` + v_trunc_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f16_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f32 :ref:`vdst`, :ref:`src` + v_trunc_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f32_sdwa :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f64 :ref:`vdst`, :ref:`src` + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_add_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_addc_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_addc_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_addc_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_and_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_and_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_and_b32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_ashrrev_i16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`u16`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_ashrrev_i32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`u32`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_cndmask_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_cndmask_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cndmask_b32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`, :ref:`vcc` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ldexp_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`i16` + v_ldexp_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`i16` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ldexp_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`::ref:`i16` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshlrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`u16`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshlrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`u32`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshrrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`u16`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshrrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`u32`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mac_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mac_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_madak_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madak_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madmk_f16 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_madmk_f32 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_i32_i24_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_u32_u24_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_i32_i24_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_legacy_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_lo_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_lo_u16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_u32_u24_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_or_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_or_b32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_sub_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subb_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subb_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subb_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subbrev_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subbrev_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subbrev_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u16_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_xor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xor_b32_sdwa :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + +VOP3 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_addc_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_alignbit_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_alignbyte_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_and_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_ashrrev_i16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_ashrrev_i32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_ashrrev_i64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_bcnt_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfe_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_bfe_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfi_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfrev_b32_e64 :ref:`vdst`, :ref:`src` + v_ceil_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_clrexcp_e64 + v_cmp_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cndmask_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` + v_cos_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cos_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubeid_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubema_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubesc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubetc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_i16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f16_u16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte0_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte1_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte2_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte3_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f64_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_flr_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_off_f32_i4_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_pk_i16_i32 :ref:`vdst`, :ref:`src0`::ref:`i32`, :ref:`src1`::ref:`i32` + v_cvt_pk_u16_u32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1`::ref:`u32` + v_cvt_pk_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_cvt_pkaccum_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32` + v_cvt_pknorm_i16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pknorm_u16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pkrtz_f16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_rpi_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_div_fixup_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_scale_f32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_div_scale_f64 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_exp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ffbh_i32_e64 :ref:`vdst`, :ref:`src` + v_ffbh_u32_e64 :ref:`vdst`, :ref:`src` + v_ffbl_b32_e64 :ref:`vdst`, :ref:`src` + v_floor_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_exp_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_mant_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_interp_mov_f32_e64 :ref:`vdst`, :ref:`param`::ref:`b32`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_interp_p1_f32_e64 :ref:`vdst`, :ref:`vsrc`::ref:`m`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_interp_p1ll_f16 :ref:`vdst`::ref:`f32`, :ref:`vsrc`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32` :ref:`high` :ref:`clamp` :ref:`omod` + v_interp_p1lv_f16 :ref:`vdst`::ref:`f32`, :ref:`vsrc0`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32`, :ref:`vsrc2`::ref:`m`::ref:`f16x2` :ref:`high` :ref:`clamp` :ref:`omod` + v_interp_p2_f16 :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32`, :ref:`vsrc2`::ref:`m`::ref:`f32` :ref:`high` :ref:`clamp` + v_interp_p2_f32_e64 :ref:`vdst`, :ref:`vsrc`::ref:`m`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_ldexp_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i16` :ref:`clamp` :ref:`omod` + v_ldexp_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_ldexp_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_lerp_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`b32`, :ref:`src1`::ref:`b32`, :ref:`src2`::ref:`b32` + v_log_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_lshlrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshlrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshlrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshrrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_mac_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_mad_i64_i32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i64` :ref:`clamp` + v_mad_legacy_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mad_u64_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u64` :ref:`clamp` + v_max3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_max3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_hi_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_lo_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_med3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_med3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_med3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_min3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mov_b32_e64 :ref:`vdst`, :ref:`src` + v_mqsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_mqsad_u32_u8 :ref:`vdst`::ref:`u32x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`vsrc2`::ref:`u32x4` :ref:`clamp` + v_msad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mul_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_hi_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_mul_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_lo_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_lo_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_nop_e64 + v_not_b32_e64 :ref:`vdst`, :ref:`src` + v_or_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_perm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_qsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_rcp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_iflag_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_readlane_b32 :ref:`sdst`, :ref:`src0`, :ref:`ssrc1` + v_rndne_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sad_hi_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u16 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_sad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sin_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sin_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subb_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subbrev_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subrev_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_trig_preop_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`u32` :ref:`clamp` :ref:`omod` + v_trunc_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_writelane_b32 :ref:`vdst`, :ref:`ssrc0`, :ref:`ssrc1` + v_xor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_cmp_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f32_sdwa :ref:`vcc`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`clamp` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx8_attr + gfx8_dst + gfx8_hwreg + gfx8_imask + gfx8_imm16 + gfx8_imm16_1 + gfx8_imm16_2 + gfx8_label + gfx8_m + gfx8_m_1 + gfx8_msg + gfx8_opt + gfx8_param + gfx8_probe + gfx8_sbase + gfx8_sbase_1 + gfx8_sdata + gfx8_sdata_1 + gfx8_sdata_2 + gfx8_sdst + gfx8_sdst_1 + gfx8_sdst_2 + gfx8_sdst_3 + gfx8_sdst_4 + gfx8_sdst_5 + gfx8_sdst_6 + gfx8_sdst_7 + gfx8_simm32 + gfx8_simm32_1 + gfx8_simm32_2 + gfx8_soffset + gfx8_soffset_1 + gfx8_soffset_2 + gfx8_src + gfx8_src_1 + gfx8_src_10 + gfx8_src_2 + gfx8_src_3 + gfx8_src_4 + gfx8_src_5 + gfx8_src_6 + gfx8_src_7 + gfx8_src_8 + gfx8_src_9 + gfx8_srsrc + gfx8_srsrc_1 + gfx8_ssamp + gfx8_ssrc + gfx8_ssrc_1 + gfx8_ssrc_2 + gfx8_ssrc_3 + gfx8_ssrc_4 + gfx8_ssrc_5 + gfx8_ssrc_6 + gfx8_ssrc_7 + gfx8_ssrc_8 + gfx8_tgt + gfx8_type_deviation + gfx8_vaddr + gfx8_vaddr_1 + gfx8_vaddr_2 + gfx8_vaddr_3 + gfx8_vcc + gfx8_vdata + gfx8_vdata0 + gfx8_vdata0_1 + gfx8_vdata1 + gfx8_vdata1_1 + gfx8_vdata_1 + gfx8_vdata_10 + gfx8_vdata_11 + gfx8_vdata_12 + gfx8_vdata_13 + gfx8_vdata_14 + gfx8_vdata_2 + gfx8_vdata_3 + gfx8_vdata_4 + gfx8_vdata_5 + gfx8_vdata_6 + gfx8_vdata_7 + gfx8_vdata_8 + gfx8_vdata_9 + gfx8_vdst + gfx8_vdst_1 + gfx8_vdst_10 + gfx8_vdst_11 + gfx8_vdst_12 + gfx8_vdst_13 + gfx8_vdst_14 + gfx8_vdst_15 + gfx8_vdst_16 + gfx8_vdst_17 + gfx8_vdst_2 + gfx8_vdst_3 + gfx8_vdst_4 + gfx8_vdst_5 + gfx8_vdst_6 + gfx8_vdst_7 + gfx8_vdst_8 + gfx8_vdst_9 + gfx8_vsrc + gfx8_vsrc_1 + gfx8_vsrc_2 + gfx8_vsrc_3 + gfx8_waitcnt diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX900.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX900.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX900.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX900.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,56 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of gfx900, gfx902 and gfx909 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of *instructions specific to gfx900, gfx902 and gfx909*. + +For a description of other gfx900, gfx902 and gfx909 instructions see :doc:`Syntax of Core GFX9 Instructions`. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_mad_mix_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_mad_mixhi_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_mad_mixlo_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx900_fx_operand + gfx900_m + gfx900_src + gfx900_src_1 + gfx900_vdst diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX904.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX904.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX904.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX904.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,56 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of gfx904 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of *instructions specific to gfx904*. + +For a description of other gfx904 instructions see :doc:`Syntax of Core GFX9 Instructions`. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_fma_mix_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixhi_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixlo_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx904_fx_operand + gfx904_m + gfx904_src + gfx904_src_1 + gfx904_vdst diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX906.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX906.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX906.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX906.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,92 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of gfx906 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of *instructions specific to gfx906*. + +For a description of other gfx906 instructions see :doc:`Syntax of Core GFX9 Instructions`. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_fmac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xnor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xnor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xnor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + +VOP3 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_fmac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_xnor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_dot2_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`src1`::ref:`f16x2`, :ref:`src2`::ref:`f32` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_dot2_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`src1`::ref:`i16x2`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot2_u32_u16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot4_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`src1`::ref:`i8x4`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot4_u32_u8 :ref:`vdst`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot8_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`src1`::ref:`i4x8`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot8_u32_u4 :ref:`vdst`, :ref:`src0`::ref:`u4x8`, :ref:`src1`::ref:`u4x8`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_fma_mix_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixhi_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixlo_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx906_fx_operand + gfx906_m + gfx906_m_1 + gfx906_src + gfx906_src_1 + gfx906_src_2 + gfx906_src_3 + gfx906_src_4 + gfx906_type_deviation + gfx906_vdst + gfx906_vsrc diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX908.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX908.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX908.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX908.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,164 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of gfx908 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of *instructions specific to gfx908*. + +For a description of other gfx908 instructions see :doc:`Syntax of Core GFX9 Instructions`. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +FLAT +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + global_atomic_add_f32 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`slc` + global_atomic_pk_add_f16 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`slc` + +MUBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + buffer_atomic_add_f32 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`slc` + buffer_atomic_pk_add_f16 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`slc` + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_dot2c_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` + v_dot2c_f32_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot2c_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`vsrc1`::ref:`i16x2` + v_dot2c_i32_i16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i16x2`, :ref:`vsrc1`::ref:`i16x2` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot4c_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` + v_dot4c_i32_i8_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot8c_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`vsrc1`::ref:`i4x8` + v_dot8c_i32_i4_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i4x8`, :ref:`vsrc1`::ref:`i4x8` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fmac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_pk_fmac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xnor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xnor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xnor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + +VOP3 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_fmac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_xnor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_accvgpr_read_b32 :ref:`vdst`, :ref:`vsrc` + v_accvgpr_write_b32 :ref:`vdst`, :ref:`src` + v_dot2_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`src1`::ref:`f16x2`, :ref:`src2`::ref:`f32` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_dot2_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`src1`::ref:`i16x2`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot2_u32_u16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot4_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`src1`::ref:`i8x4`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot4_u32_u8 :ref:`vdst`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot8_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`src1`::ref:`i4x8`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot8_u32_u4 :ref:`vdst`, :ref:`src0`::ref:`u4x8`, :ref:`src1`::ref:`u4x8`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_fma_mix_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixhi_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixlo_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_mfma_f32_16x16x16f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`vsrc2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x1f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`vsrc2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x2bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`vsrc2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`vsrc2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`vsrc2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x8bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`vsrc2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x1f32 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`vsrc2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x2bf16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`vsrc2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x2f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`vsrc2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`vsrc2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4f16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`vsrc2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`vsrc2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x1f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`vsrc2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x2bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`vsrc2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`vsrc2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_16x16x16i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`vsrc2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_16x16x4i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`vsrc2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x4i8 :ref:`vdst`::ref:`i32x32`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`vsrc2`::ref:`i32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x8i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`vsrc2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_4x4x4i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`vsrc2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx908_dst + gfx908_fx_operand + gfx908_m + gfx908_m_1 + gfx908_opt + gfx908_saddr + gfx908_soffset + gfx908_src + gfx908_src_1 + gfx908_src_2 + gfx908_src_3 + gfx908_src_4 + gfx908_src_5 + gfx908_srsrc + gfx908_type_deviation + gfx908_vaddr + gfx908_vaddr_1 + gfx908_vdata + gfx908_vdata_1 + gfx908_vdst + gfx908_vdst_1 + gfx908_vdst_2 + gfx908_vdst_3 + gfx908_vdst_4 + gfx908_vdst_5 + gfx908_vsrc + gfx908_vsrc_1 + gfx908_vsrc_2 + gfx908_vsrc_3 + gfx908_vsrc_4 + gfx908_vsrc_5 + gfx908_vsrc_6 diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX90a.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX90a.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX90a.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX90a.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,2103 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of gfx90a Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of gfx90a instructions. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +DS +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + ds_add_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_append :ref:`vdst` :ref:`offset` :ref:`gds` + ds_bpermute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_cmpst_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_condxchg32_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_consume :ref:`vdst` :ref:`offset` :ref:`gds` + ds_dec_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_barrier :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_init :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_br :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_p :ref:`offset` :ref:`gds` + ds_gws_sema_release_all :ref:`offset` :ref:`gds` + ds_gws_sema_v :ref:`offset` :ref:`gds` + ds_inc_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_mskor_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_nop + ds_or_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_permute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_read2_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read_addtid_b32 :ref:`vdst` :ref:`offset` :ref:`gds` + ds_read_b128 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b32 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b64 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b96 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_swizzle_b32 :ref:`vdst`, :ref:`vaddr` :ref:`pattern` :ref:`gds` + ds_wrap_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_write2_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write_addtid_b32 :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b128 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b96 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg2_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + +FLAT +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + flat_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_add_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_max_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_min_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dword :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dwordx2 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dwordx3 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dwordx4 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sbyte :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_short_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_short_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sshort :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ubyte :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ushort :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_byte :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dword :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dwordx2 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dwordx3 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dwordx4 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_short :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_short_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + global_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_add_f32 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_add_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_max_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_min_f64 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_pk_add_f16 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + +MIMG +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + image_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_cmpswap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_dec :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_inc :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_smax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_smin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_umax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_umin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_get_resinfo :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_load :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_load_mip :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_load_mip_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_load_mip_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_load_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_load_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_sample :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_store :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_store_mip :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_store_mip_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_store_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + +MTBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + tbuffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + +MUBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + buffer_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_add_f32 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_add_f64 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_add_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap :ref:`vdata`::ref:`dst`::ref:`b32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`b64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_max_f64 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_min_f64 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_pk_add_f16 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_invl2 + buffer_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_hi_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_store_byte :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_byte_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dword :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx2 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx3 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx4 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_hi_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_lds_dword :ref:`srsrc`, :ref:`soffset` :ref:`offset12` :ref:`lds` + buffer_store_short :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_short_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_wbinvl1 + buffer_wbinvl1_vol + buffer_wbl2 + +SMEM +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_atc_probe :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atc_probe_buffer :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_dcache_discard :ref:`sbase`, :ref:`soffset` + s_dcache_discard_x2 :ref:`sbase`, :ref:`soffset` + s_dcache_inv + s_dcache_inv_vol + s_dcache_wb + s_dcache_wb_vol + s_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_memrealtime :ref:`sdst`::ref:`b64` + s_memtime :ref:`sdst`::ref:`b64` + s_scratch_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + +SOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_abs_i32 :ref:`sdst`, :ref:`ssrc` + s_and_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bitreplicate_b64_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_bitset1_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset1_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_brev_b32 :ref:`sdst`, :ref:`ssrc` + s_brev_b64 :ref:`sdst`, :ref:`ssrc` + s_cbranch_join :ref:`ssrc` + s_cmov_b32 :ref:`sdst`, :ref:`ssrc` + s_cmov_b64 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_i64 :ref:`sdst`, :ref:`ssrc` + s_getpc_b64 :ref:`sdst` + s_mov_b32 :ref:`sdst`, :ref:`ssrc` + s_mov_b64 :ref:`sdst`, :ref:`ssrc` + s_movreld_b32 :ref:`sdst`, :ref:`ssrc` + s_movreld_b64 :ref:`sdst`, :ref:`ssrc` + s_movrels_b32 :ref:`sdst`, :ref:`ssrc` + s_movrels_b64 :ref:`sdst`, :ref:`ssrc` + s_nand_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_nor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_not_b32 :ref:`sdst`, :ref:`ssrc` + s_not_b64 :ref:`sdst`, :ref:`ssrc` + s_or_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b32 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b64 :ref:`sdst`, :ref:`ssrc` + s_rfe_b64 :ref:`ssrc` + s_set_gpr_idx_idx :ref:`ssrc` + s_setpc_b64 :ref:`ssrc` + s_sext_i32_i16 :ref:`sdst`, :ref:`ssrc` + s_sext_i32_i8 :ref:`sdst`, :ref:`ssrc` + s_swappc_b64 :ref:`sdst`, :ref:`ssrc` + s_wqm_b32 :ref:`sdst`, :ref:`ssrc` + s_wqm_b64 :ref:`sdst`, :ref:`ssrc` + s_xnor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_xor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + +SOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_absdiff_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_addc_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_ashr_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_ashr_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfe_u64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfm_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfm_b64 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_cbranch_g_fork :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl1_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl2_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl3_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl4_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshl_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_max_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_max_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_pack_hh_b32_b16 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_pack_lh_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_pack_ll_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_rfe_restore_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_sub_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_subb_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + +SOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_bitcmp0_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp0_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bitcmp1_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp1_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_cmp_eq_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_set_gpr_idx_on :ref:`ssrc`, :ref:`imask` + s_setvskip :ref:`ssrc0`, :ref:`ssrc1` + +SOPK +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_addk_i32 :ref:`sdst`, :ref:`imm16` + s_call_b64 :ref:`sdst`, :ref:`label` + s_cbranch_i_fork :ref:`ssrc`, :ref:`label` + s_cmovk_i32 :ref:`sdst`, :ref:`imm16` + s_cmpk_eq_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_eq_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_u32 :ref:`ssrc`, :ref:`imm16` + s_getreg_b32 :ref:`sdst`, :ref:`hwreg` + s_movk_i32 :ref:`sdst`, :ref:`imm16` + s_mulk_i32 :ref:`sdst`, :ref:`imm16` + s_setreg_b32 :ref:`hwreg`, :ref:`ssrc` + s_setreg_imm32_b32 :ref:`hwreg`, :ref:`simm32` + +SOPP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_barrier + s_branch :ref:`label` + s_cbranch_cdbgsys :ref:`label` + s_cbranch_cdbgsys_and_user :ref:`label` + s_cbranch_cdbgsys_or_user :ref:`label` + s_cbranch_cdbguser :ref:`label` + s_cbranch_execnz :ref:`label` + s_cbranch_execz :ref:`label` + s_cbranch_scc0 :ref:`label` + s_cbranch_scc1 :ref:`label` + s_cbranch_vccnz :ref:`label` + s_cbranch_vccz :ref:`label` + s_decperflevel :ref:`imm16` + s_endpgm + s_endpgm_ordered_ps_done + s_endpgm_saved + s_icache_inv + s_incperflevel :ref:`imm16` + s_nop :ref:`imm16` + s_sendmsg :ref:`msg` + s_sendmsghalt :ref:`msg` + s_set_gpr_idx_mode :ref:`imask` + s_set_gpr_idx_off + s_sethalt :ref:`imm16` + s_setkill :ref:`imm16` + s_setprio :ref:`imm16` + s_sleep :ref:`imm16` + s_trap :ref:`imm16` + s_ttracedata + s_waitcnt :ref:`waitcnt` + s_wakeup + +VOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_accvgpr_mov_b32 :ref:`vdst`, :ref:`vsrc` + v_bfrev_b32 :ref:`vdst`, :ref:`src` + v_bfrev_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_bfrev_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f16 :ref:`vdst`, :ref:`src` + v_ceil_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f32 :ref:`vdst`, :ref:`src` + v_ceil_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f64 :ref:`vdst`, :ref:`src` + v_ceil_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_clrexcp + v_cos_f16 :ref:`vdst`, :ref:`src` + v_cos_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cos_f32 :ref:`vdst`, :ref:`src` + v_cos_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_f32 :ref:`vdst`, :ref:`src` + v_cvt_f16_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_i16 :ref:`vdst`, :ref:`src` + v_cvt_f16_i16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_i16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_u16 :ref:`vdst`, :ref:`src` + v_cvt_f16_u16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_u16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f16 :ref:`vdst`, :ref:`src` + v_cvt_f32_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f64 :ref:`vdst`, :ref:`src` + v_cvt_f32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_i32 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_u32 :ref:`vdst`, :ref:`src` + v_cvt_f32_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte0 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte0_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte0_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte1 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte1_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte1_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte2 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte2_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte2_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte3 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte3_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte3_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f64_f32 :ref:`vdst`, :ref:`src` + v_cvt_f64_i32 :ref:`vdst`, :ref:`src` + v_cvt_f64_u32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_flr_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f64 :ref:`vdst`, :ref:`src` + v_cvt_i32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_norm_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_norm_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_norm_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_off_f32_i4 :ref:`vdst`, :ref:`src` + v_cvt_off_f32_i4_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_off_f32_i4_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_rpi_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_rpi_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_rpi_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f64 :ref:`vdst`, :ref:`src` + v_cvt_u32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f16 :ref:`vdst`, :ref:`src` + v_exp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_f32 :ref:`vdst`, :ref:`src` + v_exp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_legacy_f32 :ref:`vdst`, :ref:`src` + v_exp_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_legacy_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_i32 :ref:`vdst`, :ref:`src` + v_ffbh_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_u32 :ref:`vdst`, :ref:`src` + v_ffbh_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbl_b32 :ref:`vdst`, :ref:`src` + v_ffbl_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbl_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f16 :ref:`vdst`, :ref:`src` + v_floor_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f32 :ref:`vdst`, :ref:`src` + v_floor_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f64 :ref:`vdst`, :ref:`src` + v_floor_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f16 :ref:`vdst`, :ref:`src` + v_fract_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f32 :ref:`vdst`, :ref:`src` + v_fract_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f64 :ref:`vdst`, :ref:`src` + v_fract_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i16_f16 :ref:`vdst`, :ref:`src` + v_frexp_exp_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f32 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f64 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f16 :ref:`vdst`, :ref:`src` + v_frexp_mant_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f32 :ref:`vdst`, :ref:`src` + v_frexp_mant_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f64 :ref:`vdst`, :ref:`src` + v_frexp_mant_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f16 :ref:`vdst`, :ref:`src` + v_log_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_f32 :ref:`vdst`, :ref:`src` + v_log_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_legacy_f32 :ref:`vdst`, :ref:`src` + v_log_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_legacy_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_mov_b32 :ref:`vdst`, :ref:`src` + v_mov_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mov_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_nop + v_not_b32 :ref:`vdst`, :ref:`src` + v_not_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_not_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f16 :ref:`vdst`, :ref:`src` + v_rcp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f32 :ref:`vdst`, :ref:`src` + v_rcp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f64 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_iflag_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_readfirstlane_b32 :ref:`sdst`, :ref:`vsrc` + v_rndne_f16 :ref:`vdst`, :ref:`src` + v_rndne_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f32 :ref:`vdst`, :ref:`src` + v_rndne_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f64 :ref:`vdst`, :ref:`src` + v_rsq_f16 :ref:`vdst`, :ref:`src` + v_rsq_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f32 :ref:`vdst`, :ref:`src` + v_rsq_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f64 :ref:`vdst`, :ref:`src` + v_sat_pk_u8_i16 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_sat_pk_u8_i16_dpp :ref:`vdst`::ref:`u8x4`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sat_pk_u8_i16_sdwa :ref:`vdst`::ref:`u8x4`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_screen_partition_4se_b32 :ref:`vdst`, :ref:`src` + v_screen_partition_4se_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_screen_partition_4se_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f16 :ref:`vdst`, :ref:`src` + v_sin_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f32 :ref:`vdst`, :ref:`src` + v_sin_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f16 :ref:`vdst`, :ref:`src` + v_sqrt_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f64 :ref:`vdst`, :ref:`src` + v_swap_b32 :ref:`vdst`, :ref:`vsrc` + v_trunc_f16 :ref:`vdst`, :ref:`src` + v_trunc_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f32 :ref:`vdst`, :ref:`src` + v_trunc_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f64 :ref:`vdst`, :ref:`src` + v_trunc_f64_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_add_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_addc_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_addc_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_addc_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_and_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_and_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_and_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_ashrrev_i16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_ashrrev_i32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_cndmask_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_cndmask_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cndmask_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_dot2c_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` + v_dot2c_f32_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`f16x2`, :ref:`vsrc1`::ref:`f16x2` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot2c_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`vsrc1`::ref:`i16x2` + v_dot2c_i32_i16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i16x2`, :ref:`vsrc1`::ref:`i16x2` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot4c_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` + v_dot4c_i32_i8_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_dot8c_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`vsrc1`::ref:`i4x8` + v_dot8c_i32_i4_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`i4x8`, :ref:`vsrc1`::ref:`i4x8` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fmac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fmac_f64 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_fmac_f64_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp64_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ldexp_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`i16` + v_ldexp_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`i16` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ldexp_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`i16` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshlrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshlrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshrrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshrrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_madak_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madak_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madmk_f16 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_madmk_f32 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_lo_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_lo_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_or_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_or_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_pk_fmac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_sub_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subb_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subb_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subb_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subbrev_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subbrev_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subbrev_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_subrev_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_xnor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xnor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xnor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_xor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp32_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + +VOP3 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_add_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_lshl_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_addc_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_alignbit_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_alignbyte_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_and_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_and_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_ashrrev_i16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_ashrrev_i32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_ashrrev_i64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_bcnt_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfe_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_bfe_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfi_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfrev_b32_e64 :ref:`vdst`, :ref:`src` + v_ceil_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_clrexcp_e64 + v_cmp_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cndmask_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` + v_cos_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cos_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubeid_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubema_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubesc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubetc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_i16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f16_u16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte0_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte1_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte2_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte3_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f64_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_flr_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_off_f32_i4_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_pk_i16_i32 :ref:`vdst`, :ref:`src0`::ref:`i32`, :ref:`src1`::ref:`i32` + v_cvt_pk_u16_u32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1`::ref:`u32` + v_cvt_pk_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_cvt_pkaccum_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32` + v_cvt_pknorm_i16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_i16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pknorm_u16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_u16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pkrtz_f16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_rpi_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_div_fixup_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_div_fixup_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_scale_f32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_div_scale_f64 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_exp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ffbh_i32_e64 :ref:`vdst`, :ref:`src` + v_ffbh_u32_e64 :ref:`vdst`, :ref:`src` + v_ffbl_b32_e64 :ref:`vdst`, :ref:`src` + v_floor_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_fma_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fmac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_fmac_f64_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_exp_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_mant_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ldexp_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i16` :ref:`clamp` :ref:`omod` + v_ldexp_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_ldexp_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_lerp_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`b32`, :ref:`src1`::ref:`b32`, :ref:`src2`::ref:`b32` + v_log_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_lshl_add_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_lshl_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2` + v_lshlrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshlrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshlrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshrrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_mac_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_mad_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_mad_i64_i32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i64` :ref:`clamp` + v_mad_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_legacy_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_legacy_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_legacy_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mad_u64_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u64` :ref:`clamp` + v_max3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_max3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_max3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_hi_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_lo_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_med3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_med3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_med3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_med3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_min3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_min3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mov_b32_e64 :ref:`vdst`, :ref:`src` + v_mqsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_mqsad_u32_u8 :ref:`vdst`::ref:`u32x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`vsrc2`::ref:`u32x4` :ref:`clamp` + v_msad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mul_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_hi_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_mul_legacy_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_lo_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_lo_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_nop_e64 + v_not_b32_e64 :ref:`vdst`, :ref:`src` + v_or3_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_or_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_pack_b32_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`op_sel` + v_perm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_qsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_rcp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_iflag_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_readlane_b32 :ref:`sdst`, :ref:`vsrc0`, :ref:`ssrc1` + v_rndne_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sad_hi_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u16 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_sad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sat_pk_u8_i16_e64 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_screen_partition_4se_b32_e64 :ref:`vdst`, :ref:`src` + v_sin_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sin_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_sub_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subb_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subbrev_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subrev_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_trig_preop_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`u32` :ref:`clamp` :ref:`omod` + v_trunc_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_writelane_b32 :ref:`vdst`, :ref:`ssrc0`, :ref:`ssrc1` + v_xad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_xnor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_xor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_accvgpr_read_b32 :ref:`vdst`, :ref:`vsrc` + v_accvgpr_write_b32 :ref:`vdst`, :ref:`src` + v_dot2_f32_f16 :ref:`vdst`, :ref:`src0`::ref:`f16x2`, :ref:`src1`::ref:`f16x2`, :ref:`src2`::ref:`f32` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_dot2_i32_i16 :ref:`vdst`, :ref:`src0`::ref:`i16x2`, :ref:`src1`::ref:`i16x2`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot2_u32_u16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot4_i32_i8 :ref:`vdst`, :ref:`src0`::ref:`i8x4`, :ref:`src1`::ref:`i8x4`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot4_u32_u8 :ref:`vdst`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_dot8_i32_i4 :ref:`vdst`, :ref:`src0`::ref:`i4x8`, :ref:`src1`::ref:`i4x8`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_dot8_u32_u4 :ref:`vdst`, :ref:`src0`::ref:`u4x8`, :ref:`src1`::ref:`u4x8`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_fma_mix_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixhi_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_fma_mixlo_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`fx`, :ref:`src1`::ref:`m`::ref:`fx`, :ref:`src2`::ref:`m`::ref:`fx` :ref:`m_op_sel` :ref:`m_op_sel_hi` :ref:`clamp` + v_mfma_f32_16x16x16bf16_1k :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x16f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x1f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x2bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4bf16_1k :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x4f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_16x16x8bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x1f32 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x2bf16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x2f32 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4bf16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4bf16_1k :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x4f16 :ref:`vdst`::ref:`f32x32`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8bf16_1k :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_32x32x8f16 :ref:`vdst`::ref:`f32x16`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x1f32 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f32`, :ref:`vsrc1`::ref:`f32`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x2bf16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x2`, :ref:`vsrc1`::ref:`bf16x2`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4bf16_1k :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`bf16x4`, :ref:`vsrc1`::ref:`bf16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f32_4x4x4f16 :ref:`vdst`::ref:`f32x4`, :ref:`vsrc0`::ref:`f16x4`, :ref:`vsrc1`::ref:`f16x4`, :ref:`src2`::ref:`f32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f64_16x16x4f64 :ref:`vdst`::ref:`f64x4`, :ref:`vsrc0`::ref:`f64`, :ref:`vsrc1`::ref:`f64`, :ref:`src2`::ref:`f64x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_f64_4x4x4f64 :ref:`vdst`::ref:`f64`, :ref:`vsrc0`::ref:`f64`, :ref:`vsrc1`::ref:`f64`, :ref:`src2`::ref:`f64` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_16x16x16i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_16x16x4i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x4i8 :ref:`vdst`::ref:`i32x32`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x32` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_32x32x8i8 :ref:`vdst`::ref:`i32x16`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x16` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_mfma_i32_4x4x4i8 :ref:`vdst`::ref:`i32x4`, :ref:`vsrc0`::ref:`i8x4`, :ref:`vsrc1`::ref:`i8x4`, :ref:`src2`::ref:`i32x4` :ref:`cbsz` :ref:`abid` :ref:`blgp` + v_pk_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_add_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_fma_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_fma_f32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mov_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_sub_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + +VOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_cmp_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx90a_dst + gfx90a_fx_operand + gfx90a_hwreg + gfx90a_imask + gfx90a_imm16 + gfx90a_imm16_1 + gfx90a_imm16_2 + gfx90a_label + gfx90a_m + gfx90a_m_1 + gfx90a_msg + gfx90a_opt + gfx90a_probe + gfx90a_saddr + gfx90a_saddr_1 + gfx90a_sbase + gfx90a_sbase_1 + gfx90a_sbase_2 + gfx90a_sdata + gfx90a_sdata_1 + gfx90a_sdata_2 + gfx90a_sdata_3 + gfx90a_sdata_4 + gfx90a_sdata_5 + gfx90a_sdst + gfx90a_sdst_1 + gfx90a_sdst_2 + gfx90a_sdst_3 + gfx90a_sdst_4 + gfx90a_sdst_5 + gfx90a_sdst_6 + gfx90a_sdst_7 + gfx90a_simm32 + gfx90a_simm32_1 + gfx90a_simm32_2 + gfx90a_soffset + gfx90a_soffset_1 + gfx90a_soffset_2 + gfx90a_src + gfx90a_src_1 + gfx90a_src_10 + gfx90a_src_11 + gfx90a_src_2 + gfx90a_src_3 + gfx90a_src_4 + gfx90a_src_5 + gfx90a_src_6 + gfx90a_src_7 + gfx90a_src_8 + gfx90a_src_9 + gfx90a_srsrc + gfx90a_srsrc_1 + gfx90a_ssamp + gfx90a_ssrc + gfx90a_ssrc_1 + gfx90a_ssrc_2 + gfx90a_ssrc_3 + gfx90a_ssrc_4 + gfx90a_ssrc_5 + gfx90a_ssrc_6 + gfx90a_ssrc_7 + gfx90a_ssrc_8 + gfx90a_type_deviation + gfx90a_vaddr + gfx90a_vaddr_1 + gfx90a_vaddr_2 + gfx90a_vaddr_3 + gfx90a_vaddr_4 + gfx90a_vaddr_5 + gfx90a_vcc + gfx90a_vdata + gfx90a_vdata0 + gfx90a_vdata0_1 + gfx90a_vdata1 + gfx90a_vdata1_1 + gfx90a_vdata_1 + gfx90a_vdata_10 + gfx90a_vdata_2 + gfx90a_vdata_3 + gfx90a_vdata_4 + gfx90a_vdata_5 + gfx90a_vdata_6 + gfx90a_vdata_7 + gfx90a_vdata_8 + gfx90a_vdata_9 + gfx90a_vdst + gfx90a_vdst_1 + gfx90a_vdst_10 + gfx90a_vdst_11 + gfx90a_vdst_12 + gfx90a_vdst_13 + gfx90a_vdst_14 + gfx90a_vdst_15 + gfx90a_vdst_16 + gfx90a_vdst_17 + gfx90a_vdst_18 + gfx90a_vdst_19 + gfx90a_vdst_2 + gfx90a_vdst_3 + gfx90a_vdst_4 + gfx90a_vdst_5 + gfx90a_vdst_6 + gfx90a_vdst_7 + gfx90a_vdst_8 + gfx90a_vdst_9 + gfx90a_vsrc + gfx90a_vsrc_1 + gfx90a_vsrc_2 + gfx90a_vsrc_3 + gfx90a_vsrc_4 + gfx90a_vsrc_5 + gfx90a_waitcnt diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/AMDGPUAsmGFX9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,2133 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +==================================================================================== +Syntax of Core GFX9 Instructions +==================================================================================== + +.. contents:: + :local: + +Introduction +============ + +This document describes the syntax of *core* GFX9 instructions. + +Notation +======== + +Notation used in this document is explained :ref:`here`. + +Overview +======== + +An overview of generic syntax and other features of AMDGPU instructions may be found :ref:`in this document`. + +Instructions +============ + + +DS +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + ds_add_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_add_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_add_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_and_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_and_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_append :ref:`vdst` :ref:`offset` :ref:`gds` + ds_bpermute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_cmpst_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_f64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_cmpst_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_condxchg32_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_consume :ref:`vdst` :ref:`offset` :ref:`gds` + ds_dec_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_dec_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_dec_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_barrier :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_init :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_br :ref:`vdata` :ref:`offset` :ref:`gds` + ds_gws_sema_p :ref:`offset` :ref:`gds` + ds_gws_sema_release_all :ref:`offset` :ref:`gds` + ds_gws_sema_v :ref:`offset` :ref:`gds` + ds_inc_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_inc_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_inc_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_max_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_max_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_f64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_i64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_f64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_i64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_src2_f32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_f64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_i64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_min_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_min_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_mskor_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_mskor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_nop + ds_or_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_or_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_or_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_ordered_count :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_permute_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` + ds_read2_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read2st64_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_read_addtid_b32 :ref:`vdst` :ref:`offset` :ref:`gds` + ds_read_b128 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b32 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b64 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_b96 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_i8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u16_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_read_u8_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_rsub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_rsub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_rtn_u64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_src2_u32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_src2_u64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_sub_u32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_sub_u64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_swizzle_b32 :ref:`vdst`, :ref:`vaddr` :ref:`pattern` :ref:`gds` + ds_wrap_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset` :ref:`gds` + ds_write2_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b32 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write2st64_b64 :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_write_addtid_b32 :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b128 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b16_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b8_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_b96 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_write_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_write_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_wrxchg2_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b32 :ref:`vdst`::ref:`b32x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg2st64_rtn_b64 :ref:`vdst`::ref:`b64x2`, :ref:`vaddr`, :ref:`vdata0`, :ref:`vdata1` :ref:`offset0` :ref:`offset1` :ref:`gds` + ds_wrxchg_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_wrxchg_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b32 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_b64 :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b32 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_rtn_b64 :ref:`vdst`, :ref:`vaddr`, :ref:`vdata` :ref:`offset` :ref:`gds` + ds_xor_src2_b32 :ref:`vaddr` :ref:`offset` :ref:`gds` + ds_xor_src2_b64 :ref:`vaddr` :ref:`offset` :ref:`gds` + +EXP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + exp :ref:`tgt`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vsrc2`, :ref:`vsrc3` :ref:`done` :ref:`compr` :ref:`vm` + +FLAT +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + flat_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dword :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dwordx2 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dwordx3 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_dwordx4 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sbyte :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_short_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_short_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_sshort :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ubyte :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_load_ushort :ref:`vdst`, :ref:`vaddr` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_byte :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dword :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dwordx2 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dwordx3 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_dwordx4 :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_short :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + flat_store_short_d16_hi :ref:`vaddr`, :ref:`vdata` :ref:`offset12` :ref:`glc` :ref:`slc` + global_atomic_add :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_add_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_and :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_and_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_cmpswap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b32x2`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_cmpswap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`::ref:`b64x2`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_dec :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_dec_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_inc :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_inc_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_or :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_or_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smax :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smax_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smin :ref:`vdst`::ref:`opt`::ref:`i32`, :ref:`vaddr`, :ref:`vdata`::ref:`i32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_smin_x2 :ref:`vdst`::ref:`opt`::ref:`i64`, :ref:`vaddr`, :ref:`vdata`::ref:`i64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_sub :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_sub_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_swap :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_swap_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umax :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umax_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umin :ref:`vdst`::ref:`opt`::ref:`u32`, :ref:`vaddr`, :ref:`vdata`::ref:`u32`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_umin_x2 :ref:`vdst`::ref:`opt`::ref:`u64`, :ref:`vaddr`, :ref:`vdata`::ref:`u64`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_xor :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_atomic_xor_x2 :ref:`vdst`::ref:`opt`, :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + global_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_byte :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_byte_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dword :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dwordx2 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dwordx3 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_dwordx4 :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_short :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + scratch_store_short_d16_hi :ref:`vaddr`, :ref:`vdata`, :ref:`saddr` :ref:`offset13s` :ref:`glc` :ref:`slc` + +MIMG +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + image_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_cmpswap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_dec :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_inc :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_smax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_smin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_umax :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_umin :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_gather4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_gather4_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_get_lod :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` + image_get_resinfo :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_load_mip :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_load_mip_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_mip_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_pck :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` + image_load_pck_sgn :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` + image_sample :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_b_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_c_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cd_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d_cl :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d_cl_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_d_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_l :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_l_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_lz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_lz_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_sample_o :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`ssamp` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`tfe` :ref:`lwe` :ref:`da` :ref:`d16` + image_store :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_store_mip :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` :ref:`d16` + image_store_mip_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + image_store_pck :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc` :ref:`dmask` :ref:`unorm` :ref:`glc` :ref:`slc` :ref:`a16` :ref:`lwe` :ref:`da` + +MTBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + tbuffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + tbuffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`fmt` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + +MUBUF +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **SRC3** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + buffer_atomic_add :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_add_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_and_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap :ref:`vdata`::ref:`dst`::ref:`b32x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_cmpswap_x2 :ref:`vdata`::ref:`dst`::ref:`b64x2`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_dec_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_inc_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_or_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smax_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin :ref:`vdata`::ref:`dst`::ref:`i32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_smin_x2 :ref:`vdata`::ref:`dst`::ref:`i64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_sub_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_swap_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umax_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin :ref:`vdata`::ref:`dst`::ref:`u32`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_umin_x2 :ref:`vdata`::ref:`dst`::ref:`u64`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_atomic_xor_x2 :ref:`vdata`::ref:`dst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dword :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_dwordx2 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx3 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_dwordx4 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_hi_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_d16_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_x :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_format_xy :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyz :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_format_xyzw :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sbyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_sbyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sbyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_short_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_short_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_sshort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ubyte :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_load_ubyte_d16 :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_ubyte_d16_hi :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_load_ushort :ref:`vdst`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` :ref:`lds` + buffer_store_byte :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_byte_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dword :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx2 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx3 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_dwordx4 :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_hi_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_d16_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_x :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xy :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyz :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_format_xyzw :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_lds_dword :ref:`srsrc`, :ref:`soffset` :ref:`offset12` :ref:`lds` + buffer_store_short :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_store_short_d16_hi :ref:`vdata`, :ref:`vaddr`, :ref:`srsrc`, :ref:`soffset` :ref:`idxen` :ref:`offen` :ref:`offset12` :ref:`glc` :ref:`slc` + buffer_wbinvl1 + buffer_wbinvl1_vol + +SMEM +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_atc_probe :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atc_probe_buffer :ref:`probe`, :ref:`sbase`, :ref:`soffset` + s_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_add_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_and_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap :ref:`sdata`::ref:`dst`::ref:`b32x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_cmpswap_x2 :ref:`sdata`::ref:`dst`::ref:`b64x2`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_dec_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_inc_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_or_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smax_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin :ref:`sdata`::ref:`dst`::ref:`i32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_smin_x2 :ref:`sdata`::ref:`dst`::ref:`i64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_sub_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_swap_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umax_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin :ref:`sdata`::ref:`dst`::ref:`u32`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_umin_x2 :ref:`sdata`::ref:`dst`::ref:`u64`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_atomic_xor_x2 :ref:`sdata`::ref:`dst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_buffer_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_dcache_discard :ref:`sbase`, :ref:`soffset` + s_dcache_discard_x2 :ref:`sbase`, :ref:`soffset` + s_dcache_inv + s_dcache_inv_vol + s_dcache_wb + s_dcache_wb_vol + s_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx16 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_load_dwordx8 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_memrealtime :ref:`sdst`::ref:`b64` + s_memtime :ref:`sdst`::ref:`b64` + s_scratch_load_dword :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_load_dwordx2 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_load_dwordx4 :ref:`sdst`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_scratch_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dword :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx2 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + s_store_dwordx4 :ref:`sdata`, :ref:`sbase`, :ref:`soffset` :ref:`glc` + +SOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_abs_i32 :ref:`sdst`, :ref:`ssrc` + s_and_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn1_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_andn2_wrexec_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_bcnt1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_bitreplicate_b64_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset0_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_bitset1_b32 :ref:`sdst`, :ref:`ssrc` + s_bitset1_b64 :ref:`sdst`, :ref:`ssrc`::ref:`b32` + s_brev_b32 :ref:`sdst`, :ref:`ssrc` + s_brev_b64 :ref:`sdst`, :ref:`ssrc` + s_cbranch_join :ref:`ssrc` + s_cmov_b32 :ref:`sdst`, :ref:`ssrc` + s_cmov_b64 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff0_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_ff1_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b32 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_b64 :ref:`sdst`, :ref:`ssrc` + s_flbit_i32_i64 :ref:`sdst`, :ref:`ssrc` + s_getpc_b64 :ref:`sdst` + s_mov_b32 :ref:`sdst`, :ref:`ssrc` + s_mov_b64 :ref:`sdst`, :ref:`ssrc` + s_movreld_b32 :ref:`sdst`, :ref:`ssrc` + s_movreld_b64 :ref:`sdst`, :ref:`ssrc` + s_movrels_b32 :ref:`sdst`, :ref:`ssrc` + s_movrels_b64 :ref:`sdst`, :ref:`ssrc` + s_nand_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_nor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_not_b32 :ref:`sdst`, :ref:`ssrc` + s_not_b64 :ref:`sdst`, :ref:`ssrc` + s_or_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn1_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_orn2_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b32 :ref:`sdst`, :ref:`ssrc` + s_quadmask_b64 :ref:`sdst`, :ref:`ssrc` + s_rfe_b64 :ref:`ssrc` + s_set_gpr_idx_idx :ref:`ssrc` + s_setpc_b64 :ref:`ssrc` + s_sext_i32_i16 :ref:`sdst`, :ref:`ssrc` + s_sext_i32_i8 :ref:`sdst`, :ref:`ssrc` + s_swappc_b64 :ref:`sdst`, :ref:`ssrc` + s_wqm_b32 :ref:`sdst`, :ref:`ssrc` + s_wqm_b64 :ref:`sdst`, :ref:`ssrc` + s_xnor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + s_xor_saveexec_b64 :ref:`sdst`, :ref:`ssrc` + +SOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_absdiff_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_addc_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_and_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_andn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_ashr_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_ashr_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_i64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfe_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfe_u64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bfm_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_bfm_b64 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_cbranch_g_fork :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_cselect_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl1_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl2_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl3_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl4_add_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_lshl_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshl_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_lshr_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_max_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_max_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_min_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_hi_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_mul_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nand_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_nor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_or_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_orn2_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_pack_hh_b32_b16 :ref:`sdst`, :ref:`ssrc0`::ref:`b32`, :ref:`ssrc1`::ref:`b32` + s_pack_lh_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_pack_ll_b32_b16 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_rfe_restore_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`b32` + s_sub_i32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_sub_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_subb_u32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xnor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b32 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + s_xor_b64 :ref:`sdst`, :ref:`ssrc0`, :ref:`ssrc1` + +SOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_bitcmp0_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp0_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_bitcmp1_b32 :ref:`ssrc0`, :ref:`ssrc1` + s_bitcmp1_b64 :ref:`ssrc0`, :ref:`ssrc1`::ref:`u32` + s_cmp_eq_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_eq_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_ge_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_gt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_le_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lg_u64 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_i32 :ref:`ssrc0`, :ref:`ssrc1` + s_cmp_lt_u32 :ref:`ssrc0`, :ref:`ssrc1` + s_set_gpr_idx_on :ref:`ssrc`, :ref:`imask` + s_setvskip :ref:`ssrc0`, :ref:`ssrc1` + +SOPK +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_addk_i32 :ref:`sdst`, :ref:`imm16` + s_call_b64 :ref:`sdst`, :ref:`label` + s_cbranch_i_fork :ref:`ssrc`, :ref:`label` + s_cmovk_i32 :ref:`sdst`, :ref:`imm16` + s_cmpk_eq_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_eq_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_ge_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_gt_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_le_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lg_u32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_i32 :ref:`ssrc`, :ref:`imm16` + s_cmpk_lt_u32 :ref:`ssrc`, :ref:`imm16` + s_getreg_b32 :ref:`sdst`, :ref:`hwreg` + s_movk_i32 :ref:`sdst`, :ref:`imm16` + s_mulk_i32 :ref:`sdst`, :ref:`imm16` + s_setreg_b32 :ref:`hwreg`, :ref:`ssrc` + s_setreg_imm32_b32 :ref:`hwreg`, :ref:`simm32` + +SOPP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **SRC** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + s_barrier + s_branch :ref:`label` + s_cbranch_cdbgsys :ref:`label` + s_cbranch_cdbgsys_and_user :ref:`label` + s_cbranch_cdbgsys_or_user :ref:`label` + s_cbranch_cdbguser :ref:`label` + s_cbranch_execnz :ref:`label` + s_cbranch_execz :ref:`label` + s_cbranch_scc0 :ref:`label` + s_cbranch_scc1 :ref:`label` + s_cbranch_vccnz :ref:`label` + s_cbranch_vccz :ref:`label` + s_decperflevel :ref:`imm16` + s_endpgm + s_endpgm_ordered_ps_done + s_endpgm_saved + s_icache_inv + s_incperflevel :ref:`imm16` + s_nop :ref:`imm16` + s_sendmsg :ref:`msg` + s_sendmsghalt :ref:`msg` + s_set_gpr_idx_mode :ref:`imask` + s_set_gpr_idx_off + s_sethalt :ref:`imm16` + s_setkill :ref:`imm16` + s_setprio :ref:`imm16` + s_sleep :ref:`imm16` + s_trap :ref:`imm16` + s_ttracedata + s_waitcnt :ref:`waitcnt` + s_wakeup + +VINTRP +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_interp_mov_f32 :ref:`vdst`, :ref:`param`::ref:`b32`, :ref:`attr`::ref:`b32` + v_interp_p1_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + v_interp_p2_f32 :ref:`vdst`, :ref:`vsrc`, :ref:`attr`::ref:`b32` + +VOP1 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_bfrev_b32 :ref:`vdst`, :ref:`src` + v_bfrev_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_bfrev_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f16 :ref:`vdst`, :ref:`src` + v_ceil_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f32 :ref:`vdst`, :ref:`src` + v_ceil_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ceil_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ceil_f64 :ref:`vdst`, :ref:`src` + v_clrexcp + v_cos_f16 :ref:`vdst`, :ref:`src` + v_cos_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cos_f32 :ref:`vdst`, :ref:`src` + v_cos_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cos_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_f32 :ref:`vdst`, :ref:`src` + v_cvt_f16_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_i16 :ref:`vdst`, :ref:`src` + v_cvt_f16_i16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_i16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f16_u16 :ref:`vdst`, :ref:`src` + v_cvt_f16_u16_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f16_u16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f16 :ref:`vdst`, :ref:`src` + v_cvt_f32_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_f64 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32 :ref:`vdst`, :ref:`src` + v_cvt_f32_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_u32 :ref:`vdst`, :ref:`src` + v_cvt_f32_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte0 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte0_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte0_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte1 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte1_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte1_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte2 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte2_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte2_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f32_ubyte3 :ref:`vdst`, :ref:`src` + v_cvt_f32_ubyte3_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_f32_ubyte3_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_f64_f32 :ref:`vdst`, :ref:`src` + v_cvt_f64_i32 :ref:`vdst`, :ref:`src` + v_cvt_f64_u32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_flr_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_flr_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_i32_f64 :ref:`vdst`, :ref:`src` + v_cvt_norm_i16_f16 :ref:`vdst`, :ref:`src` + v_cvt_norm_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_norm_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_norm_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_norm_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_off_f32_i4 :ref:`vdst`, :ref:`src` + v_cvt_off_f32_i4_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_off_f32_i4_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_rpi_i32_f32 :ref:`vdst`, :ref:`src` + v_cvt_rpi_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_rpi_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u16_f16 :ref:`vdst`, :ref:`src` + v_cvt_u16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f32 :ref:`vdst`, :ref:`src` + v_cvt_u32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cvt_u32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_cvt_u32_f64 :ref:`vdst`, :ref:`src` + v_exp_f16 :ref:`vdst`, :ref:`src` + v_exp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_f32 :ref:`vdst`, :ref:`src` + v_exp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_exp_legacy_f32 :ref:`vdst`, :ref:`src` + v_exp_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_exp_legacy_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_i32 :ref:`vdst`, :ref:`src` + v_ffbh_i32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_i32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbh_u32 :ref:`vdst`, :ref:`src` + v_ffbh_u32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbh_u32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_ffbl_b32 :ref:`vdst`, :ref:`src` + v_ffbl_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ffbl_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f16 :ref:`vdst`, :ref:`src` + v_floor_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f32 :ref:`vdst`, :ref:`src` + v_floor_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_floor_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_floor_f64 :ref:`vdst`, :ref:`src` + v_fract_f16 :ref:`vdst`, :ref:`src` + v_fract_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f32 :ref:`vdst`, :ref:`src` + v_fract_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_fract_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_fract_f64 :ref:`vdst`, :ref:`src` + v_frexp_exp_i16_f16 :ref:`vdst`, :ref:`src` + v_frexp_exp_i16_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i16_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f32 :ref:`vdst`, :ref:`src` + v_frexp_exp_i32_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_exp_i32_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_exp_i32_f64 :ref:`vdst`, :ref:`src` + v_frexp_mant_f16 :ref:`vdst`, :ref:`src` + v_frexp_mant_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f32 :ref:`vdst`, :ref:`src` + v_frexp_mant_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_frexp_mant_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_frexp_mant_f64 :ref:`vdst`, :ref:`src` + v_log_f16 :ref:`vdst`, :ref:`src` + v_log_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_f32 :ref:`vdst`, :ref:`src` + v_log_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_log_legacy_f32 :ref:`vdst`, :ref:`src` + v_log_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_log_legacy_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_mov_b32 :ref:`vdst`, :ref:`src` + v_mov_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mov_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_nop + v_not_b32 :ref:`vdst`, :ref:`src` + v_not_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_not_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f16 :ref:`vdst`, :ref:`src` + v_rcp_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f32 :ref:`vdst`, :ref:`src` + v_rcp_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rcp_f64 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32 :ref:`vdst`, :ref:`src` + v_rcp_iflag_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rcp_iflag_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_readfirstlane_b32 :ref:`sdst`, :ref:`src` + v_rndne_f16 :ref:`vdst`, :ref:`src` + v_rndne_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f32 :ref:`vdst`, :ref:`src` + v_rndne_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rndne_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rndne_f64 :ref:`vdst`, :ref:`src` + v_rsq_f16 :ref:`vdst`, :ref:`src` + v_rsq_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f32 :ref:`vdst`, :ref:`src` + v_rsq_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_rsq_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_rsq_f64 :ref:`vdst`, :ref:`src` + v_sat_pk_u8_i16 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_sat_pk_u8_i16_dpp :ref:`vdst`::ref:`u8x4`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sat_pk_u8_i16_sdwa :ref:`vdst`::ref:`u8x4`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_screen_partition_4se_b32 :ref:`vdst`, :ref:`src` + v_screen_partition_4se_b32_dpp :ref:`vdst`, :ref:`vsrc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_screen_partition_4se_b32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f16 :ref:`vdst`, :ref:`src` + v_sin_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sin_f32 :ref:`vdst`, :ref:`src` + v_sin_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sin_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f16 :ref:`vdst`, :ref:`src` + v_sqrt_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f32 :ref:`vdst`, :ref:`src` + v_sqrt_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sqrt_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_sqrt_f64 :ref:`vdst`, :ref:`src` + v_swap_b32 :ref:`vdst`, :ref:`vsrc` + v_trunc_f16 :ref:`vdst`, :ref:`src` + v_trunc_f16_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f16_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f32 :ref:`vdst`, :ref:`src` + v_trunc_f32_dpp :ref:`vdst`, :ref:`vsrc`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_trunc_f32_sdwa :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` + v_trunc_f64 :ref:`vdst`, :ref:`src` + +VOP2 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_add_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_add_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_add_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_add_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_addc_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_addc_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_addc_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_and_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_and_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_and_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_ashrrev_i16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ashrrev_i32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_ashrrev_i32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ashrrev_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_cndmask_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_cndmask_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_cndmask_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_ldexp_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`::ref:`i16` + v_ldexp_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`i16` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_ldexp_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`i16` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshlrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshlrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshlrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshlrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`vsrc1` + v_lshrrev_b16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u16`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u16`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_lshrrev_b32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`vsrc1` + v_lshrrev_b32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`u32`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_lshrrev_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`u32`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mac_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mac_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mac_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_madak_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madak_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1`, :ref:`simm32` + v_madmk_f16 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_madmk_f32 :ref:`vdst`, :ref:`src0`, :ref:`simm32`, :ref:`vsrc2` + v_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_max_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_max_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_max_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_i32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_i32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_i32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_min_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_min_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_min_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_hi_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_hi_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_hi_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_i32_i24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_i32_i24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_legacy_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_legacy_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_legacy_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_lo_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_lo_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_mul_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_mul_u32_u24_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_mul_u32_u24_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_or_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_or_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_sub_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_sub_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_sub_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_sub_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subb_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subb_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subb_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subbrev_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`, :ref:`vcc` + v_subbrev_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1`, :ref:`vcc` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subbrev_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`vcc` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_co_u32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_subrev_co_u32_dpp :ref:`vdst`, :ref:`vcc`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_co_u32_sdwa :ref:`vdst`, :ref:`vcc`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f16_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_f32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_f32_dpp :ref:`vdst`, :ref:`vsrc0`::ref:`m`, :ref:`vsrc1`::ref:`m` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_f32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u16 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u16_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u16_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_subrev_u32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_subrev_u32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_subrev_u32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + v_xor_b32 :ref:`vdst`, :ref:`src0`, :ref:`vsrc1` + v_xor_b32_dpp :ref:`vdst`, :ref:`vsrc0`, :ref:`vsrc1` :ref:`dpp_ctrl` :ref:`row_mask` :ref:`bank_mask` :ref:`bound_ctrl` + v_xor_b32_sdwa :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`dst_sel` :ref:`dst_unused` :ref:`src0_sel` :ref:`src1_sel` + +VOP3 +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST0** **DST1** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_add3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_add_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_add_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_lshl_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_add_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_add_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_addc_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_alignbit_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_alignbyte_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`b16` + v_and_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_and_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_ashrrev_i16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_ashrrev_i32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_ashrrev_i64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_bcnt_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfe_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_bfe_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfi_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_bfm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_bfrev_b32_e64 :ref:`vdst`, :ref:`src` + v_ceil_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ceil_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_clrexcp_e64 + v_cmp_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmp_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmp_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmp_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_class_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_class_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`b32` + v_cmpx_eq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_eq_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_eq_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_f_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_f_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ge_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ge_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_gt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_gt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_le_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_le_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_lt_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_lt_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_ne_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_neq_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_neq_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nge_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_ngt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nle_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlg_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_nlt_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_o_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_t_i16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_i64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u16_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u32_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_t_u64_e64 :ref:`sdst`, :ref:`src0`, :ref:`src1` + v_cmpx_tru_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_tru_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f16_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f32_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cmpx_u_f64_e64 :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` + v_cndmask_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` + v_cos_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cos_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubeid_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubema_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubesc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cubetc_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f16_i16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f16_u16_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f32_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte0_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte1_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte2_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f32_ubyte3_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_cvt_f64_i32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_f64_u32_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_flr_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_norm_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_off_f32_i4_e64 :ref:`vdst`, :ref:`src` :ref:`clamp` :ref:`omod` + v_cvt_pk_i16_i32 :ref:`vdst`, :ref:`src0`::ref:`i32`, :ref:`src1`::ref:`i32` + v_cvt_pk_u16_u32 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1`::ref:`u32` + v_cvt_pk_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32`, :ref:`src2`::ref:`u32` + v_cvt_pkaccum_u8_f32 :ref:`vdst`::ref:`b32`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`u32` + v_cvt_pknorm_i16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_i16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pknorm_u16_f16 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f16`, :ref:`src1`::ref:`m`::ref:`f16` :ref:`op_sel` + v_cvt_pknorm_u16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_pkrtz_f16_f32 :ref:`vdst`, :ref:`src0`::ref:`m`::ref:`f32`, :ref:`src1`::ref:`m`::ref:`f32` + v_cvt_rpi_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_cvt_u16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_cvt_u32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` + v_div_fixup_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_div_fixup_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fixup_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_fmas_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_div_scale_f32 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_div_scale_f64 :ref:`vdst`, :ref:`vcc`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_exp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_exp_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_ffbh_i32_e64 :ref:`vdst`, :ref:`src` + v_ffbh_u32_e64 :ref:`vdst`, :ref:`src` + v_ffbl_b32_e64 :ref:`vdst`, :ref:`src` + v_floor_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_floor_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_fma_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fma_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_fract_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_exp_i16_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_exp_i32_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` + v_frexp_mant_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_frexp_mant_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_interp_mov_f32_e64 :ref:`vdst`, :ref:`param`::ref:`b32`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_interp_p1_f32_e64 :ref:`vdst`, :ref:`vsrc`::ref:`m`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_interp_p1ll_f16 :ref:`vdst`::ref:`f32`, :ref:`vsrc`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32` :ref:`high` :ref:`clamp` :ref:`omod` + v_interp_p1lv_f16 :ref:`vdst`::ref:`f32`, :ref:`vsrc0`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32`, :ref:`vsrc2`::ref:`m`::ref:`f16x2` :ref:`high` :ref:`clamp` :ref:`omod` + v_interp_p2_f16 :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32`, :ref:`vsrc2`::ref:`m`::ref:`f32` :ref:`high` :ref:`clamp` + v_interp_p2_f32_e64 :ref:`vdst`, :ref:`vsrc`::ref:`m`, :ref:`attr`::ref:`b32` :ref:`clamp` :ref:`omod` + v_interp_p2_legacy_f16 :ref:`vdst`, :ref:`vsrc0`::ref:`m`::ref:`f32`, :ref:`attr`::ref:`b32`, :ref:`vsrc2`::ref:`m`::ref:`f32` :ref:`high` :ref:`clamp` + v_ldexp_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i16` :ref:`clamp` :ref:`omod` + v_ldexp_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_ldexp_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`i32` :ref:`clamp` :ref:`omod` + v_lerp_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`b32`, :ref:`src1`::ref:`b32`, :ref:`src2`::ref:`b32` + v_log_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_log_legacy_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_lshl_add_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_lshl_or_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`::ref:`u32`, :ref:`src2` + v_lshlrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshlrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshlrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b16_e64 :ref:`vdst`, :ref:`src0`::ref:`u16`, :ref:`src1` + v_lshrrev_b32_e64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_lshrrev_b64 :ref:`vdst`, :ref:`src0`::ref:`u32`, :ref:`src1` + v_mac_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mac_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_mad_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`op_sel` :ref:`clamp` + v_mad_i32_i24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i32` :ref:`clamp` + v_mad_i64_i32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`i64` :ref:`clamp` + v_mad_legacy_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_legacy_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_mad_legacy_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_legacy_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`op_sel` :ref:`clamp` + v_mad_u32_u24 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mad_u64_u32 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`src2`::ref:`u64` :ref:`clamp` + v_max3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_max3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_max3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_max3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_max_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_max_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_max_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_hi_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mbcnt_lo_u32_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_med3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_med3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_med3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_med3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_med3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`op_sel` :ref:`clamp` + v_min3_f32 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`, :ref:`src2`::ref:`m` :ref:`clamp` :ref:`omod` + v_min3_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min3_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` + v_min3_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_min_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_min_i16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_i32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_min_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mov_b32_e64 :ref:`vdst`, :ref:`src` + v_mqsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_mqsad_u32_u8 :ref:`vdst`::ref:`u32x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`vsrc2`::ref:`u32x4` :ref:`clamp` + v_msad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_mul_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_hi_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_hi_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_i32_i24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_mul_legacy_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_mul_lo_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_lo_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_mul_u32_u24_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_nop_e64 + v_not_b32_e64 :ref:`vdst`, :ref:`src` + v_or3_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_or_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + v_pack_b32_f16 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`op_sel` + v_perm_b32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_qsad_pk_u16_u8 :ref:`vdst`::ref:`u16x4`, :ref:`src0`::ref:`u8x8`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u16x4` :ref:`clamp` + v_rcp_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rcp_iflag_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_readlane_b32 :ref:`sdst`, :ref:`src0`, :ref:`ssrc1` + v_rndne_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rndne_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_rsq_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sad_hi_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u16 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u16x2`, :ref:`src1`::ref:`u16x2`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`clamp` + v_sad_u8 :ref:`vdst`::ref:`u32`, :ref:`src0`::ref:`u8x4`, :ref:`src1`::ref:`u8x4`, :ref:`src2`::ref:`u32` :ref:`clamp` + v_sat_pk_u8_i16_e64 :ref:`vdst`::ref:`u8x4`, :ref:`src` + v_screen_partition_4se_b32_e64 :ref:`vdst`, :ref:`src` + v_sin_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sin_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sqrt_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_sub_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`clamp` + v_sub_i32 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_sub_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subb_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subbrev_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1`, :ref:`ssrc2` :ref:`clamp` + v_subrev_co_u32_e64 :ref:`vdst`, :ref:`sdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_f16_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_f32_e64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`clamp` :ref:`omod` + v_subrev_u16_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_subrev_u32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`clamp` + v_trig_preop_f64 :ref:`vdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`u32` :ref:`clamp` :ref:`omod` + v_trunc_f16_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f32_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_trunc_f64_e64 :ref:`vdst`, :ref:`src`::ref:`m` :ref:`clamp` :ref:`omod` + v_writelane_b32 :ref:`vdst`, :ref:`ssrc0`, :ref:`ssrc1` + v_xad_u32 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` + v_xor_b32_e64 :ref:`vdst`, :ref:`src0`, :ref:`src1` + +VOP3P +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **SRC2** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_pk_add_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_add_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_add_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_ashrrev_i16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_fma_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_lshlrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_lshrrev_b16 :ref:`vdst`, :ref:`src0`::ref:`u16x2`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mad_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_mad_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1`, :ref:`src2` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_max_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_max_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_max_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_min_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_min_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_mul_f16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`neg_lo` :ref:`neg_hi` :ref:`clamp` + v_pk_mul_lo_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` + v_pk_sub_i16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + v_pk_sub_u16 :ref:`vdst`, :ref:`src0`, :ref:`src1` :ref:`op_sel` :ref:`op_sel_hi` :ref:`clamp` + +VOPC +----------------------- + +.. parsed-literal:: + + **INSTRUCTION** **DST** **SRC0** **SRC1** **MODIFIERS** + \ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---|\ |---| + v_cmp_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_class_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmp_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_eq_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_f_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ge_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_gt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_le_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_lt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ne_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_neq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_ngt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nle_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_nlt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_o_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_t_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_tru_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmp_u_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmp_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_class_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_class_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m`::ref:`b32` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_class_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1`::ref:`b32` + v_cmpx_eq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_eq_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_eq_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_f_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_f_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ge_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ge_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_gt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_gt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_le_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_le_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_lt_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_lt_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ne_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ne_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_neq_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_neq_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nge_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nge_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_ngt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_ngt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nle_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nle_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlg_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlg_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_nlt_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_nlt_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_o_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_o_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_i32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_i64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_t_u32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_t_u64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_tru_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_tru_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f16_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f32 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + v_cmpx_u_f32_sdwa :ref:`sdst`, :ref:`src0`::ref:`m`, :ref:`src1`::ref:`m` :ref:`src0_sel` :ref:`src1_sel` + v_cmpx_u_f64 :ref:`vcc`, :ref:`src0`, :ref:`vsrc1` + +.. |---| unicode:: U+02014 .. em dash + +.. toctree:: + :hidden: + + gfx9_attr + gfx9_dst + gfx9_hwreg + gfx9_imask + gfx9_imm16 + gfx9_imm16_1 + gfx9_imm16_2 + gfx9_label + gfx9_m + gfx9_m_1 + gfx9_msg + gfx9_opt + gfx9_param + gfx9_probe + gfx9_saddr + gfx9_saddr_1 + gfx9_sbase + gfx9_sbase_1 + gfx9_sbase_2 + gfx9_sdata + gfx9_sdata_1 + gfx9_sdata_2 + gfx9_sdata_3 + gfx9_sdata_4 + gfx9_sdata_5 + gfx9_sdst + gfx9_sdst_1 + gfx9_sdst_2 + gfx9_sdst_3 + gfx9_sdst_4 + gfx9_sdst_5 + gfx9_sdst_6 + gfx9_sdst_7 + gfx9_simm32 + gfx9_simm32_1 + gfx9_simm32_2 + gfx9_soffset + gfx9_soffset_1 + gfx9_soffset_2 + gfx9_src + gfx9_src_1 + gfx9_src_10 + gfx9_src_2 + gfx9_src_3 + gfx9_src_4 + gfx9_src_5 + gfx9_src_6 + gfx9_src_7 + gfx9_src_8 + gfx9_src_9 + gfx9_srsrc + gfx9_srsrc_1 + gfx9_ssamp + gfx9_ssrc + gfx9_ssrc_1 + gfx9_ssrc_2 + gfx9_ssrc_3 + gfx9_ssrc_4 + gfx9_ssrc_5 + gfx9_ssrc_6 + gfx9_ssrc_7 + gfx9_ssrc_8 + gfx9_tgt + gfx9_type_deviation + gfx9_vaddr + gfx9_vaddr_1 + gfx9_vaddr_2 + gfx9_vaddr_3 + gfx9_vaddr_4 + gfx9_vaddr_5 + gfx9_vcc + gfx9_vdata + gfx9_vdata0 + gfx9_vdata0_1 + gfx9_vdata1 + gfx9_vdata1_1 + gfx9_vdata_1 + gfx9_vdata_10 + gfx9_vdata_2 + gfx9_vdata_3 + gfx9_vdata_4 + gfx9_vdata_5 + gfx9_vdata_6 + gfx9_vdata_7 + gfx9_vdata_8 + gfx9_vdata_9 + gfx9_vdst + gfx9_vdst_1 + gfx9_vdst_10 + gfx9_vdst_11 + gfx9_vdst_12 + gfx9_vdst_13 + gfx9_vdst_2 + gfx9_vdst_3 + gfx9_vdst_4 + gfx9_vdst_5 + gfx9_vdst_6 + gfx9_vdst_7 + gfx9_vdst_8 + gfx9_vdst_9 + gfx9_vsrc + gfx9_vsrc_1 + gfx9_vsrc_2 + gfx9_vsrc_3 + gfx9_waitcnt diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx1011_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx1011_src_2: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx1011_src_3: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx1011_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx1011_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx1011_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx1011_vsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx1011_vsrc: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_attr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_attr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_attr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_attr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,29 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_attr: + +attr +==== + +Interpolation attribute and channel: + + ============== =================================== + Syntax Description + ============== =================================== + attr{0..32}.x Attribute 0..32 with *x* channel. + attr{0..32}.y Attribute 0..32 with *y* channel. + attr{0..32}.z Attribute 0..32 with *z* channel. + attr{0..32}.w Attribute 0..32 with *w* channel. + ============== =================================== + +Examples: + +.. parsed-literal:: + + v_interp_p1_f32 v1, v0, attr0.x + v_interp_p1_f32 v1, v0, attr32.w diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_dst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_dst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_dst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_dst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_dst: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_fx_operand.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_fx_operand.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_fx_operand.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_fx_operand.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_fx_operand: + +FX Operand +========== + +This is an *f32* or *f16* operand depending on instruction modifiers: + +* Operand size is controlled by :ref:`m_op_sel_hi`. +* Location of 16-bit operand is controlled by :ref:`m_op_sel`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_hwreg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_hwreg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_hwreg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_hwreg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,81 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_hwreg: + +hwreg +===== + +Bits of a hardware register being accessed. + +The bits of this operand have the following meaning: + + ======= ===================== ============ + Bits Description Value Range + ======= ===================== ============ + 5:0 Register *id*. 0..63 + 10:6 First bit *offset*. 0..31 + 15:11 *Size* in bits. 1..32 + ======= ===================== ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* An *hwreg* value described below. + + ==================================== ============================================================================ + Hwreg Value Syntax Description + ==================================== ============================================================================ + hwreg({0..63}) All bits of a register indicated by its *id*. + hwreg(<*name*>) All bits of a register indicated by its *name*. + hwreg({0..63}, {0..31}, {1..32}) Register bits indicated by register *id*, first bit *offset* and *size*. + hwreg(<*name*>, {0..31}, {1..32}) Register bits indicated by register *name*, first bit *offset* and *size*. + ==================================== ============================================================================ + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Defined register *names* include: + + =================== ========================================== + Name Description + =================== ========================================== + HW_REG_MODE Shader writeable mode bits. + HW_REG_STATUS Shader read-only status. + HW_REG_TRAPSTS Trap status. + HW_REG_HW_ID Id of wave, simd, compute unit, etc. + HW_REG_GPR_ALLOC Per-wave SGPR and VGPR allocation. + HW_REG_LDS_ALLOC Per-wave LDS allocation. + HW_REG_IB_STS Counters of outstanding instructions. + HW_REG_SH_MEM_BASES Memory aperture. + HW_REG_TBA_LO tba_lo register. + HW_REG_TBA_HI tba_hi register. + HW_REG_TMA_LO tma_lo register. + HW_REG_TMA_HI tma_hi register. + HW_REG_FLAT_SCR_LO flat_scratch_lo register. + HW_REG_FLAT_SCR_HI flat_scratch_hi register. + HW_REG_XNACK_MASK xnack_mask register. + HW_REG_POPS_PACKER pops_packer register. + =================== ========================================== + +Examples: + +.. parsed-literal:: + + reg = 1 + offset = 2 + size = 4 + hwreg_enc = reg | (offset << 6) | ((size - 1) << 11) + + s_getreg_b32 s2, 0x1881 + s_getreg_b32 s2, hwreg_enc // the same as above + s_getreg_b32 s2, hwreg(1, 2, 4) // the same as above + s_getreg_b32 s2, hwreg(reg, offset, size) // the same as above + + s_getreg_b32 s2, hwreg(15) + s_getreg_b32 s2, hwreg(51, 1, 31) + s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_imm16_1: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_imm16_2: + +imm16 +===== + +A 16-bit :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_imm16.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_imm16: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_label.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_label.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_label.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_label.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,36 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_label: + +label +===== + +A branch target which is a 16-bit signed integer treated as a PC-relative dword offset. + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. +* A :ref:`symbol` (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker. + +Examples: + +.. parsed-literal:: + + offset = 30 + label_1: + label_2 = . + 4 + + s_branch 32 + s_branch offset + 2 + s_branch label_1 + s_branch label_2 + s_branch label_3 + s_branch label_4 + + label_3 = label_2 + 4 + label_4: diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_m_1: + +m += + +This operand may be used with integer operand modifier :ref:`sext`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_m.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_m: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_msg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_msg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_msg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_msg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,101 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_msg: + +msg +=== + +A 16-bit message code. The bits of this operand have the following meaning: + + ============ =============================== =============== + Bits Description Value Range + ============ =============================== =============== + 3:0 Message *type*. 0..15 + 6:4 Optional *operation*. 0..7 + 7:7 Unused. \- + 9:8 Optional *stream*. 0..3 + 15:10 Unused. \- + ============ =============================== =============== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A *sendmsg* value described below. + + ==================================== ==================================================== + Sendmsg Value Syntax Description + ==================================== ==================================================== + sendmsg(<*type*>) A message identified by its *type*. + sendmsg(<*type*>,<*op*>) A message identified by its *type* and *operation*. + sendmsg(<*type*>,<*op*>,<*stream*>) A message identified by its *type* and *operation* + with a stream *id*. + ==================================== ==================================================== + +*Type* may be specified using message *name* or message *id*. + +*Op* may be specified using operation *name* or operation *id*. + +Stream *id* is an integer in the range 0..3. + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Each message type supports specific operations: + + =================== ========== ============================== ============ ========== + Message name Message Id Supported Operations Operation Id Stream Id + =================== ========== ============================== ============ ========== + MSG_INTERRUPT 1 \- \- \- + MSG_GS 2 GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_GS_DONE 3 GS_OP_NOP 0 \- + \ GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_SAVEWAVE 4 \- \- \- + MSG_STALL_WAVE_GEN 5 \- \- \- + MSG_HALT_WAVES 6 \- \- \- + MSG_ORDERED_PS_DONE 7 \- \- \- + MSG_GS_ALLOC_REQ 9 \- \- \- + MSG_GET_DOORBELL 10 \- \- \- + MSG_GET_DDID 11 \- \- \- + MSG_SYSMSG 15 SYSMSG_OP_ECC_ERR_INTERRUPT 1 \- + \ SYSMSG_OP_REG_RD 2 \- + \ SYSMSG_OP_HOST_TRAP_ACK 3 \- + \ SYSMSG_OP_TTRACE_PC 4 \- + =================== ========== ============================== ============ ========== + +*Sendmsg* arguments are validated depending on how *type* value is specified: + +* If message *type* is specified by name, arguments values must satisfy limitations detailed in the table above. +* If message *type* is specified as a number, each argument must not exceed corresponding value range (see the first table). + +Examples: + +.. parsed-literal:: + + // numeric message code + msg = 0x10 + s_sendmsg 0x12 + s_sendmsg msg + 2 + + // sendmsg with strict arguments validation + s_sendmsg sendmsg(MSG_INTERRUPT) + s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT) + s_sendmsg sendmsg(MSG_GS, 2) + s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1) + s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC) + s_sendmsg sendmsg(MSG_GET_DOORBELL) + + // sendmsg with validation of value range only + msg = 2 + op = 3 + stream = 1 + s_sendmsg sendmsg(msg, op, stream) + s_sendmsg sendmsg(2, GS_OP_CUT) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_opt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_opt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_opt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_opt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_opt: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_param.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_param.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_param.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_param.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_param: + +param +===== + +Interpolation parameter to read: + + ============ =================================== + Syntax Description + ============ =================================== + p0 Parameter *P0*. + p10 Parameter *P10*. + p20 Parameter *P20*. + ============ =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_probe.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_probe.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_probe.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_probe.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,24 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_probe: + +probe +===== + +A bit mask which indicates request permissions. + +This operand must be specified as an :ref:`integer_number` or an :ref:`absolute_expression`. +The value is truncated to 7 bits, but only 3 low bits are significant. + + ============ ============================== + Bit Number Description + ============ ============================== + 0 Request *read* permission. + 1 Request *write* permission. + 2 Request *execute* permission. + ============ ============================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_saddr_1: + +saddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +Either this operand or :ref:`vaddr` must be set to :ref:`off`. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_saddr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_saddr: + +saddr +===== + +An optional 64-bit flat global address. Must be specified as :ref:`off` if not used. + +See :ref:`vaddr` for description of available addressing modes. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sbase_1: + +sbase +===== + +A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sbase_2: + +sbase +===== + +This operand is ignored by H/W and :ref:`flat_scratch` is supplied instead. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sbase.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sbase: + +sbase +===== + +A 64-bit base address for scalar memory operations. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdata_1: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdata_2: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdata_3: + +sdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdata_4: + +sdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdata_5: + +sdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdata.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdata: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_1: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_2: + +sdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_3: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_4: + +sdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_5: + +sdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_6: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_7: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst_8: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_sdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_sdst: + +sdst +==== + +Instruction output. + +*Size:* 1 dword if wavefront size is 32, otherwise 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_simm32_1: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f16* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_simm32_2: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f32* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_simm32.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_simm32: + +simm32 +====== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value is truncated to 32 bits. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_soffset_1: + +soffset +======= + +An offset added to the base address to get memory address. + +* If offset is specified as a register, it supplies an unsigned byte offset. +* If offset is specified as a 21-bit immediate, it supplies a signed byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`m0`, :ref:`simm21` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_soffset_2: + +soffset +======= + +An unsigned 20-bit offset added to the base address to get memory address. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`m0`, :ref:`uimm20` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_soffset.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_soffset: + +soffset +======= + +An unsigned byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`m0`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_2: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_3: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_5: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`lds_direct` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_6: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_7: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src_8: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_srsrc_1: + +srsrc +===== + +Buffer resource constant which defines the address and characteristics of the buffer in memory. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_srsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_srsrc: + +srsrc +===== + +Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format. + +*Size:* 8 dwords by default, 4 dwords if :ref:`r128` is specified. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssamp.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssamp.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssamp.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssamp.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssamp: + +ssamp +===== + +Sampler constant used to specify filtering options applied to the image data after it is read. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_1: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_2: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_3: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_4: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_5: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword if wavefront size is 32, otherwise 2 dwords. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_6: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_7: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc_8: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_ssrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_ssrc: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`vcc`, :ref:`ttmp`, :ref:`null`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_tgt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_tgt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_tgt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_tgt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,24 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_tgt: + +tgt +=== + +An export target: + + ============== =================================== + Syntax Description + ============== =================================== + pos{0..4} Copy vertex position 0..4. + param{0..31} Copy vertex parameter 0..31. + mrt{0..7} Copy pixel color to the MRTs 0..7. + mrtz Copy pixel depth (Z) data. + prim Copy primitive (connectivity) data. + null Copy nothing. + ============== =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vaddr_1: + +vaddr +===== + +A 64-bit flat address. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vaddr_2: + +vaddr +===== + +A 64-bit flat global address or a 32-bit offset depending on addressing mode: + +* Address = :ref:`vaddr` + :ref:`offset12s`. :ref:`vaddr` is a 64-bit address. This mode is indicated by :ref:`saddr` set to :ref:`off`. +* Address = :ref:`saddr` + :ref:`vaddr` + :ref:`offset12s`. :ref:`vaddr` is a 32-bit offset. This mode is used when :ref:`saddr` is not :ref:`off`. + +*Size:* 1 or 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vaddr_3: + +vaddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +Either this operand or :ref:`saddr` must be set to :ref:`off`. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vaddr_4: + +vaddr +===== + +Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image. + +This operand may be specified using either :ref:`standard VGPR syntax` or special :ref:`NSA VGPR syntax`. + +*Size:* 1-13 dwords. Actual size depends on syntax, opcode, :ref:`dim` and :ref:`a16`. + +* If specified using :ref:`NSA VGPR syntax`, the size is 1-13 dwords. +* If specified using :ref:`standard VGPR syntax`, the size is 1-8 dwords. Opcodes which require more than 8 dwords for address size must specify 16 dwords due to a limited range of supported register sequences. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vaddr_5: + +vaddr +===== + +This is an optional operand which may specify offset and/or index. + +*Size:* 0, 1 or 2 dwords. Size is controlled by modifiers :ref:`offen` and :ref:`idxen`: + +* If only :ref:`idxen` is specified, this operand supplies an index. Size is 1 dword. +* If only :ref:`offen` is specified, this operand supplies an offset. Size is 1 dword. +* If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords. +* If none of these modifiers are specified, this operand must be set to :ref:`off`. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vaddr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vaddr: + +vaddr +===== + +An offset from the start of GDS/LDS memory. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vcc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vcc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vcc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vcc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vcc: + +vcc +=== + +Vector condition code. This operand depends on wavefront size: + +* Should be :ref:`vcc_lo` if wavefront size is 32. +* Should be :ref:`vcc` if wavefront size is 64. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata0_1: + +vdata0 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata0.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata0: + +vdata0 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_10: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata1_1: + +vdata1 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_1: + +vdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata1: + +vdata1 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_2: + +vdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_3: + +vdata +===== + +Instruction input. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_4: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_5: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_6: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` specifies that data in registers are packed; each value occupies 16 bits. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_7: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` which may specify from 1 to 4 data elements. Each data element occupies 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_8: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata_9: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdata.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdata: + +vdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_10: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_11.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_11.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_11.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_11.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_11: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 3 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_12.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_12.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_12.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_12.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_12: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_13.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_13.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_13.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_13.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_13: + +vdst +==== + +Instruction output: data read from a memory buffer. + +If :ref:`lds` is specified, this operand is ignored by H/W and data are stored directly into LDS. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + + Note that :ref:`tfe` and :ref:`lds` cannot be used together. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_1: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_2: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_3: + +vdst +==== + +Instruction output. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_4: + +vdst +==== + +Data returned by a 32-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_5: + +vdst +==== + +Data returned by a 64-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_6: + +vdst +==== + +Image data to load by an *image_gather4* instruction. + +*Size:* 4 data elements by default. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. + +:ref:`d16` and :ref:`tfe` affect operand size as follows: + +* :ref:`d16` specifies that data elements in registers are packed; each value occupies 16 bits. +* :ref:`tfe` adds one dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_7: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_8: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask`, :ref:`tfe` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` specifies that data elements in registers are packed; each value occupies 16 bits. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst_9: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,28 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vsrc_1: + +vsrc +==== + +Data to copy to export buffers. This is an optional operand. Must be specified as :ref:`off` if not used. + +:ref:`compr` modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2: + +* src0 and src1 must specify the first register (or :ref:`off`). +* src2 and src3 must specify the second register (or :ref:`off`). + +An example: + +.. parsed-literal:: + + exp mrtz v3, v3, off, off compr + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vsrc_2: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vsrc_3: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_vsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_vsrc: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_waitcnt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_waitcnt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx10_waitcnt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx10_waitcnt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,64 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx10_waitcnt: + +waitcnt +======= + +Counts of outstanding instructions to wait for. + +The bits of this operand have the following meaning: + + ========== ========= ================================================ ============ + High Bits Low Bits Description Value Range + ========== ========= ================================================ ============ + 15:14 3:0 VM_CNT: vector memory operations count. 0..63 + \- 6:4 EXP_CNT: export count. 0..7 + \- 11:8 LGKM_CNT: LDS, GDS, Constant and Message count. 0..15 + ========== ========= ================================================ ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A combination of *vmcnt*, *expcnt*, *lgkmcnt* and other values described below. + + ====================== ====================================================================== + Syntax Description + ====================== ====================================================================== + vmcnt(<*N*>) A VM_CNT value. *N* must not exceed the largest VM_CNT value. + expcnt(<*N*>) An EXP_CNT value. *N* must not exceed the largest EXP_CNT value. + lgkmcnt(<*N*>) An LGKM_CNT value. *N* must not exceed the largest LGKM_CNT value. + vmcnt_sat(<*N*>) A VM_CNT value computed as min(*N*, the largest VM_CNT value). + expcnt_sat(<*N*>) An EXP_CNT value computed as min(*N*, the largest EXP_CNT value). + lgkmcnt_sat(<*N*>) An LGKM_CNT value computed as min(*N*, the largest LGKM_CNT value). + ====================== ====================================================================== + +These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators. + +*N* is either an +:ref:`integer number` or an +:ref:`absolute expression`. + +Examples: + +.. parsed-literal:: + + vm_cnt = 1 + exp_cnt = 2 + lgkm_cnt = 3 + cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8) + + s_waitcnt cnt + s_waitcnt 1 | (2 << 4) | (3 << 8) // the same as above + s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3) // the same as above + s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt) // the same as above + + s_waitcnt vmcnt(1) + s_waitcnt expcnt(2) lgkmcnt(3) + s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3) + s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_attr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_attr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_attr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_attr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,29 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_attr: + +attr +==== + +Interpolation attribute and channel: + + ============== =================================== + Syntax Description + ============== =================================== + attr{0..32}.x Attribute 0..32 with *x* channel. + attr{0..32}.y Attribute 0..32 with *y* channel. + attr{0..32}.z Attribute 0..32 with *z* channel. + attr{0..32}.w Attribute 0..32 with *w* channel. + ============== =================================== + +Examples: + +.. parsed-literal:: + + v_interp_p1_f32 v1, v0, attr0.x + v_interp_p1_f32 v1, v0, attr32.w diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_dst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_dst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_dst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_dst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_dst: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_hwreg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_hwreg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_hwreg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_hwreg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,72 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_hwreg: + +hwreg +===== + +Bits of a hardware register being accessed. + +The bits of this operand have the following meaning: + + ======= ===================== ============ + Bits Description Value Range + ======= ===================== ============ + 5:0 Register *id*. 0..63 + 10:6 First bit *offset*. 0..31 + 15:11 *Size* in bits. 1..32 + ======= ===================== ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* An *hwreg* value described below. + + ==================================== ============================================================================ + Hwreg Value Syntax Description + ==================================== ============================================================================ + hwreg({0..63}) All bits of a register indicated by its *id*. + hwreg(<*name*>) All bits of a register indicated by its *name*. + hwreg({0..63}, {0..31}, {1..32}) Register bits indicated by register *id*, first bit *offset* and *size*. + hwreg(<*name*>, {0..31}, {1..32}) Register bits indicated by register *name*, first bit *offset* and *size*. + ==================================== ============================================================================ + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Defined register *names* include: + + =================== ========================================== + Name Description + =================== ========================================== + HW_REG_MODE Shader writeable mode bits. + HW_REG_STATUS Shader read-only status. + HW_REG_TRAPSTS Trap status. + HW_REG_HW_ID Id of wave, simd, compute unit, etc. + HW_REG_GPR_ALLOC Per-wave SGPR and VGPR allocation. + HW_REG_LDS_ALLOC Per-wave LDS allocation. + HW_REG_IB_STS Counters of outstanding instructions. + =================== ========================================== + +Examples: + +.. parsed-literal:: + + reg = 1 + offset = 2 + size = 4 + hwreg_enc = reg | (offset << 6) | ((size - 1) << 11) + + s_getreg_b32 s2, 0x1881 + s_getreg_b32 s2, hwreg_enc // the same as above + s_getreg_b32 s2, hwreg(1, 2, 4) // the same as above + s_getreg_b32 s2, hwreg(reg, offset, size) // the same as above + + s_getreg_b32 s2, hwreg(15) + s_getreg_b32 s2, hwreg(51, 1, 31) + s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_imm16_1: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_imm16_2: + +imm16 +===== + +A 16-bit :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_imm16.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_imm16: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_label.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_label.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_label.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_label.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,36 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_label: + +label +===== + +A branch target which is a 16-bit signed integer treated as a PC-relative dword offset. + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. +* A :ref:`symbol` (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker. + +Examples: + +.. parsed-literal:: + + offset = 30 + label_1: + label_2 = . + 4 + + s_branch 32 + s_branch offset + 2 + s_branch label_1 + s_branch label_2 + s_branch label_3 + s_branch label_4 + + label_3 = label_2 + 4 + label_4: diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_m.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_m: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_msg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_msg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_msg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_msg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,93 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_msg: + +msg +=== + +A 16-bit message code. The bits of this operand have the following meaning: + + ============ =============================== =============== + Bits Description Value Range + ============ =============================== =============== + 3:0 Message *type*. 0..15 + 6:4 Optional *operation*. 0..7 + 7:7 Unused. \- + 9:8 Optional *stream*. 0..3 + 15:10 Unused. \- + ============ =============================== =============== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A *sendmsg* value described below. + + ==================================== ==================================================== + Sendmsg Value Syntax Description + ==================================== ==================================================== + sendmsg(<*type*>) A message identified by its *type*. + sendmsg(<*type*>,<*op*>) A message identified by its *type* and *operation*. + sendmsg(<*type*>,<*op*>,<*stream*>) A message identified by its *type* and *operation* + with a stream *id*. + ==================================== ==================================================== + +*Type* may be specified using message *name* or message *id*. + +*Op* may be specified using operation *name* or operation *id*. + +Stream *id* is an integer in the range 0..3. + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Each message type supports specific operations: + + ================= ========== ============================== ============ ========== + Message name Message Id Supported Operations Operation Id Stream Id + ================= ========== ============================== ============ ========== + MSG_INTERRUPT 1 \- \- \- + MSG_GS 2 GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_GS_DONE 3 GS_OP_NOP 0 \- + \ GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_SYSMSG 15 SYSMSG_OP_ECC_ERR_INTERRUPT 1 \- + \ SYSMSG_OP_REG_RD 2 \- + \ SYSMSG_OP_HOST_TRAP_ACK 3 \- + \ SYSMSG_OP_TTRACE_PC 4 \- + ================= ========== ============================== ============ ========== + +*Sendmsg* arguments are validated depending on how *type* value is specified: + +* If message *type* is specified by name, arguments values must satisfy limitations detailed in the table above. +* If message *type* is specified as a number, each argument must not exceed corresponding value range (see the first table). + +Examples: + +.. parsed-literal:: + + // numeric message code + msg = 0x10 + s_sendmsg 0x12 + s_sendmsg msg + 2 + + // sendmsg with strict arguments validation + s_sendmsg sendmsg(MSG_INTERRUPT) + s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT) + s_sendmsg sendmsg(MSG_GS, 2) + s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1) + s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC) + + // sendmsg with validation of value range only + msg = 2 + op = 3 + stream = 1 + s_sendmsg sendmsg(msg, op, stream) + s_sendmsg sendmsg(2, GS_OP_CUT) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_opt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_opt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_opt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_opt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_opt: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_param.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_param.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_param.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_param.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_param: + +param +===== + +Interpolation parameter to read: + + ============ =================================== + Syntax Description + ============ =================================== + p0 Parameter *P0*. + p10 Parameter *P10*. + p20 Parameter *P20*. + ============ =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sbase_1: + +sbase +===== + +A 64-bit base address for scalar memory operations. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sbase.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sbase: + +sbase +===== + +A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst_1: + +sdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`s` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst_2: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst_3: + +sdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst_4: + +sdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst_5: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst_6: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst_7: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_sdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_sdst: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_simm32_1: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f32* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_simm32.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_simm32: + +simm32 +====== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value is truncated to 32 bits. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_soffset_1: + +soffset +======= + +An unsigned offset added to the base address to get memory address. + +* If offset is specified as a register, it supplies an unsigned byte offset but 2 lsb's are ignored. +* If offset is specified as an :ref:`uimm32`, it supplies a 32-bit unsigned byte offset but 2 lsb's are ignored. +* If offset is specified as an :ref:`uimm8`, it supplies an 8-bit unsigned dword offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`uimm8`, :ref:`uimm32` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_soffset.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_soffset: + +soffset +======= + +An unsigned byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_10: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_1: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_2: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_3: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`lds_direct` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_5: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_6: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_7: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_8: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src_9: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_srsrc_1: + +srsrc +===== + +Buffer resource constant which defines the address and characteristics of the buffer in memory. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_srsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_srsrc: + +srsrc +===== + +Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format. + +*Size:* 8 dwords by default, 4 dwords if :ref:`r128` is specified. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssamp.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssamp.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssamp.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssamp.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssamp: + +ssamp +===== + +Sampler constant used to specify filtering options applied to the image data after it is read. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_10: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_1: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_2: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_3: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_4: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_5: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_6: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_7: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_8: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc_9: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_ssrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_ssrc: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_tgt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_tgt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_tgt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_tgt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,23 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_tgt: + +tgt +=== + +An export target: + + ============== =================================== + Syntax Description + ============== =================================== + pos{0..3} Copy vertex position 0..3. + param{0..31} Copy vertex parameter 0..31. + mrt{0..7} Copy pixel color to the MRTs 0..7. + mrtz Copy pixel depth (Z) data. + null Copy nothing. + ============== =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vaddr_1: + +vaddr +===== + +A 64-bit flat address. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vaddr_2: + +vaddr +===== + +Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image. + +*Size:* 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode and specific image being handled. + + Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction. + + Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,24 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vaddr_3: + +vaddr +===== + +This is an optional operand which may specify a 64-bit address, offset and/or index. + +*Size:* 0, 1 or 2 dwords. Size is controlled by modifiers :ref:`addr64`, :ref:`offen` and :ref:`idxen`: + +* If only :ref:`addr64` is specified, this operand supplies a 64-bit address. Size is 2 dwords. +* If only :ref:`idxen` is specified, this operand supplies an index. Size is 1 dword. +* If only :ref:`offen` is specified, this operand supplies an offset. Size is 1 dword. +* If both :ref:`idxen` and :ref:`offen` are specified, index is in the first register and offset is in the second. Size is 2 dwords. +* If none of these modifiers are specified, this operand must be set to :ref:`off`. +* All other combinations of these modifiers are illegal. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vaddr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vaddr: + +vaddr +===== + +An offset from the start of GDS/LDS memory. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vcc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vcc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vcc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vcc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vcc: + +vcc +=== + +Vector condition code. + +*Size:* 2 dwords. + +*Operands:* :ref:`vcc` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata0_1: + +vdata0 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata0.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata0: + +vdata0 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata1_1: + +vdata1 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_1: + +vdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata1: + +vdata1 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_2: + +vdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_3: + +vdata +===== + +Instruction input. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_4: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_5: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_6: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` which may specify from 1 to 4 data elements. Each data element occupies 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_7: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_8: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata_9: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdata.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdata: + +vdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_10: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 3 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_11.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_11.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_11.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_11.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_11: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_12.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_12.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_12.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_12.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_12: + +vdst +==== + +Instruction output: data read from a memory buffer. + +If :ref:`lds` is specified, this operand is ignored by H/W and data are stored directly into LDS. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + + Note that :ref:`tfe` and :ref:`lds` cannot be used together. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_1: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_2: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_3: + +vdst +==== + +Instruction output. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_4: + +vdst +==== + +Data returned by a 32-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_5: + +vdst +==== + +Data returned by a 64-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_6: + +vdst +==== + +Image data to load by an *image_gather4* instruction. + +*Size:* 4 data elements by default. Each data element occupies 1 dword. :ref:`tfe` adds one more dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_7: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_8: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst_9: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vsrc_1: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vsrc_2: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vsrc_3: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_vsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,28 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_vsrc: + +vsrc +==== + +Data to copy to export buffers. This is an optional operand. Must be specified as :ref:`off` if not used. + +:ref:`compr` modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2: + +* src0 and src1 must specify the first register (or :ref:`off`). +* src2 and src3 must specify the second register (or :ref:`off`). + +An example: + +.. parsed-literal:: + + exp mrtz v3, v3, off, off compr + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_waitcnt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_waitcnt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx7_waitcnt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx7_waitcnt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,64 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx7_waitcnt: + +waitcnt +======= + +Counts of outstanding instructions to wait for. + +The bits of this operand have the following meaning: + + ===== ================================================ ============ + Bits Description Value Range + ===== ================================================ ============ + 3:0 VM_CNT: vector memory operations count. 0..15 + 6:4 EXP_CNT: export count. 0..7 + 12:8 LGKM_CNT: LDS, GDS, Constant and Message count. 0..31 + ===== ================================================ ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A combination of *vmcnt*, *expcnt*, *lgkmcnt* and other values described below. + + ====================== ====================================================================== + Syntax Description + ====================== ====================================================================== + vmcnt(<*N*>) A VM_CNT value. *N* must not exceed the largest VM_CNT value. + expcnt(<*N*>) An EXP_CNT value. *N* must not exceed the largest EXP_CNT value. + lgkmcnt(<*N*>) An LGKM_CNT value. *N* must not exceed the largest LGKM_CNT value. + vmcnt_sat(<*N*>) A VM_CNT value computed as min(*N*, the largest VM_CNT value). + expcnt_sat(<*N*>) An EXP_CNT value computed as min(*N*, the largest EXP_CNT value). + lgkmcnt_sat(<*N*>) An LGKM_CNT value computed as min(*N*, the largest LGKM_CNT value). + ====================== ====================================================================== + +These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators. + +*N* is either an +:ref:`integer number` or an +:ref:`absolute expression`. + +Examples: + +.. parsed-literal:: + + vm_cnt = 1 + exp_cnt = 2 + lgkm_cnt = 3 + cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8) + + s_waitcnt cnt + s_waitcnt 1 | (2 << 4) | (3 << 8) // the same as above + s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3) // the same as above + s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt) // the same as above + + s_waitcnt vmcnt(1) + s_waitcnt expcnt(2) lgkmcnt(3) + s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3) + s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_attr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_attr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_attr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_attr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,29 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_attr: + +attr +==== + +Interpolation attribute and channel: + + ============== =================================== + Syntax Description + ============== =================================== + attr{0..32}.x Attribute 0..32 with *x* channel. + attr{0..32}.y Attribute 0..32 with *y* channel. + attr{0..32}.z Attribute 0..32 with *z* channel. + attr{0..32}.w Attribute 0..32 with *w* channel. + ============== =================================== + +Examples: + +.. parsed-literal:: + + v_interp_p1_f32 v1, v0, attr0.x + v_interp_p1_f32 v1, v0, attr32.w diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_dst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_dst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_dst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_dst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_dst: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_hwreg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_hwreg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_hwreg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_hwreg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,72 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_hwreg: + +hwreg +===== + +Bits of a hardware register being accessed. + +The bits of this operand have the following meaning: + + ======= ===================== ============ + Bits Description Value Range + ======= ===================== ============ + 5:0 Register *id*. 0..63 + 10:6 First bit *offset*. 0..31 + 15:11 *Size* in bits. 1..32 + ======= ===================== ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* An *hwreg* value described below. + + ==================================== ============================================================================ + Hwreg Value Syntax Description + ==================================== ============================================================================ + hwreg({0..63}) All bits of a register indicated by its *id*. + hwreg(<*name*>) All bits of a register indicated by its *name*. + hwreg({0..63}, {0..31}, {1..32}) Register bits indicated by register *id*, first bit *offset* and *size*. + hwreg(<*name*>, {0..31}, {1..32}) Register bits indicated by register *name*, first bit *offset* and *size*. + ==================================== ============================================================================ + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Defined register *names* include: + + =================== ========================================== + Name Description + =================== ========================================== + HW_REG_MODE Shader writeable mode bits. + HW_REG_STATUS Shader read-only status. + HW_REG_TRAPSTS Trap status. + HW_REG_HW_ID Id of wave, simd, compute unit, etc. + HW_REG_GPR_ALLOC Per-wave SGPR and VGPR allocation. + HW_REG_LDS_ALLOC Per-wave LDS allocation. + HW_REG_IB_STS Counters of outstanding instructions. + =================== ========================================== + +Examples: + +.. parsed-literal:: + + reg = 1 + offset = 2 + size = 4 + hwreg_enc = reg | (offset << 6) | ((size - 1) << 11) + + s_getreg_b32 s2, 0x1881 + s_getreg_b32 s2, hwreg_enc // the same as above + s_getreg_b32 s2, hwreg(1, 2, 4) // the same as above + s_getreg_b32 s2, hwreg(reg, offset, size) // the same as above + + s_getreg_b32 s2, hwreg(15) + s_getreg_b32 s2, hwreg(51, 1, 31) + s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imask.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imask.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imask.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imask.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,65 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_imask: + +imask +===== + +This operand is a mask which controls indexing mode for operands of subsequent instructions. +Bits 0, 1 and 2 control indexing of *src0*, *src1* and *src2*, while bit 3 controls indexing of *dst*. +Value 1 enables indexing and value 0 disables it. + + ===== ======================================== + Bit Meaning + ===== ======================================== + 0 Enables or disables *src0* indexing. + 1 Enables or disables *src1* indexing. + 2 Enables or disables *src2* indexing. + 3 Enables or disables *dst* indexing. + ===== ======================================== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..15. +* A *gpr_idx* value described below. + + ==================================== =========================================== + Gpr_idx Value Syntax Description + ==================================== =========================================== + gpr_idx(**) Enable indexing for specified *operands* + and disable it for the rest. + *Operands* is a comma-separated list of + values which may include: + + * "SRC0" - enable *src0* indexing. + + * "SRC1" - enable *src1* indexing. + + * "SRC2" - enable *src2* indexing. + + * "DST" - enable *dst* indexing. + + Each of these values may be specified only + once. + + *Operands* list may be empty; this syntax + disables indexing for all operands. + ==================================== =========================================== + +Examples: + +.. parsed-literal:: + + s_set_gpr_idx_mode 0 + s_set_gpr_idx_mode gpr_idx() // the same as above + + s_set_gpr_idx_mode 15 + s_set_gpr_idx_mode gpr_idx(DST,SRC0,SRC1,SRC2) // the same as above + s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) // the same as above + + s_set_gpr_idx_mode gpr_idx(DST,SRC1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_imm16_1: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_imm16_2: + +imm16 +===== + +A 16-bit :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_imm16.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_imm16: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_label.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_label.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_label.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_label.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,36 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_label: + +label +===== + +A branch target which is a 16-bit signed integer treated as a PC-relative dword offset. + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. +* A :ref:`symbol` (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker. + +Examples: + +.. parsed-literal:: + + offset = 30 + label_1: + label_2 = . + 4 + + s_branch 32 + s_branch offset + 2 + s_branch label_1 + s_branch label_2 + s_branch label_3 + s_branch label_4 + + label_3 = label_2 + 4 + label_4: diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_m_1: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_m.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_m: + +m += + +This operand may be used with integer operand modifier :ref:`sext`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_msg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_msg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_msg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_msg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,94 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_msg: + +msg +=== + +A 16-bit message code. The bits of this operand have the following meaning: + + ============ =============================== =============== + Bits Description Value Range + ============ =============================== =============== + 3:0 Message *type*. 0..15 + 6:4 Optional *operation*. 0..7 + 7:7 Unused. \- + 9:8 Optional *stream*. 0..3 + 15:10 Unused. \- + ============ =============================== =============== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A *sendmsg* value described below. + + ==================================== ==================================================== + Sendmsg Value Syntax Description + ==================================== ==================================================== + sendmsg(<*type*>) A message identified by its *type*. + sendmsg(<*type*>,<*op*>) A message identified by its *type* and *operation*. + sendmsg(<*type*>,<*op*>,<*stream*>) A message identified by its *type* and *operation* + with a stream *id*. + ==================================== ==================================================== + +*Type* may be specified using message *name* or message *id*. + +*Op* may be specified using operation *name* or operation *id*. + +Stream *id* is an integer in the range 0..3. + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Each message type supports specific operations: + + ================= ========== ============================== ============ ========== + Message name Message Id Supported Operations Operation Id Stream Id + ================= ========== ============================== ============ ========== + MSG_INTERRUPT 1 \- \- \- + MSG_GS 2 GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_GS_DONE 3 GS_OP_NOP 0 \- + \ GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_SAVEWAVE 4 \- \- \- + MSG_SYSMSG 15 SYSMSG_OP_ECC_ERR_INTERRUPT 1 \- + \ SYSMSG_OP_REG_RD 2 \- + \ SYSMSG_OP_HOST_TRAP_ACK 3 \- + \ SYSMSG_OP_TTRACE_PC 4 \- + ================= ========== ============================== ============ ========== + +*Sendmsg* arguments are validated depending on how *type* value is specified: + +* If message *type* is specified by name, arguments values must satisfy limitations detailed in the table above. +* If message *type* is specified as a number, each argument must not exceed corresponding value range (see the first table). + +Examples: + +.. parsed-literal:: + + // numeric message code + msg = 0x10 + s_sendmsg 0x12 + s_sendmsg msg + 2 + + // sendmsg with strict arguments validation + s_sendmsg sendmsg(MSG_INTERRUPT) + s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT) + s_sendmsg sendmsg(MSG_GS, 2) + s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1) + s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC) + + // sendmsg with validation of value range only + msg = 2 + op = 3 + stream = 1 + s_sendmsg sendmsg(msg, op, stream) + s_sendmsg sendmsg(2, GS_OP_CUT) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_opt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_opt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_opt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_opt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_opt: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_param.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_param.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_param.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_param.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_param: + +param +===== + +Interpolation parameter to read: + + ============ =================================== + Syntax Description + ============ =================================== + p0 Parameter *P0*. + p10 Parameter *P10*. + p20 Parameter *P20*. + ============ =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_probe.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_probe.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_probe.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_probe.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,24 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_probe: + +probe +===== + +A bit mask which indicates request permissions. + +This operand must be specified as an :ref:`integer_number` or an :ref:`absolute_expression`. +The value is truncated to 7 bits, but only 3 low bits are significant. + + ============ ============================== + Bit Number Description + ============ ============================== + 0 Request *read* permission. + 1 Request *write* permission. + 2 Request *execute* permission. + ============ ============================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sbase_1: + +sbase +===== + +A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sbase.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sbase: + +sbase +===== + +A 64-bit base address for scalar memory operations. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdata_1: + +sdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdata_2: + +sdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdata.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdata: + +sdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst_1: + +sdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`s` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst_2: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst_3: + +sdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst_4: + +sdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst_5: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst_6: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst_7: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_sdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_sdst: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_simm32_1: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f16* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_simm32_2: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f32* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_simm32.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_simm32: + +simm32 +====== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value is truncated to 32 bits. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_soffset_1: + +soffset +======= + +An unsigned byte offset added to the base address to get memory address. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`uimm20` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_soffset_2: + +soffset +======= + +An unsigned byte offset added to the base address to get memory address. + +*Size:* 1 dword. + +*Operands:* :ref:`m0`, :ref:`uimm20` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_soffset.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_soffset: + +soffset +======= + +An unsigned byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_10: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_1: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_2: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_3: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`lds_direct` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_5: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_6: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_7: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_8: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src_9: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_srsrc_1: + +srsrc +===== + +Buffer resource constant which defines the address and characteristics of the buffer in memory. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_srsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_srsrc: + +srsrc +===== + +Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format. + +*Size:* 8 dwords by default, 4 dwords if :ref:`r128` is specified. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssamp.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssamp.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssamp.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssamp.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssamp: + +ssamp +===== + +Sampler constant used to specify filtering options applied to the image data after it is read. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_1: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_2: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_3: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_4: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_5: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_6: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_7: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc_8: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_ssrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_ssrc: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`trap`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_tgt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_tgt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_tgt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_tgt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,23 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_tgt: + +tgt +=== + +An export target: + + ============== =================================== + Syntax Description + ============== =================================== + pos{0..3} Copy vertex position 0..3. + param{0..31} Copy vertex parameter 0..31. + mrt{0..7} Copy pixel color to the MRTs 0..7. + mrtz Copy pixel depth (Z) data. + null Copy nothing. + ============== =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vaddr_1: + +vaddr +===== + +A 64-bit flat address. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vaddr_2: + +vaddr +===== + +Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image. + +*Size:* 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode and specific image being handled. + + Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction. + + Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vaddr_3: + +vaddr +===== + +This is an optional operand which may specify offset and/or index. + +*Size:* 0, 1 or 2 dwords. Size is controlled by modifiers :ref:`offen` and :ref:`idxen`: + +* If only :ref:`idxen` is specified, this operand supplies an index. Size is 1 dword. +* If only :ref:`offen` is specified, this operand supplies an offset. Size is 1 dword. +* If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords. +* If none of these modifiers are specified, this operand must be set to :ref:`off`. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vaddr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vaddr: + +vaddr +===== + +An offset from the start of GDS/LDS memory. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vcc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vcc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vcc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vcc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vcc: + +vcc +=== + +Vector condition code. + +*Size:* 2 dwords. + +*Operands:* :ref:`vcc` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata0_1: + +vdata0 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata0.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata0: + +vdata0 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_10: + +vdata +===== + +16-bit data to store by a buffer instruction. + +*Size:* depends on GFX8 GPU revision: + +* 3 dwords for GFX8.0. This H/W supports no packing. +* 2 dwords for GFX8.1+. This H/W supports data packing. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_11.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_11.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_11.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_11.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_11: + +vdata +===== + +16-bit data to store by a buffer instruction. + +*Size:* depends on GFX8 GPU revision: + +* 4 dwords for GFX8.0. This H/W supports no packing. +* 2 dwords for GFX8.1+. This H/W supports data packing. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata1_1: + +vdata1 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_12.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_12.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_12.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_12.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_12: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_13.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_13.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_13.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_13.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_13: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_14.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_14.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_14.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_14.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_14: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_1: + +vdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata1: + +vdata1 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_2: + +vdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_3: + +vdata +===== + +Instruction input. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_4: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_5: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,23 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_6: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` has different meaning for GFX8.0 and GFX8.1: + + * For GFX8.0 this modifier does not affect size of data elements in registers. Data in registers are stored in low 16 bits, high 16 bits are unused. There is no packing. + * Starting from GFX8.1 this modifier specifies that data elements in registers are packed; each value occupies 16 bits. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_7: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` which may specify from 1 to 4 data elements. Each data element occupies 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_8: + +vdata +===== + +16-bit data to store by a buffer instruction. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata_9: + +vdata +===== + +16-bit data to store by a buffer instruction. + +*Size:* depends on GFX8 GPU revision: + +* 2 dwords for GFX8.0. This H/W supports no packing. +* 1 dword for GFX8.1+. This H/W supports data packing. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdata.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdata: + +vdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_10: + +vdst +==== + +Instruction output: data read from a memory buffer and converted to a 16-bit format. + +*Size:* depends on GFX8 GPU revision and :ref:`tfe`: + +* 2 dwords for GFX8.0. This H/W supports no packing. +* 1 dword for GFX8.1+. This H/W supports data packing. +* :ref:`tfe` adds one dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_11.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_11.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_11.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_11.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_11: + +vdst +==== + +Instruction output: data read from a memory buffer and converted to a 16-bit format. + +*Size:* depends on GFX8 GPU revision and :ref:`tfe`: + +* 3 dwords for GFX8.0. This H/W supports no packing. +* 2 dwords for GFX8.1+. This H/W supports data packing. +* :ref:`tfe` adds one dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_12.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_12.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_12.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_12.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_12: + +vdst +==== + +Instruction output: data read from a memory buffer and converted to a 16-bit format. + +*Size:* depends on GFX8 GPU revision and :ref:`tfe`: + +* 4 dwords for GFX8.0. This H/W supports no packing. +* 2 dwords for GFX8.1+. This H/W supports data packing. +* :ref:`tfe` adds one dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_13.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_13.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_13.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_13.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_13: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_14.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_14.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_14.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_14.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_14: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_15.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_15.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_15.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_15.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_15: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 3 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_16.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_16.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_16.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_16.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_16: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_17.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_17.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_17.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_17.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_17: + +vdst +==== + +Instruction output: data read from a memory buffer. + +If :ref:`lds` is specified, this operand is ignored by H/W and data are stored directly into LDS. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + + Note that :ref:`tfe` and :ref:`lds` cannot be used together. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_1: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_2: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_3: + +vdst +==== + +Instruction output. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_4: + +vdst +==== + +Data returned by a 32-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_5: + +vdst +==== + +Data returned by a 64-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_6: + +vdst +==== + +Image data to load by an *image_gather4* instruction. + +*Size:* 4 data elements by default. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. + +:ref:`d16` and :ref:`tfe` affect operand size as follows: + +* :ref:`d16` has different meaning for GFX8.0 and GFX8.1: + + * For GFX8.0 this modifier does not affect size of data elements in registers. Data in registers are stored in low 16 bits, high 16 bits are unused. There is no packing. + * Starting from GFX8.1 this modifier specifies that data elements in registers are packed; each value occupies 16 bits. + +* :ref:`tfe` adds one dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_7: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,25 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_8: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask`, :ref:`tfe` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` has different meaning for GFX8.0 and GFX8.1: + + * For GFX8.0 this modifier does not affect size of data elements in registers. Data in registers are stored in low 16 bits, high 16 bits are unused. There is no packing. + * Starting from GFX8.1 this modifier specifies that data elements in registers are packed; each value occupies 16 bits. + +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst_9: + +vdst +==== + +Instruction output: data read from a memory buffer and converted to a 16-bit format. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vsrc_1: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vsrc_2: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vsrc_3: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_vsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,28 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_vsrc: + +vsrc +==== + +Data to copy to export buffers. This is an optional operand. Must be specified as :ref:`off` if not used. + +:ref:`compr` modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2: + +* src0 and src1 must specify the first register (or :ref:`off`). +* src2 and src3 must specify the second register (or :ref:`off`). + +An example: + +.. parsed-literal:: + + exp mrtz v3, v3, off, off compr + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_waitcnt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_waitcnt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx8_waitcnt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx8_waitcnt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,64 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx8_waitcnt: + +waitcnt +======= + +Counts of outstanding instructions to wait for. + +The bits of this operand have the following meaning: + + ===== ================================================ ============ + Bits Description Value Range + ===== ================================================ ============ + 3:0 VM_CNT: vector memory operations count. 0..15 + 6:4 EXP_CNT: export count. 0..7 + 11:8 LGKM_CNT: LDS, GDS, Constant and Message count. 0..15 + ===== ================================================ ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A combination of *vmcnt*, *expcnt*, *lgkmcnt* and other values described below. + + ====================== ====================================================================== + Syntax Description + ====================== ====================================================================== + vmcnt(<*N*>) A VM_CNT value. *N* must not exceed the largest VM_CNT value. + expcnt(<*N*>) An EXP_CNT value. *N* must not exceed the largest EXP_CNT value. + lgkmcnt(<*N*>) An LGKM_CNT value. *N* must not exceed the largest LGKM_CNT value. + vmcnt_sat(<*N*>) A VM_CNT value computed as min(*N*, the largest VM_CNT value). + expcnt_sat(<*N*>) An EXP_CNT value computed as min(*N*, the largest EXP_CNT value). + lgkmcnt_sat(<*N*>) An LGKM_CNT value computed as min(*N*, the largest LGKM_CNT value). + ====================== ====================================================================== + +These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators. + +*N* is either an +:ref:`integer number` or an +:ref:`absolute expression`. + +Examples: + +.. parsed-literal:: + + vm_cnt = 1 + exp_cnt = 2 + lgkm_cnt = 3 + cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8) + + s_waitcnt cnt + s_waitcnt 1 | (2 << 4) | (3 << 8) // the same as above + s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3) // the same as above + s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt) // the same as above + + s_waitcnt vmcnt(1) + s_waitcnt expcnt(2) lgkmcnt(3) + s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3) + s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_fx_operand.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_fx_operand.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_fx_operand.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_fx_operand.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx900_fx_operand: + +FX Operand +========== + +This is an *f32* or *f16* operand depending on instruction modifiers: + +* Operand size is controlled by :ref:`m_op_sel_hi`. +* Location of 16-bit operand is controlled by :ref:`m_op_sel`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_m.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx900_m: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx900_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx900_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx900_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx900_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx900_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_fx_operand.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_fx_operand.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_fx_operand.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_fx_operand.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx904_fx_operand: + +FX Operand +========== + +This is an *f32* or *f16* operand depending on instruction modifiers: + +* Operand size is controlled by :ref:`m_op_sel_hi`. +* Location of 16-bit operand is controlled by :ref:`m_op_sel`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_m.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx904_m: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx904_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx904_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx904_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx904_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx904_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_fx_operand.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_fx_operand.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_fx_operand.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_fx_operand.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_fx_operand: + +FX Operand +========== + +This is an *f32* or *f16* operand depending on instruction modifiers: + +* Operand size is controlled by :ref:`m_op_sel_hi`. +* Location of 16-bit operand is controlled by :ref:`m_op_sel`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_m_1: + +m += + +This operand may be used with integer operand modifier :ref:`sext`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_m.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_m: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_src_2: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_src_3: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_src_4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx906_vsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx906_vsrc: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_dst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_dst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_dst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_dst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_dst: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_fx_operand.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_fx_operand.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_fx_operand.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_fx_operand.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_fx_operand: + +FX Operand +========== + +This is an *f32* or *f16* operand depending on instruction modifiers: + +* Operand size is controlled by :ref:`m_op_sel_hi`. +* Location of 16-bit operand is controlled by :ref:`m_op_sel`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_m_1: + +m += + +This operand may be used with integer operand modifier :ref:`sext`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_m.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_m: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_opt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_opt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_opt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_opt.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_opt: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_saddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_saddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_saddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_saddr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_saddr: + +saddr +===== + +An optional 64-bit flat global address. Must be specified as :ref:`off` if not used. + +See :ref:`vaddr` for description of available addressing modes. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_soffset.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_soffset.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_soffset.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_soffset.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_soffset: + +soffset +======= + +An unsigned byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_src_2: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_src_3: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`iconst`, :ref:`fconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_src_4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_src_5: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_src.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_srsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_srsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_srsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_srsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_srsrc: + +srsrc +===== + +Buffer resource constant which defines the address and characteristics of the buffer in memory. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vaddr_1: + +vaddr +===== + +This is an optional operand which may specify offset and/or index. + +*Size:* 0, 1 or 2 dwords. Size is controlled by modifiers :ref:`offen` and :ref:`idxen`: + +* If only :ref:`idxen` is specified, this operand supplies an index. Size is 1 dword. +* If only :ref:`offen` is specified, this operand supplies an offset. Size is 1 dword. +* If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords. +* If none of these modifiers are specified, this operand must be set to :ref:`off`. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vaddr.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vaddr: + +vaddr +===== + +A 64-bit flat global address or a 32-bit offset depending on addressing mode: + +* Address = :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 64-bit address. This mode is indicated by :ref:`saddr` set to :ref:`off`. +* Address = :ref:`saddr` + :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 32-bit offset. This mode is used when :ref:`saddr` is not :ref:`off`. + +*Size:* 1 or 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdata_1: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdata.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdata: + +vdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdst_1: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdst_2: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdst_3: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdst_4: + +vdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdst_5: + +vdst +==== + +Instruction output. + +*Size:* 32 dwords. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vdst.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vdst: + +vdst +==== + +Data returned by a 32-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_1.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vsrc_1: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_2.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vsrc_2: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vsrc_3: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vsrc_4: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_5.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vsrc_5: + +vsrc +==== + +Instruction input. + +*Size:* 16 dwords. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vsrc_6: + +vsrc +==== + +Instruction input. + +*Size:* 32 dwords. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx908_vsrc.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx908_vsrc: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_dst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_dst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_dst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_dst.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_dst: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_fx_operand.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_fx_operand.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_fx_operand.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_fx_operand.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,16 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_fx_operand: + +FX Operand +========== + +This is an *f32* or *f16* operand depending on instruction modifiers: + +* Operand size is controlled by :ref:`m_op_sel_hi`. +* Location of 16-bit operand is controlled by :ref:`m_op_sel`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_hwreg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_hwreg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_hwreg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_hwreg.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,73 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_hwreg: + +hwreg +===== + +Bits of a hardware register being accessed. + +The bits of this operand have the following meaning: + + ======= ===================== ============ + Bits Description Value Range + ======= ===================== ============ + 5:0 Register *id*. 0..63 + 10:6 First bit *offset*. 0..31 + 15:11 *Size* in bits. 1..32 + ======= ===================== ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* An *hwreg* value described below. + + ==================================== ============================================================================ + Hwreg Value Syntax Description + ==================================== ============================================================================ + hwreg({0..63}) All bits of a register indicated by its *id*. + hwreg(<*name*>) All bits of a register indicated by its *name*. + hwreg({0..63}, {0..31}, {1..32}) Register bits indicated by register *id*, first bit *offset* and *size*. + hwreg(<*name*>, {0..31}, {1..32}) Register bits indicated by register *name*, first bit *offset* and *size*. + ==================================== ============================================================================ + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Defined register *names* include: + + =================== ========================================== + Name Description + =================== ========================================== + HW_REG_MODE Shader writeable mode bits. + HW_REG_STATUS Shader read-only status. + HW_REG_TRAPSTS Trap status. + HW_REG_HW_ID Id of wave, simd, compute unit, etc. + HW_REG_GPR_ALLOC Per-wave SGPR and VGPR allocation. + HW_REG_LDS_ALLOC Per-wave LDS allocation. + HW_REG_IB_STS Counters of outstanding instructions. + HW_REG_SH_MEM_BASES Memory aperture. + =================== ========================================== + +Examples: + +.. parsed-literal:: + + reg = 1 + offset = 2 + size = 4 + hwreg_enc = reg | (offset << 6) | ((size - 1) << 11) + + s_getreg_b32 s2, 0x1881 + s_getreg_b32 s2, hwreg_enc // the same as above + s_getreg_b32 s2, hwreg(1, 2, 4) // the same as above + s_getreg_b32 s2, hwreg(reg, offset, size) // the same as above + + s_getreg_b32 s2, hwreg(15) + s_getreg_b32 s2, hwreg(51, 1, 31) + s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imask.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imask.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imask.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imask.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,65 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_imask: + +imask +===== + +This operand is a mask which controls indexing mode for operands of subsequent instructions. +Bits 0, 1 and 2 control indexing of *src0*, *src1* and *src2*, while bit 3 controls indexing of *dst*. +Value 1 enables indexing and value 0 disables it. + + ===== ======================================== + Bit Meaning + ===== ======================================== + 0 Enables or disables *src0* indexing. + 1 Enables or disables *src1* indexing. + 2 Enables or disables *src2* indexing. + 3 Enables or disables *dst* indexing. + ===== ======================================== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..15. +* A *gpr_idx* value described below. + + ==================================== =========================================== + Gpr_idx Value Syntax Description + ==================================== =========================================== + gpr_idx(**) Enable indexing for specified *operands* + and disable it for the rest. + *Operands* is a comma-separated list of + values which may include: + + * "SRC0" - enable *src0* indexing. + + * "SRC1" - enable *src1* indexing. + + * "SRC2" - enable *src2* indexing. + + * "DST" - enable *dst* indexing. + + Each of these values may be specified only + once. + + *Operands* list may be empty; this syntax + disables indexing for all operands. + ==================================== =========================================== + +Examples: + +.. parsed-literal:: + + s_set_gpr_idx_mode 0 + s_set_gpr_idx_mode gpr_idx() // the same as above + + s_set_gpr_idx_mode 15 + s_set_gpr_idx_mode gpr_idx(DST,SRC0,SRC1,SRC2) // the same as above + s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) // the same as above + + s_set_gpr_idx_mode gpr_idx(DST,SRC1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_imm16_1: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_imm16_2: + +imm16 +===== + +A 16-bit :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_imm16.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_imm16: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_label.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_label.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_label.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_label.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,36 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_label: + +label +===== + +A branch target which is a 16-bit signed integer treated as a PC-relative dword offset. + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. +* A :ref:`symbol` (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker. + +Examples: + +.. parsed-literal:: + + offset = 30 + label_1: + label_2 = . + 4 + + s_branch 32 + s_branch offset + 2 + s_branch label_1 + s_branch label_2 + s_branch label_3 + s_branch label_4 + + label_3 = label_2 + 4 + label_4: diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_m_1: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_m.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_m: + +m += + +This operand may be used with integer operand modifier :ref:`sext`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_msg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_msg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_msg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_msg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,101 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_msg: + +msg +=== + +A 16-bit message code. The bits of this operand have the following meaning: + + ============ =============================== =============== + Bits Description Value Range + ============ =============================== =============== + 3:0 Message *type*. 0..15 + 6:4 Optional *operation*. 0..7 + 7:7 Unused. \- + 9:8 Optional *stream*. 0..3 + 15:10 Unused. \- + ============ =============================== =============== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A *sendmsg* value described below. + + ==================================== ==================================================== + Sendmsg Value Syntax Description + ==================================== ==================================================== + sendmsg(<*type*>) A message identified by its *type*. + sendmsg(<*type*>,<*op*>) A message identified by its *type* and *operation*. + sendmsg(<*type*>,<*op*>,<*stream*>) A message identified by its *type* and *operation* + with a stream *id*. + ==================================== ==================================================== + +*Type* may be specified using message *name* or message *id*. + +*Op* may be specified using operation *name* or operation *id*. + +Stream *id* is an integer in the range 0..3. + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Each message type supports specific operations: + + ====================== ========== ============================== ============ ========== + Message name Message Id Supported Operations Operation Id Stream Id + ====================== ========== ============================== ============ ========== + MSG_INTERRUPT 1 \- \- \- + MSG_GS 2 GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_GS_DONE 3 GS_OP_NOP 0 \- + \ GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_SAVEWAVE 4 \- \- \- + MSG_STALL_WAVE_GEN 5 \- \- \- + MSG_HALT_WAVES 6 \- \- \- + MSG_ORDERED_PS_DONE 7 \- \- \- + MSG_EARLY_PRIM_DEALLOC 8 \- \- \- + MSG_GS_ALLOC_REQ 9 \- \- \- + MSG_GET_DOORBELL 10 \- \- \- + MSG_SYSMSG 15 SYSMSG_OP_ECC_ERR_INTERRUPT 1 \- + \ SYSMSG_OP_REG_RD 2 \- + \ SYSMSG_OP_HOST_TRAP_ACK 3 \- + \ SYSMSG_OP_TTRACE_PC 4 \- + ====================== ========== ============================== ============ ========== + +*Sendmsg* arguments are validated depending on how *type* value is specified: + +* If message *type* is specified by name, arguments values must satisfy limitations detailed in the table above. +* If message *type* is specified as a number, each argument must not exceed corresponding value range (see the first table). + +Examples: + +.. parsed-literal:: + + // numeric message code + msg = 0x10 + s_sendmsg 0x12 + s_sendmsg msg + 2 + + // sendmsg with strict arguments validation + s_sendmsg sendmsg(MSG_INTERRUPT) + s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT) + s_sendmsg sendmsg(MSG_GS, 2) + s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1) + s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC) + s_sendmsg sendmsg(MSG_GET_DOORBELL) + + // sendmsg with validation of value range only + msg = 2 + op = 3 + stream = 1 + s_sendmsg sendmsg(msg, op, stream) + s_sendmsg sendmsg(2, GS_OP_CUT) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_opt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_opt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_opt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_opt.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_opt: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_probe.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_probe.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_probe.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_probe.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,24 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_probe: + +probe +===== + +A bit mask which indicates request permissions. + +This operand must be specified as an :ref:`integer_number` or an :ref:`absolute_expression`. +The value is truncated to 7 bits, but only 3 low bits are significant. + + ============ ============================== + Bit Number Description + ============ ============================== + 0 Request *read* permission. + 1 Request *write* permission. + 2 Request *execute* permission. + ============ ============================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_saddr_1: + +saddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +Either this operand or :ref:`vaddr` must be set to :ref:`off`. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_saddr.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_saddr: + +saddr +===== + +An optional 64-bit flat global address. Must be specified as :ref:`off` if not used. + +See :ref:`vaddr` for description of available addressing modes. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sbase_1: + +sbase +===== + +A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sbase_2: + +sbase +===== + +This operand is ignored by H/W and :ref:`flat_scratch` is supplied instead. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sbase.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sbase: + +sbase +===== + +A 64-bit base address for scalar memory operations. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdata_1: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdata_2: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_3.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdata_3: + +sdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_4.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdata_4: + +sdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdata_5: + +sdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdata.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdata: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst_1: + +sdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst_2: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_3.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst_3: + +sdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_4.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst_4: + +sdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst_5: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_6.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst_6: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst_7.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst_7: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_sdst.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_sdst: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_simm32_1: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f16* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_simm32_2: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f32* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_simm32.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_simm32: + +simm32 +====== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value is truncated to 32 bits. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_soffset_1: + +soffset +======= + +An offset added to the base address to get memory address. + +* If offset is specified as a register, it supplies an unsigned byte offset. +* If offset is specified as a 21-bit immediate, it supplies a signed byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`simm21` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_soffset_2: + +soffset +======= + +An unsigned 20-bit offset added to the base address to get memory address. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`uimm20` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_soffset.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_soffset: + +soffset +======= + +An unsigned byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_10.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_10: + +src +=== + +Instruction input. + +*Size:* 8 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_11.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_11.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_11.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_11.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_11: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_2: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_3.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_3: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_4.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_5: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_6.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_6: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`iconst`, :ref:`fconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_7.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_7: + +src +=== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_8.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_8: + +src +=== + +Instruction input. + +*Size:* 16 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src_9.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src_9: + +src +=== + +Instruction input. + +*Size:* 32 dwords. + +*Operands:* :ref:`v`, :ref:`a`, :ref:`iconst`, :ref:`fconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_src.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_srsrc_1: + +srsrc +===== + +Buffer resource constant which defines the address and characteristics of the buffer in memory. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_srsrc.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_srsrc: + +srsrc +===== + +Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssamp.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssamp.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssamp.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssamp.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssamp: + +ssamp +===== + +Sampler constant used to specify filtering options applied to the image data after it is read. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_1: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_2: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_3.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_3: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_4.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_4: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_5: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_6.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_6: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_7.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_7: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc_8.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc_8: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_ssrc.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_ssrc: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vaddr_1: + +vaddr +===== + +A 64-bit flat address. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vaddr_2: + +vaddr +===== + +A 64-bit flat global address or a 32-bit offset depending on addressing mode: + +* Address = :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 64-bit address. This mode is indicated by :ref:`saddr` set to :ref:`off`. +* Address = :ref:`saddr` + :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 32-bit offset. This mode is used when :ref:`saddr` is not :ref:`off`. + +*Size:* 1 or 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_3.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vaddr_3: + +vaddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +Either this operand or :ref:`saddr` must be set to :ref:`off`. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_4.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vaddr_4: + +vaddr +===== + +Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image. + +*Size:* 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode, specific image being handled and :ref:`a16`. + + Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction. + + Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vaddr_5: + +vaddr +===== + +This is an optional operand which may specify offset and/or index. + +*Size:* 0, 1 or 2 dwords. Size is controlled by modifiers :ref:`offen` and :ref:`idxen`: + +* If only :ref:`idxen` is specified, this operand supplies an index. Size is 1 dword. +* If only :ref:`offen` is specified, this operand supplies an offset. Size is 1 dword. +* If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords. +* If none of these modifiers are specified, this operand must be set to :ref:`off`. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vaddr.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vaddr: + +vaddr +===== + +An offset from the start of GDS/LDS memory. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vcc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vcc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vcc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vcc.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vcc: + +vcc +=== + +Vector condition code. + +*Size:* 2 dwords. + +*Operands:* :ref:`vcc` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata0_1: + +vdata0 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata0.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata0: + +vdata0 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_10.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_10: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata1_1: + +vdata1 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_1: + +vdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata1: + +vdata1 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_2: + +vdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_3.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_3: + +vdata +===== + +Instruction input. + +*Size:* 3 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_4.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_4: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_5: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_6.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_6: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` specifies that data in registers are packed; each value occupies 16 bits. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_7.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_7: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` which may specify from 1 to 4 data elements. Each data element occupies 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_8.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_8: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata_9.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata_9: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdata.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdata: + +vdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_10.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_10: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 3 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_11.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_11.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_11.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_11.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_11: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_12.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_12.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_12.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_12.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_12: + +vdst +==== + +Instruction output: data read from a memory buffer. + +If :ref:`lds` is specified, this operand is ignored by H/W and data are stored directly into LDS. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + + Note that :ref:`tfe` and :ref:`lds` cannot be used together. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_13.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_13.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_13.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_13.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_13: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_14.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_14.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_14.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_14.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_14: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_15.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_15.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_15.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_15.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_15: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_16.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_16.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_16.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_16.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_16: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_17.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_17.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_17.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_17.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_17: + +vdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_18.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_18.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_18.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_18.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_18: + +vdst +==== + +Instruction output. + +*Size:* 32 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_19.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_19.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_19.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_19.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_19: + +vdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_1: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_2: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_3.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_3: + +vdst +==== + +Instruction output. + +*Size:* 3 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_4.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_4: + +vdst +==== + +Data returned by a 32-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_5: + +vdst +==== + +Data returned by a 64-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_6.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_6: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_7.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_7: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask`, :ref:`tfe` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` specifies that data elements in registers are packed; each value occupies 16 bits. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_8.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_8: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst_9.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst_9: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vdst.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_1.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vsrc_1: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_2.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vsrc_2: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_3.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vsrc_3: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_4.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vsrc_4: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc_5.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vsrc_5: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_vsrc.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_vsrc: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`a` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_waitcnt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_waitcnt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_waitcnt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx90a_waitcnt.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,64 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx90a_waitcnt: + +waitcnt +======= + +Counts of outstanding instructions to wait for. + +The bits of this operand have the following meaning: + + ========== ========= ================================================ ============ + High Bits Low Bits Description Value Range + ========== ========= ================================================ ============ + 15:14 3:0 VM_CNT: vector memory operations count. 0..63 + \- 6:4 EXP_CNT: export count. 0..7 + \- 11:8 LGKM_CNT: LDS, GDS, Constant and Message count. 0..15 + ========== ========= ================================================ ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A combination of *vmcnt*, *expcnt*, *lgkmcnt* and other values described below. + + ====================== ====================================================================== + Syntax Description + ====================== ====================================================================== + vmcnt(<*N*>) A VM_CNT value. *N* must not exceed the largest VM_CNT value. + expcnt(<*N*>) An EXP_CNT value. *N* must not exceed the largest EXP_CNT value. + lgkmcnt(<*N*>) An LGKM_CNT value. *N* must not exceed the largest LGKM_CNT value. + vmcnt_sat(<*N*>) A VM_CNT value computed as min(*N*, the largest VM_CNT value). + expcnt_sat(<*N*>) An EXP_CNT value computed as min(*N*, the largest EXP_CNT value). + lgkmcnt_sat(<*N*>) An LGKM_CNT value computed as min(*N*, the largest LGKM_CNT value). + ====================== ====================================================================== + +These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators. + +*N* is either an +:ref:`integer number` or an +:ref:`absolute expression`. + +Examples: + +.. parsed-literal:: + + vm_cnt = 1 + exp_cnt = 2 + lgkm_cnt = 3 + cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8) + + s_waitcnt cnt + s_waitcnt 1 | (2 << 4) | (3 << 8) // the same as above + s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3) // the same as above + s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt) // the same as above + + s_waitcnt vmcnt(1) + s_waitcnt expcnt(2) lgkmcnt(3) + s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3) + s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_attr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_attr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_attr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_attr.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,29 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_attr: + +attr +==== + +Interpolation attribute and channel: + + ============== =================================== + Syntax Description + ============== =================================== + attr{0..32}.x Attribute 0..32 with *x* channel. + attr{0..32}.y Attribute 0..32 with *y* channel. + attr{0..32}.z Attribute 0..32 with *z* channel. + attr{0..32}.w Attribute 0..32 with *w* channel. + ============== =================================== + +Examples: + +.. parsed-literal:: + + v_interp_p1_f32 v1, v0, attr0.x + v_interp_p1_f32 v1, v0, attr32.w diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_dst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_dst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_dst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_dst.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_dst: + +dst +=== + +This is an input operand. It may optionally serve as a destination if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_hwreg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_hwreg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_hwreg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_hwreg.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,73 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_hwreg: + +hwreg +===== + +Bits of a hardware register being accessed. + +The bits of this operand have the following meaning: + + ======= ===================== ============ + Bits Description Value Range + ======= ===================== ============ + 5:0 Register *id*. 0..63 + 10:6 First bit *offset*. 0..31 + 15:11 *Size* in bits. 1..32 + ======= ===================== ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* An *hwreg* value described below. + + ==================================== ============================================================================ + Hwreg Value Syntax Description + ==================================== ============================================================================ + hwreg({0..63}) All bits of a register indicated by its *id*. + hwreg(<*name*>) All bits of a register indicated by its *name*. + hwreg({0..63}, {0..31}, {1..32}) Register bits indicated by register *id*, first bit *offset* and *size*. + hwreg(<*name*>, {0..31}, {1..32}) Register bits indicated by register *name*, first bit *offset* and *size*. + ==================================== ============================================================================ + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Defined register *names* include: + + =================== ========================================== + Name Description + =================== ========================================== + HW_REG_MODE Shader writeable mode bits. + HW_REG_STATUS Shader read-only status. + HW_REG_TRAPSTS Trap status. + HW_REG_HW_ID Id of wave, simd, compute unit, etc. + HW_REG_GPR_ALLOC Per-wave SGPR and VGPR allocation. + HW_REG_LDS_ALLOC Per-wave LDS allocation. + HW_REG_IB_STS Counters of outstanding instructions. + HW_REG_SH_MEM_BASES Memory aperture. + =================== ========================================== + +Examples: + +.. parsed-literal:: + + reg = 1 + offset = 2 + size = 4 + hwreg_enc = reg | (offset << 6) | ((size - 1) << 11) + + s_getreg_b32 s2, 0x1881 + s_getreg_b32 s2, hwreg_enc // the same as above + s_getreg_b32 s2, hwreg(1, 2, 4) // the same as above + s_getreg_b32 s2, hwreg(reg, offset, size) // the same as above + + s_getreg_b32 s2, hwreg(15) + s_getreg_b32 s2, hwreg(51, 1, 31) + s_getreg_b32 s2, hwreg(HW_REG_LDS_ALLOC, 0, 1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imask.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imask.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imask.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imask.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,65 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_imask: + +imask +===== + +This operand is a mask which controls indexing mode for operands of subsequent instructions. +Bits 0, 1 and 2 control indexing of *src0*, *src1* and *src2*, while bit 3 controls indexing of *dst*. +Value 1 enables indexing and value 0 disables it. + + ===== ======================================== + Bit Meaning + ===== ======================================== + 0 Enables or disables *src0* indexing. + 1 Enables or disables *src1* indexing. + 2 Enables or disables *src2* indexing. + 3 Enables or disables *dst* indexing. + ===== ======================================== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..15. +* A *gpr_idx* value described below. + + ==================================== =========================================== + Gpr_idx Value Syntax Description + ==================================== =========================================== + gpr_idx(**) Enable indexing for specified *operands* + and disable it for the rest. + *Operands* is a comma-separated list of + values which may include: + + * "SRC0" - enable *src0* indexing. + + * "SRC1" - enable *src1* indexing. + + * "SRC2" - enable *src2* indexing. + + * "DST" - enable *dst* indexing. + + Each of these values may be specified only + once. + + *Operands* list may be empty; this syntax + disables indexing for all operands. + ==================================== =========================================== + +Examples: + +.. parsed-literal:: + + s_set_gpr_idx_mode 0 + s_set_gpr_idx_mode gpr_idx() // the same as above + + s_set_gpr_idx_mode 15 + s_set_gpr_idx_mode gpr_idx(DST,SRC0,SRC1,SRC2) // the same as above + s_set_gpr_idx_mode gpr_idx(SRC0,SRC1,SRC2,DST) // the same as above + + s_set_gpr_idx_mode gpr_idx(DST,SRC1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_imm16_1: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_imm16_2: + +imm16 +===== + +A 16-bit :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_imm16.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_imm16: + +imm16 +===== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_label.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_label.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_label.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_label.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,36 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_label: + +label +===== + +A branch target which is a 16-bit signed integer treated as a PC-relative dword offset. + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range -32768..65535. +* A :ref:`symbol` (for example, a label) representing a relocatable address in the same compilation unit where it is referred from. The value is handled as a 16-bit PC-relative dword offset to be resolved by a linker. + +Examples: + +.. parsed-literal:: + + offset = 30 + label_1: + label_2 = . + 4 + + s_branch 32 + s_branch offset + 2 + s_branch label_1 + s_branch label_2 + s_branch label_3 + s_branch label_4 + + label_3 = label_2 + 4 + label_4: diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_m_1: + +m += + +This operand may be used with floating point operand modifiers :ref:`abs` and :ref:`neg`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_m.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_m: + +m += + +This operand may be used with integer operand modifier :ref:`sext`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_msg.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_msg.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_msg.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_msg.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,101 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_msg: + +msg +=== + +A 16-bit message code. The bits of this operand have the following meaning: + + ============ =============================== =============== + Bits Description Value Range + ============ =============================== =============== + 3:0 Message *type*. 0..15 + 6:4 Optional *operation*. 0..7 + 7:7 Unused. \- + 9:8 Optional *stream*. 0..3 + 15:10 Unused. \- + ============ =============================== =============== + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A *sendmsg* value described below. + + ==================================== ==================================================== + Sendmsg Value Syntax Description + ==================================== ==================================================== + sendmsg(<*type*>) A message identified by its *type*. + sendmsg(<*type*>,<*op*>) A message identified by its *type* and *operation*. + sendmsg(<*type*>,<*op*>,<*stream*>) A message identified by its *type* and *operation* + with a stream *id*. + ==================================== ==================================================== + +*Type* may be specified using message *name* or message *id*. + +*Op* may be specified using operation *name* or operation *id*. + +Stream *id* is an integer in the range 0..3. + +Numeric values may be specified as positive :ref:`integer numbers` +or :ref:`absolute expressions`. + +Each message type supports specific operations: + + ====================== ========== ============================== ============ ========== + Message name Message Id Supported Operations Operation Id Stream Id + ====================== ========== ============================== ============ ========== + MSG_INTERRUPT 1 \- \- \- + MSG_GS 2 GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_GS_DONE 3 GS_OP_NOP 0 \- + \ GS_OP_CUT 1 Optional + \ GS_OP_EMIT 2 Optional + \ GS_OP_EMIT_CUT 3 Optional + MSG_SAVEWAVE 4 \- \- \- + MSG_STALL_WAVE_GEN 5 \- \- \- + MSG_HALT_WAVES 6 \- \- \- + MSG_ORDERED_PS_DONE 7 \- \- \- + MSG_EARLY_PRIM_DEALLOC 8 \- \- \- + MSG_GS_ALLOC_REQ 9 \- \- \- + MSG_GET_DOORBELL 10 \- \- \- + MSG_SYSMSG 15 SYSMSG_OP_ECC_ERR_INTERRUPT 1 \- + \ SYSMSG_OP_REG_RD 2 \- + \ SYSMSG_OP_HOST_TRAP_ACK 3 \- + \ SYSMSG_OP_TTRACE_PC 4 \- + ====================== ========== ============================== ============ ========== + +*Sendmsg* arguments are validated depending on how *type* value is specified: + +* If message *type* is specified by name, arguments values must satisfy limitations detailed in the table above. +* If message *type* is specified as a number, each argument must not exceed corresponding value range (see the first table). + +Examples: + +.. parsed-literal:: + + // numeric message code + msg = 0x10 + s_sendmsg 0x12 + s_sendmsg msg + 2 + + // sendmsg with strict arguments validation + s_sendmsg sendmsg(MSG_INTERRUPT) + s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT) + s_sendmsg sendmsg(MSG_GS, 2) + s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_EMIT_CUT, 1) + s_sendmsg sendmsg(MSG_SYSMSG, SYSMSG_OP_TTRACE_PC) + s_sendmsg sendmsg(MSG_GET_DOORBELL) + + // sendmsg with validation of value range only + msg = 2 + op = 3 + stream = 1 + s_sendmsg sendmsg(msg, op, stream) + s_sendmsg sendmsg(2, GS_OP_CUT) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_opt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_opt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_opt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_opt.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_opt: + +opt +=== + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_param.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_param.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_param.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_param.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_param: + +param +===== + +Interpolation parameter to read: + + ============ =================================== + Syntax Description + ============ =================================== + p0 Parameter *P0*. + p10 Parameter *P10*. + p20 Parameter *P20*. + ============ =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_probe.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_probe.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_probe.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_probe.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,24 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_probe: + +probe +===== + +A bit mask which indicates request permissions. + +This operand must be specified as an :ref:`integer_number` or an :ref:`absolute_expression`. +The value is truncated to 7 bits, but only 3 low bits are significant. + + ============ ============================== + Bit Number Description + ============ ============================== + 0 Request *read* permission. + 1 Request *write* permission. + 2 Request *execute* permission. + ============ ============================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_saddr_1: + +saddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +Either this operand or :ref:`vaddr` must be set to :ref:`off`. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_saddr.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_saddr: + +saddr +===== + +An optional 64-bit flat global address. Must be specified as :ref:`off` if not used. + +See :ref:`vaddr` for description of available addressing modes. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sbase_1: + +sbase +===== + +A 128-bit buffer resource constant for scalar memory operations which provides a base address, a size and a stride. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sbase_2: + +sbase +===== + +This operand is ignored by H/W and :ref:`flat_scratch` is supplied instead. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sbase.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sbase: + +sbase +===== + +A 64-bit base address for scalar memory operations. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdata_1: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdata_2: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdata_3: + +sdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_4.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdata_4: + +sdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata_5.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdata_5: + +sdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdata.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdata: + +sdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst_1: + +sdst +==== + +Instruction output. + +*Size:* 16 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst_2: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst_3: + +sdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_4.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst_4: + +sdst +==== + +Instruction output. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_5.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst_5: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_6.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst_6: + +sdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst_7.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst_7: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_sdst.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_sdst: + +sdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_simm32_1: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f16* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,14 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_simm32_2: + +simm32 +====== + +A :ref:`floating-point_number`, an :ref:`integer_number`, or an :ref:`absolute_expression`. +The value is converted to *f32* as described :ref:`here`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_simm32.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_simm32: + +simm32 +====== + +An :ref:`integer_number` or an :ref:`absolute_expression`. The value is truncated to 32 bits. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_soffset_1: + +soffset +======= + +An offset added to the base address to get memory address. + +* If offset is specified as a register, it supplies an unsigned byte offset. +* If offset is specified as a 21-bit immediate, it supplies a signed byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`simm21` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_soffset_2: + +soffset +======= + +An unsigned 20-bit offset added to the base address to get memory address. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`uimm20` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_soffset.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_soffset: + +soffset +======= + +An unsigned byte offset. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_10.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_10: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_1: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_2: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_3: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_4.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_4: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_5.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_5: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`lds_direct` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_6.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_6: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`iconst`, :ref:`ival`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_7.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_7: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_8.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_8: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src_9.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src_9: + +src +=== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_src.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_src: + +src +=== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`lds_direct`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_srsrc_1: + +srsrc +===== + +Buffer resource constant which defines the address and characteristics of the buffer in memory. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_srsrc.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_srsrc: + +srsrc +===== + +Image resource constant which defines the location of the image buffer in memory, its dimensions, tiling, and data format. + +*Size:* 8 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssamp.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssamp.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssamp.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssamp.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssamp: + +ssamp +===== + +Sampler constant used to specify filtering options applied to the image data after it is read. + +*Size:* 4 dwords. + +*Operands:* :ref:`s`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_1: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_2: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_3: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_4.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_4: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_5.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_5: + +ssrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_6.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_6: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_7.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_7: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`iconst` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc_8.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc_8: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_ssrc.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_ssrc: + +ssrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`s`, :ref:`flat_scratch`, :ref:`xnack_mask`, :ref:`vcc`, :ref:`ttmp`, :ref:`m0`, :ref:`exec`, :ref:`vccz`, :ref:`execz`, :ref:`scc`, :ref:`constant`, :ref:`literal` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_tgt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_tgt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_tgt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_tgt.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,23 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_tgt: + +tgt +=== + +An export target: + + ============== =================================== + Syntax Description + ============== =================================== + pos{0..3} Copy vertex position 0..3. + param{0..31} Copy vertex parameter 0..31. + mrt{0..7} Copy pixel color to the MRTs 0..7. + mrtz Copy pixel depth (Z) data. + null Copy nothing. + ============== =================================== diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_type_deviation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_type_deviation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_type_deviation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_type_deviation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,13 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_type_deviation: + +Type Deviation +============== + +*Type* of this operand differs from *type* :ref:`implied by the opcode`. This tag specifies actual operand *type*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vaddr_1: + +vaddr +===== + +A 64-bit flat address. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vaddr_2: + +vaddr +===== + +A 64-bit flat global address or a 32-bit offset depending on addressing mode: + +* Address = :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 64-bit address. This mode is indicated by :ref:`saddr` set to :ref:`off`. +* Address = :ref:`saddr` + :ref:`vaddr` + :ref:`offset13s`. :ref:`vaddr` is a 32-bit offset. This mode is used when :ref:`saddr` is not :ref:`off`. + +*Size:* 1 or 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vaddr_3: + +vaddr +===== + +An optional 32-bit flat scratch offset. Must be specified as :ref:`off` if not used. + +Either this operand or :ref:`saddr` must be set to :ref:`off`. + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_4.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vaddr_4: + +vaddr +===== + +Image address which includes from one to four dimensional coordinates and other data used to locate a position in the image. + +*Size:* 1, 2, 3, 4, 8 or 16 dwords. Actual size depends on opcode, specific image being handled and :ref:`a16`. + + Note 1. Image format and dimensions are encoded in the image resource constant but not in the instruction. + + Note 2. Actually image address size may vary from 1 to 13 dwords, but assembler currently supports a limited range of register sequences. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr_5.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vaddr_5: + +vaddr +===== + +This is an optional operand which may specify offset and/or index. + +*Size:* 0, 1 or 2 dwords. Size is controlled by modifiers :ref:`offen` and :ref:`idxen`: + +* If only :ref:`idxen` is specified, this operand supplies an index. Size is 1 dword. +* If only :ref:`offen` is specified, this operand supplies an offset. Size is 1 dword. +* If both modifiers are specified, index is in the first register and offset is in the second. Size is 2 dwords. +* If none of these modifiers are specified, this operand must be set to :ref:`off`. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vaddr.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vaddr: + +vaddr +===== + +An offset from the start of GDS/LDS memory. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vcc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vcc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vcc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vcc.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vcc: + +vcc +=== + +Vector condition code. + +*Size:* 2 dwords. + +*Operands:* :ref:`vcc` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata0_1: + +vdata0 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata0.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata0: + +vdata0 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_10.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_10: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata1_1: + +vdata1 +====== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_1: + +vdata +===== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata1: + +vdata1 +====== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_2: + +vdata +===== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_3: + +vdata +===== + +Instruction input. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_4.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_4: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 1 data element for 32-bit-per-pixel surfaces or 2 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_5.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,26 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_5: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify 2 data elements for 32-bit-per-pixel surfaces or 4 data elements for 64-bit-per-pixel surfaces. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + + Note: the surface data format is indicated in the image resource constant but not in the instruction. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_6.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_6: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` specifies that data in registers are packed; each value occupies 16 bits. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_7.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_7: + +vdata +===== + +Image data to store by an *image_store* instruction. + +*Size:* depends on :ref:`dmask` which may specify from 1 to 4 data elements. Each data element occupies 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_8.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_8: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata_9.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata_9: + +vdata +===== + +Input data for an atomic instruction. + +Optionally may serve as an output data: + +* If :ref:`glc` is specified, gets the memory value before the operation. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdata.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdata: + +vdata +===== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_10.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_10.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_10.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_10.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_10: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 2 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_11.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_11.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_11.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_11.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_11: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 3 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_12.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_12.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_12.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_12.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_12: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 4 dwords by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_13.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_13.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_13.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_13.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_13: + +vdst +==== + +Instruction output: data read from a memory buffer. + +If :ref:`lds` is specified, this operand is ignored by H/W and data are stored directly into LDS. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + + Note that :ref:`tfe` and :ref:`lds` cannot be used together. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_1: + +vdst +==== + +Instruction output. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_2: + +vdst +==== + +Instruction output. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_3: + +vdst +==== + +Instruction output. + +*Size:* 3 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_4.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_4.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_4.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_4.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_4: + +vdst +==== + +Data returned by a 32-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_5.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_5.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_5.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_5.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,19 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_5: + +vdst +==== + +Data returned by a 64-bit atomic flat instruction. + +This is an optional operand. It must be used if and only if :ref:`glc` is specified. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_6.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_6.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_6.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_6.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,22 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_6: + +vdst +==== + +Image data to load by an *image_gather4* instruction. + +*Size:* 4 data elements by default. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. + +:ref:`d16` and :ref:`tfe` affect operand size as follows: + +* :ref:`d16` specifies that data elements in registers are packed; each value occupies 16 bits. +* :ref:`tfe` adds one dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_7.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_7.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_7.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_7.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,20 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_7: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask` and :ref:`tfe`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies 1 dword. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_8.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_8.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_8.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_8.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,21 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_8: + +vdst +==== + +Image data to load by an image instruction. + +*Size:* depends on :ref:`dmask`, :ref:`tfe` and :ref:`d16`: + +* :ref:`dmask` may specify from 1 to 4 data elements. Each data element occupies either 32 bits or 16 bits depending on :ref:`d16`. +* :ref:`d16` specifies that data elements in registers are packed; each value occupies 16 bits. +* :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_9.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_9.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_9.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst_9.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst_9: + +vdst +==== + +Instruction output: data read from a memory buffer. + +*Size:* 1 dword by default. :ref:`tfe` adds 1 dword if specified. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vdst.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vdst: + +vdst +==== + +Instruction output. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_1.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_1.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_1.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_1.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vsrc_1: + +vsrc +==== + +Instruction input. + +*Size:* 1 dword. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_2.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_2.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_2.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_2.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vsrc_2: + +vsrc +==== + +Instruction input. + +*Size:* 4 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_3.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_3.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_3.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc_3.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,17 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vsrc_3: + +vsrc +==== + +Instruction input. + +*Size:* 2 dwords. + +*Operands:* :ref:`v` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_vsrc.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,28 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_vsrc: + +vsrc +==== + +Data to copy to export buffers. This is an optional operand. Must be specified as :ref:`off` if not used. + +:ref:`compr` modifier indicates use of compressed (16-bit) data. This limits number of source operands from 4 to 2: + +* src0 and src1 must specify the first register (or :ref:`off`). +* src2 and src3 must specify the second register (or :ref:`off`). + +An example: + +.. parsed-literal:: + + exp mrtz v3, v3, off, off compr + +*Size:* 1 dword. + +*Operands:* :ref:`v`, :ref:`off` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_waitcnt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_waitcnt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPU/gfx9_waitcnt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPU/gfx9_waitcnt.rst.txt 2021-07-28 16:49:04.000000000 +0000 @@ -0,0 +1,64 @@ +.. + ************************************************** + * * + * Automatically generated file, do not edit! * + * * + ************************************************** + +.. _amdgpu_synid_gfx9_waitcnt: + +waitcnt +======= + +Counts of outstanding instructions to wait for. + +The bits of this operand have the following meaning: + + ========== ========= ================================================ ============ + High Bits Low Bits Description Value Range + ========== ========= ================================================ ============ + 15:14 3:0 VM_CNT: vector memory operations count. 0..63 + \- 6:4 EXP_CNT: export count. 0..7 + \- 11:8 LGKM_CNT: LDS, GDS, Constant and Message count. 0..15 + ========== ========= ================================================ ============ + +This operand may be specified as one of the following: + +* An :ref:`integer_number` or an :ref:`absolute_expression`. The value must be in the range 0..0xFFFF. +* A combination of *vmcnt*, *expcnt*, *lgkmcnt* and other values described below. + + ====================== ====================================================================== + Syntax Description + ====================== ====================================================================== + vmcnt(<*N*>) A VM_CNT value. *N* must not exceed the largest VM_CNT value. + expcnt(<*N*>) An EXP_CNT value. *N* must not exceed the largest EXP_CNT value. + lgkmcnt(<*N*>) An LGKM_CNT value. *N* must not exceed the largest LGKM_CNT value. + vmcnt_sat(<*N*>) A VM_CNT value computed as min(*N*, the largest VM_CNT value). + expcnt_sat(<*N*>) An EXP_CNT value computed as min(*N*, the largest EXP_CNT value). + lgkmcnt_sat(<*N*>) An LGKM_CNT value computed as min(*N*, the largest LGKM_CNT value). + ====================== ====================================================================== + +These values may be specified in any order. Spaces, ampersands and commas may be used as optional separators. + +*N* is either an +:ref:`integer number` or an +:ref:`absolute expression`. + +Examples: + +.. parsed-literal:: + + vm_cnt = 1 + exp_cnt = 2 + lgkm_cnt = 3 + cnt = vm_cnt | (exp_cnt << 4) | (lgkm_cnt << 8) + + s_waitcnt cnt + s_waitcnt 1 | (2 << 4) | (3 << 8) // the same as above + s_waitcnt vmcnt(1) expcnt(2) lgkmcnt(3) // the same as above + s_waitcnt vmcnt(vm_cnt) expcnt(exp_cnt) lgkmcnt(lgkm_cnt) // the same as above + + s_waitcnt vmcnt(1) + s_waitcnt expcnt(2) lgkmcnt(3) + s_waitcnt vmcnt(1), expcnt(2), lgkmcnt(3) + s_waitcnt vmcnt(1) & lgkmcnt_sat(100) & expcnt(2) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,4299 @@ +.. _amdgpu-dwarf-extensions-for-heterogeneous-debugging: + +******************************************** +DWARF Extensions For Heterogeneous Debugging +******************************************** + +.. contents:: + :local: + +.. warning:: + + This document describes **provisional extensions** to DWARF Version 5 + [:ref:`DWARF `] to support heterogeneous debugging. It is + not currently fully implemented and is subject to change. + +.. _amdgpu-dwarf-introduction: + +Introduction +============ + +AMD [:ref:`AMD `] has been working on supporting heterogeneous +computing through the AMD Radeon Open Compute Platform (ROCm) [:ref:`AMD-ROCm +`]. A heterogeneous computing program can be written in a +high level language such as C++ or Fortran with OpenMP pragmas, OpenCL, or HIP +(a portable C++ programming environment for heterogeneous computing [:ref:`HIP +`]). A heterogeneous compiler and runtime allows a program to +execute on multiple devices within the same native process. Devices could +include CPUs, GPUs, DSPs, FPGAs, or other special purpose accelerators. +Currently HIP programs execute on systems with CPUs and GPUs. + +ROCm is fully open sourced and includes contributions to open source projects +such as LLVM for compilation [:ref:`LLVM `] and GDB for +debugging [:ref:`GDB `], as well as collaboration with other +third party projects such as the GCC compiler [:ref:`GCC `] +and the Perforce TotalView HPC debugger [:ref:`Perforce-TotalView +`]. + +To support debugging heterogeneous programs several features that are not +provided by current DWARF Version 5 [:ref:`DWARF `] have +been identified. This document contains a collection of extensions to address +providing those features. + +The :ref:`amdgpu-dwarf-motivation` section describes the issues that are being +addressed for heterogeneous computing. That is followed by the +:ref:`amdgpu-dwarf-changes-relative-to-dwarf-version-5` section containing the +textual changes for the extensions relative to the DWARF Version 5 standard. +Then there is an :ref:`amdgpu-dwarf-examples` section that links to the AMD GPU +specific usage of the extensions that includes an example. Finally, there is a +:ref:`amdgpu-dwarf-references` section. There are a number of notes included +that raise open questions, or provide alternative approaches considered. The +extensions seek to be general in nature and backwards compatible with DWARF +Version 5. The goal is to be applicable to meeting the needs of any +heterogeneous system and not be vendor or architecture specific. + +A fundamental aspect of the extensions is that it allows DWARF expression +location descriptions as stack elements. The extensions are based on DWARF +Version 5 and maintains compatibility with DWARF Version 5. After attempting +several alternatives, the current thinking is that such extensions to DWARF +Version 5 are the simplest and cleanest ways to support debugging optimized GPU +code. It also appears to be generally useful and may be able to address other +reported DWARF issues, as well as being helpful in providing better optimization +support for non-GPU code. + +General feedback on these extensions is sought, together with suggestions on how +to clarify, simplify, or organize them. If their is general interest then some +or all of these extensions could be submitted as future DWARF proposals. + +We are in the process of modifying LLVM and GDB to support these extensions +which is providing experience and insights. We plan to upstream the changes to +those projects for any final form of the extensions. + +The author very much appreciates the input provided so far by many others which +has been incorporated into this current version. + +.. _amdgpu-dwarf-motivation: + +Motivation +========== + +This document presents a set of backwards compatible extensions to DWARF Version +5 [:ref:`DWARF `] to support heterogeneous debugging. + +The remainder of this section provides motivation for each extension in +terms of heterogeneous debugging on commercially available AMD GPU hardware +(AMDGPU). The goal is to add support to the AMD [:ref:`AMD `] +open source Radeon Open Compute Platform (ROCm) [:ref:`AMD-ROCm +`] which is an implementation of the industry standard +for heterogeneous computing devices defined by the Heterogeneous System +Architecture (HSA) Foundation [:ref:`HSA `]. ROCm includes the +LLVM compiler [:ref:`LLVM `] with upstreamed support for +AMDGPU [:ref:`AMDGPU-LLVM `]. The goal is to also add +the GDB debugger [:ref:`GDB `] with upstreamed support for +AMDGPU [:ref:`AMD-ROCgdb `]. In addition, the goal is +to work with third parties to enable support for AMDGPU debugging in the GCC +compiler [:ref:`GCC `] and the Perforce TotalView HPC debugger +[:ref:`Perforce-TotalView `]. + +However, the extensions are intended to be vendor and architecture neutral. They +are believed to apply to other heterogeneous hardware devices including GPUs, +DSPs, FPGAs, and other specialized hardware. These collectively include similar +characteristics and requirements as AMDGPU devices. Some of the extension can +also apply to traditional CPU hardware that supports large vector registers. +Compilers can map source languages and extensions that describe large scale +parallel execution onto the lanes of the vector registers. This is common in +programming languages used in ML and HPC. The extensions also include improved +support for optimized code on any architecture. Some of the generalizations may +also benefit other issues that have been raised. + +The extensions have evolved through collaboration with many individuals and +active prototyping within the GDB debugger and LLVM compiler. Input has also +been very much appreciated from the developers working on the Perforce TotalView +HPC Debugger and GCC compiler. + +The AMDGPU has several features that require additional DWARF functionality in +order to support optimized code. + +AMDGPU optimized code may spill vector registers to non-global address space +memory, and this spilling may be done only for lanes that are active on entry +to the subprogram. To support this, a location description that can be created +as a masked select is required. See ``DW_OP_LLVM_select_bit_piece``. + +Since the active lane mask may be held in a register, a way to get the value +of a register on entry to a subprogram is required. To support this an +operation that returns the caller value of a register as specified by the Call +Frame Information (CFI) is required. See ``DW_OP_LLVM_call_frame_entry_reg`` +and :ref:`amdgpu-dwarf-call-frame-information`. + +Current DWARF uses an empty expression to indicate an undefined location +description. Since the masked select composite location description operation +takes more than one location description, it is necessary to have an explicit +way to specify an undefined location description. Otherwise it is not possible +to specify that a particular one of the input location descriptions is +undefined. See ``DW_OP_LLVM_undefined``. + +CFI describes restoring callee saved registers that are spilled. Currently CFI +only allows a location description that is a register, memory address, or +implicit location description. AMDGPU optimized code may spill scalar +registers into portions of vector registers. This requires extending CFI to +allow any location description. See +:ref:`amdgpu-dwarf-call-frame-information`. + +The vector registers of the AMDGPU are represented as their full wavefront +size, meaning the wavefront size times the dword size. This reflects the +actual hardware and allows the compiler to generate DWARF for languages that +map a thread to the complete wavefront. It also allows more efficient DWARF to +be generated to describe the CFI as only a single expression is required for +the whole vector register, rather than a separate expression for each lane's +dword of the vector register. It also allows the compiler to produce DWARF +that indexes the vector register if it spills scalar registers into portions +of a vector register. + +Since DWARF stack value entries have a base type and AMDGPU registers are a +vector of dwords, the ability to specify that a base type is a vector is +required. See ``DW_AT_LLVM_vector_size``. + +If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner, +then the variable DWARF location expressions must compute the location for a +single lane of the wavefront. Therefore, a DWARF operation is required to denote +the current lane, much like ``DW_OP_push_object_address`` denotes the current +object. The ``DW_OP_*piece`` operations only allow literal indices. Therefore, a +way to use a computed offset of an arbitrary location description (such as a +vector register) is required. See ``DW_OP_LLVM_push_lane``, +``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and +``DW_OP_LLVM_bit_offset``. + +If the source language is mapped onto the AMDGPU wavefronts in a SIMT manner +the compiler can use the AMDGPU execution mask register to control which lanes +are active. To describe the conceptual location of non-active lanes a DWARF +expression is needed that can compute a per lane PC. For efficiency, this is +done for the wavefront as a whole. This expression benefits by having a masked +select composite location description operation. This requires an attribute +for source location of each lane. The AMDGPU may update the execution mask for +whole wavefront operations and so needs an attribute that computes the current +active lane mask. See ``DW_OP_LLVM_select_bit_piece``, ``DW_OP_LLVM_extend``, +``DW_AT_LLVM_lane_pc``, and ``DW_AT_LLVM_active_lane``. + +AMDGPU needs to be able to describe addresses that are in different kinds of +memory. Optimized code may need to describe a variable that resides in pieces +that are in different kinds of storage which may include parts of registers, +memory that is in a mixture of memory kinds, implicit values, or be undefined. +DWARF has the concept of segment addresses. However, the segment cannot be +specified within a DWARF expression, which is only able to specify the offset +portion of a segment address. The segment index is only provided by the entity +that specifies the DWARF expression. Therefore, the segment index is a +property that can only be put on complete objects, such as a variable. That +makes it only suitable for describing an entity (such as variable or +subprogram code) that is in a single kind of memory. Therefore, AMDGPU uses +the DWARF concept of address spaces. For example, a variable may be allocated +in a register that is partially spilled to the call stack which is in the +private address space, and partially spilled to the local address space. + +DWARF uses the concept of an address in many expression operations but does not +define how it relates to address spaces. For example, +``DW_OP_push_object_address`` pushes the address of an object. Other contexts +implicitly push an address on the stack before evaluating an expression. For +example, the ``DW_AT_use_location`` attribute of the +``DW_TAG_ptr_to_member_type``. The expression that uses the address needs to +do so in a general way and not need to be dependent on the address space of +the address. For example, a pointer to member value may want to be applied to +an object that may reside in any address space. + +The number of registers and the cost of memory operations is much higher for +AMDGPU than a typical CPU. The compiler attempts to optimize whole variables +and arrays into registers. Currently DWARF only allows +``DW_OP_push_object_address`` and related operations to work with a global +memory location. To support AMDGPU optimized code it is required to generalize +DWARF to allow any location description to be used. This allows registers, or +composite location descriptions that may be a mixture of memory, registers, or +even implicit values. + +DWARF Version 5 does not allow location descriptions to be entries on the +DWARF stack. They can only be the final result of the evaluation of a DWARF +expression. However, by allowing a location description to be a first-class +entry on the DWARF stack it becomes possible to compose expressions containing +both values and location descriptions naturally. It allows objects to be +located in any kind of memory address space, in registers, be implicit values, +be undefined, or a composite of any of these. By extending DWARF carefully, +all existing DWARF expressions can retain their current semantic meaning. +DWARF has implicit conversions that convert from a value that represents an +address in the default address space to a memory location description. This +can be extended to allow a default address space memory location description +to be implicitly converted back to its address value. This allows all DWARF +Version 5 expressions to retain their same meaning, while adding the ability +to explicitly create memory location descriptions in non-default address +spaces and generalizing the power of composite location descriptions to any +kind of location description. See :ref:`amdgpu-dwarf-operation-expressions`. + +To allow composition of composite location descriptions, an explicit operation +that indicates the end of the definition of a composite location description +is required. This can be implied if the end of a DWARF expression is reached, +allowing current DWARF expressions to remain legal. See +``DW_OP_LLVM_piece_end``. + +The ``DW_OP_plus`` and ``DW_OP_minus`` can be defined to operate on a memory +location description in the default target architecture specific address space +and a generic type value to produce an updated memory location description. This +allows them to continue to be used to offset an address. To generalize +offsetting to any location description, including location descriptions that +describe when bytes are in registers, are implicit, or a composite of these, the +``DW_OP_LLVM_offset``, ``DW_OP_LLVM_offset_uconst``, and +``DW_OP_LLVM_bit_offset`` offset operations are added. Unlike ``DW_OP_plus``, +``DW_OP_plus_uconst``, and ``DW_OP_minus`` arithmetic operations, these do not +define that integer overflow causes wrap-around. The offset operations can +operate on location storage of any size. For example, implicit location storage +could be any number of bits in size. It is simpler to define offsets that exceed +the size of the location storage as being an evaluation error, than having to +force an implementation to support potentially infinite precision offsets to +allow it to correctly track a series of positive and negative offsets that may +transiently overflow or underflow, but end up in range. This is simple for the +arithmetic operations as they are defined in terms of two's compliment +arithmetic on a base type of a fixed size. + +Having the offset operations allows ``DW_OP_push_object_address`` to push a +location description that may be in a register, or be an implicit value, and the +DWARF expression of ``DW_TAG_ptr_to_member_type`` can contain them to offset +within it. ``DW_OP_LLVM_bit_offset`` generalizes DWARF to work with bit fields +which is not possible in DWARF Version 5. + +The DWARF ``DW_OP_xderef*`` operations allow a value to be converted into an +address of a specified address space which is then read. But it provides no +way to create a memory location description for an address in the non-default +address space. For example, AMDGPU variables can be allocated in the local +address space at a fixed address. It is required to have an operation to +create an address in a specific address space that can be used to define the +location description of the variable. Defining this operation to produce a +location description allows the size of addresses in an address space to be +larger than the generic type. See ``DW_OP_LLVM_form_aspace_address``. + +If the ``DW_OP_LLVM_form_aspace_address`` operation had to produce a value +that can be implicitly converted to a memory location description, then it +would be limited to the size of the generic type which matches the size of the +default address space. Its value would be undefined and likely not match any +value in the actual program. By making the result a location description, it +allows a consumer great freedom in how it implements it. The implicit +conversion back to a value can be limited only to the default address space to +maintain compatibility with DWARF Version 5. For other address spaces the +producer can use the new operations that explicitly specify the address space. + +``DW_OP_breg*`` treats the register as containing an address in the default +address space. It is required to be able to specify the address space of the +register value. See ``DW_OP_LLVM_aspace_bregx``. + +Similarly, ``DW_OP_implicit_pointer`` treats its implicit pointer value as +being in the default address space. It is required to be able to specify the +address space of the pointer value. See +``DW_OP_LLVM_aspace_implicit_pointer``. + +Almost all uses of addresses in DWARF are limited to defining location +descriptions, or to be dereferenced to read memory. The exception is +``DW_CFA_val_offset`` which uses the address to set the value of a register. +By defining the CFA DWARF expression as being a memory location description, +it can maintain what address space it is, and that can be used to convert the +offset address back to an address in that address space. See +:ref:`amdgpu-dwarf-call-frame-information`. + +This approach allows all existing DWARF to have the identical semantics. It +allows the compiler to explicitly specify the address space it is using. For +example, a compiler could choose to access private memory in a swizzled manner +when mapping a source language to a wavefront in a SIMT manner, or to access +it in an unswizzled manner if mapping the same language with the wavefront +being the thread. It also allows the compiler to mix the address space it uses +to access private memory. For example, for SIMT it can still spill entire +vector registers in an unswizzled manner, while using a swizzled private +memory for SIMT variable access. This approach allows memory location +descriptions for different address spaces to be combined using the regular +``DW_OP_*piece`` operations. + +Location descriptions are an abstraction of storage, they give freedom to the +consumer on how to implement them. They allow the address space to encode lane +information so they can be used to read memory with only the memory +description and no extra arguments. The same set of operations can operate on +locations independent of their kind of storage. The ``DW_OP_deref*`` therefore +can be used on any storage kind. ``DW_OP_xderef*`` is unnecessary, except to +become a more compact way to convert a non-default address space address +followed by dereferencing it. + +In DWARF Version 5 a location description is defined as a single location +description or a location list. A location list is defined as either +effectively an undefined location description or as one or more single +location descriptions to describe an object with multiple places. The +``DW_OP_push_object_address`` and ``DW_OP_call*`` operations can put a +location description on the stack. Furthermore, debugger information entry +attributes such as ``DW_AT_data_member_location``, ``DW_AT_use_location``, and +``DW_AT_vtable_elem_location`` are defined as pushing a location description +on the expression stack before evaluating the expression. However, DWARF +Version 5 only allows the stack to contain values and so only a single memory +address can be on the stack which makes these incapable of handling location +descriptions with multiple places, or places other than memory. Since these +extensions allow the stack to contain location descriptions, the operations are +generalized to support location descriptions that can have multiple places. +This is backwards compatible with DWARF Version 5 and allows objects with +multiple places to be supported. For example, the expression that describes +how to access the field of an object can be evaluated with a location +description that has multiple places and will result in a location description +with multiple places as expected. With this change, the separate DWARF Version +5 sections that described DWARF expressions and location lists have been +unified into a single section that describes DWARF expressions in general. +This unification seems to be a natural consequence and a necessity of allowing +location descriptions to be part of the evaluation stack. + +For those familiar with the definition of location descriptions in DWARF Version +5, the definitions in these extensions are presented differently, but does +in fact define the same concept with the same fundamental semantics. However, +it does so in a way that allows the concept to extend to support address +spaces, bit addressing, the ability for composite location descriptions to be +composed of any kind of location description, and the ability to support +objects located at multiple places. Collectively these changes expand the set +of processors that can be supported and improves support for optimized code. + +Several approaches were considered, and the one presented appears to be the +cleanest and offers the greatest improvement of DWARF's ability to support +optimized code. Examining the GDB debugger and LLVM compiler, it appears only +to require modest changes as they both already have to support general use of +location descriptions. It is anticipated that will also be the case for other +debuggers and compilers. + +As an experiment, GDB was modified to evaluate DWARF Version 5 expressions +with location descriptions as stack entries and implicit conversions. All GDB +tests have passed, except one that turned out to be an invalid test by DWARF +Version 5 rules. The code in GDB actually became simpler as all evaluation was +on the stack and there was no longer a need to maintain a separate structure +for the location description result. This gives confidence of the backwards +compatibility. + +Since the AMDGPU supports languages such as OpenCL [:ref:`OpenCL +`], there is a need to define source language address +classes so they can be used in a consistent way by consumers. It would also be +desirable to add support for using them in defining language types rather than +the current target architecture specific address spaces. See +:ref:`amdgpu-dwarf-segment_addresses`. + +A ``DW_AT_LLVM_augmentation`` attribute is added to a compilation unit +debugger information entry to indicate that there is additional target +architecture specific information in the debugging information entries of that +compilation unit. This allows a consumer to know what extensions are present +in the debugger information entries as is possible with the augmentation +string of other sections. The format that should be used for the augmentation +string in the lookup by name table and CFI Common Information Entry is also +recommended to allow a consumer to parse the string when it contains +information from multiple vendors. + +The AMDGPU supports programming languages that include online compilation +where the source text may be created at runtime. Therefore, a way to embed the +source text in the debug information is required. For example, the OpenCL +language runtime supports online compilation. See +:ref:`amdgpu-dwarf-line-number-information`. + +Support to allow MD5 checksums to be optionally present in the line table is +added. This allows linking together compilation units where some have MD5 +checksums and some do not. In DWARF Version 5 the file timestamp and file size +can be optional, but if the MD5 checksum is present it must be valid for all +files. See :ref:`amdgpu-dwarf-line-number-information`. + +Support is added for the HIP programming language [:ref:`HIP +`] which is supported by the AMDGPU. See +:ref:`amdgpu-dwarf-language-names`. + +The following sections provide the definitions for the additional operations, +as well as clarifying how existing expression operations, CFI operations, and +attributes behave with respect to generalized location descriptions that +support address spaces and location descriptions that support multiple places. +It has been defined such that it is backwards compatible with DWARF Version 5. +The definitions are intended to fully define well-formed DWARF in a consistent +style based on the DWARF Version 5 specification. Non-normative text is shown +in *italics*. + +The names for the new operations, attributes, and constants include "\ +``LLVM``\ " and are encoded with vendor specific codes so these extensions can +be implemented as an LLVM vendor extension to DWARF Version 5. If accepted these +names would not include the "\ ``LLVM``\ " and would not use encodings in the +vendor range. + +The extensions are described in +:ref:`amdgpu-dwarf-changes-relative-to-dwarf-version-5` and are +organized to follow the section ordering of DWARF Version 5. It includes notes +to indicate the corresponding DWARF Version 5 sections to which they pertain. +Other notes describe additional changes that may be worth considering, and to +raise questions. + +.. _amdgpu-dwarf-changes-relative-to-dwarf-version-5: + +Changes Relative to DWARF Version 5 +=================================== + +General Description +------------------- + +Attribute Types +~~~~~~~~~~~~~~~ + +.. note:: + + This augments DWARF Version 5 section 2.2 and Table 2.2. + +The following table provides the additional attributes. See +:ref:`amdgpu-dwarf-debugging-information-entry-attributes`. + +.. table:: Attribute names + :name: amdgpu-dwarf-attribute-names-table + + =========================== ==================================== + Attribute Usage + =========================== ==================================== + ``DW_AT_LLVM_active_lane`` SIMD or SIMT active lanes + ``DW_AT_LLVM_augmentation`` Compilation unit augmentation string + ``DW_AT_LLVM_lane_pc`` SIMD or SIMT lane program location + ``DW_AT_LLVM_lanes`` SIMD or SIMT thread lane count + ``DW_AT_LLVM_vector_size`` Base type vector size + =========================== ==================================== + +.. _amdgpu-dwarf-expressions: + +DWARF Expressions +~~~~~~~~~~~~~~~~~ + +.. note:: + + This section, and its nested sections, replaces DWARF Version 5 section 2.5 + and section 2.6. The new DWARF expression operation extensions are defined as + well as clarifying the extensions to already existing DWARF Version 5 + operations. It is based on the text of the existing DWARF Version 5 standard. + +DWARF expressions describe how to compute a value or specify a location. + +*The evaluation of a DWARF expression can provide the location of an object, the +value of an array bound, the length of a dynamic string, the desired value +itself, and so on.* + +If the evaluation of a DWARF expression does not encounter an error, then it can +either result in a value (see :ref:`amdgpu-dwarf-expression-value`) or a +location description (see :ref:`amdgpu-dwarf-location-description`). When a +DWARF expression is evaluated, it may be specified whether a value or location +description is required as the result kind. + +If a result kind is specified, and the result of the evaluation does not match +the specified result kind, then the implicit conversions described in +:ref:`amdgpu-dwarf-memory-location-description-operations` are performed if +valid. Otherwise, the DWARF expression is ill-formed. + +If the evaluation of a DWARF expression encounters an evaluation error, then the +result is an evaluation error. + +.. note:: + + Decided to define the concept of an evaluation error. An alternative is to + introduce an undefined value base type in a similar way to location + descriptions having an undefined location description. Then operations that + encounter an evaluation error can return the undefined location description or + value with an undefined base type. + + All operations that act on values would return an undefined entity if given an + undefined value. The expression would then always evaluate to completion, and + can be tested to determine if it is an undefined entity. + + However, this would add considerable additional complexity and does not match + that GDB throws an exception when these evaluation errors occur. + +If a DWARF expression is ill-formed, then the result is undefined. + +The following sections detail the rules for when a DWARF expression is +ill-formed or results in an evaluation error. + +A DWARF expression can either be encoded as an operation expression (see +:ref:`amdgpu-dwarf-operation-expressions`), or as a location list expression +(see :ref:`amdgpu-dwarf-location-list-expressions`). + +.. _amdgpu-dwarf-expression-evaluation-context: + +DWARF Expression Evaluation Context ++++++++++++++++++++++++++++++++++++ + +A DWARF expression is evaluated in a context that can include a number of +context elements. If multiple context elements are specified then they must be +self consistent or the result of the evaluation is undefined. The context +elements that can be specified are: + +*A current result kind* + + The kind of result required by the DWARF expression evaluation. If specified + it can be a location description or a value. + +*A current thread* + + The target architecture thread identifier of the source program thread of + execution for which a user presented expression is currently being evaluated. + + It is required for operations that are related to target architecture threads. + + *For example, the* ``DW_OP_form_tls_address`` *operation and* + ``DW_OP_LLVM_form_aspace_address`` *operation when given an address space that + is thread specific.* + +*A current lane* + + The target architecture lane identifier of the source program thread of + execution for which a user presented expression is currently being evaluated. + This applies to languages that are implemented using a SIMD or SIMT execution + model. + + It is required for operations that are related to target architecture lanes. + + *For example, the* ``DW_OP_LLVM_push_lane`` *operation and* + ``DW_OP_LLVM_form_aspace_address`` *operation when given an address space that + is lane specific.* + + If specified, it must be consistent with any specified current thread and + current target architecture. It is consistent with a thread if it identifies a + lane of the thread. It is consistent with a target architecture if it is a + valid lane identifier of the target architecture. Otherwise the result is + undefined. + +*A current call frame* + + The target architecture call frame identifier. It identifies a call frame that + corresponds to an active invocation of a subprogram in the current thread. It + is identified by its address on the call stack. The address is referred to as + the Canonical Frame Address (CFA). The call frame information is used to + determine the CFA for the call frames of the current thread's call stack (see + :ref:`amdgpu-dwarf-call-frame-information`). + + It is required for operations that specify target architecture registers to + support virtual unwinding of the call stack. + + *For example, the* ``DW_OP_*reg*`` *operations.* + + If specified, it must be an active call frame in the current thread. If the + current lane is specified, then that lane must have been active on entry to + the call frame (see the ``DW_AT_LLVM_lane_pc`` attribute). Otherwise the + result is undefined. + + If it is the currently executing call frame, then it is termed the top call + frame. + +*A current program location* + + The target architecture program location corresponding to the current call + frame of the current thread. + + The program location of the top call frame is the target architecture program + counter for the current thread. The call frame information is used to obtain + the value of the return address register to determine the program location of + the other call frames (see :ref:`amdgpu-dwarf-call-frame-information`). + + It is required for the evaluation of location list expressions to select + amongst multiple program location ranges. It is required for operations that + specify target architecture registers to support virtual unwinding of the call + stack (see :ref:`amdgpu-dwarf-call-frame-information`). + + If specified: + + * If the current lane is not specified: + + * If the current call frame is the top call frame, it must be the current + target architecture program location. + + * If the current call frame F is not the top call frame, it must be the + program location associated with the call site in the current caller frame + F that invoked the callee frame. + + * If the current lane is specified and the architecture program location LPC + computed by the ``DW_AT_LLVM_lane_pc`` attribute for the current lane is not + the undefined location description (indicating the lane was not active on + entry to the call frame), it must be LPC. + + * Otherwise the result is undefined. + +*A current compilation unit* + + The compilation unit debug information entry that contains the DWARF expression + being evaluated. + + It is required for operations that reference debug information associated with + the same compilation unit, including indicating if such references use the + 32-bit or 64-bit DWARF format. It can also provide the default address space + address size if no current target architecture is specified. + + *For example, the* ``DW_OP_constx`` *and* ``DW_OP_addrx`` *operations.* + + *Note that this compilation unit may not be the same as the compilation unit + determined from the loaded code object corresponding to the current program + location. For example, the evaluation of the expression E associated with a + ``DW_AT_location`` attribute of the debug information entry operand of the + ``DW_OP_call*`` operations is evaluated with the compilation unit that + contains E and not the one that contains the ``DW_OP_call*`` operation + expression.* + +*A current target architecture* + + The target architecture. + + It is required for operations that specify target architecture specific + entities. + + *For example, target architecture specific entities include DWARF register + identifiers, DWARF lane identifiers, DWARF address space identifiers, the + default address space, and the address space address sizes.* + + If specified: + + * If the current thread is specified, then the current target architecture + must be the same as the target architecture of the current thread. + + * If the current compilation unit is specified, then the current target + architecture default address space address size must be the same as he + ``address_size`` field in the header of the current compilation unit and any + associated entry in the ``.debug_aranges`` section. + + * If the current program location is specified, then the current target + architecture must be the same as the target architecture of any line number + information entry (see :ref:`amdgpu-dwarf-line-number-information`) + corresponding to the current program location. + + * If the current program location is specified, then the current target + architecture default address space address size must be the same as he + ``address_size`` field in the header of any entry corresponding to the + current program location in the ``.debug_addr``, ``.debug_line``, + ``.debug_rnglists``, ``.debug_rnglists.dwo``, ``.debug_loclists``, and + ``.debug_loclists.dwo`` sections. + + * Otherwise the result is undefined. + +*A current object* + + The location description of a program object. + + It is required for the ``DW_OP_push_object_address`` operation. + + *For example, the* ``DW_AT_data_location`` *attribute on type debug + information entries specifies the the program object corresponding to a + runtime descriptor as the current object when it evaluates its associated + expression.* + + The result is undefined if the location descriptor is invalid (see + :ref:`amdgpu-dwarf-location-description`). + +*An initial stack* + + This is a list of values or location descriptions that will be pushed on the + operation expression evaluation stack in the order provided before evaluation + of an operation expression starts. + + Some debugger information entries have attributes that evaluate their DWARF + expression value with initial stack entries. In all other cases the initial + stack is empty. + + The result is undefined if any location descriptors are invalid (see + :ref:`amdgpu-dwarf-location-description`). + +If the evaluation requires a context element that is not specified, then the +result of the evaluation is an error. + +*A DWARF expression for the location description may be able to be evaluated +without a thread, lane, call frame, program location, or architecture context. +For example, the location of a global variable may be able to be evaluated +without such context. If the expression evaluates with an error then it may +indicate the variable has been optimized and so requires more context.* + +*The DWARF expression for call frame information (see +:ref:`amdgpu-dwarf-call-frame-information`) operations are restricted to those +that do not require the compilation unit context to be specified.* + +The DWARF is ill-formed if all the ``address_size`` fields in the headers of all +the entries in the ``.debug_info``, ``.debug_addr``, ``.debug_line``, +``.debug_rnglists``, ``.debug_rnglists.dwo``, ``.debug_loclists``, and +``.debug_loclists.dwo`` sections corresponding to any given program location do +not match. + +.. _amdgpu-dwarf-expression-value: + +DWARF Expression Value +++++++++++++++++++++++ + +A value has a type and a literal value. It can represent a literal value of any +supported base type of the target architecture. The base type specifies the +size, encoding, and endianity of the literal value. + +.. note:: + + It may be desirable to add an implicit pointer base type encoding. It would be + used for the type of the value that is produced when the ``DW_OP_deref*`` + operation retrieves the full contents of an implicit pointer location storage + created by the ``DW_OP_implicit_pointer`` or + ``DW_OP_LLVM_aspace_implicit_pointer`` operations. The literal value would + record the debugging information entry and byte displacement specified by the + associated ``DW_OP_implicit_pointer`` or + ``DW_OP_LLVM_aspace_implicit_pointer`` operations. + +There is a distinguished base type termed the generic type, which is an integral +type that has the size of an address in the target architecture default address +space, a target architecture defined endianity, and unspecified signedness. + +*The generic type is the same as the unspecified type used for stack operations +defined in DWARF Version 4 and before.* + +An integral type is a base type that has an encoding of ``DW_ATE_signed``, +``DW_ATE_signed_char``, ``DW_ATE_unsigned``, ``DW_ATE_unsigned_char``, +``DW_ATE_boolean``, or any target architecture defined integral encoding in the +inclusive range ``DW_ATE_lo_user`` to ``DW_ATE_hi_user``. + +.. note:: + + It is unclear if ``DW_ATE_address`` is an integral type. GDB does not seem to + consider it as integral. + +.. _amdgpu-dwarf-location-description: + +DWARF Location Description +++++++++++++++++++++++++++ + +*Debugging information must provide consumers a way to find the location of +program variables, determine the bounds of dynamic arrays and strings, and +possibly to find the base address of a subprogram’s call frame or the return +address of a subprogram. Furthermore, to meet the needs of recent computer +architectures and optimization techniques, debugging information must be able to +describe the location of an object whose location changes over the object’s +lifetime, and may reside at multiple locations simultaneously during parts of an +object's lifetime.* + +Information about the location of program objects is provided by location +descriptions. + +Location descriptions can consist of one or more single location descriptions. + +A single location description specifies the location storage that holds a +program object and a position within the location storage where the program +object starts. The position within the location storage is expressed as a bit +offset relative to the start of the location storage. + +A location storage is a linear stream of bits that can hold values. Each +location storage has a size in bits and can be accessed using a zero-based bit +offset. The ordering of bits within a location storage uses the bit numbering +and direction conventions that are appropriate to the current language on the +target architecture. + +There are five kinds of location storage: + +*memory location storage* + Corresponds to the target architecture memory address spaces. + +*register location storage* + Corresponds to the target architecture registers. + +*implicit location storage* + Corresponds to fixed values that can only be read. + +*undefined location storage* + Indicates no value is available and therefore cannot be read or written. + +*composite location storage* + Allows a mixture of these where some bits come from one location storage and + some from another location storage, or from disjoint parts of the same + location storage. + +.. note:: + + It may be better to add an implicit pointer location storage kind used by the + ``DW_OP_implicit_pointer`` and ``DW_OP_LLVM_aspace_implicit_pointer`` + operations. It would specify the debugger information entry and byte offset + provided by the operations. + +*Location descriptions are a language independent representation of addressing +rules. They are created using DWARF operation expressions of arbitrary +complexity. They can be the result of evaluating a debugger information entry +attribute that specifies an operation expression. In this usage they can +describe the location of an object as long as its lifetime is either static or +the same as the lexical block (see DWARF Version 5 section 3.5) that owns it, +and it does not move during its lifetime. They can be the result of evaluating a +debugger information entry attribute that specifies a location list expression. +In this usage they can describe the location of an object that has a limited +lifetime, changes its location during its lifetime, or has multiple locations +over part or all of its lifetime.* + +If a location description has more than one single location description, the +DWARF expression is ill-formed if the object value held in each single location +description's position within the associated location storage is not the same +value, except for the parts of the value that are uninitialized. + +*A location description that has more than one single location description can +only be created by a location list expression that has overlapping program +location ranges, or certain expression operations that act on a location +description that has more than one single location description. There are no +operation expression operations that can directly create a location description +with more than one single location description.* + +*A location description with more than one single location description can be +used to describe objects that reside in more than one piece of storage at the +same time. An object may have more than one location as a result of +optimization. For example, a value that is only read may be promoted from memory +to a register for some region of code, but later code may revert to reading the +value from memory as the register may be used for other purposes. For the code +region where the value is in a register, any change to the object value must be +made in both the register and the memory so both regions of code will read the +updated value.* + +*A consumer of a location description with more than one single location +description can read the object's value from any of the single location +descriptions (since they all refer to location storage that has the same value), +but must write any changed value to all the single location descriptions.* + +The evaluation of an expression may require context elements to create a +location description. If such a location description is accessed, the storage it +denotes is that associated with the context element values specified when the +location description was created, which may differ from the context at the time +it is accessed. + +*For example, creating a register location description requires the thread +context: the location storage is for the specified register of that thread. +Creating a memory location description for an address space may required a +thread and a lane context: the location storage is the memory associated with +that thread and lane.* + +If any of the context elements required to create a location description change, +the location description becomes invalid and accessing it is undefined. + +*Examples of context that can invalidate a location description are:* + +* *The thread context is required and execution causes the thread to terminate.* +* *The call frame context is required and further execution causes the call + frame to return to the calling frame.* +* *The program location is required and further execution of the thread occurs. + That could change the location list entry or call frame information entry that + applies.* +* *An operation uses call frame information:* + + * *Any of the frames used in the virtual call frame unwinding return.* + * *The top call frame is used, the program location is used to select the call + frame information entry, and further execution of the thread occurs.* + +*A DWARF expression can be used to compute a location description for an object. +A subsequent DWARF expression evaluation can be given the object location +description as the object context or initial stack context to compute a +component of the object. The final result is undefined if the object location +description becomes invalid between the two expression evaluations.* + +A change of a thread's program location may not make a location description +invalid, yet may still render it as no longer meaningful. Accessing such a +location description, or using it as the object context or initial stack context +of an expression evaluation, may produce an undefined result. + +*For example, a location description may specify a register that no longer holds +the intended program object after a program location change. One way to avoid +such problems is to recompute location descriptions associated with threads when +their program locations change.* + +.. _amdgpu-dwarf-operation-expressions: + +DWARF Operation Expressions ++++++++++++++++++++++++++++ + +An operation expression is comprised of a stream of operations, each consisting +of an opcode followed by zero or more operands. The number of operands is +implied by the opcode. + +Operations represent a postfix operation on a simple stack machine. Each stack +entry can hold either a value or a location description. Operations can act on +entries on the stack, including adding entries and removing entries. If the kind +of a stack entry does not match the kind required by the operation and is not +implicitly convertible to the required kind (see +:ref:`amdgpu-dwarf-memory-location-description-operations`), then the DWARF +operation expression is ill-formed. + +Evaluation of an operation expression starts with an empty stack on which the +entries from the initial stack provided by the context are pushed in the order +provided. Then the operations are evaluated, starting with the first operation +of the stream. Evaluation continues until either an operation has an evaluation +error, or until one past the last operation of the stream is reached. + +The result of the evaluation is: + +* If an operation has an evaluation error, or an operation evaluates an + expression that has an evaluation error, then the result is an evaluation + error. + +* If the current result kind specifies a location description, then: + + * If the stack is empty, the result is a location description with one + undefined location description. + + *This rule is for backwards compatibility with DWARF Version 5 which has no + explicit operation to create an undefined location description, and uses an + empty operation expression for this purpose.* + + * If the top stack entry is a location description, or can be converted + to one (see :ref:`amdgpu-dwarf-memory-location-description-operations`), + then the result is that, possibly converted, location description. Any other + entries on the stack are discarded. + + * Otherwise the DWARF expression is ill-formed. + + .. note:: + + Could define this case as returning an implicit location description as + if the ``DW_OP_implicit`` operation is performed. + +* If the current result kind specifies a value, then: + + * If the top stack entry is a value, or can be converted to one (see + :ref:`amdgpu-dwarf-memory-location-description-operations`), then the result + is that, possibly converted, value. Any other entries on the stack are + discarded. + + * Otherwise the DWARF expression is ill-formed. + +* If the current result kind is not specified, then: + + * If the stack is empty, the result is a location description with one + undefined location description. + + *This rule is for backwards compatibility with DWARF Version 5 which has no + explicit operation to create an undefined location description, and uses an + empty operation expression for this purpose.* + + .. note:: + + This rule is consistent with the rule above for when a location + description is requested. However, GDB appears to report this as an error + and no GDB tests appear to cause an empty stack for this case. + + * Otherwise, the top stack entry is returned. Any other entries on the stack + are discarded. + +An operation expression is encoded as a byte block with some form of prefix that +specifies the byte count. It can be used: + +* as the value of a debugging information entry attribute that is encoded using + class ``exprloc`` (see DWARF Version 5 section 7.5.5), + +* as the operand to certain operation expression operations, + +* as the operand to certain call frame information operations (see + :ref:`amdgpu-dwarf-call-frame-information`), + +* and in location list entries (see + :ref:`amdgpu-dwarf-location-list-expressions`). + +.. _amdgpu-dwarf-stack-operations: + +Stack Operations +################ + +The following operations manipulate the DWARF stack. Operations that index the +stack assume that the top of the stack (most recently added entry) has index 0. +They allow the stack entries to be either a value or location description. + +If any stack entry accessed by a stack operation is an incomplete composite +location description (see +:ref:`amdgpu-dwarf-composite-location-description-operations`), then the DWARF +expression is ill-formed. + +.. note:: + + These operations now support stack entries that are values and location + descriptions. + +.. note:: + + If it is desired to also make them work with incomplete composite location + descriptions, then would need to define that the composite location storage + specified by the incomplete composite location description is also replicated + when a copy is pushed. This ensures that each copy of the incomplete composite + location description can update the composite location storage they specify + independently. + +1. ``DW_OP_dup`` + + ``DW_OP_dup`` duplicates the stack entry at the top of the stack. + +2. ``DW_OP_drop`` + + ``DW_OP_drop`` pops the stack entry at the top of the stack and discards it. + +3. ``DW_OP_pick`` + + ``DW_OP_pick`` has a single unsigned 1-byte operand that represents an index + I. A copy of the stack entry with index I is pushed onto the stack. + +4. ``DW_OP_over`` + + ``DW_OP_over`` pushes a copy of the entry with index 1. + + *This is equivalent to a ``DW_OP_pick 1`` operation.* + +5. ``DW_OP_swap`` + + ``DW_OP_swap`` swaps the top two stack entries. The entry at the top of the + stack becomes the second stack entry, and the second stack entry becomes the + top of the stack. + +6. ``DW_OP_rot`` + + ``DW_OP_rot`` rotates the first three stack entries. The entry at the top of + the stack becomes the third stack entry, the second entry becomes the top of + the stack, and the third entry becomes the second entry. + +.. _amdgpu-dwarf-control-flow-operations: + +Control Flow Operations +####################### + +The following operations provide simple control of the flow of a DWARF operation +expression. + +1. ``DW_OP_nop`` + + ``DW_OP_nop`` is a place holder. It has no effect on the DWARF stack + entries. + +2. ``DW_OP_le``, ``DW_OP_ge``, ``DW_OP_eq``, ``DW_OP_lt``, ``DW_OP_gt``, + ``DW_OP_ne`` + + .. note:: + + The same as in DWARF Version 5 section 2.5.1.5. + +3. ``DW_OP_skip`` + + ``DW_OP_skip`` is an unconditional branch. Its single operand is a 2-byte + signed integer constant. The 2-byte constant is the number of bytes of the + DWARF expression to skip forward or backward from the current operation, + beginning after the 2-byte constant. + + If the updated position is at one past the end of the last operation, then + the operation expression evaluation is complete. + + Otherwise, the DWARF expression is ill-formed if the updated operation + position is not in the range of the first to last operation inclusive, or + not at the start of an operation. + +4. ``DW_OP_bra`` + + ``DW_OP_bra`` is a conditional branch. Its single operand is a 2-byte signed + integer constant. This operation pops the top of stack. If the value popped + is not the constant 0, the 2-byte constant operand is the number of bytes of + the DWARF operation expression to skip forward or backward from the current + operation, beginning after the 2-byte constant. + + If the updated position is at one past the end of the last operation, then + the operation expression evaluation is complete. + + Otherwise, the DWARF expression is ill-formed if the updated operation + position is not in the range of the first to last operation inclusive, or + not at the start of an operation. + +5. ``DW_OP_call2, DW_OP_call4, DW_OP_call_ref`` + + ``DW_OP_call2``, ``DW_OP_call4``, and ``DW_OP_call_ref`` perform DWARF + procedure calls during evaluation of a DWARF expression. + + ``DW_OP_call2`` and ``DW_OP_call4``, have one operand that is, respectively, + a 2-byte or 4-byte unsigned offset DR that represents the byte offset of a + debugging information entry D relative to the beginning of the current + compilation unit. + + ``DW_OP_call_ref`` has one operand that is a 4-byte unsigned value in the + 32-bit DWARF format, or an 8-byte unsigned value in the 64-bit DWARF format, + that represents the byte offset DR of a debugging information entry D + relative to the beginning of the ``.debug_info`` section that contains the + current compilation unit. D may not be in the current compilation unit. + + .. note: + + DWARF Version 5 states that DR can be an offset in a ``.debug_info`` + section other than the one that contains the current compilation unit. It + states that relocation of references from one executable or shared object + file to another must be performed by the consumer. But given that DR is + defined as an offset in a ``.debug_info`` section this seems impossible. + If DR was defined as an implementation defined value, then the consumer + could choose to interpret the value in an implementation defined manner to + reference a debug information in another executable or shared object. + + In ELF the ``.debug_info`` section is in a non-\ ``PT_LOAD`` segment so + standard dynamic relocations cannot be used. But even if they were loaded + segments and dynamic relocations were used, DR would need to be the + address of D, not an offset in a ``.debug_info`` section. That would also + need DR to be the size of a global address. So it would not be possible to + use the 32-bit DWARF format in a 64-bit global address space. In addition, + the consumer would need to determine what executable or shared object the + relocated address was in so it could determine the containing compilation + unit. + + GDB only interprets DR as an offset in the ``.debug_info`` section that + contains the current compilation unit. + + This comment also applies to ``DW_OP_implicit_pointer`` and + ``DW_OP_LLVM_aspace_implicit_pointer``. + + *Operand interpretation of* ``DW_OP_call2``\ *,* ``DW_OP_call4``\ *, and* + ``DW_OP_call_ref`` *is exactly like that for* ``DW_FORM_ref2``\ *, + ``DW_FORM_ref4``\ *, and* ``DW_FORM_ref_addr``\ *, respectively.* + + The call operation is evaluated by: + + * If D has a ``DW_AT_location`` attribute that is encoded as a ``exprloc`` + that specifies an operation expression E, then execution of the current + operation expression continues from the first operation of E. Execution + continues until one past the last operation of E is reached, at which + point execution continues with the operation following the call operation. + The operations of E are evaluated with the same current context, except + current compilation unit is the one that contains D and the stack is the + same as that being used by the call operation. After the call operation + has been evaluated, the stack is therefore as it is left by the evaluation + of the operations of E. Since E is evaluated on the same stack as the call + operation, E can use, and/or remove entries already on the stack, and can + add new entries to the stack. + + *Values on the stack at the time of the call may be used as parameters by + the called expression and values left on the stack by the called expression + may be used as return values by prior agreement between the calling and + called expressions.* + + * If D has a ``DW_AT_location`` attribute that is encoded as a ``loclist`` or + ``loclistsptr``, then the specified location list expression E is + evaluated. The evaluation of E uses the current context, except the result + kind is a location description, the compilation unit is the one that + contains D, and the initial stack is empty. The location description + result is pushed on the stack. + + .. note:: + + This rule avoids having to define how to execute a matched location list + entry operation expression on the same stack as the call when there are + multiple matches. But it allows the call to obtain the location + description for a variable or formal parameter which may use a location + list expression. + + An alternative is to treat the case when D has a ``DW_AT_location`` + attribute that is encoded as a ``loclist`` or ``loclistsptr``, and the + specified location list expression E' matches a single location list + entry with operation expression E, the same as the ``exprloc`` case and + evaluate on the same stack. + + But this is not attractive as if the attribute is for a variable that + happens to end with a non-singleton stack, it will not simply put a + location description on the stack. Presumably the intent of using + ``DW_OP_call*`` on a variable or formal parameter debugger information + entry is to push just one location description on the stack. That + location description may have more than one single location description. + + The previous rule for ``exprloc`` also has the same problem as normally + a variable or formal parameter location expression may leave multiple + entries on the stack and only return the top entry. + + GDB implements ``DW_OP_call*`` by always executing E on the same stack. + If the location list has multiple matching entries, it simply picks the + first one and ignores the rest. This seems fundamentally at odds with + the desire to supporting multiple places for variables. + + So, it feels like ``DW_OP_call*`` should both support pushing a location + description on the stack for a variable or formal parameter, and also + support being able to execute an operation expression on the same stack. + Being able to specify a different operation expression for different + program locations seems a desirable feature to retain. + + A solution to that is to have a distinct ``DW_AT_LLVM_proc`` attribute + for the ``DW_TAG_dwarf_procedure`` debugging information entry. Then the + ``DW_AT_location`` attribute expression is always executed separately + and pushes a location description (that may have multiple single + location descriptions), and the ``DW_AT_LLVM_proc`` attribute expression + is always executed on the same stack and can leave anything on the + stack. + + The ``DW_AT_LLVM_proc`` attribute could have the new classes + ``exprproc``, ``loclistproc``, and ``loclistsptrproc`` to indicate that + the expression is executed on the same stack. ``exprproc`` is the same + encoding as ``exprloc``. ``loclistproc`` and ``loclistsptrproc`` are the + same encoding as their non-\ ``proc`` counterparts, except the DWARF is + ill-formed if the location list does not match exactly one location list + entry and a default entry is required. These forms indicate explicitly + that the matched single operation expression must be executed on the + same stack. This is better than ad hoc special rules for ``loclistproc`` + and ``loclistsptrproc`` which are currently clearly defined to always + return a location description. The producer then explicitly indicates + the intent through the attribute classes. + + Such a change would be a breaking change for how GDB implements + ``DW_OP_call*``. However, are the breaking cases actually occurring in + practice? GDB could implement the current approach for DWARF Version 5, + and the new semantics for DWARF Version 6 which has been done for some + other features. + + Another option is to limit the execution to be on the same stack only to + the evaluation of an expression E that is the value of a + ``DW_AT_location`` attribute of a ``DW_TAG_dwarf_procedure`` debugging + information entry. The DWARF would be ill-formed if E is a location list + expression that does not match exactly one location list entry. In all + other cases the evaluation of an expression E that is the value of a + ``DW_AT_location`` attribute would evaluate E with the current context, + except the result kind is a location description, the compilation unit + is the one that contains D, and the initial stack is empty. The location + description result is pushed on the stack. + + * If D has a ``DW_AT_const_value`` attribute with a value V, then it is as + if a ``DW_OP_implicit_value V`` operation was executed. + + *This allows a call operation to be used to compute the location + description for any variable or formal parameter regardless of whether the + producer has optimized it to a constant. This is consistent with the + ``DW_OP_implicit_pointer`` operation.* + + .. note:: + + Alternatively, could deprecate using ``DW_AT_const_value`` for + ``DW_TAG_variable`` and ``DW_TAG_formal_parameter`` debugger information + entries that are constants and instead use ``DW_AT_location`` with an + operation expression that results in a location description with one + implicit location description. Then this rule would not be required. + + * Otherwise, there is no effect and no changes are made to the stack. + + .. note:: + + In DWARF Version 5, if D does not have a ``DW_AT_location`` then + ``DW_OP_call*`` is defined to have no effect. It is unclear that this is + the right definition as a producer should be able to rely on using + ``DW_OP_call*`` to get a location description for any non-\ + ``DW_TAG_dwarf_procedure`` debugging information entries. Also, the + producer should not be creating DWARF with ``DW_OP_call*`` to a + ``DW_TAG_dwarf_procedure`` that does not have a ``DW_AT_location`` + attribute. So, should this case be defined as an ill-formed DWARF + expression? + + *The* ``DW_TAG_dwarf_procedure`` *debugging information entry can be used to + define DWARF procedures that can be called.* + +.. _amdgpu-dwarf-value-operations: + +Value Operations +################ + +This section describes the operations that push values on the stack. + +Each value stack entry has a type and a literal value and can represent a +literal value of any supported base type of the target architecture. The base +type specifies the size, encoding, and endianity of the literal value. + +The base type of value stack entries can be the distinguished generic type. + +.. _amdgpu-dwarf-literal-operations: + +Literal Operations +^^^^^^^^^^^^^^^^^^ + +The following operations all push a literal value onto the DWARF stack. + +Operations other than ``DW_OP_const_type`` push a value V with the generic type. +If V is larger than the generic type, then V is truncated to the generic type +size and the low-order bits used. + +1. ``DW_OP_lit0``, ``DW_OP_lit1``, ..., ``DW_OP_lit31`` + + ``DW_OP_lit`` operations encode an unsigned literal value N from 0 + through 31, inclusive. They push the value N with the generic type. + +2. ``DW_OP_const1u``, ``DW_OP_const2u``, ``DW_OP_const4u``, ``DW_OP_const8u`` + + ``DW_OP_constu`` operations have a single operand that is a 1, 2, 4, or + 8-byte unsigned integer constant U, respectively. They push the value U with + the generic type. + +3. ``DW_OP_const1s``, ``DW_OP_const2s``, ``DW_OP_const4s``, ``DW_OP_const8s`` + + ``DW_OP_consts`` operations have a single operand that is a 1, 2, 4, or + 8-byte signed integer constant S, respectively. They push the value S with + the generic type. + +4. ``DW_OP_constu`` + + ``DW_OP_constu`` has a single unsigned LEB128 integer operand N. It pushes + the value N with the generic type. + +5. ``DW_OP_consts`` + + ``DW_OP_consts`` has a single signed LEB128 integer operand N. It pushes the + value N with the generic type. + +6. ``DW_OP_constx`` + + ``DW_OP_constx`` has a single unsigned LEB128 integer operand that + represents a zero-based index into the ``.debug_addr`` section relative to + the value of the ``DW_AT_addr_base`` attribute of the associated compilation + unit. The value N in the ``.debug_addr`` section has the size of the generic + type. It pushes the value N with the generic type. + + *The* ``DW_OP_constx`` *operation is provided for constants that require + link-time relocation but should not be interpreted by the consumer as a + relocatable address (for example, offsets to thread-local storage).* + +9. ``DW_OP_const_type`` + + ``DW_OP_const_type`` has three operands. The first is an unsigned LEB128 + integer DR that represents the byte offset of a debugging information entry + D relative to the beginning of the current compilation unit, that provides + the type T of the constant value. The second is a 1-byte unsigned integral + constant S. The third is a block of bytes B, with a length equal to S. + + TS is the bit size of the type T. The least significant TS bits of B are + interpreted as a value V of the type D. It pushes the value V with the type + D. + + The DWARF is ill-formed if D is not a ``DW_TAG_base_type`` debugging + information entry in the current compilation unit, or if TS divided by 8 + (the byte size) and rounded up to a whole number is not equal to S. + + *While the size of the byte block B can be inferred from the type D + definition, it is encoded explicitly into the operation so that the + operation can be parsed easily without reference to the* ``.debug_info`` + *section.* + +10. ``DW_OP_LLVM_push_lane`` *New* + + ``DW_OP_LLVM_push_lane`` pushes the target architecture lane identifier of + the current lane as a value with the generic type. + + *For languages that are implemented using a SIMD or SIMT execution model, + this is the lane number that corresponds to the source language thread of + execution upon which the user is focused.* + +.. _amdgpu-dwarf-arithmetic-logical-operations: + +Arithmetic and Logical Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section is the same as DWARF Version 5 section 2.5.1.4. + +.. _amdgpu-dwarf-type-conversions-operations: + +Type Conversion Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This section is the same as DWARF Version 5 section 2.5.1.6. + +.. _amdgpu-dwarf-general-operations: + +Special Value Operations +^^^^^^^^^^^^^^^^^^^^^^^^ + +There are these special value operations currently defined: + +1. ``DW_OP_regval_type`` + + ``DW_OP_regval_type`` has two operands. The first is an unsigned LEB128 + integer that represents a register number R. The second is an unsigned + LEB128 integer DR that represents the byte offset of a debugging information + entry D relative to the beginning of the current compilation unit, that + provides the type T of the register value. + + The operation is equivalent to performing ``DW_OP_regx R; DW_OP_deref_type + DR``. + + .. note:: + + Should DWARF allow the type T to be a larger size than the size of the + register R? Restricting a larger bit size avoids any issue of conversion + as the, possibly truncated, bit contents of the register is simply + interpreted as a value of T. If a conversion is wanted it can be done + explicitly using a ``DW_OP_convert`` operation. + + GDB has a per register hook that allows a target specific conversion on a + register by register basis. It defaults to truncation of bigger registers. + Removing use of the target hook does not cause any test failures in common + architectures. If the compiler for a target architecture did want some + form of conversion, including a larger result type, it could always + explicitly used the ``DW_OP_convert`` operation. + + If T is a larger type than the register size, then the default GDB + register hook reads bytes from the next register (or reads out of bounds + for the last register!). Removing use of the target hook does not cause + any test failures in common architectures (except an illegal hand written + assembly test). If a target architecture requires this behavior, these + extensions allow a composite location description to be used to combine + multiple registers. + +2. ``DW_OP_deref`` + + S is the bit size of the generic type divided by 8 (the byte size) and + rounded up to a whole number. DR is the offset of a hypothetical debug + information entry D in the current compilation unit for a base type of the + generic type. + + The operation is equivalent to performing ``DW_OP_deref_type S, DR``. + +3. ``DW_OP_deref_size`` + + ``DW_OP_deref_size`` has a single 1-byte unsigned integral constant that + represents a byte result size S. + + TS is the smaller of the generic type bit size and S scaled by 8 (the byte + size). If TS is smaller than the generic type bit size then T is an unsigned + integral type of bit size TS, otherwise T is the generic type. DR is the + offset of a hypothetical debug information entry D in the current + compilation unit for a base type T. + + .. note:: + + Truncating the value when S is larger than the generic type matches what + GDB does. This allows the generic type size to not be an integral byte + size. It does allow S to be arbitrarily large. Should S be restricted to + the size of the generic type rounded up to a multiple of 8? + + The operation is equivalent to performing ``DW_OP_deref_type S, DR``, except + if T is not the generic type, the value V pushed is zero-extended to the + generic type bit size and its type changed to the generic type. + +4. ``DW_OP_deref_type`` + + ``DW_OP_deref_type`` has two operands. The first is a 1-byte unsigned + integral constant S. The second is an unsigned LEB128 integer DR that + represents the byte offset of a debugging information entry D relative to + the beginning of the current compilation unit, that provides the type T of + the result value. + + TS is the bit size of the type T. + + *While the size of the pushed value V can be inferred from the type T, it is + encoded explicitly as the operand S so that the operation can be parsed + easily without reference to the* ``.debug_info`` *section.* + + .. note:: + + It is unclear why the operand S is needed. Unlike ``DW_OP_const_type``, + the size is not needed for parsing. Any evaluation needs to get the base + type T to push with the value to know its encoding and bit size. + + It pops one stack entry that must be a location description L. + + A value V of TS bits is retrieved from the location storage LS specified by + one of the single location descriptions SL of L. + + *If L, or the location description of any composite location description + part that is a subcomponent of L, has more than one single location + description, then any one of them can be selected as they are required to + all have the same value. For any single location description SL, bits are + retrieved from the associated storage location starting at the bit offset + specified by SL. For a composite location description, the retrieved bits + are the concatenation of the N bits from each composite location part PL, + where N is limited to the size of PL.* + + V is pushed on the stack with the type T. + + .. note:: + + This definition makes it an evaluation error if L is a register location + description that has less than TS bits remaining in the register storage. + Particularly since these extensions extend location descriptions to have + a bit offset, it would be odd to define this as performing sign extension + based on the type, or be target architecture dependent, as the number of + remaining bits could be any number. This matches the GDB implementation + for ``DW_OP_deref_type``. + + These extensions define ``DW_OP_*breg*`` in terms of + ``DW_OP_regval_type``. ``DW_OP_regval_type`` is defined in terms of + ``DW_OP_regx``, which uses a 0 bit offset, and ``DW_OP_deref_type``. + Therefore, it requires the register size to be greater or equal to the + address size of the address space. This matches the GDB implementation for + ``DW_OP_*breg*``. + + The DWARF is ill-formed if D is not in the current compilation unit, D is + not a ``DW_TAG_base_type`` debugging information entry, or if TS divided by + 8 (the byte size) and rounded up to a whole number is not equal to S. + + .. note:: + + This definition allows the base type to be a bit size since there seems no + reason to restrict it. + + It is an evaluation error if any bit of the value is retrieved from the + undefined location storage or the offset of any bit exceeds the size of the + location storage LS specified by any single location description SL of L. + + See :ref:`amdgpu-dwarf-implicit-location-descriptions` for special rules + concerning implicit location descriptions created by the + ``DW_OP_implicit_pointer`` and ``DW_OP_LLVM_implicit_aspace_pointer`` + operations. + +5. ``DW_OP_xderef`` *Deprecated* + + ``DW_OP_xderef`` pops two stack entries. The first must be an integral type + value that represents an address A. The second must be an integral type + value that represents a target architecture specific address space + identifier AS. + + The operation is equivalent to performing ``DW_OP_swap; + DW_OP_LLVM_form_aspace_address; DW_OP_deref``. The value V retrieved is left + on the stack with the generic type. + + *This operation is deprecated as the* ``DW_OP_LLVM_form_aspace_address`` + *operation can be used and provides greater expressiveness.* + +6. ``DW_OP_xderef_size`` *Deprecated* + + ``DW_OP_xderef_size`` has a single 1-byte unsigned integral constant that + represents a byte result size S. + + It pops two stack entries. The first must be an integral type value that + represents an address A. The second must be an integral type value that + represents a target architecture specific address space identifier AS. + + The operation is equivalent to performing ``DW_OP_swap; + DW_OP_LLVM_form_aspace_address; DW_OP_deref_size S``. The zero-extended + value V retrieved is left on the stack with the generic type. + + *This operation is deprecated as the* ``DW_OP_LLVM_form_aspace_address`` + *operation can be used and provides greater expressiveness.* + +7. ``DW_OP_xderef_type`` *Deprecated* + + ``DW_OP_xderef_type`` has two operands. The first is a 1-byte unsigned + integral constant S. The second operand is an unsigned LEB128 integer DR + that represents the byte offset of a debugging information entry D relative + to the beginning of the current compilation unit, that provides the type T + of the result value. + + It pops two stack entries. The first must be an integral type value that + represents an address A. The second must be an integral type value that + represents a target architecture specific address space identifier AS. + + The operation is equivalent to performing ``DW_OP_swap; + DW_OP_LLVM_form_aspace_address; DW_OP_deref_type S R``. The value V + retrieved is left on the stack with the type D. + + *This operation is deprecated as the* ``DW_OP_LLVM_form_aspace_address`` + *operation can be used and provides greater expressiveness.* + +8. ``DW_OP_entry_value`` *Deprecated* + + ``DW_OP_entry_value`` pushes the value of an expression that is evaluated in + the context of the calling frame. + + *It may be used to determine the value of arguments on entry to the current + call frame provided they are not clobbered.* + + It has two operands. The first is an unsigned LEB128 integer S. The second + is a block of bytes, with a length equal S, interpreted as a DWARF + operation expression E. + + E is evaluated with the current context, except the result kind is + unspecified, the call frame is the one that called the current frame, the + program location is the call site in the calling frame, the object is + unspecified, and the initial stack is empty. The calling frame information + is obtained by virtually unwinding the current call frame using the call + frame information (see :ref:`amdgpu-dwarf-call-frame-information`). + + If the result of E is a location description L (see + :ref:`amdgpu-dwarf-register-location-descriptions`), and the last operation + executed by E is a ``DW_OP_reg*`` for register R with a target architecture + specific base type of T, then the contents of the register are retrieved as + if a ``DW_OP_deref_type DR`` operation was performed where DR is the offset + of a hypothetical debug information entry in the current compilation unit + for T. The resulting value V s pushed on the stack. + + *Using* ``DW_OP_reg*`` *provides a more compact form for the case where the + value was in a register on entry to the subprogram.* + + .. note: + + It is unclear how this provides a more compact expression, as + ``DW_OP_regval_type`` could be used which is marginally larger. + + If the result of E is a value V, then V is pushed on the stack. + + Otherwise, the DWARF expression is ill-formed. + + *The* ``DW_OP_entry_value`` *operation is deprecated as its main usage is + provided by other means. DWARF Version 5 added the* + ``DW_TAG_call_site_parameter`` *debugger information entry for call sites + that has* ``DW_AT_call_value``\ *,* ``DW_AT_call_data_location``\ *, and* + ``DW_AT_call_data_value`` *attributes that provide DWARF expressions to + compute actual parameter values at the time of the call, and requires the + producer to ensure the expressions are valid to evaluate even when virtually + unwound. The* ``DW_OP_LLVM_call_frame_entry_reg`` *operation provides access + to registers in the virtually unwound calling frame.* + + .. note:: + + GDB only implements ``DW_OP_entry_value`` when E is exactly + ``DW_OP_reg*`` or ``DW_OP_breg*; DW_OP_deref*``. + +.. _amdgpu-dwarf-location-description-operations: + +Location Description Operations +############################### + +This section describes the operations that push location descriptions on the +stack. + +General Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +1. ``DW_OP_LLVM_offset`` *New* + + ``DW_OP_LLVM_offset`` pops two stack entries. The first must be an integral + type value that represents a byte displacement B. The second must be a + location description L. + + It adds the value of B scaled by 8 (the byte size) to the bit offset of each + single location description SL of L, and pushes the updated L. + + It is an evaluation error if the updated bit offset of any SL is less than 0 + or greater than or equal to the size of the location storage specified by + SL. + +2. ``DW_OP_LLVM_offset_uconst`` *New* + + ``DW_OP_LLVM_offset_uconst`` has a single unsigned LEB128 integer operand + that represents a byte displacement B. + + The operation is equivalent to performing ``DW_OP_constu B; + DW_OP_LLVM_offset``. + + *This operation is supplied specifically to be able to encode more field + displacements in two bytes than can be done with* ``DW_OP_lit*; + DW_OP_LLVM_offset``\ *.* + + .. note:: + + Should this be named ``DW_OP_LLVM_offset_uconst`` to match + ``DW_OP_plus_uconst``, or ``DW_OP_LLVM_offset_constu`` to match + ``DW_OP_constu``? + +3. ``DW_OP_LLVM_bit_offset`` *New* + + ``DW_OP_LLVM_bit_offset`` pops two stack entries. The first must be an + integral type value that represents a bit displacement B. The second must be + a location description L. + + It adds the value of B to the bit offset of each single location description + SL of L, and pushes the updated L. + + It is an evaluation error if the updated bit offset of any SL is less than 0 + or greater than or equal to the size of the location storage specified by + SL. + +4. ``DW_OP_push_object_address`` + + ``DW_OP_push_object_address`` pushes the location description L of the + current object. + + *This object may correspond to an independent variable that is part of a + user presented expression that is being evaluated. The object location + description may be determined from the variable's own debugging information + entry or it may be a component of an array, structure, or class whose + address has been dynamically determined by an earlier step during user + expression evaluation.* + + *This operation provides explicit functionality (especially for arrays + involving descriptions) that is analogous to the implicit push of the base + location description of a structure prior to evaluation of a + ``DW_AT_data_member_location`` to access a data member of a structure.* + + .. note:: + + This operation could be removed and the object location description + specified as the initial stack as for ``DW_AT_data_member_location``. + + The only attribute that specifies a current object is + ``DW_AT_data_location`` so the non-normative text seems to overstate how + this is being used. Or are there other attributes that need to state they + pass an object? + +5. ``DW_OP_LLVM_call_frame_entry_reg`` *New* + + ``DW_OP_LLVM_call_frame_entry_reg`` has a single unsigned LEB128 integer + operand that represents a target architecture register number R. + + It pushes a location description L that holds the value of register R on + entry to the current subprogram as defined by the call frame information + (see :ref:`amdgpu-dwarf-call-frame-information`). + + *If there is no call frame information defined, then the default rules for + the target architecture are used. If the register rule is* undefined\ *, then + the undefined location description is pushed. If the register rule is* same + value\ *, then a register location description for R is pushed.* + +.. _amdgpu-dwarf-undefined-location-description-operations: + +Undefined Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +*The undefined location storage represents a piece or all of an object that is +present in the source but not in the object code (perhaps due to optimization). +Neither reading nor writing to the undefined location storage is meaningful.* + +An undefined location description specifies the undefined location storage. +There is no concept of the size of the undefined location storage, nor of a bit +offset for an undefined location description. The ``DW_OP_LLVM_*offset`` +operations leave an undefined location description unchanged. The +``DW_OP_*piece`` operations can explicitly or implicitly specify an undefined +location description, allowing any size and offset to be specified, and results +in a part with all undefined bits. + +1. ``DW_OP_LLVM_undefined`` *New* + + ``DW_OP_LLVM_undefined`` pushes a location description L that comprises one + undefined location description SL. + +.. _amdgpu-dwarf-memory-location-description-operations: + +Memory Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each of the target architecture specific address spaces has a corresponding +memory location storage that denotes the linear addressable memory of that +address space. The size of each memory location storage corresponds to the range +of the addresses in the corresponding address space. + +*It is target architecture defined how address space location storage maps to +target architecture physical memory. For example, they may be independent +memory, or more than one location storage may alias the same physical memory +possibly at different offsets and with different interleaving. The mapping may +also be dictated by the source language address classes.* + +A memory location description specifies a memory location storage. The bit +offset corresponds to a bit position within a byte of the memory. Bits accessed +using a memory location description, access the corresponding target +architecture memory starting at the bit position within the byte specified by +the bit offset. + +A memory location description that has a bit offset that is a multiple of 8 (the +byte size) is defined to be a byte address memory location description. It has a +memory byte address A that is equal to the bit offset divided by 8. + +A memory location description that does not have a bit offset that is a multiple +of 8 (the byte size) is defined to be a bit field memory location description. +It has a bit position B equal to the bit offset modulo 8, and a memory byte +address A equal to the bit offset minus B that is then divided by 8. + +The address space AS of a memory location description is defined to be the +address space that corresponds to the memory location storage associated with +the memory location description. + +A location description that is comprised of one byte address memory location +description SL is defined to be a memory byte address location description. It +has a byte address equal to A and an address space equal to AS of the +corresponding SL. + +``DW_ASPACE_none`` is defined as the target architecture default address space. + +If a stack entry is required to be a location description, but it is a value V +with the generic type, then it is implicitly converted to a location description +L with one memory location description SL. SL specifies the memory location +storage that corresponds to the target architecture default address space with a +bit offset equal to V scaled by 8 (the byte size). + +.. note:: + + If it is wanted to allow any integral type value to be implicitly converted to + a memory location description in the target architecture default address + space: + + If a stack entry is required to be a location description, but is a value V + with an integral type, then it is implicitly converted to a location + description L with a one memory location description SL. If the type size of + V is less than the generic type size, then the value V is zero extended to + the size of the generic type. The least significant generic type size bits + are treated as a twos-complement unsigned value to be used as an address A. + SL specifies memory location storage corresponding to the target + architecture default address space with a bit offset equal to A scaled by 8 + (the byte size). + + The implicit conversion could also be defined as target architecture specific. + For example, GDB checks if V is an integral type. If it is not it gives an + error. Otherwise, GDB zero-extends V to 64 bits. If the GDB target defines a + hook function, then it is called. The target specific hook function can modify + the 64-bit value, possibly sign extending based on the original value type. + Finally, GDB treats the 64-bit value V as a memory location address. + +If a stack entry is required to be a location description, but it is an implicit +pointer value IPV with the target architecture default address space, then it is +implicitly converted to a location description with one single location +description specified by IPV. See +:ref:`amdgpu-dwarf-implicit-location-descriptions`. + +.. note:: + + Is this rule required for DWARF Version 5 backwards compatibility? If not, it + can be eliminated, and the producer can use + ``DW_OP_LLVM_form_aspace_address``. + +If a stack entry is required to be a value, but it is a location description L +with one memory location description SL in the target architecture default +address space with a bit offset B that is a multiple of 8, then it is implicitly +converted to a value equal to B divided by 8 (the byte size) with the generic +type. + +1. ``DW_OP_addr`` + + ``DW_OP_addr`` has a single byte constant value operand, which has the size + of the generic type, that represents an address A. + + It pushes a location description L with one memory location description SL + on the stack. SL specifies the memory location storage corresponding to the + target architecture default address space with a bit offset equal to A + scaled by 8 (the byte size). + + *If the DWARF is part of a code object, then A may need to be relocated. For + example, in the ELF code object format, A must be adjusted by the difference + between the ELF segment virtual address and the virtual address at which the + segment is loaded.* + +2. ``DW_OP_addrx`` + + ``DW_OP_addrx`` has a single unsigned LEB128 integer operand that represents + a zero-based index into the ``.debug_addr`` section relative to the value of + the ``DW_AT_addr_base`` attribute of the associated compilation unit. The + address value A in the ``.debug_addr`` section has the size of the generic + type. + + It pushes a location description L with one memory location description SL + on the stack. SL specifies the memory location storage corresponding to the + target architecture default address space with a bit offset equal to A + scaled by 8 (the byte size). + + *If the DWARF is part of a code object, then A may need to be relocated. For + example, in the ELF code object format, A must be adjusted by the difference + between the ELF segment virtual address and the virtual address at which the + segment is loaded.* + +3. ``DW_OP_LLVM_form_aspace_address`` *New* + + ``DW_OP_LLVM_form_aspace_address`` pops top two stack entries. The first + must be an integral type value that represents a target architecture + specific address space identifier AS. The second must be an integral type + value that represents an address A. + + The address size S is defined as the address bit size of the target + architecture specific address space that corresponds to AS. + + A is adjusted to S bits by zero extending if necessary, and then treating the + least significant S bits as a twos-complement unsigned value A'. + + It pushes a location description L with one memory location description SL + on the stack. SL specifies the memory location storage LS that corresponds + to AS with a bit offset equal to A' scaled by 8 (the byte size). + + If AS is an address space that is specific to context elements, then LS + corresponds to the location storage associated with the current context. + + *For example, if AS is for per thread storage then LS is the location + storage for the current thread. For languages that are implemented using a + SIMD or SIMT execution model, then if AS is for per lane storage then LS is + the location storage for the current lane of the current thread. Therefore, + if L is accessed by an operation, the location storage selected when the + location description was created is accessed, and not the location storage + associated with the current context of the access operation.* + + The DWARF expression is ill-formed if AS is not one of the values defined by + the target architecture specific ``DW_ASPACE_*`` values. + + See :ref:`amdgpu-dwarf-implicit-location-descriptions` for special rules + concerning implicit pointer values produced by dereferencing implicit + location descriptions created by the ``DW_OP_implicit_pointer`` and + ``DW_OP_LLVM_implicit_aspace_pointer`` operations. + +4. ``DW_OP_form_tls_address`` + + ``DW_OP_form_tls_address`` pops one stack entry that must be an integral + type value and treats it as a thread-local storage address TA. + + It pushes a location description L with one memory location description SL + on the stack. SL is the target architecture specific memory location + description that corresponds to the thread-local storage address TA. + + The meaning of the thread-local storage address TA is defined by the + run-time environment. If the run-time environment supports multiple + thread-local storage blocks for a single thread, then the block + corresponding to the executable or shared library containing this DWARF + expression is used. + + *Some implementations of C, C++, Fortran, and other languages support a + thread-local storage class. Variables with this storage class have distinct + values and addresses in distinct threads, much as automatic variables have + distinct values and addresses in each subprogram invocation. Typically, + there is a single block of storage containing all thread-local variables + declared in the main executable, and a separate block for the variables + declared in each shared library. Each thread-local variable can then be + accessed in its block using an identifier. This identifier is typically a + byte offset into the block and pushed onto the DWARF stack by one of the* + ``DW_OP_const*`` *operations prior to the* ``DW_OP_form_tls_address`` + *operation. Computing the address of the appropriate block can be complex + (in some cases, the compiler emits a function call to do it), and difficult + to describe using ordinary DWARF location descriptions. Instead of forcing + complex thread-local storage calculations into the DWARF expressions, the* + ``DW_OP_form_tls_address`` *allows the consumer to perform the computation + based on the target architecture specific run-time environment.* + +5. ``DW_OP_call_frame_cfa`` + + ``DW_OP_call_frame_cfa`` pushes the location description L of the Canonical + Frame Address (CFA) of the current subprogram, obtained from the call frame + information on the stack. See :ref:`amdgpu-dwarf-call-frame-information`. + + *Although the value of the* ``DW_AT_frame_base`` *attribute of the debugger + information entry corresponding to the current subprogram can be computed + using a location list expression, in some cases this would require an + extensive location list because the values of the registers used in + computing the CFA change during a subprogram execution. If the call frame + information is present, then it already encodes such changes, and it is + space efficient to reference that using the* ``DW_OP_call_frame_cfa`` + *operation.* + +6. ``DW_OP_fbreg`` + + ``DW_OP_fbreg`` has a single signed LEB128 integer operand that represents a + byte displacement B. + + The location description L for the *frame base* of the current subprogram is + obtained from the ``DW_AT_frame_base`` attribute of the debugger information + entry corresponding to the current subprogram as described in + :ref:`amdgpu-dwarf-debugging-information-entry-attributes`. + + The location description L is updated as if the ``DW_OP_LLVM_offset_uconst + B`` operation was applied. The updated L is pushed on the stack. + +7. ``DW_OP_breg0``, ``DW_OP_breg1``, ..., ``DW_OP_breg31`` + + The ``DW_OP_breg`` operations encode the numbers of up to 32 registers, + numbered from 0 through 31, inclusive. The register number R corresponds to + the N in the operation name. + + They have a single signed LEB128 integer operand that represents a byte + displacement B. + + The address space identifier AS is defined as the one corresponding to the + target architecture specific default address space. + + The address size S is defined as the address bit size of the target + architecture specific address space corresponding to AS. + + The contents of the register specified by R are retrieved as if a + ``DW_OP_regval_type R, DR`` operation was performed where DR is the offset + of a hypothetical debug information entry in the current compilation unit + for an unsigned integral base type of size S bits. B is added and the least + significant S bits are treated as an unsigned value to be used as an address + A. + + They push a location description L comprising one memory location + description LS on the stack. LS specifies the memory location storage that + corresponds to AS with a bit offset equal to A scaled by 8 (the byte size). + +8. ``DW_OP_bregx`` + + ``DW_OP_bregx`` has two operands. The first is an unsigned LEB128 integer + that represents a register number R. The second is a signed LEB128 + integer that represents a byte displacement B. + + The action is the same as for ``DW_OP_breg``, except that R is used as + the register number and B is used as the byte displacement. + +9. ``DW_OP_LLVM_aspace_bregx`` *New* + + ``DW_OP_LLVM_aspace_bregx`` has two operands. The first is an unsigned + LEB128 integer that represents a register number R. The second is a signed + LEB128 integer that represents a byte displacement B. It pops one stack + entry that is required to be an integral type value that represents a target + architecture specific address space identifier AS. + + The action is the same as for ``DW_OP_breg``, except that R is used as + the register number, B is used as the byte displacement, and AS is used as + the address space identifier. + + The DWARF expression is ill-formed if AS is not one of the values defined by + the target architecture specific ``DW_ASPACE_*`` values. + + .. note:: + + Could also consider adding ``DW_OP_aspace_breg0, DW_OP_aspace_breg1, ..., + DW_OP_aspace_bref31`` which would save encoding size. + +.. _amdgpu-dwarf-register-location-descriptions: + +Register Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There is a register location storage that corresponds to each of the target +architecture registers. The size of each register location storage corresponds +to the size of the corresponding target architecture register. + +A register location description specifies a register location storage. The bit +offset corresponds to a bit position within the register. Bits accessed using a +register location description access the corresponding target architecture +register starting at the specified bit offset. + +1. ``DW_OP_reg0``, ``DW_OP_reg1``, ..., ``DW_OP_reg31`` + + ``DW_OP_reg`` operations encode the numbers of up to 32 registers, + numbered from 0 through 31, inclusive. The target architecture register + number R corresponds to the N in the operation name. + + The operation is equivalent to performing ``DW_OP_regx R``. + +2. ``DW_OP_regx`` + + ``DW_OP_regx`` has a single unsigned LEB128 integer operand that represents + a target architecture register number R. + + If the current call frame is the top call frame, it pushes a location + description L that specifies one register location description SL on the + stack. SL specifies the register location storage that corresponds to R with + a bit offset of 0 for the current thread. + + If the current call frame is not the top call frame, call frame information + (see :ref:`amdgpu-dwarf-call-frame-information`) is used to determine the + location description that holds the register for the current call frame and + current program location of the current thread. The resulting location + description L is pushed. + + *Note that if call frame information is used, the resulting location + description may be register, memory, or undefined.* + + *An implementation may evaluate the call frame information immediately, or + may defer evaluation until L is accessed by an operation. If evaluation is + deferred, R and the current context can be recorded in L. When accessed, the + recorded context is used to evaluate the call frame information, not the + current context of the access operation.* + +*These operations obtain a register location. To fetch the contents of a +register, it is necessary to use* ``DW_OP_regval_type``\ *, use one of the* +``DW_OP_breg*`` *register-based addressing operations, or use* ``DW_OP_deref*`` +*on a register location description.* + +.. _amdgpu-dwarf-implicit-location-descriptions: + +Implicit Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Implicit location storage represents a piece or all of an object which has no +actual location in the program but whose contents are nonetheless known, either +as a constant or can be computed from other locations and values in the program. + +An implicit location description specifies an implicit location storage. The bit +offset corresponds to a bit position within the implicit location storage. Bits +accessed using an implicit location description, access the corresponding +implicit storage value starting at the bit offset. + +1. ``DW_OP_implicit_value`` + + ``DW_OP_implicit_value`` has two operands. The first is an unsigned LEB128 + integer that represents a byte size S. The second is a block of bytes with a + length equal to S treated as a literal value V. + + An implicit location storage LS is created with the literal value V and a + size of S. + + It pushes location description L with one implicit location description SL + on the stack. SL specifies LS with a bit offset of 0. + +2. ``DW_OP_stack_value`` + + ``DW_OP_stack_value`` pops one stack entry that must be a value V. + + An implicit location storage LS is created with the literal value V using + the size, encoding, and enianity specified by V's base type. + + It pushes a location description L with one implicit location description SL + on the stack. SL specifies LS with a bit offset of 0. + + *The* ``DW_OP_stack_value`` *operation specifies that the object does not + exist in memory, but its value is nonetheless known. In this form, the + location description specifies the actual value of the object, rather than + specifying the memory or register storage that holds the value.* + + See :ref:`amdgpu-dwarf-implicit-location-descriptions` for special rules + concerning implicit pointer values produced by dereferencing implicit + location descriptions created by the ``DW_OP_implicit_pointer`` and + ``DW_OP_LLVM_implicit_aspace_pointer`` operations. + + .. note:: + + Since location descriptions are allowed on the stack, the + ``DW_OP_stack_value`` operation no longer terminates the DWARF operation + expression execution as in DWARF Version 5. + +3. ``DW_OP_implicit_pointer`` + + *An optimizing compiler may eliminate a pointer, while still retaining the + value that the pointer addressed.* ``DW_OP_implicit_pointer`` *allows a + producer to describe this value.* + + ``DW_OP_implicit_pointer`` *specifies an object is a pointer to the target + architecture default address space that cannot be represented as a real + pointer, even though the value it would point to can be described. In this + form, the location description specifies a debugging information entry that + represents the actual location description of the object to which the + pointer would point. Thus, a consumer of the debug information would be able + to access the dereferenced pointer, even when it cannot access the pointer + itself.* + + ``DW_OP_implicit_pointer`` has two operands. The first operand is a 4-byte + unsigned value in the 32-bit DWARF format, or an 8-byte unsigned value in + the 64-bit DWARF format, that represents the byte offset DR of a debugging + information entry D relative to the beginning of the ``.debug_info`` section + that contains the current compilation unit. The second operand is a signed + LEB128 integer that represents a byte displacement B. + + *Note that D may not be in the current compilation unit.* + + *The first operand interpretation is exactly like that for* + ``DW_FORM_ref_addr``\ *.* + + The address space identifier AS is defined as the one corresponding to the + target architecture specific default address space. + + The address size S is defined as the address bit size of the target + architecture specific address space corresponding to AS. + + An implicit location storage LS is created with the debugging information + entry D, address space AS, and size of S. + + It pushes a location description L that comprises one implicit location + description SL on the stack. SL specifies LS with a bit offset of 0. + + It is an evaluation error if a ``DW_OP_deref*`` operation pops a location + description L', and retrieves S bits, such that any retrieved bits come from + an implicit location storage that is the same as LS, unless both the + following conditions are met: + + 1. All retrieved bits come from an implicit location description that + refers to an implicit location storage that is the same as LS. + + *Note that all bits do not have to come from the same implicit location + description, as L' may involve composite location descriptors.* + + 2. The bits come from consecutive ascending offsets within their respective + implicit location storage. + + *These rules are equivalent to retrieving the complete contents of LS.* + + If both the above conditions are met, then the value V pushed by the + ``DW_OP_deref*`` operation is an implicit pointer value IPV with a target + architecture specific address space of AS, a debugging information entry of + D, and a base type of T. If AS is the target architecture default address + space, then T is the generic type. Otherwise, T is a target architecture + specific integral type with a bit size equal to S. + + If IPV is either implicitly converted to a location description (only done + if AS is the target architecture default address space) or used by + ``DW_OP_LLVM_form_aspace_address`` (only done if the address space popped by + ``DW_OP_LLVM_form_aspace_address`` is AS), then the resulting location + description RL is: + + * If D has a ``DW_AT_location`` attribute, the DWARF expression E from the + ``DW_AT_location`` attribute is evaluated with the current context, except + that the result kind is a location description, the compilation unit is + the one that contains D, the object is unspecified, and the initial stack + is empty. RL is the expression result. + + *Note that E is evaluated with the context of the expression accessing + IPV, and not the context of the expression that contained the* + ``DW_OP_implicit_pointer`` *or* ``DW_OP_LLVM_aspace_implicit_pointer`` + *operation that created L.* + + * If D has a ``DW_AT_const_value`` attribute, then an implicit location + storage RLS is created from the ``DW_AT_const_value`` attribute's value + with a size matching the size of the ``DW_AT_const_value`` attribute's + value. RL comprises one implicit location description SRL. SRL specifies + RLS with a bit offset of 0. + + .. note:: + + If using ``DW_AT_const_value`` for variables and formal parameters is + deprecated and instead ``DW_AT_location`` is used with an implicit + location description, then this rule would not be required. + + * Otherwise, it is an evaluation error. + + The bit offset of RL is updated as if the ``DW_OP_LLVM_offset_uconst B`` + operation was applied. + + If a ``DW_OP_stack_value`` operation pops a value that is the same as IPV, + then it pushes a location description that is the same as L. + + It is an evaluation error if LS or IPV is accessed in any other manner. + + *The restrictions on how an implicit pointer location description created + by* ``DW_OP_implicit_pointer`` *and* ``DW_OP_LLVM_aspace_implicit_pointer`` + *can be used are to simplify the DWARF consumer. Similarly, for an implicit + pointer value created by* ``DW_OP_deref*`` *and* ``DW_OP_stack_value``\ .* + +4. ``DW_OP_LLVM_aspace_implicit_pointer`` *New* + + ``DW_OP_LLVM_aspace_implicit_pointer`` has two operands that are the same as + for ``DW_OP_implicit_pointer``. + + It pops one stack entry that must be an integral type value that represents + a target architecture specific address space identifier AS. + + The location description L that is pushed on the stack is the same as for + ``DW_OP_implicit_pointer``, except that the address space identifier used is + AS. + + The DWARF expression is ill-formed if AS is not one of the values defined by + the target architecture specific ``DW_ASPACE_*`` values. + + .. note:: + + This definition of ``DW_OP_LLVM_aspace_implicit_pointer`` may change when + full support for address classes is added as required for languages such + as OpenCL/SyCL. + +*Typically a* ``DW_OP_implicit_pointer`` *or* +``DW_OP_LLVM_aspace_implicit_pointer`` *operation is used in a DWARF expression +E*\ :sub:`1` *of a* ``DW_TAG_variable`` *or* ``DW_TAG_formal_parameter`` +*debugging information entry D*\ :sub:`1`\ *'s* ``DW_AT_location`` *attribute. +The debugging information entry referenced by the* ``DW_OP_implicit_pointer`` +*or* ``DW_OP_LLVM_aspace_implicit_pointer`` *operations is typically itself a* +``DW_TAG_variable`` *or* ``DW_TAG_formal_parameter`` *debugging information +entry D*\ :sub:`2` *whose* ``DW_AT_location`` *attribute gives a second DWARF +expression E*\ :sub:`2`\ *.* + +*D*\ :sub:`1` *and E*\ :sub:`1` *are describing the location of a pointer type +object. D*\ :sub:`2` *and E*\ :sub:`2` *are describing the location of the +object pointed to by that pointer object.* + +*However, D*\ :sub:`2` *may be any debugging information entry that contains a* +``DW_AT_location`` *or* ``DW_AT_const_value`` *attribute (for example,* +``DW_TAG_dwarf_procedure``\ *). By using E*\ :sub:`2`\ *, a consumer can +reconstruct the value of the object when asked to dereference the pointer +described by E*\ :sub:`1` *which contains the* ``DW_OP_implicit_pointer`` or +``DW_OP_LLVM_aspace_implicit_pointer`` *operation.* + +.. _amdgpu-dwarf-composite-location-description-operations: + +Composite Location Description Operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A composite location storage represents an object or value which may be +contained in part of another location storage or contained in parts of more +than one location storage. + +Each part has a part location description L and a part bit size S. L can have +one or more single location descriptions SL. If there are more than one SL then +that indicates that part is located in more than one place. The bits of each +place of the part comprise S contiguous bits from the location storage LS +specified by SL starting at the bit offset specified by SL. All the bits must +be within the size of LS or the DWARF expression is ill-formed. + +A composite location storage can have zero or more parts. The parts are +contiguous such that the zero-based location storage bit index will range over +each part with no gaps between them. Therefore, the size of a composite location +storage is the sum of the size of its parts. The DWARF expression is ill-formed +if the size of the contiguous location storage is larger than the size of the +memory location storage corresponding to the largest target architecture +specific address space. + +A composite location description specifies a composite location storage. The bit +offset corresponds to a bit position within the composite location storage. + +There are operations that create a composite location storage. + +There are other operations that allow a composite location storage to be +incrementally created. Each part is created by a separate operation. There may +be one or more operations to create the final composite location storage. A +series of such operations describes the parts of the composite location storage +that are in the order that the associated part operations are executed. + +To support incremental creation, a composite location storage can be in an +incomplete state. When an incremental operation operates on an incomplete +composite location storage, it adds a new part, otherwise it creates a new +composite location storage. The ``DW_OP_LLVM_piece_end`` operation explicitly +makes an incomplete composite location storage complete. + +A composite location description that specifies a composite location storage +that is incomplete is termed an incomplete composite location description. A +composite location description that specifies a composite location storage that +is complete is termed a complete composite location description. + +If the top stack entry is a location description that has one incomplete +composite location description SL after the execution of an operation expression +has completed, SL is converted to a complete composite location description. + +*Note that this conversion does not happen after the completion of an operation +expression that is evaluated on the same stack by the* ``DW_OP_call*`` +*operations. Such executions are not a separate evaluation of an operation +expression, but rather the continued evaluation of the same operation expression +that contains the* ``DW_OP_call*`` *operation.* + +If a stack entry is required to be a location description L, but L has an +incomplete composite location description, then the DWARF expression is +ill-formed. The exception is for the operations involved in incrementally +creating a composite location description as described below. + +*Note that a DWARF operation expression may arbitrarily compose composite +location descriptions from any other location description, including those that +have multiple single location descriptions, and those that have composite +location descriptions.* + +*The incremental composite location description operations are defined to be +compatible with the definitions in DWARF Version 5.* + +1. ``DW_OP_piece`` + + ``DW_OP_piece`` has a single unsigned LEB128 integer that represents a byte + size S. + + The action is based on the context: + + * If the stack is empty, then a location description L comprised of one + incomplete composite location description SL is pushed on the stack. + + An incomplete composite location storage LS is created with a single part + P. P specifies a location description PL and has a bit size of S scaled by + 8 (the byte size). PL is comprised of one undefined location description + PSL. + + SL specifies LS with a bit offset of 0. + + * Otherwise, if the top stack entry is a location description L comprised of + one incomplete composite location description SL, then the incomplete + composite location storage LS that SL specifies is updated to append a new + part P. P specifies a location description PL and has a bit size of S + scaled by 8 (the byte size). PL is comprised of one undefined location + description PSL. L is left on the stack. + + * Otherwise, if the top stack entry is a location description or can be + converted to one, then it is popped and treated as a part location + description PL. Then: + + * If the top stack entry (after popping PL) is a location description L + comprised of one incomplete composite location description SL, then the + incomplete composite location storage LS that SL specifies is updated to + append a new part P. P specifies the location description PL and has a + bit size of S scaled by 8 (the byte size). L is left on the stack. + + * Otherwise, a location description L comprised of one incomplete + composite location description SL is pushed on the stack. + + An incomplete composite location storage LS is created with a single + part P. P specifies the location description PL and has a bit size of S + scaled by 8 (the byte size). + + SL specifies LS with a bit offset of 0. + + * Otherwise, the DWARF expression is ill-formed + + *Many compilers store a single variable in sets of registers or store a + variable partially in memory and partially in registers.* ``DW_OP_piece`` + *provides a way of describing where a part of a variable is located.* + + *If a non-0 byte displacement is required, the* ``DW_OP_LLVM_offset`` + *operation can be used to update the location description before using it as + the part location description of a* ``DW_OP_piece`` *operation.* + + *The evaluation rules for the* ``DW_OP_piece`` *operation allow it to be + compatible with the DWARF Version 5 definition.* + + .. note:: + + Since these extensions allow location descriptions to be entries on the + stack, a simpler operation to create composite location descriptions could + be defined. For example, just one operation that specifies how many parts, + and pops pairs of stack entries for the part size and location + description. Not only would this be a simpler operation and avoid the + complexities of incomplete composite location descriptions, but it may + also have a smaller encoding in practice. However, the desire for + compatibility with DWARF Version 5 is likely a stronger consideration. + +2. ``DW_OP_bit_piece`` + + ``DW_OP_bit_piece`` has two operands. The first is an unsigned LEB128 + integer that represents the part bit size S. The second is an unsigned + LEB128 integer that represents a bit displacement B. + + The action is the same as for ``DW_OP_piece``, except that any part created + has the bit size S, and the location description PL of any created part is + updated as if the ``DW_OP_constu B; DW_OP_LLVM_bit_offset`` operations were + applied. + + ``DW_OP_bit_piece`` *is used instead of* ``DW_OP_piece`` *when the piece to + be assembled is not byte-sized or is not at the start of the part location + description.* + + *If a computed bit displacement is required, the* ``DW_OP_LLVM_bit_offset`` + *operation can be used to update the location description before using it as + the part location description of a* ``DW_OP_bit_piece`` *operation.* + + .. note:: + + The bit offset operand is not needed as ``DW_OP_LLVM_bit_offset`` can be + used on the part's location description. + +3. ``DW_OP_LLVM_piece_end`` *New* + + If the top stack entry is not a location description L comprised of one + incomplete composite location description SL, then the DWARF expression is + ill-formed. + + Otherwise, the incomplete composite location storage LS specified by SL is + updated to be a complete composite location description with the same parts. + +4. ``DW_OP_LLVM_extend`` *New* + + ``DW_OP_LLVM_extend`` has two operands. The first is an unsigned LEB128 + integer that represents the element bit size S. The second is an unsigned + LEB128 integer that represents a count C. + + It pops one stack entry that must be a location description and is treated + as the part location description PL. + + A location description L comprised of one complete composite location + description SL is pushed on the stack. + + A complete composite location storage LS is created with C identical parts + P. Each P specifies PL and has a bit size of S. + + SL specifies LS with a bit offset of 0. + + The DWARF expression is ill-formed if the element bit size or count are 0. + +5. ``DW_OP_LLVM_select_bit_piece`` *New* + + ``DW_OP_LLVM_select_bit_piece`` has two operands. The first is an unsigned + LEB128 integer that represents the element bit size S. The second is an + unsigned LEB128 integer that represents a count C. + + It pops three stack entries. The first must be an integral type value that + represents a bit mask value M. The second must be a location description + that represents the one-location description L1. The third must be a + location description that represents the zero-location description L0. + + A complete composite location storage LS is created with C parts P\ :sub:`N` + ordered in ascending N from 0 to C-1 inclusive. Each P\ :sub:`N` specifies + location description PL\ :sub:`N` and has a bit size of S. + + PL\ :sub:`N` is as if the ``DW_OP_LLVM_bit_offset N*S`` operation was + applied to PLX\ :sub:`N`\ . + + PLX\ :sub:`N` is the same as L0 if the N\ :sup:`th` least significant bit of + M is a zero, otherwise it is the same as L1. + + A location description L comprised of one complete composite location + description SL is pushed on the stack. SL specifies LS with a bit offset of + 0. + + The DWARF expression is ill-formed if S or C are 0, or if the bit size of M + is less than C. + +.. _amdgpu-dwarf-location-list-expressions: + +DWARF Location List Expressions ++++++++++++++++++++++++++++++++ + +*To meet the needs of recent computer architectures and optimization techniques, +debugging information must be able to describe the location of an object whose +location changes over the object’s lifetime, and may reside at multiple +locations during parts of an object's lifetime. Location list expressions are +used in place of operation expressions whenever the object whose location is +being described has these requirements.* + +A location list expression consists of a series of location list entries. Each +location list entry is one of the following kinds: + +*Bounded location description* + + This kind of location list entry provides an operation expression that + evaluates to the location description of an object that is valid over a + lifetime bounded by a starting and ending address. The starting address is the + lowest address of the address range over which the location is valid. The + ending address is the address of the first location past the highest address + of the address range. + + The location list entry matches when the current program location is within + the given range. + + There are several kinds of bounded location description entries which differ + in the way that they specify the starting and ending addresses. + +*Default location description* + + This kind of location list entry provides an operation expression that + evaluates to the location description of an object that is valid when no + bounded location description entry applies. + + The location list entry matches when the current program location is not + within the range of any bounded location description entry. + +*Base address* + + This kind of location list entry provides an address to be used as the base + address for beginning and ending address offsets given in certain kinds of + bounded location description entries. The applicable base address of a bounded + location description entry is the address specified by the closest preceding + base address entry in the same location list. If there is no preceding base + address entry, then the applicable base address defaults to the base address + of the compilation unit (see DWARF Version 5 section 3.1.1). + + In the case of a compilation unit where all of the machine code is contained + in a single contiguous section, no base address entry is needed. + +*End-of-list* + + This kind of location list entry marks the end of the location list + expression. + +The address ranges defined by the bounded location description entries of a +location list expression may overlap. When they do, they describe a situation in +which an object exists simultaneously in more than one place. + +If all of the address ranges in a given location list expression do not +collectively cover the entire range over which the object in question is +defined, and there is no following default location description entry, it is +assumed that the object is not available for the portion of the range that is +not covered. + +The result of the evaluation of a DWARF location list expression is: + +* If the current program location is not specified, then it is an evaluation + error. + + .. note:: + + If the location list only has a single default entry, should that be + considered a match if there is no program location? If there are non-default + entries then it seems it has to be an evaluation error when there is no + program location as that indicates the location depends on the program + location which is not known. + +* If there are no matching location list entries, then the result is a location + description that comprises one undefined location description. + +* Otherwise, the operation expression E of each matching location list entry is + evaluated with the current context, except that the result kind is a location + description, the object is unspecified, and the initial stack is empty. The + location list entry result is the location description returned by the + evaluation of E. + + The result is a location description that is comprised of the union of the + single location descriptions of the location description result of each + matching location list entry. + +A location list expression can only be used as the value of a debugger +information entry attribute that is encoded using class ``loclist`` or +``loclistsptr`` (see DWARF Version 5 section 7.5.5). The value of the attribute +provides an index into a separate object file section called ``.debug_loclists`` +or ``.debug_loclists.dwo`` (for split DWARF object files) that contains the +location list entries. + +A ``DW_OP_call*`` and ``DW_OP_implicit_pointer`` operation can be used to +specify a debugger information entry attribute that has a location list +expression. Several debugger information entry attributes allow DWARF +expressions that are evaluated with an initial stack that includes a location +description that may originate from the evaluation of a location list +expression. + +*This location list representation, the* ``loclist`` *and* ``loclistsptr`` +*class, and the related* ``DW_AT_loclists_base`` *attribute are new in DWARF +Version 5. Together they eliminate most, or all of the code object relocations +previously needed for location list expressions.* + +.. note:: + + The rest of this section is the same as DWARF Version 5 section 2.6.2. + +.. _amdgpu-dwarf-segment_addresses: + +Segmented Addresses +~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This augments DWARF Version 5 section 2.12. + +DWARF address classes are used for source languages that have the concept of +memory spaces. They are used in the ``DW_AT_address_class`` attribute for +pointer type, reference type, subprogram, and subprogram type debugger +information entries. + +Each DWARF address class is conceptually a separate source language memory space +with its own lifetime and aliasing rules. DWARF address classes are used to +specify the source language memory spaces that pointer type and reference type +values refer, and to specify the source language memory space in which variables +are allocated. + +The set of currently defined source language DWARF address classes, together +with source language mappings, is given in +:ref:`amdgpu-dwarf-address-class-table`. + +Vendor defined source language address classes may be defined using codes in the +range ``DW_ADDR_LLVM_lo_user`` to ``DW_ADDR_LLVM_hi_user``. + +.. table:: Address class + :name: amdgpu-dwarf-address-class-table + + ========================= ============ ========= ========= ========= + Address Class Name Meaning C/C++ OpenCL CUDA/HIP + ========================= ============ ========= ========= ========= + ``DW_ADDR_none`` generic *default* generic *default* + ``DW_ADDR_LLVM_global`` global global + ``DW_ADDR_LLVM_constant`` constant constant constant + ``DW_ADDR_LLVM_group`` thread-group local shared + ``DW_ADDR_LLVM_private`` thread private + ``DW_ADDR_LLVM_lo_user`` + ``DW_ADDR_LLVM_hi_user`` + ========================= ============ ========= ========= ========= + +DWARF address spaces correspond to target architecture specific linear +addressable memory areas. They are used in DWARF expression location +descriptions to describe in which target architecture specific memory area data +resides. + +*Target architecture specific DWARF address spaces may correspond to hardware +supported facilities such as memory utilizing base address registers, scratchpad +memory, and memory with special interleaving. The size of addresses in these +address spaces may vary. Their access and allocation may be hardware managed +with each thread or group of threads having access to independent storage. For +these reasons they may have properties that do not allow them to be viewed as +part of the unified global virtual address space accessible by all threads.* + +*It is target architecture specific whether multiple DWARF address spaces are +supported and how source language DWARF address classes map to target +architecture specific DWARF address spaces. A target architecture may map +multiple source language DWARF address classes to the same target architecture +specific DWARF address class. Optimization may determine that variable lifetime +and access pattern allows them to be allocated in faster scratchpad memory +represented by a different DWARF address space.* + +Although DWARF address space identifiers are target architecture specific, +``DW_ASPACE_none`` is a common address space supported by all target +architectures. + +DWARF address space identifiers are used by: + +* The DWARF expression operations: ``DW_OP_LLVM_aspace_bregx``, + ``DW_OP_LLVM_form_aspace_address``, ``DW_OP_LLVM_implicit_aspace_pointer``, + and ``DW_OP_xderef*``. + +* The CFI instructions: ``DW_CFA_LLVM_def_aspace_cfa`` and + ``DW_CFA_LLVM_def_aspace_cfa_sf``. + +.. note:: + + With the definition of DWARF address classes and DWARF address spaces in these + extensions, DWARF Version 5 table 2.7 needs to be updated. It seems it is an + example of DWARF address spaces and not DWARF address classes. + +.. note:: + + With the expanded support for DWARF address spaces in these extensions, it may + be worth examining if DWARF segments can be eliminated and DWARF address + spaces used instead. + + That may involve extending DWARF address spaces to also be used to specify + code locations. In target architectures that use different memory areas for + code and data this would seem a natural use for DWARF address spaces. This + would allow DWARF expression location descriptions to be used to describe the + location of subprograms and entry points that are used in expressions + involving subprogram pointer type values. + + Currently, DWARF expressions assume data and code resides in the same default + DWARF address space, and only the address ranges in DWARF location list + entries and in the ``.debug_aranges`` section for accelerated access for + addresses allow DWARF segments to be used to distinguish. + +.. note:: + + Currently, DWARF defines address class values as being target architecture + specific. It is unclear how language specific memory spaces are intended to be + represented in DWARF using these. + + For example, OpenCL defines memory spaces (called address spaces in OpenCL) + for ``global``, ``local``, ``constant``, and ``private``. These are part of + the type system and are modifiers to pointer types. In addition, OpenCL + defines ``generic`` pointers that can reference either the ``global``, + ``local``, or ``private`` memory spaces. To support the OpenCL language the + debugger would want to support casting pointers between the ``generic`` and + other memory spaces, querying what memory space a ``generic`` pointer value is + currently referencing, and possibly using pointer casting to form an address + for a specific memory space out of an integral value. + + The method to use to dereference a pointer type or reference type value is + defined in DWARF expressions using ``DW_OP_xderef*`` which uses a target + architecture specific address space. + + DWARF defines the ``DW_AT_address_class`` attribute on pointer type and + reference type debugger information entries. It specifies the method to use to + dereference them. Why is the value of this not the same as the address space + value used in ``DW_OP_xderef*``? In both cases it is target architecture + specific and the architecture presumably will use the same set of methods to + dereference pointers in both cases. + + Since ``DW_AT_address_class`` uses a target architecture specific value, it + cannot in general capture the source language memory space type modifier + concept. On some architectures all source language memory space modifiers may + actually use the same method for dereferencing pointers. + + One possibility is for DWARF to add an ``DW_TAG_LLVM_address_class_type`` + debugger information entry type modifier that can be applied to a pointer type + and reference type. The ``DW_AT_address_class`` attribute could be re-defined + to not be target architecture specific and instead define generalized language + values (as presented above for DWARF address classes in the table + :ref:`amdgpu-dwarf-address-class-table`) that will support OpenCL and other + languages using memory spaces. The ``DW_AT_address_class`` attribute could be + defined to not be applied to pointer types or reference types, but instead + only to the new ``DW_TAG_LLVM_address_class_type`` type modifier debugger + information entry. + + If a pointer type or reference type is not modified by + ``DW_TAG_LLVM_address_class_type`` or if ``DW_TAG_LLVM_address_class_type`` + has no ``DW_AT_address_class`` attribute, then the pointer type or reference + type would be defined to use the ``DW_ADDR_none`` address class as currently. + Since modifiers can be chained, it would need to be defined if multiple + ``DW_TAG_LLVM_address_class_type`` modifiers were legal, and if so if the + outermost one is the one that takes precedence. + + A target architecture implementation that supports multiple address spaces + would need to map ``DW_ADDR_none`` appropriately to support CUDA-like + languages that have no address classes in the type system but do support + variable allocation in address classes. Such variable allocation would result + in the variable's location description needing an address space. + + The approach presented in :ref:`amdgpu-dwarf-address-class-table` is to define + the default ``DW_ADDR_none`` to be the generic address class and not the + global address class. This matches how CLANG and LLVM have added support for + CUDA-like languages on top of existing C++ language support. This allows all + addresses to be generic by default which matches CUDA-like languages. + + An alternative approach is to define ``DW_ADDR_none`` as being the global + address class and then change ``DW_ADDR_LLVM_global`` to + ``DW_ADDR_LLVM_generic``. This would match the reality that languages that do + not support multiple memory spaces only have one default global memory space. + Generally, in these languages if they expose that the target architecture + supports multiple address spaces, the default one is still the global memory + space. Then a language that does support multiple memory spaces has to + explicitly indicate which pointers have the added ability to reference more + than the global memory space. However, compilers generating DWARF for + CUDA-like languages would then have to define every CUDA-like language pointer + type or reference type using ``DW_TAG_LLVM_address_class_type`` with a + ``DW_AT_address_class`` attribute of ``DW_ADDR_LLVM_generic`` to match the + language semantics. + + A new ``DW_AT_LLVM_address_space`` attribute could be defined that can be + applied to pointer type, reference type, subprogram, and subprogram type to + describe how objects having the given type are dereferenced or called (the + role that ``DW_AT_address_class`` currently provides). The values of + ``DW_AT_address_space`` would be target architecture specific and the same as + used in ``DW_OP_xderef*``. + +.. note:: + + Some additional changes will be made to support languages such as OpenCL/SyCL + that allow address class pointer casting and queries. + + This requires the compiler to provide the mapping from address space to + address class which may be runtime and not target architecture dependent. Some + implementations may have a one-to-one mapping from source language address + class to target architecture address space, and some may have a many-to-one + mapping which requires knowledge of the address class when determining if + pointer address class casts are allowed. + + The changes will likely add an attribute that has an expression provided by + the compiler to map from address class to address space. The + ``DW_OP_implicit_pointer`` and ``DW_OP_LLVM_aspace_implicit_pointer`` + operations may be changed as the current IPV definition may not provide enough + information when used to cast between address classes. Other attributes and + operations may be needed. The legal casts between address classes may need to + be defined on a per language address class basis. + +.. _amdgpu-dwarf-debugging-information-entry-attributes: + +Debugging Information Entry Attributes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This section provides changes to existing debugger information entry + attributes and defines attributes added by these extensions. These would be + incorporated into the appropriate DWARF Version 5 chapter 2 sections. + +1. ``DW_AT_location`` + + Any debugging information entry describing a data object (which includes + variables and parameters) or common blocks may have a ``DW_AT_location`` + attribute, whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. The result of the evaluation is the location + description of the base of the data object. + + See :ref:`amdgpu-dwarf-control-flow-operations` for special evaluation rules + used by the ``DW_OP_call*`` operations. + + .. note:: + + Delete the description of how the ``DW_OP_call*`` operations evaluate a + ``DW_AT_location`` attribute as that is now described in the operations. + + .. note:: + + See the discussion about the ``DW_AT_location`` attribute in the + ``DW_OP_call*`` operation. Having each attribute only have a single + purpose and single execution semantics seems desirable. It makes it easier + for the consumer that no longer have to track the context. It makes it + easier for the producer as it can rely on a single semantics for each + attribute. + + For that reason, limiting the ``DW_AT_location`` attribute to only + supporting evaluating the location description of an object, and using a + different attribute and encoding class for the evaluation of DWARF + expression *procedures* on the same operation expression stack seems + desirable. + +2. ``DW_AT_const_value`` + + .. note:: + + Could deprecate using the ``DW_AT_const_value`` attribute for + ``DW_TAG_variable`` or ``DW_TAG_formal_parameter`` debugger information + entries that have been optimized to a constant. Instead, + ``DW_AT_location`` could be used with a DWARF expression that produces an + implicit location description now that any location description can be + used within a DWARF expression. This allows the ``DW_OP_call*`` operations + to be used to push the location description of any variable regardless of + how it is optimized. + +3. ``DW_AT_frame_base`` + + A ``DW_TAG_subprogram`` or ``DW_TAG_entry_point`` debugger information entry + may have a ``DW_AT_frame_base`` attribute, whose value is a DWARF expression + E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. + + The DWARF is ill-formed if E contains an ``DW_OP_fbreg`` operation, or the + resulting location description L is not comprised of one single location + description SL. + + If SL a register location description for register R, then L is replaced + with the result of evaluating a ``DW_OP_bregx R, 0`` operation. This + computes the frame base memory location description in the target + architecture default address space. + + *This allows the more compact* ``DW_OPreg*`` *to be used instead of* + ``DW_OP_breg* 0``\ *.* + + .. note:: + + This rule could be removed and require the producer to create the required + location description directly using ``DW_OP_call_frame_cfa``, + ``DW_OP_breg*``, or ``DW_OP_LLVM_aspace_bregx``. This would also then + allow a target to implement the call frames within a large register. + + Otherwise, the DWARF is ill-formed if SL is not a memory location + description in any of the target architecture specific address spaces. + + The resulting L is the *frame base* for the subprogram or entry point. + + *Typically, E will use the* ``DW_OP_call_frame_cfa`` *operation or be a + stack pointer register plus or minus some offset.* + +4. ``DW_AT_data_member_location`` + + For a ``DW_AT_data_member_location`` attribute there are two cases: + + 1. If the attribute is an integer constant B, it provides the offset in + bytes from the beginning of the containing entity. + + The result of the attribute is obtained by evaluating a + ``DW_OP_LLVM_offset B`` operation with an initial stack comprising the + location description of the beginning of the containing entity. The + result of the evaluation is the location description of the base of the + member entry. + + *If the beginning of the containing entity is not byte aligned, then the + beginning of the member entry has the same bit displacement within a + byte.* + + 2. Otherwise, the attribute must be a DWARF expression E which is evaluated + with a context that has a result kind of a location description, an + unspecified object, the compilation unit that contains E, an initial + stack comprising the location description of the beginning of the + containing entity, and other context elements corresponding to the + source language thread of execution upon which the user is focused, if + any. The result of the evaluation is the location description of the + base of the member entry. + + .. note:: + + The beginning of the containing entity can now be any location + description, including those with more than one single location + description, and those with single location descriptions that are of any + kind and have any bit offset. + +5. ``DW_AT_use_location`` + + The ``DW_TAG_ptr_to_member_type`` debugging information entry has a + ``DW_AT_use_location`` attribute whose value is a DWARF expression E. It is + used to compute the location description of the member of the class to which + the pointer to member entry points. + + *The method used to find the location description of a given member of a + class, structure, or union is common to any instance of that class, + structure, or union and to any instance of the pointer to member type. The + method is thus associated with the pointer to member type, rather than with + each object that has a pointer to member type.* + + The ``DW_AT_use_location`` DWARF expression is used in conjunction with the + location description for a particular object of the given pointer to member + type and for a particular structure or class instance. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an initial stack comprising two entries, + and other context elements corresponding to the source language thread of + execution upon which the user is focused, if any. The first stack entry is + the value of the pointer to member object itself. The second stack entry is + the location description of the base of the entire class, structure, or + union instance containing the member whose location is being calculated. The + result of the evaluation is the location description of the member of the + class to which the pointer to member entry points. + +6. ``DW_AT_data_location`` + + The ``DW_AT_data_location`` attribute may be used with any type that + provides one or more levels of hidden indirection and/or run-time parameters + in its representation. Its value is a DWARF operation expression E which + computes the location description of the data for an object. When this + attribute is omitted, the location description of the data is the same as + the location description of the object. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an object that is the location + description of the data descriptor, the compilation unit that contains E, an + empty initial stack, and other context elements corresponding to the source + language thread of execution upon which the user is focused, if any. The + result of the evaluation is the location description of the base of the + member entry. + + *E will typically involve an operation expression that begins with a* + ``DW_OP_push_object_address`` *operation which loads the location + description of the object which can then serve as a description in + subsequent calculation.* + + .. note:: + + Since ``DW_AT_data_member_location``, ``DW_AT_use_location``, and + ``DW_AT_vtable_elem_location`` allow both operation expressions and + location list expressions, why does ``DW_AT_data_location`` not allow + both? In all cases they apply to data objects so less likely that + optimization would cause different operation expressions for different + program location ranges. But if supporting for some then should be for + all. + + It seems odd this attribute is not the same as + ``DW_AT_data_member_location`` in having an initial stack with the + location description of the object since the expression has to need it. + +7. ``DW_AT_vtable_elem_location`` + + An entry for a virtual function also has a ``DW_AT_vtable_elem_location`` + attribute whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an initial stack comprising the location + description of the object of the enclosing type, and other context elements + corresponding to the source language thread of execution upon which the user + is focused, if any. The result of the evaluation is the location description + of the slot for the function within the virtual function table for the + enclosing class. + +8. ``DW_AT_static_link`` + + If a ``DW_TAG_subprogram`` or ``DW_TAG_entry_point`` debugger information + entry is lexically nested, it may have a ``DW_AT_static_link`` attribute, + whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. The result of the evaluation is the location + description L of the *canonical frame address* (see + :ref:`amdgpu-dwarf-call-frame-information`) of the relevant call frame of + the subprogram instance that immediately lexically encloses the current call + frame's subprogram or entry point. + + The DWARF is ill-formed if L is is not comprised of one memory location + description for one of the target architecture specific address spaces. + +9. ``DW_AT_return_addr`` + + A ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or + ``DW_TAG_entry_point`` debugger information entry may have a + ``DW_AT_return_addr`` attribute, whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. The result of the evaluation is the location + description L of the place where the return address for the current call + frame's subprogram or entry point is stored. + + The DWARF is ill-formed if L is not comprised of one memory location + description for one of the target architecture specific address spaces. + + .. note:: + + It is unclear why ``DW_TAG_inlined_subroutine`` has a + ``DW_AT_return_addr`` attribute but not a ``DW_AT_frame_base`` or + ``DW_AT_static_link`` attribute. Seems it would either have all of them or + none. Since inlined subprograms do not have a call frame it seems they + would have none of these attributes. + +10. ``DW_AT_call_value``, ``DW_AT_call_data_location``, and + ``DW_AT_call_data_value`` + + A ``DW_TAG_call_site_parameter`` debugger information entry may have a + ``DW_AT_call_value`` attribute, whose value is a DWARF operation expression + E\ :sub:`1`\ . + + The result of the ``DW_AT_call_value`` attribute is obtained by evaluating + E\ :sub:`1` with a context that has a result kind of a value, an unspecified + object, the compilation unit that contains E, an empty initial stack, and + other context elements corresponding to the source language thread of + execution upon which the user is focused, if any. The resulting value V\ + :sub:`1` is the value of the parameter at the time of the call made by the + call site. + + For parameters passed by reference, where the code passes a pointer to a + location which contains the parameter, or for reference type parameters, the + ``DW_TAG_call_site_parameter`` debugger information entry may also have a + ``DW_AT_call_data_location`` attribute whose value is a DWARF operation + expression E\ :sub:`2`\ , and a ``DW_AT_call_data_value`` attribute whose + value is a DWARF operation expression E\ :sub:`3`\ . + + The value of the ``DW_AT_call_data_location`` attribute is obtained by + evaluating E\ :sub:`2` with a context that has a result kind of a location + description, an unspecified object, the compilation unit that contains E, an + empty initial stack, and other context elements corresponding to the source + language thread of execution upon which the user is focused, if any. The + resulting location description L\ :sub:`2` is the location where the + referenced parameter lives during the call made by the call site. If E\ + :sub:`2` would just be a ``DW_OP_push_object_address``, then the + ``DW_AT_call_data_location`` attribute may be omitted. + + The value of the ``DW_AT_call_data_value`` attribute is obtained by + evaluating E\ :sub:`3` with a context that has a result kind of a value, an + unspecified object, the compilation unit that contains E, an empty initial + stack, and other context elements corresponding to the source language + thread of execution upon which the user is focused, if any. The resulting + value V\ :sub:`3` is the value in L\ :sub:`2` at the time of the call made + by the call site. + + The result of these attributes is undefined if the current call frame is + not for the subprogram containing the ``DW_TAG_call_site_parameter`` + debugger information entry or the current program location is not for the + call site containing the ``DW_TAG_call_site_parameter`` debugger information + entry in the current call frame. + + *The consumer may have to virtually unwind to the call site (see* + :ref:`amdgpu-dwarf-call-frame-information`\ *) in order to evaluate these + attributes. This will ensure the source language thread of execution upon + which the user is focused corresponds to the call site needed to evaluate + the expression.* + + If it is not possible to avoid the expressions of these attributes from + accessing registers or memory locations that might be clobbered by the + subprogram being called by the call site, then the associated attribute + should not be provided. + + *The reason for the restriction is that the parameter may need to be + accessed during the execution of the callee. The consumer may virtually + unwind from the called subprogram back to the caller and then evaluate the + attribute expressions. The call frame information (see* + :ref:`amdgpu-dwarf-call-frame-information`\ *) will not be able to restore + registers that have been clobbered, and clobbered memory will no longer have + the value at the time of the call.* + +11. ``DW_AT_LLVM_lanes`` *New* + + For languages that are implemented using a SIMD or SIMT execution model, a + ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or + ``DW_TAG_entry_point`` debugger information entry may have a + ``DW_AT_LLVM_lanes`` attribute whose value is an integer constant that is + the number of lanes per thread. This is the static number of lanes per + thread. It is not the dynamic number of lanes with which the thread was + initiated, for example, due to smaller or partial work-groups. + + If not present, the default value of 1 is used. + + The DWARF is ill-formed if the value is 0. + +12. ``DW_AT_LLVM_lane_pc`` *New* + + For languages that are implemented using a SIMD or SIMT execution model, a + ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or + ``DW_TAG_entry_point`` debugging information entry may have a + ``DW_AT_LLVM_lane_pc`` attribute whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a location description, an unspecified object, the + compilation unit that contains E, an empty initial stack, and other context + elements corresponding to the source language thread of execution upon which + the user is focused, if any. + + The resulting location description L is for a thread lane count sized vector + of generic type elements. The thread lane count is the value of the + ``DW_AT_LLVM_lanes`` attribute. Each element holds the conceptual program + location of the corresponding lane, where the least significant element + corresponds to the first target architecture specific lane identifier and so + forth. If the lane was not active when the current subprogram was called, + its element is an undefined location description. + + ``DW_AT_LLVM_lane_pc`` *allows the compiler to indicate conceptually where + each lane of a SIMT thread is positioned even when it is in divergent + control flow that is not active.* + + *Typically, the result is a location description with one composite location + description with each part being a location description with either one + undefined location description or one memory location description.* + + If not present, the thread is not being used in a SIMT manner, and the + thread's current program location is used. + +13. ``DW_AT_LLVM_active_lane`` *New* + + For languages that are implemented using a SIMD or SIMT execution model, a + ``DW_TAG_subprogram``, ``DW_TAG_inlined_subroutine``, or + ``DW_TAG_entry_point`` debugger information entry may have a + ``DW_AT_LLVM_active_lane`` attribute whose value is a DWARF expression E. + + The result of the attribute is obtained by evaluating E with a context that + has a result kind of a value, an unspecified object, the compilation unit + that contains E, an empty initial stack, and other context elements + corresponding to the source language thread of execution upon which the user + is focused, if any. + + The DWARF is ill-formed if the resulting value V is not an integral value. + + The resulting V is a bit mask of active lanes for the current program + location. The N\ :sup:`th` least significant bit of the mask corresponds to + the N\ :sup:`th` lane. If the bit is 1 the lane is active, otherwise it is + inactive. + + *Some targets may update the target architecture execution mask for regions + of code that must execute with different sets of lanes than the current + active lanes. For example, some code must execute with all lanes made + temporarily active.* ``DW_AT_LLVM_active_lane`` *allows the compiler to + provide the means to determine the source language active lanes.* + + If not present and ``DW_AT_LLVM_lanes`` is greater than 1, then the target + architecture execution mask is used. + +14. ``DW_AT_LLVM_vector_size`` *New* + + A ``DW_TAG_base_type`` debugger information entry for a base type T may have + a ``DW_AT_LLVM_vector_size`` attribute whose value is an integer constant + that is the vector type size N. + + The representation of a vector base type is as N contiguous elements, each + one having the representation of a base type T' that is the same as T + without the ``DW_AT_LLVM_vector_size`` attribute. + + If a ``DW_TAG_base_type`` debugger information entry does not have a + ``DW_AT_LLVM_vector_size`` attribute, then the base type is not a vector + type. + + The DWARF is ill-formed if N is not greater than 0. + + .. note:: + + LLVM has mention of a non-upstreamed debugger information entry that is + intended to support vector types. However, that was not for a base type so + would not be suitable as the type of a stack value entry. But perhaps that + could be replaced by using this attribute. + +15. ``DW_AT_LLVM_augmentation`` *New* + + A ``DW_TAG_compile_unit`` debugger information entry for a compilation unit + may have a ``DW_AT_LLVM_augmentation`` attribute, whose value is an + augmentation string. + + *The augmentation string allows producers to indicate that there is + additional vendor or target specific information in the debugging + information entries. For example, this might be information about the + version of vendor specific extensions that are being used.* + + If not present, or if the string is empty, then the compilation unit has no + augmentation string. + + The format for the augmentation string is: + + | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + + Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y + version number of the extensions used, and *options* is an optional string + providing additional information about the extensions. The version number + must conform to semantic versioning [:ref:`SEMVER `]. + The *options* string must not contain the "\ ``]``\ " character. + + For example: + + :: + + [abc:v0.0][def:v1.2:feature-a=on,feature-b=3] + +Program Scope Entities +---------------------- + +.. _amdgpu-dwarf-language-names: + +Unit Entities +~~~~~~~~~~~~~ + +.. note:: + + This augments DWARF Version 5 section 3.1.1 and Table 3.1. + +Additional language codes defined for use with the ``DW_AT_language`` attribute +are defined in :ref:`amdgpu-dwarf-language-names-table`. + +.. table:: Language Names + :name: amdgpu-dwarf-language-names-table + + ==================== ============================= + Language Name Meaning + ==================== ============================= + ``DW_LANG_LLVM_HIP`` HIP Language. + ==================== ============================= + +The HIP language [:ref:`HIP `] can be supported by extending +the C++ language. + +Other Debugger Information +-------------------------- + +Accelerated Access +~~~~~~~~~~~~~~~~~~ + +.. _amdgpu-dwarf-lookup-by-name: + +Lookup By Name +++++++++++++++ + +Contents of the Name Index +########################## + +.. note:: + + The following provides changes to DWARF Version 5 section 6.1.1.1. + + The rule for debugger information entries included in the name index in the + optional ``.debug_names`` section is extended to also include named + ``DW_TAG_variable`` debugging information entries with a ``DW_AT_location`` + attribute that includes a ``DW_OP_LLVM_form_aspace_address`` operation. + +The name index must contain an entry for each debugging information entry that +defines a named subprogram, label, variable, type, or namespace, subject to the +following rules: + +* ``DW_TAG_variable`` debugging information entries with a ``DW_AT_location`` + attribute that includes a ``DW_OP_addr``, ``DW_OP_LLVM_form_aspace_address``, + or ``DW_OP_form_tls_address`` operation are included; otherwise, they are + excluded. + +Data Representation of the Name Index +##################################### + +Section Header +^^^^^^^^^^^^^^ + +.. note:: + + The following provides an addition to DWARF Version 5 section 6.1.1.4.1 item + 14 ``augmentation_string``. + +A null-terminated UTF-8 vendor specific augmentation string, which provides +additional information about the contents of this index. If provided, the +recommended format for augmentation string is: + + | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + +Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y +version number of the extensions used in the DWARF of the compilation unit, and +*options* is an optional string providing additional information about the +extensions. The version number must conform to semantic versioning [:ref:`SEMVER +`]. The *options* string must not contain the "\ ``]``\ " +character. + +For example: + + :: + + [abc:v0.0][def:v1.2:feature-a=on,feature-b=3] + +.. note:: + + This is different to the definition in DWARF Version 5 but is consistent with + the other augmentation strings and allows multiple vendor extensions to be + supported. + +.. _amdgpu-dwarf-line-number-information: + +Line Number Information +~~~~~~~~~~~~~~~~~~~~~~~ + +The Line Number Program Header +++++++++++++++++++++++++++++++ + +Standard Content Descriptions +############################# + +.. note:: + + This augments DWARF Version 5 section 6.2.4.1. + +.. _amdgpu-dwarf-line-number-information-dw-lnct-llvm-source: + +1. ``DW_LNCT_LLVM_source`` + + The component is a null-terminated UTF-8 source text string with "\ ``\n``\ + " line endings. This content code is paired with the same forms as + ``DW_LNCT_path``. It can be used for file name entries. + + The value is an empty null-terminated string if no source is available. If + the source is available but is an empty file then the value is a + null-terminated single "\ ``\n``\ ". + + *When the source field is present, consumers can use the embedded source + instead of attempting to discover the source on disk using the file path + provided by the* ``DW_LNCT_path`` *field. When the source field is absent, + consumers can access the file to get the source text.* + + *This is particularly useful for programming languages that support runtime + compilation and runtime generation of source text. In these cases, the + source text does not reside in any permanent file. For example, the OpenCL + language [:ref:`OpenCL `] supports online compilation.* + +2. ``DW_LNCT_LLVM_is_MD5`` + + ``DW_LNCT_LLVM_is_MD5`` indicates if the ``DW_LNCT_MD5`` content kind, if + present, is valid: when 0 it is not valid and when 1 it is valid. If + ``DW_LNCT_LLVM_is_MD5`` content kind is not present, and ``DW_LNCT_MD5`` + content kind is present, then the MD5 checksum is valid. + + ``DW_LNCT_LLVM_is_MD5`` is always paired with the ``DW_FORM_udata`` form. + + *This allows a compilation unit to have a mixture of files with and without + MD5 checksums. This can happen when multiple relocatable files are linked + together.* + +.. _amdgpu-dwarf-call-frame-information: + +Call Frame Information +~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This section provides changes to existing call frame information and defines + instructions added by these extensions. Additional support is added for + address spaces. Register unwind DWARF expressions are generalized to allow any + location description, including those with composite and implicit location + descriptions. + + These changes would be incorporated into the DWARF Version 5 section 6.1. + +.. _amdgpu-dwarf-structure_of-call-frame-information: + +Structure of Call Frame Information ++++++++++++++++++++++++++++++++++++ + +The register rules are: + +*undefined* + A register that has this rule has no recoverable value in the previous frame. + The previous value of this register is the undefined location description (see + :ref:`amdgpu-dwarf-undefined-location-description-operations`). + + *By convention, the register is not preserved by a callee.* + +*same value* + This register has not been modified from the previous caller frame. + + If the current frame is the top frame, then the previous value of this + register is the location description L that specifies one register location + description SL. SL specifies the register location storage that corresponds to + the register with a bit offset of 0 for the current thread. + + If the current frame is not the top frame, then the previous value of this + register is the location description obtained using the call frame information + for the callee frame and callee program location invoked by the current caller + frame for the same register. + + *By convention, the register is preserved by the callee, but the callee has + not modified it.* + +*offset(N)* + N is a signed byte offset. The previous value of this register is saved at the + location description computed as if the DWARF operation expression + ``DW_OP_LLVM_offset N`` is evaluated with the current context, except the + result kind is a location description, the compilation unit is unspecified, + the object is unspecified, and an initial stack comprising the location + description of the current CFA (see + :ref:`amdgpu-dwarf-operation-expressions`). + +*val_offset(N)* + N is a signed byte offset. The previous value of this register is the memory + byte address of the location description computed as if the DWARF operation + expression ``DW_OP_LLVM_offset N`` is evaluated with the current context, + except the result kind is a location description, the compilation unit is + unspecified, the object is unspecified, and an initial stack comprising the + location description of the current CFA (see + :ref:`amdgpu-dwarf-operation-expressions`). + + The DWARF is ill-formed if the CFA location description is not a memory byte + address location description, or if the register size does not match the size + of an address in the address space of the current CFA location description. + + *Since the CFA location description is required to be a memory byte address + location description, the value of val_offset(N) will also be a memory byte + address location description since it is offsetting the CFA location + description by N bytes. Furthermore, the value of val_offset(N) will be a + memory byte address in the same address space as the CFA location + description.* + + .. note:: + + Should DWARF allow the address size to be a different size to the size of + the register? Requiring them to be the same bit size avoids any issue of + conversion as the bit contents of the register is simply interpreted as a + value of the address. + + GDB has a per register hook that allows a target specific conversion on a + register by register basis. It defaults to truncation of bigger registers, + and to actually reading bytes from the next register (or reads out of bounds + for the last register) for smaller registers. There are no GDB tests that + read a register out of bounds (except an illegal hand written assembly + test). + +*register(R)* + This register has been stored in another register numbered R. + + The previous value of this register is the location description obtained using + the call frame information for the current frame and current program location + for register R. + + The DWARF is ill-formed if the size of this register does not match the size + of register R or if there is a cyclic dependency in the call frame + information. + + .. note:: + + Should this also allow R to be larger than this register? If so is the value + stored in the low order bits and it is undefined what is stored in the + extra upper bits? + +*expression(E)* + The previous value of this register is located at the location description + produced by evaluating the DWARF operation expression E (see + :ref:`amdgpu-dwarf-operation-expressions`). + + E is evaluated with the current context, except the result kind is a location + description, the compilation unit is unspecified, the object is unspecified, + and an initial stack comprising the location description of the current CFA + (see :ref:`amdgpu-dwarf-operation-expressions`). + +*val_expression(E)* + The previous value of this register is the value produced by evaluating the + DWARF operation expression E (see :ref:`amdgpu-dwarf-operation-expressions`). + + E is evaluated with the current context, except the result kind is a value, + the compilation unit is unspecified, the object is unspecified, and an initial + stack comprising the location description of the current CFA (see + :ref:`amdgpu-dwarf-operation-expressions`). + + The DWARF is ill-formed if the resulting value type size does not match the + register size. + + .. note:: + + This has limited usefulness as the DWARF expression E can only produce + values up to the size of the generic type. This is due to not allowing any + operations that specify a type in a CFI operation expression. This makes it + unusable for registers that are larger than the generic type. However, + *expression(E)* can be used to create an implicit location description of + any size. + +*architectural* + The rule is defined externally to this specification by the augmenter. + +A Common Information Entry (CIE) holds information that is shared among many +Frame Description Entries (FDE). There is at least one CIE in every non-empty +``.debug_frame`` section. A CIE contains the following fields, in order: + +1. ``length`` (initial length) + + A constant that gives the number of bytes of the CIE structure, not + including the length field itself. The size of the length field plus the + value of length must be an integral multiple of the address size specified + in the ``address_size`` field. + +2. ``CIE_id`` (4 or 8 bytes, see + :ref:`amdgpu-dwarf-32-bit-and-64-bit-dwarf-formats`) + + A constant that is used to distinguish CIEs from FDEs. + + In the 32-bit DWARF format, the value of the CIE id in the CIE header is + 0xffffffff; in the 64-bit DWARF format, the value is 0xffffffffffffffff. + +3. ``version`` (ubyte) + + A version number. This number is specific to the call frame information and + is independent of the DWARF version number. + + The value of the CIE version number is 4. + + .. note:: + + Would this be increased to 5 to reflect the changes in these extensions? + +4. ``augmentation`` (sequence of UTF-8 characters) + + A null-terminated UTF-8 string that identifies the augmentation to this CIE + or to the FDEs that use it. If a reader encounters an augmentation string + that is unexpected, then only the following fields can be read: + + * CIE: length, CIE_id, version, augmentation + * FDE: length, CIE_pointer, initial_location, address_range + + If there is no augmentation, this value is a zero byte. + + *The augmentation string allows users to indicate that there is additional + vendor and target architecture specific information in the CIE or FDE which + is needed to virtually unwind a stack frame. For example, this might be + information about dynamically allocated data which needs to be freed on exit + from the routine.* + + *Because the* ``.debug_frame`` *section is useful independently of any* + ``.debug_info`` *section, the augmentation string always uses UTF-8 + encoding.* + + The recommended format for the augmentation string is: + + | ``[``\ *vendor*\ ``:v``\ *X*\ ``.``\ *Y*\ [\ ``:``\ *options*\ ]\ ``]``\ * + + Where *vendor* is the producer, ``vX.Y`` specifies the major X and minor Y + version number of the extensions used, and *options* is an optional string + providing additional information about the extensions. The version number + must conform to semantic versioning [:ref:`SEMVER `]. + The *options* string must not contain the "\ ``]``\ " character. + + For example: + + :: + + [abc:v0.0][def:v1.2:feature-a=on,feature-b=3] + +5. ``address_size`` (ubyte) + + The size of a target address in this CIE and any FDEs that use it, in bytes. + If a compilation unit exists for this frame, its address size must match the + address size here. + +6. ``segment_selector_size`` (ubyte) + + The size of a segment selector in this CIE and any FDEs that use it, in + bytes. + +7. ``code_alignment_factor`` (unsigned LEB128) + + A constant that is factored out of all advance location instructions (see + :ref:`amdgpu-dwarf-row-creation-instructions`). The resulting value is + ``(operand * code_alignment_factor)``. + +8. ``data_alignment_factor`` (signed LEB128) + + A constant that is factored out of certain offset instructions (see + :ref:`amdgpu-dwarf-cfa-definition-instructions` and + :ref:`amdgpu-dwarf-register-rule-instructions`). The resulting value is + ``(operand * data_alignment_factor)``. + +9. ``return_address_register`` (unsigned LEB128) + + An unsigned LEB128 constant that indicates which column in the rule table + represents the return address of the subprogram. Note that this column might + not correspond to an actual machine register. + + The value of the return address register is used to determine the program + location of the caller frame. The program location of the top frame is the + target architecture program counter value of the current thread. + +10. ``initial_instructions`` (array of ubyte) + + A sequence of rules that are interpreted to create the initial setting of + each column in the table. + + The default rule for all columns before interpretation of the initial + instructions is the undefined rule. However, an ABI authoring body or a + compilation system authoring body may specify an alternate default value for + any or all columns. + +11. ``padding`` (array of ubyte) + + Enough ``DW_CFA_nop`` instructions to make the size of this entry match the + length value above. + +An FDE contains the following fields, in order: + +1. ``length`` (initial length) + + A constant that gives the number of bytes of the header and instruction + stream for this subprogram, not including the length field itself. The size + of the length field plus the value of length must be an integral multiple of + the address size. + +2. ``CIE_pointer`` (4 or 8 bytes, see + :ref:`amdgpu-dwarf-32-bit-and-64-bit-dwarf-formats`) + + A constant offset into the ``.debug_frame`` section that denotes the CIE + that is associated with this FDE. + +3. ``initial_location`` (segment selector and target address) + + The address of the first location associated with this table entry. If the + segment_selector_size field of this FDE’s CIE is non-zero, the initial + location is preceded by a segment selector of the given length. + +4. ``address_range`` (target address) + + The number of bytes of program instructions described by this entry. + +5. ``instructions`` (array of ubyte) + + A sequence of table defining instructions that are described in + :ref:`amdgpu-dwarf-call-frame-instructions`. + +6. ``padding`` (array of ubyte) + + Enough ``DW_CFA_nop`` instructions to make the size of this entry match the + length value above. + +.. _amdgpu-dwarf-call-frame-instructions: + +Call Frame Instructions ++++++++++++++++++++++++ + +Some call frame instructions have operands that are encoded as DWARF operation +expressions E (see :ref:`amdgpu-dwarf-operation-expressions`). The DWARF +operations that can be used in E have the following restrictions: + +* ``DW_OP_addrx``, ``DW_OP_call2``, ``DW_OP_call4``, ``DW_OP_call_ref``, + ``DW_OP_const_type``, ``DW_OP_constx``, ``DW_OP_convert``, + ``DW_OP_deref_type``, ``DW_OP_fbreg``, ``DW_OP_implicit_pointer``, + ``DW_OP_regval_type``, ``DW_OP_reinterpret``, and ``DW_OP_xderef_type`` + operations are not allowed because the call frame information must not depend + on other debug sections. + +* ``DW_OP_push_object_address`` is not allowed because there is no object + context to provide a value to push. + +* ``DW_OP_LLVM_push_lane`` is not allowed because the call frame instructions + describe the actions for the whole thread, not the lanes independently. + +* ``DW_OP_call_frame_cfa`` and ``DW_OP_entry_value`` are not allowed because + their use would be circular. + +* ``DW_OP_LLVM_call_frame_entry_reg`` is not allowed if evaluating E causes a + circular dependency between ``DW_OP_LLVM_call_frame_entry_reg`` operations. + + *For example, if a register R1 has a* ``DW_CFA_def_cfa_expression`` + *instruction that evaluates a* ``DW_OP_LLVM_call_frame_entry_reg`` *operation + that specifies register R2, and register R2 has a* + ``DW_CFA_def_cfa_expression`` *instruction that that evaluates a* + ``DW_OP_LLVM_call_frame_entry_reg`` *operation that specifies register R1.* + +*Call frame instructions to which these restrictions apply include* +``DW_CFA_def_cfa_expression``\ *,* ``DW_CFA_expression``\ *, and* +``DW_CFA_val_expression``\ *.* + +.. _amdgpu-dwarf-row-creation-instructions: + +Row Creation Instructions +######################### + +.. note:: + + These instructions are the same as in DWARF Version 5 section 6.4.2.1. + +.. _amdgpu-dwarf-cfa-definition-instructions: + +CFA Definition Instructions +########################### + +1. ``DW_CFA_def_cfa`` + + The ``DW_CFA_def_cfa`` instruction takes two unsigned LEB128 operands + representing a register number R and a (non-factored) byte displacement B. + AS is set to the target architecture default address space identifier. The + required action is to define the current CFA rule to be the result of + evaluating the DWARF operation expression ``DW_OP_constu AS; + DW_OP_aspace_bregx R, B`` as a location description. + +2. ``DW_CFA_def_cfa_sf`` + + The ``DW_CFA_def_cfa_sf`` instruction takes two operands: an unsigned LEB128 + value representing a register number R and a signed LEB128 factored byte + displacement B. AS is set to the target architecture default address space + identifier. The required action is to define the current CFA rule to be the + result of evaluating the DWARF operation expression ``DW_OP_constu AS; + DW_OP_aspace_bregx R, B*data_alignment_factor`` as a location description. + + *The action is the same as* ``DW_CFA_def_cfa``\ *, except that the second + operand is signed and factored.* + +3. ``DW_CFA_LLVM_def_aspace_cfa`` *New* + + The ``DW_CFA_LLVM_def_aspace_cfa`` instruction takes three unsigned LEB128 + operands representing a register number R, a (non-factored) byte + displacement B, and a target architecture specific address space identifier + AS. The required action is to define the current CFA rule to be the result + of evaluating the DWARF operation expression ``DW_OP_constu AS; + DW_OP_aspace_bregx R, B`` as a location description. + + If AS is not one of the values defined by the target architecture specific + ``DW_ASPACE_*`` values then the DWARF expression is ill-formed. + +4. ``DW_CFA_LLVM_def_aspace_cfa_sf`` *New* + + The ``DW_CFA_def_cfa_sf`` instruction takes three operands: an unsigned + LEB128 value representing a register number R, a signed LEB128 factored byte + displacement B, and an unsigned LEB128 value representing a target + architecture specific address space identifier AS. The required action is to + define the current CFA rule to be the result of evaluating the DWARF + operation expression ``DW_OP_constu AS; DW_OP_aspace_bregx R, + B*data_alignment_factor`` as a location description. + + If AS is not one of the values defined by the target architecture specific + ``DW_ASPACE_*`` values, then the DWARF expression is ill-formed. + + *The action is the same as* ``DW_CFA_aspace_def_cfa``\ *, except that the + second operand is signed and factored.* + +5. ``DW_CFA_def_cfa_register`` + + The ``DW_CFA_def_cfa_register`` instruction takes a single unsigned LEB128 + operand representing a register number R. The required action is to define + the current CFA rule to be the result of evaluating the DWARF operation + expression ``DW_OP_constu AS; DW_OP_aspace_bregx R, B`` as a location + description. B and AS are the old CFA byte displacement and address space + respectively. + + If the subprogram has no current CFA rule, or the rule was defined by a + ``DW_CFA_def_cfa_expression`` instruction, then the DWARF is ill-formed. + +6. ``DW_CFA_def_cfa_offset`` + + The ``DW_CFA_def_cfa_offset`` instruction takes a single unsigned LEB128 + operand representing a (non-factored) byte displacement B. The required + action is to define the current CFA rule to be the result of evaluating the + DWARF operation expression ``DW_OP_constu AS; DW_OP_aspace_bregx R, B`` as a + location description. R and AS are the old CFA register number and address + space respectively. + + If the subprogram has no current CFA rule, or the rule was defined by a + ``DW_CFA_def_cfa_expression`` instruction, then the DWARF is ill-formed. + +7. ``DW_CFA_def_cfa_offset_sf`` + + The ``DW_CFA_def_cfa_offset_sf`` instruction takes a signed LEB128 operand + representing a factored byte displacement B. The required action is to + define the current CFA rule to be the result of evaluating the DWARF + operation expression ``DW_OP_constu AS; DW_OP_aspace_bregx R, + B*data_alignment_factor`` as a location description. R and AS are the old + CFA register number and address space respectively. + + If the subprogram has no current CFA rule, or the rule was defined by a + ``DW_CFA_def_cfa_expression`` instruction, then the DWARF is ill-formed. + + *The action is the same as* ``DW_CFA_def_cfa_offset``\ *, except that the + operand is signed and factored.* + +8. ``DW_CFA_def_cfa_expression`` + + The ``DW_CFA_def_cfa_expression`` instruction takes a single operand encoded + as a ``DW_FORM_exprloc`` value representing a DWARF operation expression E. + The required action is to define the current CFA rule to be the result of + evaluating E with the current context, except the result kind is a location + description, the compilation unit is unspecified, the object is unspecified, + and an empty initial stack. + + *See* :ref:`amdgpu-dwarf-call-frame-instructions` *regarding restrictions on + the DWARF expression operations that can be used in E.* + + The DWARF is ill-formed if the result of evaluating E is not a memory byte + address location description. + +.. _amdgpu-dwarf-register-rule-instructions: + +Register Rule Instructions +########################## + +1. ``DW_CFA_undefined`` + + The ``DW_CFA_undefined`` instruction takes a single unsigned LEB128 operand + that represents a register number R. The required action is to set the rule + for the register specified by R to ``undefined``. + +2. ``DW_CFA_same_value`` + + The ``DW_CFA_same_value`` instruction takes a single unsigned LEB128 operand + that represents a register number R. The required action is to set the rule + for the register specified by R to ``same value``. + +3. ``DW_CFA_offset`` + + The ``DW_CFA_offset`` instruction takes two operands: a register number R + (encoded with the opcode) and an unsigned LEB128 constant representing a + factored displacement B. The required action is to change the rule for the + register specified by R to be an *offset(B\*data_alignment_factor)* rule. + + .. note:: + + Seems this should be named ``DW_CFA_offset_uf`` since the offset is + unsigned factored. + +4. ``DW_CFA_offset_extended`` + + The ``DW_CFA_offset_extended`` instruction takes two unsigned LEB128 + operands representing a register number R and a factored displacement B. + This instruction is identical to ``DW_CFA_offset``, except for the encoding + and size of the register operand. + + .. note:: + + Seems this should be named ``DW_CFA_offset_extended_uf`` since the + displacement is unsigned factored. + +5. ``DW_CFA_offset_extended_sf`` + + The ``DW_CFA_offset_extended_sf`` instruction takes two operands: an + unsigned LEB128 value representing a register number R and a signed LEB128 + factored displacement B. This instruction is identical to + ``DW_CFA_offset_extended``, except that B is signed. + +6. ``DW_CFA_val_offset`` + + The ``DW_CFA_val_offset`` instruction takes two unsigned LEB128 operands + representing a register number R and a factored displacement B. The required + action is to change the rule for the register indicated by R to be a + *val_offset(B\*data_alignment_factor)* rule. + + .. note:: + + Seems this should be named ``DW_CFA_val_offset_uf`` since the displacement + is unsigned factored. + + .. note:: + + An alternative is to define ``DW_CFA_val_offset`` to implicitly use the + target architecture default address space, and add another operation that + specifies the address space. + +7. ``DW_CFA_val_offset_sf`` + + The ``DW_CFA_val_offset_sf`` instruction takes two operands: an unsigned + LEB128 value representing a register number R and a signed LEB128 factored + displacement B. This instruction is identical to ``DW_CFA_val_offset``, + except that B is signed. + +8. ``DW_CFA_register`` + + The ``DW_CFA_register`` instruction takes two unsigned LEB128 operands + representing register numbers R1 and R2 respectively. The required action is + to set the rule for the register specified by R1 to be a *register(R2)* rule. + +9. ``DW_CFA_expression`` + + The ``DW_CFA_expression`` instruction takes two operands: an unsigned LEB128 + value representing a register number R, and a ``DW_FORM_block`` value + representing a DWARF operation expression E. The required action is to + change the rule for the register specified by R to be an *expression(E)* + rule. + + *That is, E computes the location description where the register value can + be retrieved.* + + *See* :ref:`amdgpu-dwarf-call-frame-instructions` *regarding restrictions on + the DWARF expression operations that can be used in E.* + +10. ``DW_CFA_val_expression`` + + The ``DW_CFA_val_expression`` instruction takes two operands: an unsigned + LEB128 value representing a register number R, and a ``DW_FORM_block`` value + representing a DWARF operation expression E. The required action is to + change the rule for the register specified by R to be a *val_expression(E)* + rule. + + *That is, E computes the value of register R.* + + *See* :ref:`amdgpu-dwarf-call-frame-instructions` *regarding restrictions on + the DWARF expression operations that can be used in E.* + + If the result of evaluating E is not a value with a base type size that + matches the register size, then the DWARF is ill-formed. + +11. ``DW_CFA_restore`` + + The ``DW_CFA_restore`` instruction takes a single operand (encoded with the + opcode) that represents a register number R. The required action is to + change the rule for the register specified by R to the rule assigned it by + the ``initial_instructions`` in the CIE. + +12. ``DW_CFA_restore_extended`` + + The ``DW_CFA_restore_extended`` instruction takes a single unsigned LEB128 + operand that represents a register number R. This instruction is identical + to ``DW_CFA_restore``, except for the encoding and size of the register + operand. + +Row State Instructions +###################### + +.. note:: + + These instructions are the same as in DWARF Version 5 section 6.4.2.4. + +Padding Instruction +################### + +.. note:: + + These instructions are the same as in DWARF Version 5 section 6.4.2.5. + +Call Frame Instruction Usage +++++++++++++++++++++++++++++ + +.. note:: + + The same as in DWARF Version 5 section 6.4.3. + +.. _amdgpu-dwarf-call-frame-calling-address: + +Call Frame Calling Address +++++++++++++++++++++++++++ + +.. note:: + + The same as in DWARF Version 5 section 6.4.4. + +Data Representation +------------------- + +.. _amdgpu-dwarf-32-bit-and-64-bit-dwarf-formats: + +32-Bit and 64-Bit DWARF Formats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This augments DWARF Version 5 section 7.4. + +1. Within the body of the ``.debug_info`` section, certain forms of attribute + value depend on the choice of DWARF format as follows. For the 32-bit DWARF + format, the value is a 4-byte unsigned integer; for the 64-bit DWARF format, + the value is an 8-byte unsigned integer. + + .. table:: ``.debug_info`` section attribute form roles + :name: amdgpu-dwarf-debug-info-section-attribute-form-roles-table + + ================================== =================================== + Form Role + ================================== =================================== + DW_FORM_line_strp offset in ``.debug_line_str`` + DW_FORM_ref_addr offset in ``.debug_info`` + DW_FORM_sec_offset offset in a section other than + ``.debug_info`` or ``.debug_str`` + DW_FORM_strp offset in ``.debug_str`` + DW_FORM_strp_sup offset in ``.debug_str`` section of + supplementary object file + DW_OP_call_ref offset in ``.debug_info`` + DW_OP_implicit_pointer offset in ``.debug_info`` + DW_OP_LLVM_aspace_implicit_pointer offset in ``.debug_info`` + ================================== =================================== + +Format of Debugging Information +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Attribute Encodings ++++++++++++++++++++ + +.. note:: + + This augments DWARF Version 5 section 7.5.4 and Table 7.5. + +The following table gives the encoding of the additional debugging information +entry attributes. + +.. table:: Attribute encodings + :name: amdgpu-dwarf-attribute-encodings-table + + ================================== ====== =================================== + Attribute Name Value Classes + ================================== ====== =================================== + DW_AT_LLVM_active_lane 0x3e08 exprloc, loclist + DW_AT_LLVM_augmentation 0x3e09 string + DW_AT_LLVM_lanes 0x3e0a constant + DW_AT_LLVM_lane_pc 0x3e0b exprloc, loclist + DW_AT_LLVM_vector_size 0x3e0c constant + ================================== ====== =================================== + +DWARF Expressions +~~~~~~~~~~~~~~~~~ + +.. note:: + + Rename DWARF Version 5 section 7.7 to reflect the unification of location + descriptions into DWARF expressions. + +Operation Expressions ++++++++++++++++++++++ + +.. note:: + + Rename DWARF Version 5 section 7.7.1 and delete section 7.7.2 to reflect the + unification of location descriptions into DWARF expressions. + + This augments DWARF Version 5 section 7.7.1 and Table 7.9. + +The following table gives the encoding of the additional DWARF expression +operations. + +.. table:: DWARF Operation Encodings + :name: amdgpu-dwarf-operation-encodings-table + + ================================== ===== ======== =============================== + Operation Code Number Notes + of + Operands + ================================== ===== ======== =============================== + DW_OP_LLVM_form_aspace_address 0xe1 0 + DW_OP_LLVM_push_lane 0xe2 0 + DW_OP_LLVM_offset 0xe3 0 + DW_OP_LLVM_offset_uconst 0xe4 1 ULEB128 byte displacement + DW_OP_LLVM_bit_offset 0xe5 0 + DW_OP_LLVM_call_frame_entry_reg 0xe6 1 ULEB128 register number + DW_OP_LLVM_undefined 0xe7 0 + DW_OP_LLVM_aspace_bregx 0xe8 2 ULEB128 register number, + ULEB128 byte displacement + DW_OP_LLVM_aspace_implicit_pointer 0xe9 2 4-byte or 8-byte offset of DIE, + SLEB128 byte displacement + DW_OP_LLVM_piece_end 0xea 0 + DW_OP_LLVM_extend 0xeb 2 ULEB128 bit size, + ULEB128 count + DW_OP_LLVM_select_bit_piece 0xec 2 ULEB128 bit size, + ULEB128 count + ================================== ===== ======== =============================== + +Location List Expressions ++++++++++++++++++++++++++ + +.. note:: + + Rename DWARF Version 5 section 7.7.3 to reflect that location lists are a kind + of DWARF expression. + +Source Languages +~~~~~~~~~~~~~~~~ + +.. note:: + + This augments DWARF Version 5 section 7.12 and Table 7.17. + +The following table gives the encoding of the additional DWARF languages. + +.. table:: Language encodings + :name: amdgpu-dwarf-language-encodings-table + + ==================== ====== =================== + Language Name Value Default Lower Bound + ==================== ====== =================== + ``DW_LANG_LLVM_HIP`` 0x8100 0 + ==================== ====== =================== + +Address Class and Address Space Encodings +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This replaces DWARF Version 5 section 7.13. + +The encodings of the constants used for the currently defined address classes +are given in :ref:`amdgpu-dwarf-address-class-encodings-table`. + +.. table:: Address class encodings + :name: amdgpu-dwarf-address-class-encodings-table + + ========================== ====== + Address Class Name Value + ========================== ====== + ``DW_ADDR_none`` 0x0000 + ``DW_ADDR_LLVM_global`` 0x0001 + ``DW_ADDR_LLVM_constant`` 0x0002 + ``DW_ADDR_LLVM_group`` 0x0003 + ``DW_ADDR_LLVM_private`` 0x0004 + ``DW_ADDR_LLVM_lo_user`` 0x8000 + ``DW_ADDR_LLVM_hi_user`` 0xffff + ========================== ====== + +Line Number Information +~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This augments DWARF Version 5 section 7.22 and Table 7.27. + +The following table gives the encoding of the additional line number header +entry formats. + +.. table:: Line number header entry format encodings + :name: amdgpu-dwarf-line-number-header-entry-format-encodings-table + + ==================================== ==================== + Line number header entry format name Value + ==================================== ==================== + ``DW_LNCT_LLVM_source`` 0x2001 + ``DW_LNCT_LLVM_is_MD5`` 0x2002 + ==================================== ==================== + +Call Frame Information +~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This augments DWARF Version 5 section 7.24 and Table 7.29. + +The following table gives the encoding of the additional call frame information +instructions. + +.. table:: Call frame instruction encodings + :name: amdgpu-dwarf-call-frame-instruction-encodings-table + + ============================= ====== ====== ================ ================ ===================== + Instruction High 2 Low 6 Operand 1 Operand 2 Operand 3 + Bits Bits + ============================= ====== ====== ================ ================ ===================== + DW_CFA_LLVM_def_aspace_cfa 0 0x30 ULEB128 register ULEB128 offset ULEB128 address space + DW_CFA_LLVM_def_aspace_cfa_sf 0 0x31 ULEB128 register SLEB128 offset ULEB128 address space + ============================= ====== ====== ================ ================ ===================== + +Attributes by Tag Value (Informative) +------------------------------------- + +.. note:: + + This augments DWARF Version 5 Appendix A and Table A.1. + +The following table provides the additional attributes that are applicable to +debugger information entries. + +.. table:: Attributes by tag value + :name: amdgpu-dwarf-attributes-by-tag-value-table + + ============================= ============================= + Tag Name Applicable Attributes + ============================= ============================= + ``DW_TAG_base_type`` * ``DW_AT_LLVM_vector_size`` + ``DW_TAG_compile_unit`` * ``DW_AT_LLVM_augmentation`` + ``DW_TAG_entry_point`` * ``DW_AT_LLVM_active_lane`` + * ``DW_AT_LLVM_lane_pc`` + * ``DW_AT_LLVM_lanes`` + ``DW_TAG_inlined_subroutine`` * ``DW_AT_LLVM_active_lane`` + * ``DW_AT_LLVM_lane_pc`` + * ``DW_AT_LLVM_lanes`` + ``DW_TAG_subprogram`` * ``DW_AT_LLVM_active_lane`` + * ``DW_AT_LLVM_lane_pc`` + * ``DW_AT_LLVM_lanes`` + ============================= ============================= + +.. _amdgpu-dwarf-examples: + +Examples +======== + +The AMD GPU specific usage of the features in these extensions, including +examples, is available at *User Guide for AMDGPU Backend* section +:ref:`amdgpu-dwarf-debug-information`. + +.. note:: + + Change examples to use ``DW_OP_LLVM_offset`` instead of ``DW_OP_add`` when + acting on a location description. + + Need to provide examples of new features. + +.. _amdgpu-dwarf-references: + +References +========== + + .. _amdgpu-dwarf-AMD: + +1. [AMD] `Advanced Micro Devices `__ + + .. _amdgpu-dwarf-AMD-ROCm: + +2. [AMD-ROCm] `AMD ROCm Platform `__ + + .. _amdgpu-dwarf-AMD-ROCgdb: + +3. [AMD-ROCgdb] `AMD ROCm Debugger (ROCgdb) `__ + + .. _amdgpu-dwarf-AMDGPU-LLVM: + +4. [AMDGPU-LLVM] `User Guide for AMDGPU LLVM Backend `__ + + .. _amdgpu-dwarf-CUDA: + +5. [CUDA] `Nvidia CUDA Language `__ + + .. _amdgpu-dwarf-DWARF: + +6. [DWARF] `DWARF Debugging Information Format `__ + + .. _amdgpu-dwarf-ELF: + +7. [ELF] `Executable and Linkable Format (ELF) `__ + + .. _amdgpu-dwarf-GCC: + +8. [GCC] `GCC: The GNU Compiler Collection `__ + + .. _amdgpu-dwarf-GDB: + +9. [GDB] `GDB: The GNU Project Debugger `__ + + .. _amdgpu-dwarf-HIP: + +10. [HIP] `HIP Programming Guide `__ + + .. _amdgpu-dwarf-HSA: + +11. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ + + .. _amdgpu-dwarf-LLVM: + +12. [LLVM] `The LLVM Compiler Infrastructure `__ + + .. _amdgpu-dwarf-OpenCL: + +13. [OpenCL] `The OpenCL Specification Version 2.0 `__ + + .. _amdgpu-dwarf-Perforce-TotalView: + +14. [Perforce-TotalView] `Perforce TotalView HPC Debugging Software `__ + + .. _amdgpu-dwarf-SEMVER: + +15. [SEMVER] `Semantic Versioning `__ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUInstructionNotation.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUInstructionNotation.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUInstructionNotation.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUInstructionNotation.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,135 @@ +============================ +AMDGPU Instructions Notation +============================ + +.. contents:: + :local: + +.. _amdgpu_syn_instruction_notation: + +Introduction +============ + +This is an overview of notation used to describe syntax of AMDGPU assembler instructions. + +This notation mimics the :ref:`syntax of assembler instructions` +except that instead of real operands and modifiers it provides references to their description. + +Instructions +============ + +Notation +~~~~~~~~ + +This is the notation used to describe AMDGPU instructions: + + ``<``\ :ref:`opcode description`\ ``> <``\ :ref:`operands description`\ ``> <``\ :ref:`modifiers description`\ ``>`` + +.. _amdgpu_syn_opcode_notation: + +Opcode +====== + +Notation +~~~~~~~~ + +TBD + +.. _amdgpu_syn_instruction_operands_notation: + +Operands +======== + +An instruction may have zero or more *operands*. They are comma-separated in the description: + + ``<``\ :ref:`description of operand 0`\ ``>, <``\ :ref:`description of operand 1`\ ``>, ...`` + +The order of *operands* is fixed. *Operands* cannot be omitted +except for special cases described below. + +.. _amdgpu_syn_instruction_operand_notation: + +Notation +~~~~~~~~ + +An operand is described using the following notation: + + *...* + +Where: + +* *kind* is an optional prefix describing operand :ref:`kind`. +* *name* is a link to a description of the operand. +* *tags* are optional. They are used to indicate :ref:`special operand properties`. + +.. _amdgpu_syn_instruction_operand_kinds: + +Operand Kinds +^^^^^^^^^^^^^ + +Operand kind indicates which values are accepted by the operand. + +* Operands which only accept *vector* registers are labelled with 'v' prefix. +* Operands which only accept *scalar* values are labelled with 's' prefix. +* Operands which accept both *vector* registers and *scalar* values have no prefix. + +Examples: + +.. parsed-literal:: + + vdata // operand only accepts vector registers + sdst // operand only accepts scalar registers + src1 // operand accepts both scalar and vector registers + +.. _amdgpu_syn_instruction_operand_tags: + +Operand Tags +^^^^^^^^^^^^ + +Operand tags indicate special operand properties. + + ============== ================================================================================= + Operand tag Meaning + ============== ================================================================================= + :opt An optional operand. + :m An operand which may be used with + :ref:`VOP3 operand modifiers` or + :ref:`SDWA operand modifiers`. + :dst An input operand which may also serve as a destination + if :ref:`glc` modifier is specified. + :fx This is an *f32* or *f16* operand depending on + :ref:`m_op_sel_hi` modifier. + : Operand *type* differs from *type* + :ref:`implied by the opcode name`. + This tag specifies actual operand *type*. + ============== ================================================================================= + +Examples: + +.. parsed-literal:: + + src1:m // src1 operand may be used with operand modifiers + vdata:dst // vdata operand may be used as both source and destination + vdst:u32 // vdst operand has u32 type + +.. _amdgpu_syn_instruction_modifiers_notation: + +Modifiers +========= + +An instruction may have zero or more optional *modifiers*. They are space-separated in the description: + + ``<``\ :ref:`description of modifier 0`\ ``> <``\ :ref:`description of modifier 1`\ ``> ...`` + +The order of *modifiers* is fixed. + +.. _amdgpu_syn_instruction_modifier_notation: + +Notation +~~~~~~~~ + +A *modifier* is described using the following notation: + + ** + +Where *name* is a link to a description of the *modifier*. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUInstructionSyntax.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUInstructionSyntax.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUInstructionSyntax.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUInstructionSyntax.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,191 @@ +========================= +AMDGPU Instruction Syntax +========================= + +.. contents:: + :local: + +.. _amdgpu_syn_instructions: + +Instructions +============ + +Syntax +~~~~~~ + +An instruction has the following syntax: + + ``<``\ *opcode mnemonic*\ ``> <``\ *operand0*\ ``>, <``\ *operand1*\ ``>,... <``\ *modifier0*\ ``> <``\ *modifier1*\ ``>...`` + +:doc:`Operands` are normally comma-separated while +:doc:`modifiers` are space-separated. + +The order of *operands* and *modifiers* is fixed. +Most *modifiers* are optional and may be omitted. + +.. _amdgpu_syn_instruction_mnemo: + +Opcode Mnemonic +~~~~~~~~~~~~~~~ + +Opcode mnemonic describes opcode semantics and may include one or more suffices in this order: + +* :ref:`Packing suffix`. +* :ref:`Destination operand type suffix`. +* :ref:`Source operand type suffix`. +* :ref:`Encoding suffix`. + +.. _amdgpu_syn_instruction_pk: + +Packing Suffix +~~~~~~~~~~~~~~ + +Most instructions which operate on packed data have a *_pk* suffix. +Unless otherwise :ref:`noted`, +these instructions operate on and produce packed data composed of +two values. The type of values is indicated by +:ref:`type suffices`. + +For example, the following instruction sums up two pairs of f16 values +and produces a pair of f16 values: + +.. parsed-literal:: + + v_pk_add_f16 v1, v2, v3 // Each operand has f16x2 type + +.. _amdgpu_syn_instruction_type: + +Type and Size Suffices +~~~~~~~~~~~~~~~~~~~~~~ + +Instructions which operate with data have an implied type of *data* operands. +This data type is specified as a suffix of instruction mnemonic. + +There are instructions which have 2 type suffices: +the first is the data type of the destination operand, +the second is the data type of source *data* operand(s). + +Note that data type specified by an instruction does not apply +to other kinds of operands such as *addresses*, *offsets* and so on. + +The following table enumerates the most frequently used type suffices. + + ============================================ ======================= ============================ + Type Suffices Packed instruction? Data Type + ============================================ ======================= ============================ + _b512, _b256, _b128, _b64, _b32, _b16, _b8 No Bits. + _u64, _u32, _u16, _u8 No Unsigned integer. + _i64, _i32, _i16, _i8 No Signed integer. + _f64, _f32, _f16 No Floating-point. + _b16, _u16, _i16, _f16 Yes Packed (b16x2, u16x2, etc). + ============================================ ======================= ============================ + +Instructions which have no type suffices are assumed to operate with typeless data. +The size of data is specified by size suffices: + + ================= =================== ===================================== + Size Suffix Implied data type Required register size in dwords + ================= =================== ===================================== + \- b32 1 + x2 b64 2 + x3 b96 3 + x4 b128 4 + x8 b256 8 + x16 b512 16 + x b32 1 + xy b64 2 + xyz b96 3 + xyzw b128 4 + d16_x b16 1 + d16_xy b16x2 2 for GFX8.0, 1 for GFX8.1 and GFX9+ + d16_xyz b16x3 3 for GFX8.0, 2 for GFX8.1 and GFX9+ + d16_xyzw b16x4 4 for GFX8.0, 2 for GFX8.1 and GFX9+ + ================= =================== ===================================== + +.. WARNING:: + There are exceptions from rules described above. + Operands which have type different from type specified by the opcode are + :ref:`tagged` in the description. + +Examples of instructions with different types of source and destination operands: + +.. parsed-literal:: + + s_bcnt0_i32_b64 + v_cvt_f32_u32 + +Examples of instructions with one data type: + +.. parsed-literal:: + + v_max3_f32 + v_max3_i16 + +Examples of instructions which operate with packed data: + +.. parsed-literal:: + + v_pk_add_u16 + v_pk_add_i16 + v_pk_add_f16 + +Examples of typeless instructions which operate on b128 data: + +.. parsed-literal:: + + buffer_store_dwordx4 + flat_load_dwordx4 + +.. _amdgpu_syn_instruction_enc: + +Encoding Suffices +~~~~~~~~~~~~~~~~~ + +Most *VOP1*, *VOP2* and *VOPC* instructions have several variants: +they may also be encoded in *VOP3*, *DPP* and *SDWA* formats. + +The assembler will automatically use optimal encoding based on instruction operands. +To force specific encoding, one can add a suffix to the opcode of the instruction: + + =================================================== ================= + Encoding Encoding Suffix + =================================================== ================= + *VOP1*, *VOP2* and *VOPC* (32-bit) encoding _e32 + *VOP3* (64-bit) encoding _e64 + *DPP* encoding _dpp + *SDWA* encoding _sdwa + =================================================== ================= + +These suffices are used in this reference to indicate the assumed encoding. +When no suffix is specified, native instruction encoding is implied. + +Operands +======== + +Syntax +~~~~~~ + +Syntax of generic operands is described :doc:`in this document`. + +For detailed information about operands follow *operand links* in GPU-specific documents: + +* :doc:`GFX7` +* :doc:`GFX8` +* :doc:`GFX9` +* :doc:`GFX10` + +Modifiers +========= + +Syntax +~~~~~~ + +Syntax of modifiers is described :doc:`in this document`. + +Information about modifiers supported for individual instructions may be found in GPU-specific documents: + +* :doc:`GFX7` +* :doc:`GFX8` +* :doc:`GFX9` +* :doc:`GFX10` + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUModifierSyntax.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUModifierSyntax.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUModifierSyntax.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUModifierSyntax.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,1957 @@ +====================================== +Syntax of AMDGPU Instruction Modifiers +====================================== + +.. contents:: + :local: + +Conventions +=========== + +The following notation is used throughout this document: + + =================== ============================================================= + Notation Description + =================== ============================================================= + {0..N} Any integer value in the range from 0 to N (inclusive). + Syntax and meaning of *x* is explained elsewhere. + =================== ============================================================= + +.. _amdgpu_syn_modifiers: + +Modifiers +========= + +DS Modifiers +------------ + +.. _amdgpu_synid_ds_offset80: + +offset0 +~~~~~~~ + +Specifies first 8-bit offset, in bytes. The default value is 0. + +Used with DS instructions that expect two addresses. + + =================== ==================================================================== + Syntax Description + =================== ==================================================================== + offset0:{0..0xFF} Specifies an unsigned 8-bit offset as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + =================== ==================================================================== + +Examples: + +.. parsed-literal:: + + offset0:0xff + offset0:2-x + offset0:-x-y + +.. _amdgpu_synid_ds_offset81: + +offset1 +~~~~~~~ + +Specifies second 8-bit offset, in bytes. The default value is 0. + +Used with DS instructions that expect two addresses. + + =================== ==================================================================== + Syntax Description + =================== ==================================================================== + offset1:{0..0xFF} Specifies an unsigned 8-bit offset as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + =================== ==================================================================== + +Examples: + +.. parsed-literal:: + + offset1:0xff + offset1:2-x + offset1:-x-y + +.. _amdgpu_synid_ds_offset16: + +offset +~~~~~~ + +Specifies a 16-bit offset, in bytes. The default value is 0. + +Used with DS instructions that expect a single address. + + ==================== ==================================================================== + Syntax Description + ==================== ==================================================================== + offset:{0..0xFFFF} Specifies an unsigned 16-bit offset as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + ==================== ==================================================================== + +Examples: + +.. parsed-literal:: + + offset:65535 + offset:0xffff + offset:-x-y + +.. _amdgpu_synid_sw_offset16: + +swizzle pattern +~~~~~~~~~~~~~~~ + +This is a special modifier which may be used with *ds_swizzle_b32* instruction only. +It specifies a swizzle pattern in numeric or symbolic form. The default value is 0. + +See AMD documentation for more information. + + ======================================================= =========================================================== + Syntax Description + ======================================================= =========================================================== + offset:{0..0xFFFF} Specifies a 16-bit swizzle pattern. + offset:swizzle(QUAD_PERM,{0..3},{0..3},{0..3},{0..3}) Specifies a quad permute mode pattern + + Each number is a lane *id*. + offset:swizzle(BITMASK_PERM, "") Specifies a bitmask permute mode pattern. + + The pattern converts a 5-bit lane *id* to another + lane *id* with which the lane interacts. + + *mask* is a 5 character sequence which + specifies how to transform the bits of the + lane *id*. + + The following characters are allowed: + + * "0" - set bit to 0. + + * "1" - set bit to 1. + + * "p" - preserve bit. + + * "i" - inverse bit. + + offset:swizzle(BROADCAST,{2..32},{0..N}) Specifies a broadcast mode. + + Broadcasts the value of any particular lane to + all lanes in its group. + + The first numeric parameter is a group + size and must be equal to 2, 4, 8, 16 or 32. + + The second numeric parameter is an index of the + lane being broadcasted. + + The index must not exceed group size. + offset:swizzle(SWAP,{1..16}) Specifies a swap mode. + + Swaps the neighboring groups of + 1, 2, 4, 8 or 16 lanes. + offset:swizzle(REVERSE,{2..32}) Specifies a reverse mode. + + Reverses the lanes for groups of 2, 4, 8, 16 or 32 lanes. + ======================================================= =========================================================== + +Note: numeric values may be specified as either :ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + offset:255 + offset:0xffff + offset:swizzle(QUAD_PERM, 0, 1, 2, 3) + offset:swizzle(BITMASK_PERM, "01pi0") + offset:swizzle(BROADCAST, 2, 0) + offset:swizzle(SWAP, 8) + offset:swizzle(REVERSE, 30 + 2) + +.. _amdgpu_synid_gds: + +gds +~~~ + +Specifies whether to use GDS or LDS memory (LDS is the default). + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + gds Use GDS memory. + ======================================== ================================================ + + +EXP Modifiers +------------- + +.. _amdgpu_synid_done: + +done +~~~~ + +Specifies if this is the last export from the shader to the target. By default, +*exp* instruction does not finish an export sequence. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + done Indicates the last export operation. + ======================================== ================================================ + +.. _amdgpu_synid_compr: + +compr +~~~~~ + +Indicates if the data are compressed (data are not compressed by default). + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + compr Data are compressed. + ======================================== ================================================ + +.. _amdgpu_synid_vm: + +vm +~~ + +Specifies valid mask flag state (off by default). + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + vm Set valid mask flag. + ======================================== ================================================ + +FLAT Modifiers +-------------- + +.. _amdgpu_synid_flat_offset12: + +offset12 +~~~~~~~~ + +Specifies an immediate unsigned 12-bit offset, in bytes. The default value is 0. + +Cannot be used with *global/scratch* opcodes. GFX9 only. + + ================= ==================================================================== + Syntax Description + ================= ==================================================================== + offset:{0..4095} Specifies a 12-bit unsigned offset as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + ================= ==================================================================== + +Examples: + +.. parsed-literal:: + + offset:4095 + offset:x-0xff + +.. _amdgpu_synid_flat_offset13s: + +offset13s +~~~~~~~~~ + +Specifies an immediate signed 13-bit offset, in bytes. The default value is 0. + +Can be used with *global/scratch* opcodes only. GFX9 only. + + ===================== ==================================================================== + Syntax Description + ===================== ==================================================================== + offset:{-4096..4095} Specifies a 13-bit signed offset as an + :ref:`integer number ` + or an :ref:`absolute expression`. + ===================== ==================================================================== + +Examples: + +.. parsed-literal:: + + offset:-4000 + offset:0x10 + offset:-x + +.. _amdgpu_synid_flat_offset12s: + +offset12s +~~~~~~~~~ + +Specifies an immediate signed 12-bit offset, in bytes. The default value is 0. + +Can be used with *global/scratch* opcodes only. + +GFX10 only. + + ===================== ==================================================================== + Syntax Description + ===================== ==================================================================== + offset:{-2048..2047} Specifies a 12-bit signed offset as an + :ref:`integer number ` + or an :ref:`absolute expression`. + ===================== ==================================================================== + +Examples: + +.. parsed-literal:: + + offset:-2000 + offset:0x10 + offset:-x+y + +.. _amdgpu_synid_flat_offset11: + +offset11 +~~~~~~~~ + +Specifies an immediate unsigned 11-bit offset, in bytes. The default value is 0. + +Cannot be used with *global/scratch* opcodes. + +GFX10 only. + + ================= ==================================================================== + Syntax Description + ================= ==================================================================== + offset:{0..2047} Specifies an 11-bit unsigned offset as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + ================= ==================================================================== + +Examples: + +.. parsed-literal:: + + offset:2047 + offset:x+0xff + +dlc +~~~ + +See a description :ref:`here`. GFX10 only. + +glc +~~~ + +See a description :ref:`here`. + +lds +~~~ + +See a description :ref:`here`. GFX10 only. + +slc +~~~ + +See a description :ref:`here`. + +tfe +~~~ + +See a description :ref:`here`. + +nv +~~ + +See a description :ref:`here`. + +MIMG Modifiers +-------------- + +.. _amdgpu_synid_dmask: + +dmask +~~~~~ + +Specifies which channels (image components) are used by the operation. By default, no channels +are used. + + =============== ==================================================================== + Syntax Description + =============== ==================================================================== + dmask:{0..15} Specifies image channels as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + + Each bit corresponds to one of 4 image components (RGBA). + + If the specified bit value is 0, the component is not used, + value 1 means that the component is used. + =============== ==================================================================== + +This modifier has some limitations depending on instruction kind: + + =================================================== ======================== + Instruction Kind Valid dmask Values + =================================================== ======================== + 32-bit atomic *cmpswap* 0x3 + 32-bit atomic instructions except for *cmpswap* 0x1 + 64-bit atomic *cmpswap* 0xF + 64-bit atomic instructions except for *cmpswap* 0x3 + *gather4* 0x1, 0x2, 0x4, 0x8 + Other instructions any value + =================================================== ======================== + +Examples: + +.. parsed-literal:: + + dmask:0xf + dmask:0b1111 + dmask:x|y|z + +.. _amdgpu_synid_unorm: + +unorm +~~~~~ + +Specifies whether the address is normalized or not (the address is normalized by default). + + ======================== ======================================== + Syntax Description + ======================== ======================================== + unorm Force the address to be unnormalized. + ======================== ======================================== + +glc +~~~ + +See a description :ref:`here`. + +slc +~~~ + +See a description :ref:`here`. + +.. _amdgpu_synid_r128: + +r128 +~~~~ + +Specifies texture resource size. The default size is 256 bits. + +GFX7, GFX8 and GFX10 only. + + =================== ================================================ + Syntax Description + =================== ================================================ + r128 Specifies 128 bits texture resource size. + =================== ================================================ + +.. WARNING:: Using this modifier should decrease *rsrc* operand size from 8 to 4 dwords, but assembler does not currently support this feature. + +tfe +~~~ + +See a description :ref:`here`. + +.. _amdgpu_synid_lwe: + +lwe +~~~ + +Specifies LOD warning status (LOD warning is disabled by default). + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + lwe Enables LOD warning. + ======================================== ================================================ + +.. _amdgpu_synid_da: + +da +~~ + +Specifies if an array index must be sent to TA. By default, array index is not sent. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + da Send an array-index to TA. + ======================================== ================================================ + +.. _amdgpu_synid_d16: + +d16 +~~~ + +Specifies data size: 16 or 32 bits (32 bits by default). Not supported by GFX7. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + d16 Enables 16-bits data mode. + + On loads, convert data in memory to 16-bit + format before storing it in VGPRs. + + For stores, convert 16-bit data in VGPRs to + 32 bits before going to memory. + + Note that GFX8.0 does not support data packing. + Each 16-bit data element occupies 1 VGPR. + + GFX8.1, GFX9 and GFX10 support data packing. + Each pair of 16-bit data elements + occupies 1 VGPR. + ======================================== ================================================ + +.. _amdgpu_synid_a16: + +a16 +~~~ + +Specifies size of image address components: 16 or 32 bits (32 bits by default). +GFX9 and GFX10 only. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + a16 Enables 16-bits image address components. + ======================================== ================================================ + +.. _amdgpu_synid_dim: + +dim +~~~ + +Specifies surface dimension. This is a mandatory modifier. There is no default value. + +GFX10 only. + + =============================== ========================================================= + Syntax Description + =============================== ========================================================= + dim:1D One-dimensional image. + dim:2D Two-dimensional image. + dim:3D Three-dimensional image. + dim:CUBE Cubemap array. + dim:1D_ARRAY One-dimensional image array. + dim:2D_ARRAY Two-dimensional image array. + dim:2D_MSAA Two-dimensional multi-sample auto-aliasing image. + dim:2D_MSAA_ARRAY Two-dimensional multi-sample auto-aliasing image array. + =============================== ========================================================= + +The following table defines an alternative syntax which is supported +for compatibility with SP3 assembler: + + =============================== ========================================================= + Syntax Description + =============================== ========================================================= + dim:SQ_RSRC_IMG_1D One-dimensional image. + dim:SQ_RSRC_IMG_2D Two-dimensional image. + dim:SQ_RSRC_IMG_3D Three-dimensional image. + dim:SQ_RSRC_IMG_CUBE Cubemap array. + dim:SQ_RSRC_IMG_1D_ARRAY One-dimensional image array. + dim:SQ_RSRC_IMG_2D_ARRAY Two-dimensional image array. + dim:SQ_RSRC_IMG_2D_MSAA Two-dimensional multi-sample auto-aliasing image. + dim:SQ_RSRC_IMG_2D_MSAA_ARRAY Two-dimensional multi-sample auto-aliasing image array. + =============================== ========================================================= + +dlc +~~~ + +See a description :ref:`here`. GFX10 only. + +Miscellaneous Modifiers +----------------------- + +.. _amdgpu_synid_dlc: + +dlc +~~~ + +Controls device level cache policy for memory operations. Used for synchronization. +When specified, forces operation to bypass device level cache making the operation device +level coherent. By default, instructions use device level cache. + +GFX10 only. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + dlc Bypass device level cache. + ======================================== ================================================ + +.. _amdgpu_synid_glc: + +glc +~~~ + +This modifier has different meaning for loads, stores, and atomic operations. +The default value is off (0). + +See AMD documentation for details. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + glc Set glc bit to 1. + ======================================== ================================================ + +.. _amdgpu_synid_lds: + +lds +~~~ + +Specifies where to store the result: VGPRs or LDS (VGPRs by default). + + ======================================== =========================== + Syntax Description + ======================================== =========================== + lds Store result in LDS. + ======================================== =========================== + +.. _amdgpu_synid_nv: + +nv +~~ + +Specifies if instruction is operating on non-volatile memory. By default, memory is volatile. + +GFX9 only. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + nv Indicates that instruction operates on + non-volatile memory. + ======================================== ================================================ + +.. _amdgpu_synid_slc: + +slc +~~~ + +Specifies cache policy. The default value is off (0). + +See AMD documentation for details. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + slc Set slc bit to 1. + ======================================== ================================================ + +.. _amdgpu_synid_tfe: + +tfe +~~~ + +Controls access to partially resident textures. The default value is off (0). + +See AMD documentation for details. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + tfe Set tfe bit to 1. + ======================================== ================================================ + +MUBUF/MTBUF Modifiers +--------------------- + +.. _amdgpu_synid_idxen: + +idxen +~~~~~ + +Specifies whether address components include an index. By default, no components are used. + +Can be used together with :ref:`offen`. + +Cannot be used with :ref:`addr64`. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + idxen Address components include an index. + ======================================== ================================================ + +.. _amdgpu_synid_offen: + +offen +~~~~~ + +Specifies whether address components include an offset. By default, no components are used. + +Can be used together with :ref:`idxen`. + +Cannot be used with :ref:`addr64`. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + offen Address components include an offset. + ======================================== ================================================ + +.. _amdgpu_synid_addr64: + +addr64 +~~~~~~ + +Specifies whether a 64-bit address is used. By default, no address is used. + +GFX7 only. Cannot be used with :ref:`offen` and +:ref:`idxen` modifiers. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + addr64 A 64-bit address is used. + ======================================== ================================================ + +.. _amdgpu_synid_buf_offset12: + +offset12 +~~~~~~~~ + +Specifies an immediate unsigned 12-bit offset, in bytes. The default value is 0. + + ================== ==================================================================== + Syntax Description + ================== ==================================================================== + offset:{0..0xFFF} Specifies a 12-bit unsigned offset as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + ================== ==================================================================== + +Examples: + +.. parsed-literal:: + + offset:x+y + offset:0x10 + +glc +~~~ + +See a description :ref:`here`. + +slc +~~~ + +See a description :ref:`here`. + +lds +~~~ + +See a description :ref:`here`. + +dlc +~~~ + +See a description :ref:`here`. GFX10 only. + +tfe +~~~ + +See a description :ref:`here`. + +.. _amdgpu_synid_fmt: + +fmt +~~~ + +Specifies data and numeric formats used by the operation. +The default numeric format is BUF_NUM_FORMAT_UNORM. +The default data format is BUF_DATA_FORMAT_8. + + ========================================= =============================================================== + Syntax Description + ========================================= =============================================================== + format:{0..127} Use format specified as either an + :ref:`integer number` or an + :ref:`absolute expression`. + format:[] Use the specified data format and + default numeric format. + format:[] Use the specified numeric format and + default data format. + format:[, ] Use the specified data and numeric formats. + format:[, ] Use the specified data and numeric formats. + ========================================= =============================================================== + +.. _amdgpu_synid_format_data: + +Supported data formats are defined in the following table: + + ========================================= =============================== + Syntax Note + ========================================= =============================== + BUF_DATA_FORMAT_INVALID + BUF_DATA_FORMAT_8 Default value. + BUF_DATA_FORMAT_16 + BUF_DATA_FORMAT_8_8 + BUF_DATA_FORMAT_32 + BUF_DATA_FORMAT_16_16 + BUF_DATA_FORMAT_10_11_11 + BUF_DATA_FORMAT_11_11_10 + BUF_DATA_FORMAT_10_10_10_2 + BUF_DATA_FORMAT_2_10_10_10 + BUF_DATA_FORMAT_8_8_8_8 + BUF_DATA_FORMAT_32_32 + BUF_DATA_FORMAT_16_16_16_16 + BUF_DATA_FORMAT_32_32_32 + BUF_DATA_FORMAT_32_32_32_32 + BUF_DATA_FORMAT_RESERVED_15 + ========================================= =============================== + +.. _amdgpu_synid_format_num: + +Supported numeric formats are defined below: + + ========================================= =============================== + Syntax Note + ========================================= =============================== + BUF_NUM_FORMAT_UNORM Default value. + BUF_NUM_FORMAT_SNORM + BUF_NUM_FORMAT_USCALED + BUF_NUM_FORMAT_SSCALED + BUF_NUM_FORMAT_UINT + BUF_NUM_FORMAT_SINT + BUF_NUM_FORMAT_SNORM_OGL GFX7 only. + BUF_NUM_FORMAT_RESERVED_6 GFX8 and GFX9 only. + BUF_NUM_FORMAT_FLOAT + ========================================= =============================== + +Examples: + +.. parsed-literal:: + + format:0 + format:127 + format:[BUF_DATA_FORMAT_16] + format:[BUF_DATA_FORMAT_16,BUF_NUM_FORMAT_SSCALED] + format:[BUF_NUM_FORMAT_FLOAT] + +.. _amdgpu_synid_ufmt: + +ufmt +~~~~ + +Specifies a unified format used by the operation. +The default format is BUF_FMT_8_UNORM. +GFX10 only. + + ========================================= =============================================================== + Syntax Description + ========================================= =============================================================== + format:{0..127} Use unified format specified as either an + :ref:`integer number` or an + :ref:`absolute expression`. + Note that unified format numbers are not compatible with + format numbers used for pre-GFX10 ISA. + format:[] Use the specified unified format. + ========================================= =============================================================== + +Unified format is a replacement for :ref:`data` +and :ref:`numeric` formats. For compatibility with older ISA, +:ref:`syntax with data and numeric formats` is still accepted +provided that the combination of formats can be mapped to a unified format. + +Supported unified formats and equivalent combinations of data and numeric formats +are defined below: + + ============================== ============================== ============================= + Syntax Equivalent Data Format Equivalent Numeric Format + ============================== ============================== ============================= + BUF_FMT_INVALID BUF_DATA_FORMAT_INVALID BUF_NUM_FORMAT_UNORM + + BUF_FMT_8_UNORM BUF_DATA_FORMAT_8 BUF_NUM_FORMAT_UNORM + BUF_FMT_8_SNORM BUF_DATA_FORMAT_8 BUF_NUM_FORMAT_SNORM + BUF_FMT_8_USCALED BUF_DATA_FORMAT_8 BUF_NUM_FORMAT_USCALED + BUF_FMT_8_SSCALED BUF_DATA_FORMAT_8 BUF_NUM_FORMAT_SSCALED + BUF_FMT_8_UINT BUF_DATA_FORMAT_8 BUF_NUM_FORMAT_UINT + BUF_FMT_8_SINT BUF_DATA_FORMAT_8 BUF_NUM_FORMAT_SINT + + BUF_FMT_16_UNORM BUF_DATA_FORMAT_16 BUF_NUM_FORMAT_UNORM + BUF_FMT_16_SNORM BUF_DATA_FORMAT_16 BUF_NUM_FORMAT_SNORM + BUF_FMT_16_USCALED BUF_DATA_FORMAT_16 BUF_NUM_FORMAT_USCALED + BUF_FMT_16_SSCALED BUF_DATA_FORMAT_16 BUF_NUM_FORMAT_SSCALED + BUF_FMT_16_UINT BUF_DATA_FORMAT_16 BUF_NUM_FORMAT_UINT + BUF_FMT_16_SINT BUF_DATA_FORMAT_16 BUF_NUM_FORMAT_SINT + BUF_FMT_16_FLOAT BUF_DATA_FORMAT_16 BUF_NUM_FORMAT_FLOAT + + BUF_FMT_8_8_UNORM BUF_DATA_FORMAT_8_8 BUF_NUM_FORMAT_UNORM + BUF_FMT_8_8_SNORM BUF_DATA_FORMAT_8_8 BUF_NUM_FORMAT_SNORM + BUF_FMT_8_8_USCALED BUF_DATA_FORMAT_8_8 BUF_NUM_FORMAT_USCALED + BUF_FMT_8_8_SSCALED BUF_DATA_FORMAT_8_8 BUF_NUM_FORMAT_SSCALED + BUF_FMT_8_8_UINT BUF_DATA_FORMAT_8_8 BUF_NUM_FORMAT_UINT + BUF_FMT_8_8_SINT BUF_DATA_FORMAT_8_8 BUF_NUM_FORMAT_SINT + + BUF_FMT_32_UINT BUF_DATA_FORMAT_32 BUF_NUM_FORMAT_UINT + BUF_FMT_32_SINT BUF_DATA_FORMAT_32 BUF_NUM_FORMAT_SINT + BUF_FMT_32_FLOAT BUF_DATA_FORMAT_32 BUF_NUM_FORMAT_FLOAT + + BUF_FMT_16_16_UNORM BUF_DATA_FORMAT_16_16 BUF_NUM_FORMAT_UNORM + BUF_FMT_16_16_SNORM BUF_DATA_FORMAT_16_16 BUF_NUM_FORMAT_SNORM + BUF_FMT_16_16_USCALED BUF_DATA_FORMAT_16_16 BUF_NUM_FORMAT_USCALED + BUF_FMT_16_16_SSCALED BUF_DATA_FORMAT_16_16 BUF_NUM_FORMAT_SSCALED + BUF_FMT_16_16_UINT BUF_DATA_FORMAT_16_16 BUF_NUM_FORMAT_UINT + BUF_FMT_16_16_SINT BUF_DATA_FORMAT_16_16 BUF_NUM_FORMAT_SINT + BUF_FMT_16_16_FLOAT BUF_DATA_FORMAT_16_16 BUF_NUM_FORMAT_FLOAT + + BUF_FMT_10_11_11_UNORM BUF_DATA_FORMAT_10_11_11 BUF_NUM_FORMAT_UNORM + BUF_FMT_10_11_11_SNORM BUF_DATA_FORMAT_10_11_11 BUF_NUM_FORMAT_SNORM + BUF_FMT_10_11_11_USCALED BUF_DATA_FORMAT_10_11_11 BUF_NUM_FORMAT_USCALED + BUF_FMT_10_11_11_SSCALED BUF_DATA_FORMAT_10_11_11 BUF_NUM_FORMAT_SSCALED + BUF_FMT_10_11_11_UINT BUF_DATA_FORMAT_10_11_11 BUF_NUM_FORMAT_UINT + BUF_FMT_10_11_11_SINT BUF_DATA_FORMAT_10_11_11 BUF_NUM_FORMAT_SINT + BUF_FMT_10_11_11_FLOAT BUF_DATA_FORMAT_10_11_11 BUF_NUM_FORMAT_FLOAT + + BUF_FMT_11_11_10_UNORM BUF_DATA_FORMAT_11_11_10 BUF_NUM_FORMAT_UNORM + BUF_FMT_11_11_10_SNORM BUF_DATA_FORMAT_11_11_10 BUF_NUM_FORMAT_SNORM + BUF_FMT_11_11_10_USCALED BUF_DATA_FORMAT_11_11_10 BUF_NUM_FORMAT_USCALED + BUF_FMT_11_11_10_SSCALED BUF_DATA_FORMAT_11_11_10 BUF_NUM_FORMAT_SSCALED + BUF_FMT_11_11_10_UINT BUF_DATA_FORMAT_11_11_10 BUF_NUM_FORMAT_UINT + BUF_FMT_11_11_10_SINT BUF_DATA_FORMAT_11_11_10 BUF_NUM_FORMAT_SINT + BUF_FMT_11_11_10_FLOAT BUF_DATA_FORMAT_11_11_10 BUF_NUM_FORMAT_FLOAT + + BUF_FMT_10_10_10_2_UNORM BUF_DATA_FORMAT_10_10_10_2 BUF_NUM_FORMAT_UNORM + BUF_FMT_10_10_10_2_SNORM BUF_DATA_FORMAT_10_10_10_2 BUF_NUM_FORMAT_SNORM + BUF_FMT_10_10_10_2_USCALED BUF_DATA_FORMAT_10_10_10_2 BUF_NUM_FORMAT_USCALED + BUF_FMT_10_10_10_2_SSCALED BUF_DATA_FORMAT_10_10_10_2 BUF_NUM_FORMAT_SSCALED + BUF_FMT_10_10_10_2_UINT BUF_DATA_FORMAT_10_10_10_2 BUF_NUM_FORMAT_UINT + BUF_FMT_10_10_10_2_SINT BUF_DATA_FORMAT_10_10_10_2 BUF_NUM_FORMAT_SINT + + BUF_FMT_2_10_10_10_UNORM BUF_DATA_FORMAT_2_10_10_10 BUF_NUM_FORMAT_UNORM + BUF_FMT_2_10_10_10_SNORM BUF_DATA_FORMAT_2_10_10_10 BUF_NUM_FORMAT_SNORM + BUF_FMT_2_10_10_10_USCALED BUF_DATA_FORMAT_2_10_10_10 BUF_NUM_FORMAT_USCALED + BUF_FMT_2_10_10_10_SSCALED BUF_DATA_FORMAT_2_10_10_10 BUF_NUM_FORMAT_SSCALED + BUF_FMT_2_10_10_10_UINT BUF_DATA_FORMAT_2_10_10_10 BUF_NUM_FORMAT_UINT + BUF_FMT_2_10_10_10_SINT BUF_DATA_FORMAT_2_10_10_10 BUF_NUM_FORMAT_SINT + + BUF_FMT_8_8_8_8_UNORM BUF_DATA_FORMAT_8_8_8_8 BUF_NUM_FORMAT_UNORM + BUF_FMT_8_8_8_8_SNORM BUF_DATA_FORMAT_8_8_8_8 BUF_NUM_FORMAT_SNORM + BUF_FMT_8_8_8_8_USCALED BUF_DATA_FORMAT_8_8_8_8 BUF_NUM_FORMAT_USCALED + BUF_FMT_8_8_8_8_SSCALED BUF_DATA_FORMAT_8_8_8_8 BUF_NUM_FORMAT_SSCALED + BUF_FMT_8_8_8_8_UINT BUF_DATA_FORMAT_8_8_8_8 BUF_NUM_FORMAT_UINT + BUF_FMT_8_8_8_8_SINT BUF_DATA_FORMAT_8_8_8_8 BUF_NUM_FORMAT_SINT + + BUF_FMT_32_32_UINT BUF_DATA_FORMAT_32_32 BUF_NUM_FORMAT_UINT + BUF_FMT_32_32_SINT BUF_DATA_FORMAT_32_32 BUF_NUM_FORMAT_SINT + BUF_FMT_32_32_FLOAT BUF_DATA_FORMAT_32_32 BUF_NUM_FORMAT_FLOAT + + BUF_FMT_16_16_16_16_UNORM BUF_DATA_FORMAT_16_16_16_16 BUF_NUM_FORMAT_UNORM + BUF_FMT_16_16_16_16_SNORM BUF_DATA_FORMAT_16_16_16_16 BUF_NUM_FORMAT_SNORM + BUF_FMT_16_16_16_16_USCALED BUF_DATA_FORMAT_16_16_16_16 BUF_NUM_FORMAT_USCALED + BUF_FMT_16_16_16_16_SSCALED BUF_DATA_FORMAT_16_16_16_16 BUF_NUM_FORMAT_SSCALED + BUF_FMT_16_16_16_16_UINT BUF_DATA_FORMAT_16_16_16_16 BUF_NUM_FORMAT_UINT + BUF_FMT_16_16_16_16_SINT BUF_DATA_FORMAT_16_16_16_16 BUF_NUM_FORMAT_SINT + BUF_FMT_16_16_16_16_FLOAT BUF_DATA_FORMAT_16_16_16_16 BUF_NUM_FORMAT_FLOAT + + BUF_FMT_32_32_32_UINT BUF_DATA_FORMAT_32_32_32 BUF_NUM_FORMAT_UINT + BUF_FMT_32_32_32_SINT BUF_DATA_FORMAT_32_32_32 BUF_NUM_FORMAT_SINT + BUF_FMT_32_32_32_FLOAT BUF_DATA_FORMAT_32_32_32 BUF_NUM_FORMAT_FLOAT + BUF_FMT_32_32_32_32_UINT BUF_DATA_FORMAT_32_32_32_32 BUF_NUM_FORMAT_UINT + BUF_FMT_32_32_32_32_SINT BUF_DATA_FORMAT_32_32_32_32 BUF_NUM_FORMAT_SINT + BUF_FMT_32_32_32_32_FLOAT BUF_DATA_FORMAT_32_32_32_32 BUF_NUM_FORMAT_FLOAT + ============================== ============================== ============================= + +Examples: + +.. parsed-literal:: + + format:0 + format:[BUF_FMT_32_UINT] + +SMRD/SMEM Modifiers +------------------- + +glc +~~~ + +See a description :ref:`here`. + +nv +~~ + +See a description :ref:`here`. GFX9 only. + +dlc +~~~ + +See a description :ref:`here`. GFX10 only. + +VINTRP Modifiers +---------------- + +.. _amdgpu_synid_high: + +high +~~~~ + +Specifies which half of the LDS word to use. Low half of LDS word is used by default. +GFX9 and GFX10 only. + + ======================================== ================================ + Syntax Description + ======================================== ================================ + high Use high half of LDS word. + ======================================== ================================ + +DPP8 Modifiers +-------------- + +GFX10 only. + +.. _amdgpu_synid_dpp8_sel: + +dpp8_sel +~~~~~~~~ + +Selects which lanes to pull data from, within a group of 8 lanes. This is a mandatory modifier. +There is no default value. + +GFX10 only. + +The *dpp8_sel* modifier must specify exactly 8 values. +First value selects which lane to read from to supply data into lane 0. +Second value controls lane 1 and so on. + +Each value may be specified as either +an :ref:`integer number` or +an :ref:`absolute expression`. + + =============================================================== =========================== + Syntax Description + =============================================================== =========================== + dpp8:[{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7}] Select lanes to read from. + =============================================================== =========================== + +Examples: + +.. parsed-literal:: + + dpp8:[7,6,5,4,3,2,1,0] + dpp8:[0,1,0,1,0,1,0,1] + +.. _amdgpu_synid_fi8: + +fi +~~ + +Controls interaction with inactive lanes for *dpp8* instructions. The default value is zero. + +Note: *inactive* lanes are those whose :ref:`exec` mask bit is zero. + +GFX10 only. + + ==================================== ===================================================== + Syntax Description + ==================================== ===================================================== + fi:0 Fetch zero when accessing data from inactive lanes. + fi:1 Fetch pre-exist values from inactive lanes. + ==================================== ===================================================== + +Note: numeric values may be specified as either :ref:`integer numbers` or +:ref:`absolute expressions`. + +DPP Modifiers +------------- + +GFX8, GFX9 and GFX10 only. + +.. _amdgpu_synid_dpp_ctrl: + +dpp_ctrl +~~~~~~~~ + +Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value. + +GFX8 and GFX9 only. Use :ref:`dpp16_ctrl` for GFX10. + +Note: the lanes of a wavefront are organized in four *rows* and four *banks*. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + quad_perm:[{0..3},{0..3},{0..3},{0..3}] Full permute of 4 threads. + row_mirror Mirror threads within row. + row_half_mirror Mirror threads within 1/2 row (8 threads). + row_bcast:15 Broadcast 15th thread of each row to next row. + row_bcast:31 Broadcast thread 31 to rows 2 and 3. + wave_shl:1 Wavefront left shift by 1 thread. + wave_rol:1 Wavefront left rotate by 1 thread. + wave_shr:1 Wavefront right shift by 1 thread. + wave_ror:1 Wavefront right rotate by 1 thread. + row_shl:{1..15} Row shift left by 1-15 threads. + row_shr:{1..15} Row shift right by 1-15 threads. + row_ror:{1..15} Row rotate right by 1-15 threads. + ======================================== ================================================ + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + quad_perm:[0, 1, 2, 3] + row_shl:3 + +.. _amdgpu_synid_dpp16_ctrl: + +dpp16_ctrl +~~~~~~~~~~ + +Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value. + +GFX10 only. Use :ref:`dpp_ctrl` for GFX8 and GFX9. + +Note: the lanes of a wavefront are organized in four *rows* and four *banks*. +(There are only two rows in *wave32* mode.) + + ======================================== ==================================================== + Syntax Description + ======================================== ==================================================== + quad_perm:[{0..3},{0..3},{0..3},{0..3}] Full permute of 4 threads. + row_mirror Mirror threads within row. + row_half_mirror Mirror threads within 1/2 row (8 threads). + row_share:{0..15} Share the value from the specified lane with other + lanes in the row. + row_xmask:{0..15} Fetch from XOR(current lane id, specified lane id). + row_shl:{1..15} Row shift left by 1-15 threads. + row_shr:{1..15} Row shift right by 1-15 threads. + row_ror:{1..15} Row rotate right by 1-15 threads. + ======================================== ==================================================== + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + quad_perm:[0, 1, 2, 3] + row_shl:3 + +.. _amdgpu_synid_dpp32_ctrl: + +dpp32_ctrl +~~~~~~~~~~ + +Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value. + +May be used only with GFX90A 32-bit instructions. + +Note: the lanes of a wavefront are organized in four *rows* and four *banks*. + + ======================================== ================================================== + Syntax Description + ======================================== ================================================== + quad_perm:[{0..3},{0..3},{0..3},{0..3}] Full permute of 4 threads. + row_mirror Mirror threads within row. + row_half_mirror Mirror threads within 1/2 row (8 threads). + row_bcast:15 Broadcast 15th thread of each row to next row. + row_bcast:31 Broadcast thread 31 to rows 2 and 3. + wave_shl:1 Wavefront left shift by 1 thread. + wave_rol:1 Wavefront left rotate by 1 thread. + wave_shr:1 Wavefront right shift by 1 thread. + wave_ror:1 Wavefront right rotate by 1 thread. + row_shl:{1..15} Row shift left by 1-15 threads. + row_shr:{1..15} Row shift right by 1-15 threads. + row_ror:{1..15} Row rotate right by 1-15 threads. + row_newbcast:{1..15} Broadcast a thread within a row to the whole row. + ======================================== ================================================== + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + quad_perm:[0, 1, 2, 3] + row_shl:3 + + +.. _amdgpu_synid_dpp64_ctrl: + +dpp64_ctrl +~~~~~~~~~~ + +Specifies how data are shared between threads. This is a mandatory modifier. +There is no default value. + +May be used only with GFX90A 64-bit instructions. + +Note: the lanes of a wavefront are organized in four *rows* and four *banks*. + + ======================================== ================================================== + Syntax Description + ======================================== ================================================== + row_newbcast:{1..15} Broadcast a thread within a row to the whole row. + ======================================== ================================================== + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + row_newbcast:3 + + +.. _amdgpu_synid_row_mask: + +row_mask +~~~~~~~~ + +Controls which rows are enabled for data sharing. By default, all rows are enabled. + +Note: the lanes of a wavefront are organized in four *rows* and four *banks*. +(There are only two rows in *wave32* mode.) + + ================= ==================================================================== + Syntax Description + ================= ==================================================================== + row_mask:{0..15} Specifies a *row mask* as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + + Each of 4 bits in the mask controls one row + (0 - disabled, 1 - enabled). + + In *wave32* mode the values should be limited to 0..7. + ================= ==================================================================== + +Examples: + +.. parsed-literal:: + + row_mask:0xf + row_mask:0b1010 + row_mask:x|y + +.. _amdgpu_synid_bank_mask: + +bank_mask +~~~~~~~~~ + +Controls which banks are enabled for data sharing. By default, all banks are enabled. + +Note: the lanes of a wavefront are organized in four *rows* and four *banks*. +(There are only two rows in *wave32* mode.) + + ================== ==================================================================== + Syntax Description + ================== ==================================================================== + bank_mask:{0..15} Specifies a *bank mask* as a positive + :ref:`integer number ` + or an :ref:`absolute expression`. + + Each of 4 bits in the mask controls one bank + (0 - disabled, 1 - enabled). + ================== ==================================================================== + +Examples: + +.. parsed-literal:: + + bank_mask:0x3 + bank_mask:0b0011 + bank_mask:x&y + +.. _amdgpu_synid_bound_ctrl: + +bound_ctrl +~~~~~~~~~~ + +Controls data sharing when accessing an invalid lane. By default, data sharing with +invalid lanes is disabled. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + bound_ctrl:1 Enables data sharing with invalid lanes. + + Accessing data from an invalid lane will + return zero. + ======================================== ================================================ + +.. _amdgpu_synid_fi16: + +fi +~~ + +Controls interaction with *inactive* lanes for *dpp16* instructions. The default value is zero. + +Note: *inactive* lanes are those whose :ref:`exec` mask bit is zero. + +GFX10 only. + + ======================================== ================================================== + Syntax Description + ======================================== ================================================== + fi:0 Interaction with inactive lanes is controlled by + :ref:`bound_ctrl`. + + fi:1 Fetch pre-exist values from inactive lanes. + ======================================== ================================================== + +Note: numeric values may be specified as either :ref:`integer numbers` or +:ref:`absolute expressions`. + +SDWA Modifiers +-------------- + +GFX8, GFX9 and GFX10 only. + +clamp +~~~~~ + +See a description :ref:`here`. + +omod +~~~~ + +See a description :ref:`here`. + +GFX9 and GFX10 only. + +.. _amdgpu_synid_dst_sel: + +dst_sel +~~~~~~~ + +Selects which bits in the destination are affected. By default, all bits are affected. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + dst_sel:DWORD Use bits 31:0. + dst_sel:BYTE_0 Use bits 7:0. + dst_sel:BYTE_1 Use bits 15:8. + dst_sel:BYTE_2 Use bits 23:16. + dst_sel:BYTE_3 Use bits 31:24. + dst_sel:WORD_0 Use bits 15:0. + dst_sel:WORD_1 Use bits 31:16. + ======================================== ================================================ + +.. _amdgpu_synid_dst_unused: + +dst_unused +~~~~~~~~~~ + +Controls what to do with the bits in the destination which are not selected +by :ref:`dst_sel`. +By default, unused bits are preserved. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + dst_unused:UNUSED_PAD Pad with zeros. + dst_unused:UNUSED_SEXT Sign-extend upper bits, zero lower bits. + dst_unused:UNUSED_PRESERVE Preserve bits. + ======================================== ================================================ + +.. _amdgpu_synid_src0_sel: + +src0_sel +~~~~~~~~ + +Controls which bits in the src0 are used. By default, all bits are used. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + src0_sel:DWORD Use bits 31:0. + src0_sel:BYTE_0 Use bits 7:0. + src0_sel:BYTE_1 Use bits 15:8. + src0_sel:BYTE_2 Use bits 23:16. + src0_sel:BYTE_3 Use bits 31:24. + src0_sel:WORD_0 Use bits 15:0. + src0_sel:WORD_1 Use bits 31:16. + ======================================== ================================================ + +.. _amdgpu_synid_src1_sel: + +src1_sel +~~~~~~~~ + +Controls which bits in the src1 are used. By default, all bits are used. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + src1_sel:DWORD Use bits 31:0. + src1_sel:BYTE_0 Use bits 7:0. + src1_sel:BYTE_1 Use bits 15:8. + src1_sel:BYTE_2 Use bits 23:16. + src1_sel:BYTE_3 Use bits 31:24. + src1_sel:WORD_0 Use bits 15:0. + src1_sel:WORD_1 Use bits 31:16. + ======================================== ================================================ + +.. _amdgpu_synid_sdwa_operand_modifiers: + +SDWA Operand Modifiers +---------------------- + +Operand modifiers are not used separately. They are applied to source operands. + +GFX8, GFX9 and GFX10 only. + +abs +~~~ + +See a description :ref:`here`. + +neg +~~~ + +See a description :ref:`here`. + +.. _amdgpu_synid_sext: + +sext +~~~~ + +Sign-extends value of a (sub-dword) operand to fill all 32 bits. +Has no effect for 32-bit operands. + +Valid for integer operands only. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + sext() Sign-extend operand value. + ======================================== ================================================ + +Examples: + +.. parsed-literal:: + + sext(v4) + sext(v255) + +VOP3 Modifiers +-------------- + +.. _amdgpu_synid_vop3_op_sel: + +op_sel +~~~~~~ + +Selects the low [15:0] or high [31:16] operand bits for source and destination operands. +By default, low bits are used for all operands. + +The number of values specified with the op_sel modifier must match the number of instruction +operands (both source and destination). First value controls src0, second value controls src1 +and so on, except that the last value controls destination. +The value 0 selects the low bits, while 1 selects the high bits. + +Note: op_sel modifier affects 16-bit operands only. For 32-bit operands the value specified +by op_sel must be 0. + +GFX9 and GFX10 only. + + ======================================== ============================================================ + Syntax Description + ======================================== ============================================================ + op_sel:[{0..1},{0..1}] Select operand bits for instructions with 1 source operand. + op_sel:[{0..1},{0..1},{0..1}] Select operand bits for instructions with 2 source operands. + op_sel:[{0..1},{0..1},{0..1},{0..1}] Select operand bits for instructions with 3 source operands. + ======================================== ============================================================ + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + op_sel:[0,0] + op_sel:[0,1] + +.. _amdgpu_synid_dpp_op_sel: + +dpp_op_sel +~~~~~~~~~~ + +Special version of *op_sel* used for *permlane* opcodes to specify +dpp-like mode bits - :ref:`fi` and +:ref:`bound_ctrl`. + +GFX10 only. + + ======================================== ============================================================ + Syntax Description + ======================================== ============================================================ + op_sel:[{0..1},{0..1}] First bit specifies :ref:`fi`, second + bit specifies :ref:`bound_ctrl`. + ======================================== ============================================================ + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + op_sel:[0,0] + +.. _amdgpu_synid_clamp: + +clamp +~~~~~ + +Clamp meaning depends on instruction. + +For *v_cmp* instructions, clamp modifier indicates that the compare signals +if a floating point exception occurs. By default, signaling is disabled. +Not supported by GFX7. + +For integer operations, clamp modifier indicates that the result must be clamped +to the largest and smallest representable value. By default, there is no clamping. +Integer clamping is not supported by GFX7. + +For floating point operations, clamp modifier indicates that the result must be clamped +to the range [0.0, 1.0]. By default, there is no clamping. + +Note: clamp modifier is applied after :ref:`output modifiers` (if any). + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + clamp Enables clamping (or signaling). + ======================================== ================================================ + +.. _amdgpu_synid_omod: + +omod +~~~~ + +Specifies if an output modifier must be applied to the result. +By default, no output modifiers are applied. + +Note: output modifiers are applied before :ref:`clamping` (if any). + +Output modifiers are valid for f32 and f64 floating point results only. +They must not be used with f16. + +Note: *v_cvt_f16_f32* is an exception. This instruction produces f16 result +but accepts output modifiers. + + ======================================== ================================================ + Syntax Description + ======================================== ================================================ + mul:2 Multiply the result by 2. + mul:4 Multiply the result by 4. + div:2 Multiply the result by 0.5. + ======================================== ================================================ + +Note: numeric values may be specified as either :ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + mul:2 + mul:x // x must be equal to 2 or 4 + +.. _amdgpu_synid_vop3_operand_modifiers: + +VOP3 Operand Modifiers +---------------------- + +Operand modifiers are not used separately. They are applied to source operands. + +.. _amdgpu_synid_abs: + +abs +~~~ + +Computes the absolute value of its operand. Must be applied before :ref:`neg` +(if any). Valid for floating point operands only. + + ======================================== ==================================================== + Syntax Description + ======================================== ==================================================== + abs() Get the absolute value of a floating-point operand. + \|| The same as above (an SP3 syntax). + ======================================== ==================================================== + +Note: avoid using SP3 syntax with operands specified as expressions because the trailing '|' +may be misinterpreted. Such operands should be enclosed into additional parentheses as shown +in examples below. + +Examples: + +.. parsed-literal:: + + abs(v36) + \|v36| + abs(x|y) // ok + \|(x|y)| // additional parentheses are required + +.. _amdgpu_synid_neg: + +neg +~~~ + +Computes the negative value of its operand. Must be applied after :ref:`abs` +(if any). Valid for floating point operands only. + + ================== ==================================================== + Syntax Description + ================== ==================================================== + neg() Get the negative value of a floating-point operand. + The operand may include an optional + :ref:`abs` modifier. + - The same as above (an SP3 syntax). + ================== ==================================================== + +Note: SP3 syntax is supported with limitations because of a potential ambiguity. +Currently it is allowed in the following cases: + +* Before a register. +* Before an :ref:`abs` modifier. +* Before an SP3 :ref:`abs` modifier. + +In all other cases "-" is handled as a part of an expression that follows the sign. + +Examples: + +.. parsed-literal:: + + // Operands with negate modifiers + neg(v[0]) + neg(1.0) + neg(abs(v0)) + -v5 + -abs(v5) + -\|v5| + + // Operands without negate modifiers + -1 + -x+y + +VOP3P Modifiers +--------------- + +This section describes modifiers of *regular* VOP3P instructions. + +*v_mad_mix\** and *v_fma_mix\** +instructions use these modifiers :ref:`in a special manner`. + +GFX9 and GFX10 only. + +.. _amdgpu_synid_op_sel: + +op_sel +~~~~~~ + +Selects the low [15:0] or high [31:16] operand bits as input to the operation +which results in the lower-half of the destination. +By default, low bits are used for all operands. + +The number of values specified by the *op_sel* modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on. + +The value 0 selects the low bits, while 1 selects the high bits. + + ================================= ============================================================= + Syntax Description + ================================= ============================================================= + op_sel:[{0..1}] Select operand bits for instructions with 1 source operand. + op_sel:[{0..1},{0..1}] Select operand bits for instructions with 2 source operands. + op_sel:[{0..1},{0..1},{0..1}] Select operand bits for instructions with 3 source operands. + ================================= ============================================================= + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + op_sel:[0,0] + op_sel:[0,1,0] + +.. _amdgpu_synid_op_sel_hi: + +op_sel_hi +~~~~~~~~~ + +Selects the low [15:0] or high [31:16] operand bits as input to the operation +which results in the upper-half of the destination. +By default, high bits are used for all operands. + +The number of values specified by the *op_sel_hi* modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on. + +The value 0 selects the low bits, while 1 selects the high bits. + + =================================== ============================================================= + Syntax Description + =================================== ============================================================= + op_sel_hi:[{0..1}] Select operand bits for instructions with 1 source operand. + op_sel_hi:[{0..1},{0..1}] Select operand bits for instructions with 2 source operands. + op_sel_hi:[{0..1},{0..1},{0..1}] Select operand bits for instructions with 3 source operands. + =================================== ============================================================= + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + op_sel_hi:[0,0] + op_sel_hi:[0,0,1] + +.. _amdgpu_synid_neg_lo: + +neg_lo +~~~~~~ + +Specifies whether to change sign of operand values selected by +:ref:`op_sel`. These values are then used +as input to the operation which results in the upper-half of the destination. + +The number of values specified by this modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on. + +The value 0 indicates that the corresponding operand value is used unmodified, +the value 1 indicates that negative value of the operand must be used. + +By default, operand values are used unmodified. + +This modifier is valid for floating point operands only. + + ================================ ================================================================== + Syntax Description + ================================ ================================================================== + neg_lo:[{0..1}] Select affected operands for instructions with 1 source operand. + neg_lo:[{0..1},{0..1}] Select affected operands for instructions with 2 source operands. + neg_lo:[{0..1},{0..1},{0..1}] Select affected operands for instructions with 3 source operands. + ================================ ================================================================== + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + neg_lo:[0] + neg_lo:[0,1] + +.. _amdgpu_synid_neg_hi: + +neg_hi +~~~~~~ + +Specifies whether to change sign of operand values selected by +:ref:`op_sel_hi`. These values are then used +as input to the operation which results in the upper-half of the destination. + +The number of values specified by this modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on. + +The value 0 indicates that the corresponding operand value is used unmodified, +the value 1 indicates that negative value of the operand must be used. + +By default, operand values are used unmodified. + +This modifier is valid for floating point operands only. + + =============================== ================================================================== + Syntax Description + =============================== ================================================================== + neg_hi:[{0..1}] Select affected operands for instructions with 1 source operand. + neg_hi:[{0..1},{0..1}] Select affected operands for instructions with 2 source operands. + neg_hi:[{0..1},{0..1},{0..1}] Select affected operands for instructions with 3 source operands. + =============================== ================================================================== + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + neg_hi:[1,0] + neg_hi:[0,1,1] + +clamp +~~~~~ + +See a description :ref:`here`. + +.. _amdgpu_synid_mad_mix: + +VOP3P MAD_MIX/FMA_MIX Modifiers +------------------------------- + +*v_mad_mix\** and *v_fma_mix\** +instructions use *op_sel* and *op_sel_hi* modifiers +in a manner different from *regular* VOP3P instructions. + +See a description below. + +GFX9 and GFX10 only. + +.. _amdgpu_synid_mad_mix_op_sel: + +m_op_sel +~~~~~~~~ + +This operand has meaning only for 16-bit source operands as indicated by +:ref:`m_op_sel_hi`. +It specifies to select either the low [15:0] or high [31:16] operand bits +as input to the operation. + +The number of values specified by the *op_sel* modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on. + +The value 0 indicates the low bits, the value 1 indicates the high 16 bits. + +By default, low bits are used for all operands. + + =============================== ================================================ + Syntax Description + =============================== ================================================ + op_sel:[{0..1},{0..1},{0..1}] Select location of each 16-bit source operand. + =============================== ================================================ + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + op_sel:[0,1] + +.. _amdgpu_synid_mad_mix_op_sel_hi: + +m_op_sel_hi +~~~~~~~~~~~ + +Selects the size of source operands: either 32 bits or 16 bits. +By default, 32 bits are used for all source operands. + +The number of values specified by the *op_sel_hi* modifier must match the number of source +operands. First value controls src0, second value controls src1 and so on. + +The value 0 indicates 32 bits, the value 1 indicates 16 bits. + +The location of 16 bits in the operand may be specified by +:ref:`m_op_sel`. + + ======================================== ==================================== + Syntax Description + ======================================== ==================================== + op_sel_hi:[{0..1},{0..1},{0..1}] Select size of each source operand. + ======================================== ==================================== + +Note: numeric values may be specified as either +:ref:`integer numbers` or +:ref:`absolute expressions`. + +Examples: + +.. parsed-literal:: + + op_sel_hi:[1,1,1] + +abs +~~~ + +See a description :ref:`here`. + +neg +~~~ + +See a description :ref:`here`. + +clamp +~~~~~ + +See a description :ref:`here`. + +VOP3P MFMA Modifiers +-------------------- + +These modifiers may only be used with GFX908 and GFX90A. + +.. _amdgpu_synid_cbsz: + +cbsz +~~~~ + +Specifies a broadcast mode. + + =============================== ================================================================== + Syntax Description + =============================== ================================================================== + cbsz:[{0..7}] A broadcast mode. + =============================== ================================================================== + +Note: numeric value may be specified as either +an :ref:`integer number` or +an :ref:`absolute expression`. + +.. _amdgpu_synid_abid: + +abid +~~~~ + +Specifies matrix A group select. + + =============================== ================================================================== + Syntax Description + =============================== ================================================================== + abid:[{0..15}] Matrix A group select id. + =============================== ================================================================== + +Note: numeric value may be specified as either +an :ref:`integer number` or +an :ref:`absolute expression`. + +.. _amdgpu_synid_blgp: + +blgp +~~~~ + +Specifies matrix B lane group pattern. + + =============================== ================================================================== + Syntax Description + =============================== ================================================================== + blgp:[{0..7}] Matrix B lane group pattern. + =============================== ================================================================== + +Note: numeric value may be specified as either +an :ref:`integer number` or +an :ref:`absolute expression`. + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUOperandSyntax.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUOperandSyntax.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUOperandSyntax.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUOperandSyntax.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,1144 @@ +===================================== +Syntax of AMDGPU Instruction Operands +===================================== + +.. contents:: + :local: + +Conventions +=========== + +The following notation is used throughout this document: + + =================== ============================================================================= + Notation Description + =================== ============================================================================= + {0..N} Any integer value in the range from 0 to N (inclusive). + Syntax and meaning of *x* is explained elsewhere. + =================== ============================================================================= + +.. _amdgpu_syn_operands: + +Operands +======== + +.. _amdgpu_synid_v: + +v +- + +Vector registers. There are 256 32-bit vector registers. + +A sequence of *vector* registers may be used to operate with more than 32 bits of data. + +Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8, 16 and 32 *vector* registers. + + =================================================== ==================================================================== + Syntax Description + =================================================== ==================================================================== + **v**\ A single 32-bit *vector* register. + + *N* must be a decimal + :ref:`integer number`. + **v[**\ \ **]** A single 32-bit *vector* register. + + *N* may be specified as an + :ref:`integer number` + or an :ref:`absolute expression`. + **v[**\ :\ **]** A sequence of (\ *K-N+1*\ ) *vector* registers. + + *N* and *K* may be specified as + :ref:`integer numbers` + or :ref:`absolute expressions`. + **[v**\ , \ **v**\ , ... **v**\ \ **]** A sequence of (\ *K-N+1*\ ) *vector* registers. + + Register indices must be specified as decimal + :ref:`integer numbers`. + =================================================== ==================================================================== + +Note: *N* and *K* must satisfy the following conditions: + +* *N* <= *K*. +* 0 <= *N* <= 255. +* 0 <= *K* <= 255. +* *K-N+1* must be equal to 1, 2, 3, 4, 5, 6, 7, 8, 16 or 32. + +GFX90A has an additional alignment requirement: pairs of *vector* registers must be even-aligned +(first register must be even). + +Examples: + +.. parsed-literal:: + + v255 + v[0] + v[0:1] + v[1:1] + v[0:3] + v[2*2] + v[1-1:2-1] + [v252] + [v252,v253,v254,v255] + +.. _amdgpu_synid_nsa: + +GFX10 *Image* instructions may use special *NSA* (Non-Sequential Address) syntax for *image addresses*: + + ===================================== ================================================= + Syntax Description + ===================================== ================================================= + **[Vm**, \ **Vn**, ... **Vk**\ **]** A sequence of 32-bit *vector* registers. + Each register may be specified using syntax + defined :ref:`above`. + + In contrast with standard syntax, registers + in *NSA* sequence are not required to have + consecutive indices. Moreover, the same register + may appear in the list more than once. + ===================================== ================================================= + +Examples: + +.. parsed-literal:: + + [v32,v1,v[2]] + [v[32],v[1:1],[v2]] + [v4,v4,v4,v4] + +.. _amdgpu_synid_a: + +a +- + +Accumulator registers. There are 256 32-bit accumulator registers. + +A sequence of *accumulator* registers may be used to operate with more than 32 bits of data. + +Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8, 16 and 32 *accumulator* registers. + + =================================================== ========================================================= ==================================================================== + Syntax An Alternative Syntax (SP3) Description + =================================================== ========================================================= ==================================================================== + **a**\ **acc**\ A single 32-bit *accumulator* register. + + *N* must be a decimal + :ref:`integer number`. + **a[**\ \ **]** **acc[**\ \ **]** A single 32-bit *accumulator* register. + + *N* may be specified as an + :ref:`integer number` + or an :ref:`absolute expression`. + **a[**\ :\ **]** **acc[**\ :\ **]** A sequence of (\ *K-N+1*\ ) *accumulator* registers. + + *N* and *K* may be specified as + :ref:`integer numbers` + or :ref:`absolute expressions`. + **[a**\ , \ **a**\ , ... **a**\ \ **]** **[acc**\ , \ **acc**\ , ... **acc**\ \ **]** A sequence of (\ *K-N+1*\ ) *accumulator* registers. + + Register indices must be specified as decimal + :ref:`integer numbers`. + =================================================== ========================================================= ==================================================================== + +Note: *N* and *K* must satisfy the following conditions: + +* *N* <= *K*. +* 0 <= *N* <= 255. +* 0 <= *K* <= 255. +* *K-N+1* must be equal to 1, 2, 3, 4, 5, 6, 7, 8, 16 or 32. + +GFX90A has an additional alignment requirement: pairs of *accumulator* registers must be even-aligned +(first register must be even). + +Examples: + +.. parsed-literal:: + + a255 + a[0] + a[0:1] + a[1:1] + a[0:3] + a[2*2] + a[1-1:2-1] + [a252] + [a252,a253,a254,a255] + + acc0 + acc[1] + [acc250] + [acc2,acc3] + +.. _amdgpu_synid_s: + +s +- + +Scalar 32-bit registers. The number of available *scalar* registers depends on GPU: + + ======= ============================ + GPU Number of *scalar* registers + ======= ============================ + GFX7 104 + GFX8 102 + GFX9 102 + GFX10 106 + ======= ============================ + +A sequence of *scalar* registers may be used to operate with more than 32 bits of data. +Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8, 16 and 32 *scalar* registers. + +Pairs of *scalar* registers must be even-aligned (first register must be even). +Sequences of 4 and more *scalar* registers must be quad-aligned. + + ======================================================== ==================================================================== + Syntax Description + ======================================================== ==================================================================== + **s**\ A single 32-bit *scalar* register. + + *N* must be a decimal + :ref:`integer number`. + + **s[**\ \ **]** A single 32-bit *scalar* register. + + *N* may be specified as an + :ref:`integer number` + or an :ref:`absolute expression`. + **s[**\ :\ **]** A sequence of (\ *K-N+1*\ ) *scalar* registers. + + *N* and *K* may be specified as + :ref:`integer numbers` + or :ref:`absolute expressions`. + + **[s**\ , \ **s**\ , ... **s**\ \ **]** A sequence of (\ *K-N+1*\ ) *scalar* registers. + + Register indices must be specified as decimal + :ref:`integer numbers`. + ======================================================== ==================================================================== + +Note: *N* and *K* must satisfy the following conditions: + +* *N* must be properly aligned based on sequence size. +* *N* <= *K*. +* 0 <= *N* < *SMAX*\ , where *SMAX* is the number of available *scalar* registers. +* 0 <= *K* < *SMAX*\ , where *SMAX* is the number of available *scalar* registers. +* *K-N+1* must be equal to 1, 2, 3, 4, 5, 6, 7, 8, 16 or 32. + +Examples: + +.. parsed-literal:: + + s0 + s[0] + s[0:1] + s[1:1] + s[0:3] + s[2*2] + s[1-1:2-1] + [s4] + [s4,s5,s6,s7] + +Examples of *scalar* registers with an invalid alignment: + +.. parsed-literal:: + + s[1:2] + s[2:5] + +.. _amdgpu_synid_trap: + +trap +---- + +A set of trap handler registers: + +* :ref:`ttmp` +* :ref:`tba` +* :ref:`tma` + +.. _amdgpu_synid_ttmp: + +ttmp +---- + +Trap handler temporary scalar registers, 32-bits wide. +The number of available *ttmp* registers depends on GPU: + + ======= =========================== + GPU Number of *ttmp* registers + ======= =========================== + GFX7 12 + GFX8 12 + GFX9 16 + GFX10 16 + ======= =========================== + +A sequence of *ttmp* registers may be used to operate with more than 32 bits of data. +Assembler currently supports sequences of 1, 2, 3, 4, 5, 6, 7, 8 and 16 *ttmp* registers. + +Pairs of *ttmp* registers must be even-aligned (first register must be even). +Sequences of 4 and more *ttmp* registers must be quad-aligned. + + ============================================================= ==================================================================== + Syntax Description + ============================================================= ==================================================================== + **ttmp**\ A single 32-bit *ttmp* register. + + *N* must be a decimal + :ref:`integer number`. + **ttmp[**\ \ **]** A single 32-bit *ttmp* register. + + *N* may be specified as an + :ref:`integer number` + or an :ref:`absolute expression`. + **ttmp[**\ :\ **]** A sequence of (\ *K-N+1*\ ) *ttmp* registers. + + *N* and *K* may be specified as + :ref:`integer numbers` + or :ref:`absolute expressions`. + **[ttmp**\ , \ **ttmp**\ , ... **ttmp**\ \ **]** A sequence of (\ *K-N+1*\ ) *ttmp* registers. + + Register indices must be specified as decimal + :ref:`integer numbers`. + ============================================================= ==================================================================== + +Note: *N* and *K* must satisfy the following conditions: + +* *N* must be properly aligned based on sequence size. +* *N* <= *K*. +* 0 <= *N* < *TMAX*, where *TMAX* is the number of available *ttmp* registers. +* 0 <= *K* < *TMAX*, where *TMAX* is the number of available *ttmp* registers. +* *K-N+1* must be equal to 1, 2, 3, 4, 5, 6, 7, 8 or 16. + +Examples: + +.. parsed-literal:: + + ttmp0 + ttmp[0] + ttmp[0:1] + ttmp[1:1] + ttmp[0:3] + ttmp[2*2] + ttmp[1-1:2-1] + [ttmp4] + [ttmp4,ttmp5,ttmp6,ttmp7] + +Examples of *ttmp* registers with an invalid alignment: + +.. parsed-literal:: + + ttmp[1:2] + ttmp[2:5] + +.. _amdgpu_synid_tba: + +tba +--- + +Trap base address, 64-bits wide. Holds the pointer to the current trap handler program. + + ================== ======================================================================= ============= + Syntax Description Availability + ================== ======================================================================= ============= + tba 64-bit *trap base address* register. GFX7, GFX8 + [tba] 64-bit *trap base address* register (an SP3 syntax). GFX7, GFX8 + [tba_lo,tba_hi] 64-bit *trap base address* register (an SP3 syntax). GFX7, GFX8 + ================== ======================================================================= ============= + +High and low 32 bits of *trap base address* may be accessed as separate registers: + + ================== ======================================================================= ============= + Syntax Description Availability + ================== ======================================================================= ============= + tba_lo Low 32 bits of *trap base address* register. GFX7, GFX8 + tba_hi High 32 bits of *trap base address* register. GFX7, GFX8 + [tba_lo] Low 32 bits of *trap base address* register (an SP3 syntax). GFX7, GFX8 + [tba_hi] High 32 bits of *trap base address* register (an SP3 syntax). GFX7, GFX8 + ================== ======================================================================= ============= + +Note that *tba*, *tba_lo* and *tba_hi* are not accessible as assembler registers in GFX9 and GFX10, +but *tba* is readable/writable with the help of *s_get_reg* and *s_set_reg* instructions. + +.. _amdgpu_synid_tma: + +tma +--- + +Trap memory address, 64-bits wide. + + ================= ======================================================================= ================== + Syntax Description Availability + ================= ======================================================================= ================== + tma 64-bit *trap memory address* register. GFX7, GFX8 + [tma] 64-bit *trap memory address* register (an SP3 syntax). GFX7, GFX8 + [tma_lo,tma_hi] 64-bit *trap memory address* register (an SP3 syntax). GFX7, GFX8 + ================= ======================================================================= ================== + +High and low 32 bits of *trap memory address* may be accessed as separate registers: + + ================= ======================================================================= ================== + Syntax Description Availability + ================= ======================================================================= ================== + tma_lo Low 32 bits of *trap memory address* register. GFX7, GFX8 + tma_hi High 32 bits of *trap memory address* register. GFX7, GFX8 + [tma_lo] Low 32 bits of *trap memory address* register (an SP3 syntax). GFX7, GFX8 + [tma_hi] High 32 bits of *trap memory address* register (an SP3 syntax). GFX7, GFX8 + ================= ======================================================================= ================== + +Note that *tma*, *tma_lo* and *tma_hi* are not accessible as assembler registers in GFX9 and GFX10, +but *tma* is readable/writable with the help of *s_get_reg* and *s_set_reg* instructions. + +.. _amdgpu_synid_flat_scratch: + +flat_scratch +------------ + +Flat scratch address, 64-bits wide. Holds the base address of scratch memory. + + ================================== ================================================================ + Syntax Description + ================================== ================================================================ + flat_scratch 64-bit *flat scratch* address register. + [flat_scratch] 64-bit *flat scratch* address register (an SP3 syntax). + [flat_scratch_lo,flat_scratch_hi] 64-bit *flat scratch* address register (an SP3 syntax). + ================================== ================================================================ + +High and low 32 bits of *flat scratch* address may be accessed as separate registers: + + ========================= ========================================================================= + Syntax Description + ========================= ========================================================================= + flat_scratch_lo Low 32 bits of *flat scratch* address register. + flat_scratch_hi High 32 bits of *flat scratch* address register. + [flat_scratch_lo] Low 32 bits of *flat scratch* address register (an SP3 syntax). + [flat_scratch_hi] High 32 bits of *flat scratch* address register (an SP3 syntax). + ========================= ========================================================================= + +Note that *flat_scratch*, *flat_scratch_lo* and *flat_scratch_hi* are not accessible as assembler +registers in GFX10, but *flat_scratch* is readable/writable with the help of +*s_get_reg* and *s_set_reg* instructions. + +.. _amdgpu_synid_xnack: +.. _amdgpu_synid_xnack_mask: + +xnack_mask +---------- + +Xnack mask, 64-bits wide. Holds a 64-bit mask of which threads +received an *XNACK* due to a vector memory operation. + +.. WARNING:: GFX7 does not support *xnack* feature. For availability of this feature in other GPUs, refer :ref:`this table`. + +\ + + ============================== ===================================================== + Syntax Description + ============================== ===================================================== + xnack_mask 64-bit *xnack mask* register. + [xnack_mask] 64-bit *xnack mask* register (an SP3 syntax). + [xnack_mask_lo,xnack_mask_hi] 64-bit *xnack mask* register (an SP3 syntax). + ============================== ===================================================== + +High and low 32 bits of *xnack mask* may be accessed as separate registers: + + ===================== ============================================================== + Syntax Description + ===================== ============================================================== + xnack_mask_lo Low 32 bits of *xnack mask* register. + xnack_mask_hi High 32 bits of *xnack mask* register. + [xnack_mask_lo] Low 32 bits of *xnack mask* register (an SP3 syntax). + [xnack_mask_hi] High 32 bits of *xnack mask* register (an SP3 syntax). + ===================== ============================================================== + +Note that *xnack_mask*, *xnack_mask_lo* and *xnack_mask_hi* are not accessible as assembler +registers in GFX10, but *xnack_mask* is readable/writable with the help of +*s_get_reg* and *s_set_reg* instructions. + +.. _amdgpu_synid_vcc: +.. _amdgpu_synid_vcc_lo: + +vcc +--- + +Vector condition code, 64-bits wide. A bit mask with one bit per thread; +it holds the result of a vector compare operation. + +Note that GFX10 H/W does not use high 32 bits of *vcc* in *wave32* mode. + + ================ ========================================================================= + Syntax Description + ================ ========================================================================= + vcc 64-bit *vector condition code* register. + [vcc] 64-bit *vector condition code* register (an SP3 syntax). + [vcc_lo,vcc_hi] 64-bit *vector condition code* register (an SP3 syntax). + ================ ========================================================================= + +High and low 32 bits of *vector condition code* may be accessed as separate registers: + + ================ ========================================================================= + Syntax Description + ================ ========================================================================= + vcc_lo Low 32 bits of *vector condition code* register. + vcc_hi High 32 bits of *vector condition code* register. + [vcc_lo] Low 32 bits of *vector condition code* register (an SP3 syntax). + [vcc_hi] High 32 bits of *vector condition code* register (an SP3 syntax). + ================ ========================================================================= + +.. _amdgpu_synid_m0: + +m0 +-- + +A 32-bit memory register. It has various uses, +including register indexing and bounds checking. + + =========== =================================================== + Syntax Description + =========== =================================================== + m0 A 32-bit *memory* register. + [m0] A 32-bit *memory* register (an SP3 syntax). + =========== =================================================== + +.. _amdgpu_synid_exec: + +exec +---- + +Execute mask, 64-bits wide. A bit mask with one bit per thread, +which is applied to vector instructions and controls which threads execute +and which ignore the instruction. + +Note that GFX10 H/W does not use high 32 bits of *exec* in *wave32* mode. + + ===================== ================================================================= + Syntax Description + ===================== ================================================================= + exec 64-bit *execute mask* register. + [exec] 64-bit *execute mask* register (an SP3 syntax). + [exec_lo,exec_hi] 64-bit *execute mask* register (an SP3 syntax). + ===================== ================================================================= + +High and low 32 bits of *execute mask* may be accessed as separate registers: + + ===================== ================================================================= + Syntax Description + ===================== ================================================================= + exec_lo Low 32 bits of *execute mask* register. + exec_hi High 32 bits of *execute mask* register. + [exec_lo] Low 32 bits of *execute mask* register (an SP3 syntax). + [exec_hi] High 32 bits of *execute mask* register (an SP3 syntax). + ===================== ================================================================= + +.. _amdgpu_synid_vccz: + +vccz +---- + +A single bit flag indicating that the :ref:`vcc` is all zeros. + +Note: when GFX10 operates in *wave32* mode, this register reflects state of :ref:`vcc_lo`. + +.. _amdgpu_synid_execz: + +execz +----- + +A single bit flag indicating that the :ref:`exec` is all zeros. + +Note: when GFX10 operates in *wave32* mode, this register reflects state of :ref:`exec_lo`. + +.. _amdgpu_synid_scc: + +scc +--- + +A single bit flag indicating the result of a scalar compare operation. + +.. _amdgpu_synid_lds_direct: + +lds_direct +---------- + +A special operand which supplies a 32-bit value +fetched from *LDS* memory using :ref:`m0` as an address. + +.. _amdgpu_synid_null: + +null +---- + +This is a special operand which may be used as a source or a destination. + +When used as a destination, the result of the operation is discarded. + +When used as a source, it supplies zero value. + +GFX10 only. + +.. WARNING:: Due to a H/W bug, this operand cannot be used with VALU instructions in first generation of GFX10. + +.. _amdgpu_synid_constant: + +inline constant +--------------- + +An *inline constant* is an integer or a floating-point value encoded as a part of an instruction. +Compare *inline constants* with :ref:`literals`. + +Inline constants include: + +* :ref:`iconst` +* :ref:`fconst` +* :ref:`ival` + +If a number may be encoded as either +a :ref:`literal` or +a :ref:`constant`, +assembler selects the latter encoding as more efficient. + +.. _amdgpu_synid_iconst: + +iconst +~~~~~~ + +An :ref:`integer number` or +an :ref:`absolute expression` +encoded as an *inline constant*. + +Only a small fraction of integer numbers may be encoded as *inline constants*. +They are enumerated in the table below. +Other integer numbers have to be encoded as :ref:`literals`. + + ================================== ==================================== + Value Note + ================================== ==================================== + {0..64} Positive integer inline constants. + {-16..-1} Negative integer inline constants. + ================================== ==================================== + +.. WARNING:: GFX7 does not support inline constants for *f16* operands. + +.. _amdgpu_synid_fconst: + +fconst +~~~~~~ + +A :ref:`floating-point number` +encoded as an *inline constant*. + +Only a small fraction of floating-point numbers may be encoded as *inline constants*. +They are enumerated in the table below. +Other floating-point numbers have to be encoded as :ref:`literals`. + + ===================== ===================================================== ================== + Value Note Availability + ===================== ===================================================== ================== + 0.0 The same as integer constant 0. All GPUs + 0.5 Floating-point constant 0.5 All GPUs + 1.0 Floating-point constant 1.0 All GPUs + 2.0 Floating-point constant 2.0 All GPUs + 4.0 Floating-point constant 4.0 All GPUs + -0.5 Floating-point constant -0.5 All GPUs + -1.0 Floating-point constant -1.0 All GPUs + -2.0 Floating-point constant -2.0 All GPUs + -4.0 Floating-point constant -4.0 All GPUs + 0.1592 1.0/(2.0*pi). Use only for 16-bit operands. GFX8, GFX9, GFX10 + 0.15915494 1.0/(2.0*pi). Use only for 16- and 32-bit operands. GFX8, GFX9, GFX10 + 0.15915494309189532 1.0/(2.0*pi). GFX8, GFX9, GFX10 + ===================== ===================================================== ================== + +.. WARNING:: Floating-point inline constants cannot be used with *16-bit integer* operands. \ + Assembler will attempt to encode these values as literals. + +.. WARNING:: GFX7 does not support inline constants for *f16* operands. + +.. _amdgpu_synid_ival: + +ival +~~~~ + +A symbolic operand encoded as an *inline constant*. +These operands provide read-only access to H/W registers. + + ======================== ================================================ ============= + Syntax Note Availability + ======================== ================================================ ============= + shared_base Base address of shared memory region. GFX9, GFX10 + shared_limit Address of the end of shared memory region. GFX9, GFX10 + private_base Base address of private memory region. GFX9, GFX10 + private_limit Address of the end of private memory region. GFX9, GFX10 + pops_exiting_wave_id A dedicated counter for POPS. GFX9, GFX10 + ======================== ================================================ ============= + +.. _amdgpu_synid_literal: + +literal +------- + +A *literal* is a 64-bit value encoded as a separate 32-bit dword in the instruction stream. +Compare *literals* with :ref:`inline constants`. + +If a number may be encoded as either +a :ref:`literal` or +an :ref:`inline constant`, +assembler selects the latter encoding as more efficient. + +Literals may be specified as :ref:`integer numbers`, +:ref:`floating-point numbers`, +:ref:`absolute expressions` or +:ref:`relocatable expressions`. + +An instruction may use only one literal but several operands may refer the same literal. + +.. _amdgpu_synid_uimm8: + +uimm8 +----- + +A 8-bit :ref:`integer number` +or an :ref:`absolute expression`. +The value must be in the range 0..0xFF. + +.. _amdgpu_synid_uimm32: + +uimm32 +------ + +A 32-bit :ref:`integer number` +or an :ref:`absolute expression`. +The value must be in the range 0..0xFFFFFFFF. + +.. _amdgpu_synid_uimm20: + +uimm20 +------ + +A 20-bit :ref:`integer number` +or an :ref:`absolute expression`. + +The value must be in the range 0..0xFFFFF. + +.. _amdgpu_synid_simm21: + +simm21 +------ + +A 21-bit :ref:`integer number` +or an :ref:`absolute expression`. + +The value must be in the range -0x100000..0x0FFFFF. + +.. _amdgpu_synid_off: + +off +--- + +A special entity which indicates that the value of this operand is not used. + + ================================== =================================================== + Syntax Description + ================================== =================================================== + off Indicates an unused operand. + ================================== =================================================== + + +.. _amdgpu_synid_number: + +Numbers +======= + +.. _amdgpu_synid_integer_number: + +Integer Numbers +--------------- + +Integer numbers are 64 bits wide. +They are converted to :ref:`expected operand type` +as described :ref:`here`. + +Integer numbers may be specified in binary, octal, hexadecimal and decimal formats: + + ============ =============================== ======== + Format Syntax Example + ============ =============================== ======== + Decimal [-]?[1-9][0-9]* -1234 + Binary [-]?0b[01]+ 0b1010 + Octal [-]?0[0-7]+ 010 + Hexadecimal [-]?0x[0-9a-fA-F]+ 0xff + \ [-]?[0x]?[0-9][0-9a-fA-F]*[hH] 0ffh + ============ =============================== ======== + +.. _amdgpu_synid_floating-point_number: + +Floating-Point Numbers +---------------------- + +All floating-point numbers are handled as double (64 bits wide). +They are converted to +:ref:`expected operand type` +as described :ref:`here`. + +Floating-point numbers may be specified in hexadecimal and decimal formats: + + ============ ======================================================== ====================== ==================== + Format Syntax Examples Note + ============ ======================================================== ====================== ==================== + Decimal [-]?[0-9]*[.][0-9]*([eE][+-]?[0-9]*)? -1.234, 234e2 Must include either + a decimal separator + or an exponent. + Hexadecimal [-]0x[0-9a-fA-F]*(.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ -0x1afp-10, 0x.1afp10 + ============ ======================================================== ====================== ==================== + +.. _amdgpu_synid_expression: + +Expressions +=========== + +An expression is evaluated to a 64-bit integer. +Note that floating-point expressions are not supported. + +There are two kinds of expressions: + +* :ref:`Absolute`. +* :ref:`Relocatable`. + +.. _amdgpu_synid_absolute_expression: + +Absolute Expressions +-------------------- + +The value of an absolute expression does not change after program relocation. +Absolute expressions must not include unassigned and relocatable values +such as labels. + +Absolute expressions are evaluated to 64-bit integer values and converted to +:ref:`expected operand type` +as described :ref:`here`. + +Examples: + +.. parsed-literal:: + + x = -1 + y = x + 10 + +.. _amdgpu_synid_relocatable_expression: + +Relocatable Expressions +----------------------- + +The value of a relocatable expression depends on program relocation. + +Note that use of relocatable expressions is limited with branch targets +and 32-bit integer operands. + +A relocatable expression is evaluated to a 64-bit integer value +which depends on operand kind and :ref:`relocation type` +of symbol(s) used in the expression. For example, if an instruction refers a label, +this reference is evaluated to an offset from the address after the instruction +to the label address: + +.. parsed-literal:: + + label: + v_add_co_u32_e32 v0, vcc, label, v1 // 'label' operand is evaluated to -4 + +Note that values of relocatable expressions are usually unknown at assembly time; +they are resolved later by a linker and converted to +:ref:`expected operand type` +as described :ref:`here`. + +Operands and Operations +----------------------- + +Expressions are composed of 64-bit integer operands and operations. +Operands include :ref:`integer numbers` +and :ref:`symbols`. + +Expressions may also use "." which is a reference to the current PC (program counter). + +:ref:`Unary` and :ref:`binary` +operations produce 64-bit integer results. + +Syntax of Expressions +--------------------- + +Syntax of expressions is shown below:: + + expr ::= expr binop expr | primaryexpr ; + + primaryexpr ::= '(' expr ')' | symbol | number | '.' | unop primaryexpr ; + + binop ::= '&&' + | '||' + | '|' + | '^' + | '&' + | '!' + | '==' + | '!=' + | '<>' + | '<' + | '<=' + | '>' + | '>=' + | '<<' + | '>>' + | '+' + | '-' + | '*' + | '/' + | '%' ; + + unop ::= '~' + | '+' + | '-' + | '!' ; + +.. _amdgpu_synid_expression_bin_op: + +Binary Operators +---------------- + +Binary operators are described in the following table. +They operate on and produce 64-bit integers. +Operators with higher priority are performed first. + + ========== ========= =============================================== + Operator Priority Meaning + ========== ========= =============================================== + \* 5 Integer multiplication. + / 5 Integer division. + % 5 Integer signed remainder. + \+ 4 Integer addition. + \- 4 Integer subtraction. + << 3 Integer shift left. + >> 3 Logical shift right. + == 2 Equality comparison. + != 2 Inequality comparison. + <> 2 Inequality comparison. + < 2 Signed less than comparison. + <= 2 Signed less than or equal comparison. + > 2 Signed greater than comparison. + >= 2 Signed greater than or equal comparison. + \| 1 Bitwise or. + ^ 1 Bitwise xor. + & 1 Bitwise and. + && 0 Logical and. + || 0 Logical or. + ========== ========= =============================================== + +.. _amdgpu_synid_expression_un_op: + +Unary Operators +--------------- + +Unary operators are described in the following table. +They operate on and produce 64-bit integers. + + ========== =============================================== + Operator Meaning + ========== =============================================== + ! Logical negation. + ~ Bitwise negation. + \+ Integer unary plus. + \- Integer unary minus. + ========== =============================================== + +.. _amdgpu_synid_symbol: + +Symbols +------- + +A symbol is a named 64-bit integer value, representing a relocatable +address or an absolute (non-relocatable) number. + +Symbol names have the following syntax: + ``[a-zA-Z_.][a-zA-Z0-9_$.@]*`` + +The table below provides several examples of syntax used for symbol definition. + + ================ ========================================================== + Syntax Meaning + ================ ========================================================== + .globl Declares a global symbol S without assigning it a value. + .set , Assigns the value of an expression E to a symbol S. + = Assigns the value of an expression E to a symbol S. + : Declares a label S and assigns it the current PC value. + ================ ========================================================== + +A symbol may be used before it is declared or assigned; +unassigned symbols are assumed to be PC-relative. + +Additional information about symbols may be found :ref:`here`. + +.. _amdgpu_synid_conv: + +Type and Size Conversion +======================== + +This section describes what happens when a 64-bit +:ref:`integer number`, a +:ref:`floating-point number` or an +:ref:`expression` +is used for an operand which has a different type or size. + +.. _amdgpu_synid_int_conv: + +Conversion of Integer Values +---------------------------- + +Instruction operands may be specified as 64-bit :ref:`integer numbers` or +:ref:`absolute expressions`. These values are converted to +the :ref:`expected operand type` using the following steps: + +1. *Validation*. Assembler checks if the input value may be truncated without loss to the required *truncation width* +(see the table below). There are two cases when this operation is enabled: + + * The truncated bits are all 0. + * The truncated bits are all 1 and the value after truncation has its MSB bit set. + +In all other cases assembler triggers an error. + +2. *Conversion*. The input value is converted to the expected type as described in the table below. +Depending on operand kind, this conversion is performed by either assembler or AMDGPU H/W (or both). + + ============== ================= =============== ==================================================================== + Expected type Truncation Width Conversion Description + ============== ================= =============== ==================================================================== + i16, u16, b16 16 num.u16 Truncate to 16 bits. + i32, u32, b32 32 num.u32 Truncate to 32 bits. + i64 32 {-1,num.i32} Truncate to 32 bits and then sign-extend the result to 64 bits. + u64, b64 32 {0,num.u32} Truncate to 32 bits and then zero-extend the result to 64 bits. + f16 16 num.u16 Use low 16 bits as an f16 value. + f32 32 num.u32 Use low 32 bits as an f32 value. + f64 32 {num.u32,0} Use low 32 bits of the number as high 32 bits + of the result; low 32 bits of the result are zeroed. + ============== ================= =============== ==================================================================== + +Examples of enabled conversions: + +.. parsed-literal:: + + // GFX9 + + v_add_u16 v0, -1, 0 // src0 = 0xFFFF + v_add_f16 v0, -1, 0 // src0 = 0xFFFF (NaN) + // + v_add_u32 v0, -1, 0 // src0 = 0xFFFFFFFF + v_add_f32 v0, -1, 0 // src0 = 0xFFFFFFFF (NaN) + // + v_add_u16 v0, 0xff00, v0 // src0 = 0xff00 + v_add_u16 v0, 0xffffffffffffff00, v0 // src0 = 0xff00 + v_add_u16 v0, -256, v0 // src0 = 0xff00 + // + s_bfe_i64 s[0:1], 0xffefffff, s3 // src0 = 0xffffffffffefffff + s_bfe_u64 s[0:1], 0xffefffff, s3 // src0 = 0x00000000ffefffff + v_ceil_f64_e32 v[0:1], 0xffefffff // src0 = 0xffefffff00000000 (-1.7976922776554302e308) + // + x = 0xffefffff // + s_bfe_i64 s[0:1], x, s3 // src0 = 0xffffffffffefffff + s_bfe_u64 s[0:1], x, s3 // src0 = 0x00000000ffefffff + v_ceil_f64_e32 v[0:1], x // src0 = 0xffefffff00000000 (-1.7976922776554302e308) + +Examples of disabled conversions: + +.. parsed-literal:: + + // GFX9 + + v_add_u16 v0, 0x1ff00, v0 // truncated bits are not all 0 or 1 + v_add_u16 v0, 0xffffffffffff00ff, v0 // truncated bits do not match MSB of the result + +.. _amdgpu_synid_fp_conv: + +Conversion of Floating-Point Values +----------------------------------- + +Instruction operands may be specified as 64-bit :ref:`floating-point numbers`. +These values are converted to the :ref:`expected operand type` using the following steps: + +1. *Validation*. Assembler checks if the input f64 number can be converted +to the *required floating-point type* (see the table below) without overflow or underflow. +Precision lost is allowed. If this conversion is not possible, assembler triggers an error. + +2. *Conversion*. The input value is converted to the expected type as described in the table below. +Depending on operand kind, this is performed by either assembler or AMDGPU H/W (or both). + + ============== ================ ================= ================================================================= + Expected type Required FP Type Conversion Description + ============== ================ ================= ================================================================= + i16, u16, b16 f16 f16(num) Convert to f16 and use bits of the result as an integer value. + The value has to be encoded as a literal or an error occurs. + Note that the value cannot be encoded as an inline constant. + i32, u32, b32 f32 f32(num) Convert to f32 and use bits of the result as an integer value. + i64, u64, b64 \- \- Conversion disabled. + f16 f16 f16(num) Convert to f16. + f32 f32 f32(num) Convert to f32. + f64 f64 {num.u32.hi,0} Use high 32 bits of the number as high 32 bits of the result; + zero-fill low 32 bits of the result. + + Note that the result may differ from the original number. + ============== ================ ================= ================================================================= + +Examples of enabled conversions: + +.. parsed-literal:: + + // GFX9 + + v_add_f16 v0, 1.0, 0 // src0 = 0x3C00 (1.0) + v_add_u16 v0, 1.0, 0 // src0 = 0x3C00 + // + v_add_f32 v0, 1.0, 0 // src0 = 0x3F800000 (1.0) + v_add_u32 v0, 1.0, 0 // src0 = 0x3F800000 + + // src0 before conversion: + // 1.7976931348623157e308 = 0x7fefffffffffffff + // src0 after conversion: + // 1.7976922776554302e308 = 0x7fefffff00000000 + v_ceil_f64 v[0:1], 1.7976931348623157e308 + + v_add_f16 v1, 65500.0, v2 // ok for f16. + v_add_f32 v1, 65600.0, v2 // ok for f32, but would result in overflow for f16. + +Examples of disabled conversions: + +.. parsed-literal:: + + // GFX9 + + v_add_f16 v1, 65600.0, v2 // overflow + +.. _amdgpu_synid_rl_conv: + +Conversion of Relocatable Values +-------------------------------- + +:ref:`Relocatable expressions` +may be used with 32-bit integer operands and jump targets. + +When the value of a relocatable expression is resolved by a linker, it is +converted as needed and truncated to the operand size. The conversion depends +on :ref:`relocation type` and operand kind. + +For example, when a 32-bit operand of an instruction refers a relocatable expression *expr*, +this reference is evaluated to a 64-bit offset from the address after the +instruction to the address being referenced, *counted in bytes*. +Then the value is truncated to 32 bits and encoded as a literal: + +.. parsed-literal:: + + expr = . + v_add_co_u32_e32 v0, vcc, expr, v1 // 'expr' operand is evaluated to -4 + // and then truncated to 0xFFFFFFFC + +As another example, when a branch instruction refers a label, +this reference is evaluated to an offset from the address after the +instruction to the label address, *counted in dwords*. +Then the value is truncated to 16 bits: + +.. parsed-literal:: + + label: + s_branch label // 'label' operand is evaluated to -1 and truncated to 0xFFFF diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUUsage.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUUsage.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/AMDGPUUsage.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/AMDGPUUsage.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,12382 @@ +============================= +User Guide for AMDGPU Backend +============================= + +.. contents:: + :local: + +.. toctree:: + :hidden: + + AMDGPU/AMDGPUAsmGFX7 + AMDGPU/AMDGPUAsmGFX8 + AMDGPU/AMDGPUAsmGFX9 + AMDGPU/AMDGPUAsmGFX900 + AMDGPU/AMDGPUAsmGFX904 + AMDGPU/AMDGPUAsmGFX906 + AMDGPU/AMDGPUAsmGFX908 + AMDGPU/AMDGPUAsmGFX90a + AMDGPU/AMDGPUAsmGFX10 + AMDGPU/AMDGPUAsmGFX1011 + AMDGPUModifierSyntax + AMDGPUOperandSyntax + AMDGPUInstructionSyntax + AMDGPUInstructionNotation + AMDGPUDwarfExtensionsForHeterogeneousDebugging + +Introduction +============ + +The AMDGPU backend provides ISA code generation for AMD GPUs, starting with the +R600 family up until the current GCN families. It lives in the +``llvm/lib/Target/AMDGPU`` directory. + +LLVM +==== + +.. _amdgpu-target-triples: + +Target Triples +-------------- + +Use the Clang option ``-target ---`` +to specify the target triple: + + .. table:: AMDGPU Architectures + :name: amdgpu-architecture-table + + ============ ============================================================== + Architecture Description + ============ ============================================================== + ``r600`` AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders. + ``amdgcn`` AMD GPUs GCN GFX6 onwards for graphics and compute shaders. + ============ ============================================================== + + .. table:: AMDGPU Vendors + :name: amdgpu-vendor-table + + ============ ============================================================== + Vendor Description + ============ ============================================================== + ``amd`` Can be used for all AMD GPU usage. + ``mesa3d`` Can be used if the OS is ``mesa3d``. + ============ ============================================================== + + .. table:: AMDGPU Operating Systems + :name: amdgpu-os + + ============== ============================================================ + OS Description + ============== ============================================================ + ** Defaults to the *unknown* OS. + ``amdhsa`` Compute kernels executed on HSA [HSA]_ compatible runtimes + such as: + + - AMD's ROCm™ runtime [AMD-ROCm]_ using the *rocm-amdhsa* + loader on Linux. See *AMD ROCm Platform Release Notes* + [AMD-ROCm-Release-Notes]_ for supported hardware and + software. + - AMD's PAL runtime using the *pal-amdhsa* loader on + Windows. + + ``amdpal`` Graphic shaders and compute kernels executed on AMD's PAL + runtime using the *pal-amdpal* loader on Windows and Linux + Pro. + ``mesa3d`` Graphic shaders and compute kernels executed on AMD's Mesa + 3D runtime using the *mesa-mesa3d* loader on Linux. + ============== ============================================================ + + .. table:: AMDGPU Environments + :name: amdgpu-environment-table + + ============ ============================================================== + Environment Description + ============ ============================================================== + ** Default. + ============ ============================================================== + +.. _amdgpu-processors: + +Processors +---------- + +Use the Clang options ``-mcpu=`` or ``--offload-arch=`` to +specify the AMDGPU processor together with optional target features. See +:ref:`amdgpu-target-id` and :ref:`amdgpu-target-features` for AMD GPU target +specific information. + +Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following exceptions: + +* ``amdhsa`` is not supported in ``r600`` architecture (see :ref:`amdgpu-architecture-table`). + + + .. table:: AMDGPU Processors + :name: amdgpu-processor-table + + =========== =============== ============ ===== ================= =============== =============== ====================== + Processor Alternative Target dGPU/ Target Target OS Support Example + Processor Triple APU Features Properties *(see* Products + Architecture Supported `amdgpu-os`_ + *and + corresponding + runtime release + notes for + current + information and + level of + support)* + =========== =============== ============ ===== ================= =============== =============== ====================== + **Radeon HD 2000/3000 Series (R600)** [AMD-RADEON-HD-2000-3000]_ + ----------------------------------------------------------------------------------------------------------------------- + ``r600`` ``r600`` dGPU - Does not + support + generic + address + space + ``r630`` ``r600`` dGPU - Does not + support + generic + address + space + ``rs880`` ``r600`` dGPU - Does not + support + generic + address + space + ``rv670`` ``r600`` dGPU - Does not + support + generic + address + space + **Radeon HD 4000 Series (R700)** [AMD-RADEON-HD-4000]_ + ----------------------------------------------------------------------------------------------------------------------- + ``rv710`` ``r600`` dGPU - Does not + support + generic + address + space + ``rv730`` ``r600`` dGPU - Does not + support + generic + address + space + ``rv770`` ``r600`` dGPU - Does not + support + generic + address + space + **Radeon HD 5000 Series (Evergreen)** [AMD-RADEON-HD-5000]_ + ----------------------------------------------------------------------------------------------------------------------- + ``cedar`` ``r600`` dGPU - Does not + support + generic + address + space + ``cypress`` ``r600`` dGPU - Does not + support + generic + address + space + ``juniper`` ``r600`` dGPU - Does not + support + generic + address + space + ``redwood`` ``r600`` dGPU - Does not + support + generic + address + space + ``sumo`` ``r600`` dGPU - Does not + support + generic + address + space + **Radeon HD 6000 Series (Northern Islands)** [AMD-RADEON-HD-6000]_ + ----------------------------------------------------------------------------------------------------------------------- + ``barts`` ``r600`` dGPU - Does not + support + generic + address + space + ``caicos`` ``r600`` dGPU - Does not + support + generic + address + space + ``cayman`` ``r600`` dGPU - Does not + support + generic + address + space + ``turks`` ``r600`` dGPU - Does not + support + generic + address + space + **GCN GFX6 (Southern Islands (SI))** [AMD-GCN-GFX6]_ + ----------------------------------------------------------------------------------------------------------------------- + ``gfx600`` - ``tahiti`` ``amdgcn`` dGPU - Does not - *pal-amdpal* + support + generic + address + space + ``gfx601`` - ``pitcairn`` ``amdgcn`` dGPU - Does not - *pal-amdpal* + - ``verde`` support + generic + address + space + ``gfx602`` - ``hainan`` ``amdgcn`` dGPU - Does not - *pal-amdpal* + - ``oland`` support + generic + address + space + **GCN GFX7 (Sea Islands (CI))** [AMD-GCN-GFX7]_ + ----------------------------------------------------------------------------------------------------------------------- + ``gfx700`` - ``kaveri`` ``amdgcn`` APU - Offset - *rocm-amdhsa* - A6-7000 + flat - *pal-amdhsa* - A6 Pro-7050B + scratch - *pal-amdpal* - A8-7100 + - A8 Pro-7150B + - A10-7300 + - A10 Pro-7350B + - FX-7500 + - A8-7200P + - A10-7400P + - FX-7600P + ``gfx701`` - ``hawaii`` ``amdgcn`` dGPU - Offset - *rocm-amdhsa* - FirePro W8100 + flat - *pal-amdhsa* - FirePro W9100 + scratch - *pal-amdpal* - FirePro S9150 + - FirePro S9170 + ``gfx702`` ``amdgcn`` dGPU - Offset - *rocm-amdhsa* - Radeon R9 290 + flat - *pal-amdhsa* - Radeon R9 290x + scratch - *pal-amdpal* - Radeon R390 + - Radeon R390x + ``gfx703`` - ``kabini`` ``amdgcn`` APU - Offset - *pal-amdhsa* - E1-2100 + - ``mullins`` flat - *pal-amdpal* - E1-2200 + scratch - E1-2500 + - E2-3000 + - E2-3800 + - A4-5000 + - A4-5100 + - A6-5200 + - A4 Pro-3340B + ``gfx704`` - ``bonaire`` ``amdgcn`` dGPU - Offset - *pal-amdhsa* - Radeon HD 7790 + flat - *pal-amdpal* - Radeon HD 8770 + scratch - R7 260 + - R7 260X + ``gfx705`` ``amdgcn`` APU - Offset - *pal-amdhsa* *TBA* + flat - *pal-amdpal* + scratch .. TODO:: + + Add product + names. + + **GCN GFX8 (Volcanic Islands (VI))** [AMD-GCN-GFX8]_ + ----------------------------------------------------------------------------------------------------------------------- + ``gfx801`` - ``carrizo`` ``amdgcn`` APU - xnack - Offset - *rocm-amdhsa* - A6-8500P + flat - *pal-amdhsa* - Pro A6-8500B + scratch - *pal-amdpal* - A8-8600P + - Pro A8-8600B + - FX-8800P + - Pro A12-8800B + - A10-8700P + - Pro A10-8700B + - A10-8780P + - A10-9600P + - A10-9630P + - A12-9700P + - A12-9730P + - FX-9800P + - FX-9830P + - E2-9010 + - A6-9210 + - A9-9410 + ``gfx802`` - ``iceland`` ``amdgcn`` dGPU - Offset - *rocm-amdhsa* - Radeon R9 285 + - ``tonga`` flat - *pal-amdhsa* - Radeon R9 380 + scratch - *pal-amdpal* - Radeon R9 385 + ``gfx803`` - ``fiji`` ``amdgcn`` dGPU - *rocm-amdhsa* - Radeon R9 Nano + - *pal-amdhsa* - Radeon R9 Fury + - *pal-amdpal* - Radeon R9 FuryX + - Radeon Pro Duo + - FirePro S9300x2 + - Radeon Instinct MI8 + \ - ``polaris10`` ``amdgcn`` dGPU - Offset - *rocm-amdhsa* - Radeon RX 470 + flat - *pal-amdhsa* - Radeon RX 480 + scratch - *pal-amdpal* - Radeon Instinct MI6 + \ - ``polaris11`` ``amdgcn`` dGPU - Offset - *rocm-amdhsa* - Radeon RX 460 + flat - *pal-amdhsa* + scratch - *pal-amdpal* + ``gfx805`` - ``tongapro`` ``amdgcn`` dGPU - Offset - *rocm-amdhsa* - FirePro S7150 + flat - *pal-amdhsa* - FirePro S7100 + scratch - *pal-amdpal* - FirePro W7100 + - Mobile FirePro + M7170 + ``gfx810`` - ``stoney`` ``amdgcn`` APU - xnack - Offset - *rocm-amdhsa* *TBA* + flat - *pal-amdhsa* + scratch - *pal-amdpal* .. TODO:: + + Add product + names. + + **GCN GFX9 (Vega)** [AMD-GCN-GFX900-GFX904-VEGA]_ [AMD-GCN-GFX906-VEGA7NM]_ [AMD-GCN-GFX908-CDNA1]_ + ----------------------------------------------------------------------------------------------------------------------- + ``gfx900`` ``amdgcn`` dGPU - xnack - Absolute - *rocm-amdhsa* - Radeon Vega + flat - *pal-amdhsa* Frontier Edition + scratch - *pal-amdpal* - Radeon RX Vega 56 + - Radeon RX Vega 64 + - Radeon RX Vega 64 + Liquid + - Radeon Instinct MI25 + ``gfx902`` ``amdgcn`` APU - xnack - Absolute - *rocm-amdhsa* - Ryzen 3 2200G + flat - *pal-amdhsa* - Ryzen 5 2400G + scratch - *pal-amdpal* + ``gfx904`` ``amdgcn`` dGPU - xnack - *rocm-amdhsa* *TBA* + - *pal-amdhsa* + - *pal-amdpal* .. TODO:: + + Add product + names. + + ``gfx906`` ``amdgcn`` dGPU - sramecc - Absolute - *rocm-amdhsa* - Radeon Instinct MI50 + - xnack flat - *pal-amdhsa* - Radeon Instinct MI60 + scratch - *pal-amdpal* - Radeon VII + - Radeon Pro VII + ``gfx908`` ``amdgcn`` dGPU - sramecc - *rocm-amdhsa* - AMD Instinct MI100 Accelerator + - xnack - Absolute + flat + scratch + ``gfx909`` ``amdgcn`` APU - xnack - Absolute - *pal-amdpal* *TBA* + flat + scratch .. TODO:: + + Add product + names. + + ``gfx90a`` ``amdgcn`` dGPU - sramecc - Absolute - *rocm-amdhsa* *TBA* + - tgsplit flat + - xnack scratch .. TODO:: + - Packed + work-item Add product + IDs names. + + ``gfx90c`` ``amdgcn`` APU - xnack - Absolute - *pal-amdpal* - Ryzen 7 4700G + flat - Ryzen 7 4700GE + scratch - Ryzen 5 4600G + - Ryzen 5 4600GE + - Ryzen 3 4300G + - Ryzen 3 4300GE + - Ryzen Pro 4000G + - Ryzen 7 Pro 4700G + - Ryzen 7 Pro 4750GE + - Ryzen 5 Pro 4650G + - Ryzen 5 Pro 4650GE + - Ryzen 3 Pro 4350G + - Ryzen 3 Pro 4350GE + + **GCN GFX10 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_ + ----------------------------------------------------------------------------------------------------------------------- + ``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700 + - wavefrontsize64 flat - *pal-amdhsa* - Radeon RX 5700 XT + - xnack scratch - *pal-amdpal* - Radeon Pro 5600 XT + - Radeon Pro 5600M + ``gfx1011`` ``amdgcn`` dGPU - cumode - *rocm-amdhsa* - Radeon Pro V520 + - wavefrontsize64 - Absolute - *pal-amdhsa* + - xnack flat - *pal-amdpal* + scratch + ``gfx1012`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5500 + - wavefrontsize64 flat - *pal-amdhsa* - Radeon RX 5500 XT + - xnack scratch - *pal-amdpal* + ``gfx1013`` ``amdgcn`` APU - cumode - Absolute - *rocm-amdhsa* *TBA* + - wavefrontsize64 flat - *pal-amdhsa* + - xnack scratch - *pal-amdpal* .. TODO:: + + Add product + names. + + **GCN GFX10 (RDNA 2)** [AMD-GCN-GFX10-RDNA2]_ + ----------------------------------------------------------------------------------------------------------------------- + ``gfx1030`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 6800 + - wavefrontsize64 flat - *pal-amdhsa* - Radeon RX 6800 XT + scratch - *pal-amdpal* - Radeon RX 6900 XT + ``gfx1031`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 6700 XT + - wavefrontsize64 flat - *pal-amdhsa* + scratch - *pal-amdpal* + ``gfx1032`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* *TBA* + - wavefrontsize64 flat - *pal-amdhsa* + scratch - *pal-amdpal* .. TODO:: + + Add product + names. + + ``gfx1033`` ``amdgcn`` APU - cumode - Absolute - *pal-amdpal* *TBA* + - wavefrontsize64 flat + scratch .. TODO:: + + Add product + names. + ``gfx1034`` ``amdgcn`` dGPU - cumode - Absolute - *pal-amdpal* *TBA* + - wavefrontsize64 flat + scratch .. TODO:: + + Add product + names. + + ``gfx1035`` ``amdgcn`` APU - cumode - Absolute - *pal-amdpal* *TBA* + - wavefrontsize64 flat + scratch .. TODO:: + Add product + names. + + =========== =============== ============ ===== ================= =============== =============== ====================== + +.. _amdgpu-target-features: + +Target Features +--------------- + +Target features control how code is generated to support certain +processor specific features. Not all target features are supported by +all processors. The runtime must ensure that the features supported by +the device used to execute the code match the features enabled when +generating the code. A mismatch of features may result in incorrect +execution, or a reduction in performance. + +The target features supported by each processor is listed in +:ref:`amdgpu-processor-table`. + +Target features are controlled by exactly one of the following Clang +options: + +``-mcpu=`` or ``--offload-arch=`` + + The ``-mcpu`` and ``--offload-arch`` can specify the target feature as + optional components of the target ID. If omitted, the target feature has the + ``any`` value. See :ref:`amdgpu-target-id`. + +``-m[no-]`` + + Target features not specified by the target ID are specified using a + separate option. These target features can have an ``on`` or ``off`` + value. ``on`` is specified by omitting the ``no-`` prefix, and + ``off`` is specified by including the ``no-`` prefix. The default + if not specified is ``off``. + +For example: + +``-mcpu=gfx908:xnack+`` + Enable the ``xnack`` feature. +``-mcpu=gfx908:xnack-`` + Disable the ``xnack`` feature. +``-mcumode`` + Enable the ``cumode`` feature. +``-mno-cumode`` + Disable the ``cumode`` feature. + + .. table:: AMDGPU Target Features + :name: amdgpu-target-features-table + + =============== ============================ ================================================== + Target Feature Clang Option to Control Description + Name + =============== ============================ ================================================== + cumode - ``-m[no-]cumode`` Control the wavefront execution mode used + when generating code for kernels. When disabled + native WGP wavefront execution mode is used, + when enabled CU wavefront execution mode is used + (see :ref:`amdgpu-amdhsa-memory-model`). + + sramecc - ``-mcpu`` If specified, generate code that can only be + - ``--offload-arch`` loaded and executed in a process that has a + matching setting for SRAMECC. + + If not specified for code object V2 to V3, generate + code that can be loaded and executed in a process + with SRAMECC enabled. + + If not specified for code object V4, generate + code that can be loaded and executed in a process + with either setting of SRAMECC. + + tgsplit ``-m[no-]tgsplit`` Enable/disable generating code that assumes + work-groups are launched in threadgroup split mode. + When enabled the waves of a work-group may be + launched in different CUs. + + wavefrontsize64 - ``-m[no-]wavefrontsize64`` Control the wavefront size used when + generating code for kernels. When disabled + native wavefront size 32 is used, when enabled + wavefront size 64 is used. + + xnack - ``-mcpu`` If specified, generate code that can only be + - ``--offload-arch`` loaded and executed in a process that has a + matching setting for XNACK replay. + + If not specified for code object V2 to V3, generate + code that can be loaded and executed in a process + with XNACK replay enabled. + + If not specified for code object V4, generate + code that can be loaded and executed in a process + with either setting of XNACK replay. + + XNACK replay can be used for demand paging and + page migration. If enabled in the device, then if + a page fault occurs the code may execute + incorrectly unless generated with XNACK replay + enabled, or generated for code object V4 without + specifying XNACK replay. Executing code that was + generated with XNACK replay enabled, or generated + for code object V4 without specifying XNACK replay, + on a device that does not have XNACK replay + enabled will execute correctly but may be less + performant than code generated for XNACK replay + disabled. + =============== ============================ ================================================== + +.. _amdgpu-target-id: + +Target ID +--------- + +AMDGPU supports target IDs. See `Clang Offload Bundler +`_ for a general +description. The AMDGPU target specific information is: + +**processor** + Is an AMDGPU processor or alternative processor name specified in + :ref:`amdgpu-processor-table`. The non-canonical form target ID allows both + the primary processor and alternative processor names. The canonical form + target ID only allow the primary processor name. + +**target-feature** + Is a target feature name specified in :ref:`amdgpu-target-features-table` that + is supported by the processor. The target features supported by each processor + is specified in :ref:`amdgpu-processor-table`. Those that can be specified in + a target ID are marked as being controlled by ``-mcpu`` and + ``--offload-arch``. Each target feature must appear at most once in a target + ID. The non-canonical form target ID allows the target features to be + specified in any order. The canonical form target ID requires the target + features to be specified in alphabetic order. + +.. _amdgpu-target-id-v2-v3: + +Code Object V2 to V3 Target ID +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The target ID syntax for code object V2 to V3 is the same as defined in `Clang +Offload Bundler `_ except +when used in the :ref:`amdgpu-assembler-directive-amdgcn-target` assembler +directive and the bundle entry ID. In those cases it has the following BNF +syntax: + +.. code:: + + ::== ( "+" )* + +Where a target feature is omitted if *Off* and present if *On* or *Any*. + +.. note:: + + The code object V2 to V3 cannot represent *Any* and treats it the same as + *On*. + +.. _amdgpu-embedding-bundled-objects: + +Embedding Bundled Code Objects +------------------------------ + +AMDGPU supports the HIP and OpenMP languages that perform code object embedding +as described in `Clang Offload Bundler +`_. + +.. note:: + + The target ID syntax used for code object V2 to V3 for a bundle entry ID + differs from that used elsewhere. See :ref:`amdgpu-target-id-v2-v3`. + +.. _amdgpu-address-spaces: + +Address Spaces +-------------- + +The AMDGPU architecture supports a number of memory address spaces. The address +space names use the OpenCL standard names, with some additions. + +The AMDGPU address spaces correspond to target architecture specific LLVM +address space numbers used in LLVM IR. + +The AMDGPU address spaces are described in +:ref:`amdgpu-address-spaces-table`. Only 64-bit process address spaces are +supported for the ``amdgcn`` target. + + .. table:: AMDGPU Address Spaces + :name: amdgpu-address-spaces-table + + ================================= =============== =========== ================ ======= ============================ + .. 64-Bit Process Address Space + --------------------------------- --------------- ----------- ---------------- ------------------------------------ + Address Space Name LLVM IR Address HSA Segment Hardware Address NULL Value + Space Number Name Name Size + ================================= =============== =========== ================ ======= ============================ + Generic 0 flat flat 64 0x0000000000000000 + Global 1 global global 64 0x0000000000000000 + Region 2 N/A GDS 32 *not implemented for AMDHSA* + Local 3 group LDS 32 0xFFFFFFFF + Constant 4 constant *same as global* 64 0x0000000000000000 + Private 5 private scratch 32 0xFFFFFFFF + Constant 32-bit 6 *TODO* 0x00000000 + Buffer Fat Pointer (experimental) 7 *TODO* + ================================= =============== =========== ================ ======= ============================ + +**Generic** + The generic address space is supported unless the *Target Properties* column + of :ref:`amdgpu-processor-table` specifies *Does not support generic address + space*. + + The generic address space uses the hardware flat address support for two fixed + ranges of virtual addresses (the private and local apertures), that are + outside the range of addressable global memory, to map from a flat address to + a private or local address. This uses FLAT instructions that can take a flat + address and access global, private (scratch), and group (LDS) memory depending + on if the address is within one of the aperture ranges. + + Flat access to scratch requires hardware aperture setup and setup in the + kernel prologue (see :ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`). Flat + access to LDS requires hardware aperture setup and M0 (GFX7-GFX8) register + setup (see :ref:`amdgpu-amdhsa-kernel-prolog-m0`). + + To convert between a private or group address space address (termed a segment + address) and a flat address the base address of the corresponding aperture + can be used. For GFX7-GFX8 these are available in the + :ref:`amdgpu-amdhsa-hsa-aql-queue` the address of which can be obtained with + Queue Ptr SGPR (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). For + GFX9-GFX10 the aperture base addresses are directly available as inline + constant registers ``SRC_SHARED_BASE/LIMIT`` and ``SRC_PRIVATE_BASE/LIMIT``. + In 64-bit address mode the aperture sizes are 2^32 bytes and the base is + aligned to 2^32 which makes it easier to convert from flat to segment or + segment to flat. + + A global address space address has the same value when used as a flat address + so no conversion is needed. + +**Global and Constant** + The global and constant address spaces both use global virtual addresses, + which are the same virtual address space used by the CPU. However, some + virtual addresses may only be accessible to the CPU, some only accessible + by the GPU, and some by both. + + Using the constant address space indicates that the data will not change + during the execution of the kernel. This allows scalar read instructions to + be used. As the constant address space could only be modified on the host + side, a generic pointer loaded from the constant address space is safe to be + assumed as a global pointer since only the device global memory is visible + and managed on the host side. The vector and scalar L1 caches are invalidated + of volatile data before each kernel dispatch execution to allow constant + memory to change values between kernel dispatches. + +**Region** + The region address space uses the hardware Global Data Store (GDS). All + wavefronts executing on the same device will access the same memory for any + given region address. However, the same region address accessed by wavefronts + executing on different devices will access different memory. It is higher + performance than global memory. It is allocated by the runtime. The data + store (DS) instructions can be used to access it. + +**Local** + The local address space uses the hardware Local Data Store (LDS) which is + automatically allocated when the hardware creates the wavefronts of a + work-group, and freed when all the wavefronts of a work-group have + terminated. All wavefronts belonging to the same work-group will access the + same memory for any given local address. However, the same local address + accessed by wavefronts belonging to different work-groups will access + different memory. It is higher performance than global memory. The data store + (DS) instructions can be used to access it. + +**Private** + The private address space uses the hardware scratch memory support which + automatically allocates memory when it creates a wavefront and frees it when + a wavefronts terminates. The memory accessed by a lane of a wavefront for any + given private address will be different to the memory accessed by another lane + of the same or different wavefront for the same private address. + + If a kernel dispatch uses scratch, then the hardware allocates memory from a + pool of backing memory allocated by the runtime for each wavefront. The lanes + of the wavefront access this using dword (4 byte) interleaving. The mapping + used from private address to backing memory address is: + + ``wavefront-scratch-base + + ((private-address / 4) * wavefront-size * 4) + + (wavefront-lane-id * 4) + (private-address % 4)`` + + If each lane of a wavefront accesses the same private address, the + interleaving results in adjacent dwords being accessed and hence requires + fewer cache lines to be fetched. + + There are different ways that the wavefront scratch base address is + determined by a wavefront (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Scratch memory can be accessed in an interleaved manner using buffer + instructions with the scratch buffer descriptor and per wavefront scratch + offset, by the scratch instructions, or by flat instructions. Multi-dword + access is not supported except by flat and scratch instructions in + GFX9-GFX10. + +**Constant 32-bit** + *TODO* + +**Buffer Fat Pointer** + The buffer fat pointer is an experimental address space that is currently + unsupported in the backend. It exposes a non-integral pointer that is in + the future intended to support the modelling of 128-bit buffer descriptors + plus a 32-bit offset into the buffer (in total encapsulating a 160-bit + *pointer*), allowing normal LLVM load/store/atomic operations to be used to + model the buffer descriptors used heavily in graphics workloads targeting + the backend. + +.. _amdgpu-memory-scopes: + +Memory Scopes +------------- + +This section provides LLVM memory synchronization scopes supported by the AMDGPU +backend memory model when the target triple OS is ``amdhsa`` (see +:ref:`amdgpu-amdhsa-memory-model` and :ref:`amdgpu-target-triples`). + +The memory model supported is based on the HSA memory model [HSA]_ which is +based in turn on HRF-indirect with scope inclusion [HRF]_. The happens-before +relation is transitive over the synchronizes-with relation independent of scope +and synchronizes-with allows the memory scope instances to be inclusive (see +table :ref:`amdgpu-amdhsa-llvm-sync-scopes-table`). + +This is different to the OpenCL [OpenCL]_ memory model which does not have scope +inclusion and requires the memory scopes to exactly match. However, this +is conservatively correct for OpenCL. + + .. table:: AMDHSA LLVM Sync Scopes + :name: amdgpu-amdhsa-llvm-sync-scopes-table + + ======================= =================================================== + LLVM Sync Scope Description + ======================= =================================================== + *none* The default: ``system``. + + Synchronizes with, and participates in modification + and seq_cst total orderings with, other operations + (except image operations) for all address spaces + (except private, or generic that accesses private) + provided the other operation's sync scope is: + + - ``system``. + - ``agent`` and executed by a thread on the same + agent. + - ``workgroup`` and executed by a thread in the + same work-group. + - ``wavefront`` and executed by a thread in the + same wavefront. + + ``agent`` Synchronizes with, and participates in modification + and seq_cst total orderings with, other operations + (except image operations) for all address spaces + (except private, or generic that accesses private) + provided the other operation's sync scope is: + + - ``system`` or ``agent`` and executed by a thread + on the same agent. + - ``workgroup`` and executed by a thread in the + same work-group. + - ``wavefront`` and executed by a thread in the + same wavefront. + + ``workgroup`` Synchronizes with, and participates in modification + and seq_cst total orderings with, other operations + (except image operations) for all address spaces + (except private, or generic that accesses private) + provided the other operation's sync scope is: + + - ``system``, ``agent`` or ``workgroup`` and + executed by a thread in the same work-group. + - ``wavefront`` and executed by a thread in the + same wavefront. + + ``wavefront`` Synchronizes with, and participates in modification + and seq_cst total orderings with, other operations + (except image operations) for all address spaces + (except private, or generic that accesses private) + provided the other operation's sync scope is: + + - ``system``, ``agent``, ``workgroup`` or + ``wavefront`` and executed by a thread in the + same wavefront. + + ``singlethread`` Only synchronizes with and participates in + modification and seq_cst total orderings with, + other operations (except image operations) running + in the same thread for all address spaces (for + example, in signal handlers). + + ``one-as`` Same as ``system`` but only synchronizes with other + operations within the same address space. + + ``agent-one-as`` Same as ``agent`` but only synchronizes with other + operations within the same address space. + + ``workgroup-one-as`` Same as ``workgroup`` but only synchronizes with + other operations within the same address space. + + ``wavefront-one-as`` Same as ``wavefront`` but only synchronizes with + other operations within the same address space. + + ``singlethread-one-as`` Same as ``singlethread`` but only synchronizes with + other operations within the same address space. + ======================= =================================================== + +LLVM IR Intrinsics +------------------ + +The AMDGPU backend implements the following LLVM IR intrinsics. + +*This section is WIP.* + +.. TODO:: + + List AMDGPU intrinsics. + +LLVM IR Attributes +------------------ + +The AMDGPU backend supports the following LLVM IR attributes. + + .. table:: AMDGPU LLVM IR Attributes + :name: amdgpu-llvm-ir-attributes-table + + ======================================= ========================================================== + LLVM Attribute Description + ======================================= ========================================================== + "amdgpu-flat-work-group-size"="min,max" Specify the minimum and maximum flat work group sizes that + will be specified when the kernel is dispatched. Generated + by the ``amdgpu_flat_work_group_size`` CLANG attribute [CLANG-ATTR]_. + "amdgpu-implicitarg-num-bytes"="n" Number of kernel argument bytes to add to the kernel + argument block size for the implicit arguments. This + varies by OS and language (for OpenCL see + :ref:`opencl-kernel-implicit-arguments-appended-for-amdhsa-os-table`). + "amdgpu-num-sgpr"="n" Specifies the number of SGPRs to use. Generated by + the ``amdgpu_num_sgpr`` CLANG attribute [CLANG-ATTR]_. + "amdgpu-num-vgpr"="n" Specifies the number of VGPRs to use. Generated by the + ``amdgpu_num_vgpr`` CLANG attribute [CLANG-ATTR]_. + "amdgpu-waves-per-eu"="m,n" Specify the minimum and maximum number of waves per + execution unit. Generated by the ``amdgpu_waves_per_eu`` + CLANG attribute [CLANG-ATTR]_. + "amdgpu-ieee" true/false. Specify whether the function expects the IEEE field of the + mode register to be set on entry. Overrides the default for + the calling convention. + "amdgpu-dx10-clamp" true/false. Specify whether the function expects the DX10_CLAMP field of + the mode register to be set on entry. Overrides the default + for the calling convention. + + "amdgpu-no-workitem-id-x" Indicates the function does not depend on the value of the + llvm.amdgcn.workitem.id.x intrinsic. If a function is marked with this + attribute, or reached through a call site marked with this attribute, + the value returned by the intrinsic is undefined. The backend can + generally infer this during code generation, so typically there is no + benefit to frontends marking functions with this. + + "amdgpu-no-workitem-id-y" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.workitem.id.y intrinsic. + + "amdgpu-no-workitem-id-z" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.workitem.id.z intrinsic. + + "amdgpu-no-workgroup-id-x" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.workgroup.id.x intrinsic. + + "amdgpu-no-workgroup-id-y" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.workgroup.id.y intrinsic. + + "amdgpu-no-workgroup-id-z" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.workgroup.id.z intrinsic. + + "amdgpu-no-dispatch-ptr" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.dispatch.ptr intrinsic. + + "amdgpu-no-implicitarg-ptr" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.implicitarg.ptr intrinsic. + + "amdgpu-no-dispatch-id" The same as amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.dispatch.id intrinsic. + + "amdgpu-no-queue-ptr" Similar to amdgpu-no-workitem-id-x, except for the + llvm.amdgcn.queue.ptr intrinsic. Note that unlike the other ABI hint + attributes, the queue pointer may be required in situations where the + intrinsic call does not directly appear in the program. Some subtargets + require the queue pointer for to handle some addrspacecasts, as well + as the llvm.amdgcn.is.shared, llvm.amdgcn.is.private, llvm.trap, and + llvm.debug intrinsics. + + ======================================= ========================================================== + +.. _amdgpu-elf-code-object: + +ELF Code Object +=============== + +The AMDGPU backend generates a standard ELF [ELF]_ relocatable code object that +can be linked by ``lld`` to produce a standard ELF shared code object which can +be loaded and executed on an AMDGPU target. + +.. _amdgpu-elf-header: + +Header +------ + +The AMDGPU backend uses the following ELF header: + + .. table:: AMDGPU ELF Header + :name: amdgpu-elf-header-table + + ========================== =============================== + Field Value + ========================== =============================== + ``e_ident[EI_CLASS]`` ``ELFCLASS64`` + ``e_ident[EI_DATA]`` ``ELFDATA2LSB`` + ``e_ident[EI_OSABI]`` - ``ELFOSABI_NONE`` + - ``ELFOSABI_AMDGPU_HSA`` + - ``ELFOSABI_AMDGPU_PAL`` + - ``ELFOSABI_AMDGPU_MESA3D`` + ``e_ident[EI_ABIVERSION]`` - ``ELFABIVERSION_AMDGPU_HSA_V2`` + - ``ELFABIVERSION_AMDGPU_HSA_V3`` + - ``ELFABIVERSION_AMDGPU_HSA_V4`` + - ``ELFABIVERSION_AMDGPU_PAL`` + - ``ELFABIVERSION_AMDGPU_MESA3D`` + ``e_type`` - ``ET_REL`` + - ``ET_DYN`` + ``e_machine`` ``EM_AMDGPU`` + ``e_entry`` 0 + ``e_flags`` See :ref:`amdgpu-elf-header-e_flags-v2-table`, + :ref:`amdgpu-elf-header-e_flags-table-v3`, + and :ref:`amdgpu-elf-header-e_flags-table-v4` + ========================== =============================== + +.. + + .. table:: AMDGPU ELF Header Enumeration Values + :name: amdgpu-elf-header-enumeration-values-table + + =============================== ===== + Name Value + =============================== ===== + ``EM_AMDGPU`` 224 + ``ELFOSABI_NONE`` 0 + ``ELFOSABI_AMDGPU_HSA`` 64 + ``ELFOSABI_AMDGPU_PAL`` 65 + ``ELFOSABI_AMDGPU_MESA3D`` 66 + ``ELFABIVERSION_AMDGPU_HSA_V2`` 0 + ``ELFABIVERSION_AMDGPU_HSA_V3`` 1 + ``ELFABIVERSION_AMDGPU_HSA_V4`` 2 + ``ELFABIVERSION_AMDGPU_PAL`` 0 + ``ELFABIVERSION_AMDGPU_MESA3D`` 0 + =============================== ===== + +``e_ident[EI_CLASS]`` + The ELF class is: + + * ``ELFCLASS32`` for ``r600`` architecture. + + * ``ELFCLASS64`` for ``amdgcn`` architecture which only supports 64-bit + process address space applications. + +``e_ident[EI_DATA]`` + All AMDGPU targets use ``ELFDATA2LSB`` for little-endian byte ordering. + +``e_ident[EI_OSABI]`` + One of the following AMDGPU target architecture specific OS ABIs + (see :ref:`amdgpu-os`): + + * ``ELFOSABI_NONE`` for *unknown* OS. + + * ``ELFOSABI_AMDGPU_HSA`` for ``amdhsa`` OS. + + * ``ELFOSABI_AMDGPU_PAL`` for ``amdpal`` OS. + + * ``ELFOSABI_AMDGPU_MESA3D`` for ``mesa3D`` OS. + +``e_ident[EI_ABIVERSION]`` + The ABI version of the AMDGPU target architecture specific OS ABI to which the code + object conforms: + + * ``ELFABIVERSION_AMDGPU_HSA_V2`` is used to specify the version of AMD HSA + runtime ABI for code object V2. Specify using the Clang option + ``-mcode-object-version=2``. + + * ``ELFABIVERSION_AMDGPU_HSA_V3`` is used to specify the version of AMD HSA + runtime ABI for code object V3. Specify using the Clang option + ``-mcode-object-version=3``. This is the default code object + version if not specified. + + * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA + runtime ABI for code object V4. Specify using the Clang option + ``-mcode-object-version=4``. + + * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL + runtime ABI. + + * ``ELFABIVERSION_AMDGPU_MESA3D`` is used to specify the version of AMD MESA + 3D runtime ABI. + +``e_type`` + Can be one of the following values: + + + ``ET_REL`` + The type produced by the AMDGPU backend compiler as it is relocatable code + object. + + ``ET_DYN`` + The type produced by the linker as it is a shared code object. + + The AMD HSA runtime loader requires a ``ET_DYN`` code object. + +``e_machine`` + The value ``EM_AMDGPU`` is used for the machine for all processors supported + by the ``r600`` and ``amdgcn`` architectures (see + :ref:`amdgpu-processor-table`). The specific processor is specified in the + ``NT_AMD_HSA_ISA_VERSION`` note record for code object V2 (see + :ref:`amdgpu-note-records-v2`) and in the ``EF_AMDGPU_MACH`` bit field of the + ``e_flags`` for code object V3 to V4 (see + :ref:`amdgpu-elf-header-e_flags-table-v3` and + :ref:`amdgpu-elf-header-e_flags-table-v4`). + +``e_entry`` + The entry point is 0 as the entry points for individual kernels must be + selected in order to invoke them through AQL packets. + +``e_flags`` + The AMDGPU backend uses the following ELF header flags: + + .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V2 + :name: amdgpu-elf-header-e_flags-v2-table + + ===================================== ===== ============================= + Name Value Description + ===================================== ===== ============================= + ``EF_AMDGPU_FEATURE_XNACK_V2`` 0x01 Indicates if the ``xnack`` + target feature is + enabled for all code + contained in the code object. + If the processor + does not support the + ``xnack`` target + feature then must + be 0. + See + :ref:`amdgpu-target-features`. + ``EF_AMDGPU_FEATURE_TRAP_HANDLER_V2`` 0x02 Indicates if the trap + handler is enabled for all + code contained in the code + object. If the processor + does not support a trap + handler then must be 0. + See + :ref:`amdgpu-target-features`. + ===================================== ===== ============================= + + .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V3 + :name: amdgpu-elf-header-e_flags-table-v3 + + ================================= ===== ============================= + Name Value Description + ================================= ===== ============================= + ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection + mask for + ``EF_AMDGPU_MACH_xxx`` values + defined in + :ref:`amdgpu-ef-amdgpu-mach-table`. + ``EF_AMDGPU_FEATURE_XNACK_V3`` 0x100 Indicates if the ``xnack`` + target feature is + enabled for all code + contained in the code object. + If the processor + does not support the + ``xnack`` target + feature then must + be 0. + See + :ref:`amdgpu-target-features`. + ``EF_AMDGPU_FEATURE_SRAMECC_V3`` 0x200 Indicates if the ``sramecc`` + target feature is + enabled for all code + contained in the code object. + If the processor + does not support the + ``sramecc`` target + feature then must + be 0. + See + :ref:`amdgpu-target-features`. + ================================= ===== ============================= + + .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 + :name: amdgpu-elf-header-e_flags-table-v4 + + ============================================ ===== =================================== + Name Value Description + ============================================ ===== =================================== + ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection + mask for + ``EF_AMDGPU_MACH_xxx`` values + defined in + :ref:`amdgpu-ef-amdgpu-mach-table`. + ``EF_AMDGPU_FEATURE_XNACK_V4`` 0x300 XNACK selection mask for + ``EF_AMDGPU_FEATURE_XNACK_*_V4`` + values. + ``EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4`` 0x000 XNACK unsuppored. + ``EF_AMDGPU_FEATURE_XNACK_ANY_V4`` 0x100 XNACK can have any value. + ``EF_AMDGPU_FEATURE_XNACK_OFF_V4`` 0x200 XNACK disabled. + ``EF_AMDGPU_FEATURE_XNACK_ON_V4`` 0x300 XNACK enabled. + ``EF_AMDGPU_FEATURE_SRAMECC_V4`` 0xc00 SRAMECC selection mask for + ``EF_AMDGPU_FEATURE_SRAMECC_*_V4`` + values. + ``EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4`` 0x000 SRAMECC unsuppored. + ``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value. + ``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled, + ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled. + ============================================ ===== =================================== + + .. table:: AMDGPU ``EF_AMDGPU_MACH`` Values + :name: amdgpu-ef-amdgpu-mach-table + + ==================================== ========== ============================= + Name Value Description (see + :ref:`amdgpu-processor-table`) + ==================================== ========== ============================= + ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified* + ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600`` + ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630`` + ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880`` + ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670`` + ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710`` + ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730`` + ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770`` + ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar`` + ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress`` + ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper`` + ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood`` + ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo`` + ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts`` + ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos`` + ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman`` + ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks`` + *reserved* 0x011 - Reserved for ``r600`` + 0x01f architecture processors. + ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600`` + ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601`` + ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700`` + ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701`` + ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702`` + ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703`` + ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704`` + *reserved* 0x027 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801`` + ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802`` + ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803`` + ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810`` + ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900`` + ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902`` + ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904`` + ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906`` + ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908`` + ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909`` + ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033`` + ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602`` + ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705`` + ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035`` + ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034`` + ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a`` + *reserved* 0x040 Reserved. + *reserved* 0x041 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013`` + *reserved* 0x043 Reserved. + *reserved* 0x044 Reserved. + *reserved* 0x045 Reserved. + ==================================== ========== ============================= + +Sections +-------- + +An AMDGPU target ELF code object has the standard ELF sections which include: + + .. table:: AMDGPU ELF Sections + :name: amdgpu-elf-sections-table + + ================== ================ ================================= + Name Type Attributes + ================== ================ ================================= + ``.bss`` ``SHT_NOBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.data`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.debug_``\ *\** ``SHT_PROGBITS`` *none* + ``.dynamic`` ``SHT_DYNAMIC`` ``SHF_ALLOC`` + ``.dynstr`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.dynsym`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.got`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_WRITE`` + ``.hash`` ``SHT_HASH`` ``SHF_ALLOC`` + ``.note`` ``SHT_NOTE`` *none* + ``.rela``\ *name* ``SHT_RELA`` *none* + ``.rela.dyn`` ``SHT_RELA`` *none* + ``.rodata`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``.shstrtab`` ``SHT_STRTAB`` *none* + ``.strtab`` ``SHT_STRTAB`` *none* + ``.symtab`` ``SHT_SYMTAB`` *none* + ``.text`` ``SHT_PROGBITS`` ``SHF_ALLOC`` + ``SHF_EXECINSTR`` + ================== ================ ================================= + +These sections have their standard meanings (see [ELF]_) and are only generated +if needed. + +``.debug``\ *\** + The standard DWARF sections. See :ref:`amdgpu-dwarf-debug-information` for + information on the DWARF produced by the AMDGPU backend. + +``.dynamic``, ``.dynstr``, ``.dynsym``, ``.hash`` + The standard sections used by a dynamic loader. + +``.note`` + See :ref:`amdgpu-note-records` for the note records supported by the AMDGPU + backend. + +``.rela``\ *name*, ``.rela.dyn`` + For relocatable code objects, *name* is the name of the section that the + relocation records apply. For example, ``.rela.text`` is the section name for + relocation records associated with the ``.text`` section. + + For linked shared code objects, ``.rela.dyn`` contains all the relocation + records from each of the relocatable code object's ``.rela``\ *name* sections. + + See :ref:`amdgpu-relocation-records` for the relocation records supported by + the AMDGPU backend. + +``.text`` + The executable machine code for the kernels and functions they call. Generated + as position independent code. See :ref:`amdgpu-code-conventions` for + information on conventions used in the isa generation. + +.. _amdgpu-note-records: + +Note Records +------------ + +The AMDGPU backend code object contains ELF note records in the ``.note`` +section. The set of generated notes and their semantics depend on the code +object version; see :ref:`amdgpu-note-records-v2` and +:ref:`amdgpu-note-records-v3-v4`. + +As required by ``ELFCLASS32`` and ``ELFCLASS64``, minimal zero-byte padding +must be generated after the ``name`` field to ensure the ``desc`` field is 4 +byte aligned. In addition, minimal zero-byte padding must be generated to +ensure the ``desc`` field size is a multiple of 4 bytes. The ``sh_addralign`` +field of the ``.note`` section must be at least 4 to indicate at least 8 byte +alignment. + +.. _amdgpu-note-records-v2: + +Code Object V2 Note Records +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + Code object V2 is not the default code object version emitted by + this version of LLVM. + +The AMDGPU backend code object uses the following ELF note record in the +``.note`` section when compiling for code object V2. + +The note record vendor field is "AMD". + +Additional note records may be present, but any which are not documented here +are deprecated and should not be used. + + .. table:: AMDGPU Code Object V2 ELF Note Records + :name: amdgpu-elf-note-records-v2-table + + ===== ===================================== ====================================== + Name Type Description + ===== ===================================== ====================================== + "AMD" ``NT_AMD_HSA_CODE_OBJECT_VERSION`` Code object version. + "AMD" ``NT_AMD_HSA_HSAIL`` HSAIL properties generated by the HSAIL + Finalizer and not the LLVM compiler. + "AMD" ``NT_AMD_HSA_ISA_VERSION`` Target ISA version. + "AMD" ``NT_AMD_HSA_METADATA`` Metadata null terminated string in + YAML [YAML]_ textual format. + "AMD" ``NT_AMD_HSA_ISA_NAME`` Target ISA name. + ===== ===================================== ====================================== + +.. + + .. table:: AMDGPU Code Object V2 ELF Note Record Enumeration Values + :name: amdgpu-elf-note-record-enumeration-values-v2-table + + ===================================== ===== + Name Value + ===================================== ===== + ``NT_AMD_HSA_CODE_OBJECT_VERSION`` 1 + ``NT_AMD_HSA_HSAIL`` 2 + ``NT_AMD_HSA_ISA_VERSION`` 3 + *reserved* 4-9 + ``NT_AMD_HSA_METADATA`` 10 + ``NT_AMD_HSA_ISA_NAME`` 11 + ===================================== ===== + +``NT_AMD_HSA_CODE_OBJECT_VERSION`` + Specifies the code object version number. The description field has the + following layout: + + .. code:: c + + struct amdgpu_hsa_note_code_object_version_s { + uint32_t major_version; + uint32_t minor_version; + }; + + The ``major_version`` has a value less than or equal to 2. + +``NT_AMD_HSA_HSAIL`` + Specifies the HSAIL properties used by the HSAIL Finalizer. The description + field has the following layout: + + .. code:: c + + struct amdgpu_hsa_note_hsail_s { + uint32_t hsail_major_version; + uint32_t hsail_minor_version; + uint8_t profile; + uint8_t machine_model; + uint8_t default_float_round; + }; + +``NT_AMD_HSA_ISA_VERSION`` + Specifies the target ISA version. The description field has the following layout: + + .. code:: c + + struct amdgpu_hsa_note_isa_s { + uint16_t vendor_name_size; + uint16_t architecture_name_size; + uint32_t major; + uint32_t minor; + uint32_t stepping; + char vendor_and_architecture_name[1]; + }; + + ``vendor_name_size`` and ``architecture_name_size`` are the length of the + vendor and architecture names respectively, including the NUL character. + + ``vendor_and_architecture_name`` contains the NUL terminates string for the + vendor, immediately followed by the NUL terminated string for the + architecture. + + This note record is used by the HSA runtime loader. + + Code object V2 only supports a limited number of processors and has fixed + settings for target features. See + :ref:`amdgpu-elf-note-record-supported_processors-v2-table` for a list of + processors and the corresponding target ID. In the table the note record ISA + name is a concatenation of the vendor name, architecture name, major, minor, + and stepping separated by a ":". + + The target ID column shows the processor name and fixed target features used + by the LLVM compiler. The LLVM compiler does not generate a + ``NT_AMD_HSA_HSAIL`` note record. + + A code object generated by the Finalizer also uses code object V2 and always + generates a ``NT_AMD_HSA_HSAIL`` note record. The processor name and + ``sramecc`` target feature is as shown in + :ref:`amdgpu-elf-note-record-supported_processors-v2-table` but the ``xnack`` + target feature is specified by the ``EF_AMDGPU_FEATURE_XNACK_V2`` ``e_flags`` + bit. + +``NT_AMD_HSA_ISA_NAME`` + Specifies the target ISA name as a non-NUL terminated string. + + This note record is not used by the HSA runtime loader. + + See the ``NT_AMD_HSA_ISA_VERSION`` note record description of the code object + V2's limited support of processors and fixed settings for target features. + + See :ref:`amdgpu-elf-note-record-supported_processors-v2-table` for a mapping + from the string to the corresponding target ID. If the ``xnack`` target + feature is supported and enabled, the string produced by the LLVM compiler + will may have a ``+xnack`` appended. The Finlizer did not do the appending and + instead used the ``EF_AMDGPU_FEATURE_XNACK_V2`` ``e_flags`` bit. + +``NT_AMD_HSA_METADATA`` + Specifies extensible metadata associated with the code objects executed on HSA + [HSA]_ compatible runtimes (see :ref:`amdgpu-os`). It is required when the + target triple OS is ``amdhsa`` (see :ref:`amdgpu-target-triples`). See + :ref:`amdgpu-amdhsa-code-object-metadata-v2` for the syntax of the code object + metadata string. + + .. table:: AMDGPU Code Object V2 Supported Processors and Fixed Target Feature Settings + :name: amdgpu-elf-note-record-supported_processors-v2-table + + ===================== ========================== + Note Record ISA Name Target ID + ===================== ========================== + ``AMD:AMDGPU:6:0:0`` ``gfx600`` + ``AMD:AMDGPU:6:0:1`` ``gfx601`` + ``AMD:AMDGPU:6:0:2`` ``gfx602`` + ``AMD:AMDGPU:7:0:0`` ``gfx700`` + ``AMD:AMDGPU:7:0:1`` ``gfx701`` + ``AMD:AMDGPU:7:0:2`` ``gfx702`` + ``AMD:AMDGPU:7:0:3`` ``gfx703`` + ``AMD:AMDGPU:7:0:4`` ``gfx704`` + ``AMD:AMDGPU:7:0:5`` ``gfx705`` + ``AMD:AMDGPU:8:0:0`` ``gfx802`` + ``AMD:AMDGPU:8:0:1`` ``gfx801:xnack+`` + ``AMD:AMDGPU:8:0:2`` ``gfx802`` + ``AMD:AMDGPU:8:0:3`` ``gfx803`` + ``AMD:AMDGPU:8:0:4`` ``gfx803`` + ``AMD:AMDGPU:8:0:5`` ``gfx805`` + ``AMD:AMDGPU:8:1:0`` ``gfx810:xnack+`` + ``AMD:AMDGPU:9:0:0`` ``gfx900:xnack-`` + ``AMD:AMDGPU:9:0:1`` ``gfx900:xnack+`` + ``AMD:AMDGPU:9:0:2`` ``gfx902:xnack-`` + ``AMD:AMDGPU:9:0:3`` ``gfx902:xnack+`` + ``AMD:AMDGPU:9:0:4`` ``gfx904:xnack-`` + ``AMD:AMDGPU:9:0:5`` ``gfx904:xnack+`` + ``AMD:AMDGPU:9:0:6`` ``gfx906:sramecc-:xnack-`` + ``AMD:AMDGPU:9:0:7`` ``gfx906:sramecc-:xnack+`` + ``AMD:AMDGPU:9:0:12`` ``gfx90c:xnack-`` + ===================== ========================== + +.. _amdgpu-note-records-v3-v4: + +Code Object V3 to V4 Note Records +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The AMDGPU backend code object uses the following ELF note record in the +``.note`` section when compiling for code object V3 to V4. + +The note record vendor field is "AMDGPU". + +Additional note records may be present, but any which are not documented here +are deprecated and should not be used. + + .. table:: AMDGPU Code Object V3 to V4 ELF Note Records + :name: amdgpu-elf-note-records-table-v3-v4 + + ======== ============================== ====================================== + Name Type Description + ======== ============================== ====================================== + "AMDGPU" ``NT_AMDGPU_METADATA`` Metadata in Message Pack [MsgPack]_ + binary format. + ======== ============================== ====================================== + +.. + + .. table:: AMDGPU Code Object V3 to V4 ELF Note Record Enumeration Values + :name: amdgpu-elf-note-record-enumeration-values-table-v3-v4 + + ============================== ===== + Name Value + ============================== ===== + *reserved* 0-31 + ``NT_AMDGPU_METADATA`` 32 + ============================== ===== + +``NT_AMDGPU_METADATA`` + Specifies extensible metadata associated with an AMDGPU code object. It is + encoded as a map in the Message Pack [MsgPack]_ binary data format. See + :ref:`amdgpu-amdhsa-code-object-metadata-v3` and + :ref:`amdgpu-amdhsa-code-object-metadata-v4` for the map keys defined for the + ``amdhsa`` OS. + +.. _amdgpu-symbols: + +Symbols +------- + +Symbols include the following: + + .. table:: AMDGPU ELF Symbols + :name: amdgpu-elf-symbols-table + + ===================== ================== ================ ================== + Name Type Section Description + ===================== ================== ================ ================== + *link-name* ``STT_OBJECT`` - ``.data`` Global variable + - ``.rodata`` + - ``.bss`` + *link-name*\ ``.kd`` ``STT_OBJECT`` - ``.rodata`` Kernel descriptor + *link-name* ``STT_FUNC`` - ``.text`` Kernel entry point + *link-name* ``STT_OBJECT`` - SHN_AMDGPU_LDS Global variable in LDS + ===================== ================== ================ ================== + +Global variable + Global variables both used and defined by the compilation unit. + + If the symbol is defined in the compilation unit then it is allocated in the + appropriate section according to if it has initialized data or is readonly. + + If the symbol is external then its section is ``STN_UNDEF`` and the loader + will resolve relocations using the definition provided by another code object + or explicitly defined by the runtime. + + If the symbol resides in local/group memory (LDS) then its section is the + special processor specific section name ``SHN_AMDGPU_LDS``, and the + ``st_value`` field describes alignment requirements as it does for common + symbols. + + .. TODO:: + + Add description of linked shared object symbols. Seems undefined symbols + are marked as STT_NOTYPE. + +Kernel descriptor + Every HSA kernel has an associated kernel descriptor. It is the address of the + kernel descriptor that is used in the AQL dispatch packet used to invoke the + kernel, not the kernel entry point. The layout of the HSA kernel descriptor is + defined in :ref:`amdgpu-amdhsa-kernel-descriptor`. + +Kernel entry point + Every HSA kernel also has a symbol for its machine code entry point. + +.. _amdgpu-relocation-records: + +Relocation Records +------------------ + +AMDGPU backend generates ``Elf64_Rela`` relocation records. Supported +relocatable fields are: + +``word32`` + This specifies a 32-bit field occupying 4 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMDGPU architecture. + +``word64`` + This specifies a 64-bit field occupying 8 bytes with arbitrary byte + alignment. These values use the same byte order as other word values in the + AMDGPU architecture. + +Following notations are used for specifying relocation calculations: + +**A** + Represents the addend used to compute the value of the relocatable field. + +**G** + Represents the offset into the global offset table at which the relocation + entry's symbol will reside during execution. + +**GOT** + Represents the address of the global offset table. + +**P** + Represents the place (section offset for ``et_rel`` or address for ``et_dyn``) + of the storage unit being relocated (computed using ``r_offset``). + +**S** + Represents the value of the symbol whose index resides in the relocation + entry. Relocations not using this must specify a symbol index of + ``STN_UNDEF``. + +**B** + Represents the base address of a loaded executable or shared object which is + the difference between the ELF address and the actual load address. + Relocations using this are only valid in executable or shared objects. + +The following relocation types are supported: + + .. table:: AMDGPU ELF Relocation Records + :name: amdgpu-elf-relocation-records-table + + ========================== ======= ===== ========== ============================== + Relocation Type Kind Value Field Calculation + ========================== ======= ===== ========== ============================== + ``R_AMDGPU_NONE`` 0 *none* *none* + ``R_AMDGPU_ABS32_LO`` Static, 1 ``word32`` (S + A) & 0xFFFFFFFF + Dynamic + ``R_AMDGPU_ABS32_HI`` Static, 2 ``word32`` (S + A) >> 32 + Dynamic + ``R_AMDGPU_ABS64`` Static, 3 ``word64`` S + A + Dynamic + ``R_AMDGPU_REL32`` Static 4 ``word32`` S + A - P + ``R_AMDGPU_REL64`` Static 5 ``word64`` S + A - P + ``R_AMDGPU_ABS32`` Static, 6 ``word32`` S + A + Dynamic + ``R_AMDGPU_GOTPCREL`` Static 7 ``word32`` G + GOT + A - P + ``R_AMDGPU_GOTPCREL32_LO`` Static 8 ``word32`` (G + GOT + A - P) & 0xFFFFFFFF + ``R_AMDGPU_GOTPCREL32_HI`` Static 9 ``word32`` (G + GOT + A - P) >> 32 + ``R_AMDGPU_REL32_LO`` Static 10 ``word32`` (S + A - P) & 0xFFFFFFFF + ``R_AMDGPU_REL32_HI`` Static 11 ``word32`` (S + A - P) >> 32 + *reserved* 12 + ``R_AMDGPU_RELATIVE64`` Dynamic 13 ``word64`` B + A + ``R_AMDGPU_REL16`` Static 14 ``word16`` ((S + A - P) - 4) / 4 + ========================== ======= ===== ========== ============================== + +``R_AMDGPU_ABS32_LO`` and ``R_AMDGPU_ABS32_HI`` are only supported by +the ``mesa3d`` OS, which does not support ``R_AMDGPU_ABS64``. + +There is no current OS loader support for 32-bit programs and so +``R_AMDGPU_ABS32`` is not used. + +.. _amdgpu-loaded-code-object-path-uniform-resource-identifier: + +Loaded Code Object Path Uniform Resource Identifier (URI) +--------------------------------------------------------- + +The AMD GPU code object loader represents the path of the ELF shared object from +which the code object was loaded as a textual Uniform Resource Identifier (URI). +Note that the code object is the in memory loaded relocated form of the ELF +shared object. Multiple code objects may be loaded at different memory +addresses in the same process from the same ELF shared object. + +The loaded code object path URI syntax is defined by the following BNF syntax: + +.. code:: + + code_object_uri ::== file_uri | memory_uri + file_uri ::== "file://" file_path [ range_specifier ] + memory_uri ::== "memory://" process_id range_specifier + range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number + file_path ::== URI_ENCODED_OS_FILE_PATH + process_id ::== DECIMAL_NUMBER + number ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER + +**number** + Is a C integral literal where hexadecimal values are prefixed by "0x" or "0X", + and octal values by "0". + +**file_path** + Is the file's path specified as a URI encoded UTF-8 string. In URI encoding, + every character that is not in the regular expression ``[a-zA-Z0-9/_.~-]`` is + encoded as two uppercase hexadecimal digits proceeded by "%". Directories in + the path are separated by "/". + +**offset** + Is a 0-based byte offset to the start of the code object. For a file URI, it + is from the start of the file specified by the ``file_path``, and if omitted + defaults to 0. For a memory URI, it is the memory address and is required. + +**size** + Is the number of bytes in the code object. For a file URI, if omitted it + defaults to the size of the file. It is required for a memory URI. + +**process_id** + Is the identity of the process owning the memory. For Linux it is the C + unsigned integral decimal literal for the process ID (PID). + +For example: + +.. code:: + + file:///dir1/dir2/file1 + file:///dir3/dir4/file2#offset=0x2000&size=3000 + memory://1234#offset=0x20000&size=3000 + +.. _amdgpu-dwarf-debug-information: + +DWARF Debug Information +======================= + +.. warning:: + + This section describes **provisional support** for AMDGPU DWARF [DWARF]_ that + is not currently fully implemented and is subject to change. + +AMDGPU generates DWARF [DWARF]_ debugging information ELF sections (see +:ref:`amdgpu-elf-code-object`) which contain information that maps the code +object executable code and data to the source language constructs. It can be +used by tools such as debuggers and profilers. It uses features defined in +:doc:`AMDGPUDwarfExtensionsForHeterogeneousDebugging` that are made available in +DWARF Version 4 and DWARF Version 5 as an LLVM vendor extension. + +This section defines the AMDGPU target architecture specific DWARF mappings. + +.. _amdgpu-dwarf-register-identifier: + +Register Identifier +------------------- + +This section defines the AMDGPU target architecture register numbers used in +DWARF operation expressions (see DWARF Version 5 section 2.5 and +:ref:`amdgpu-dwarf-operation-expressions`) and Call Frame Information +instructions (see DWARF Version 5 section 6.4 and +:ref:`amdgpu-dwarf-call-frame-information`). + +A single code object can contain code for kernels that have different wavefront +sizes. The vector registers and some scalar registers are based on the wavefront +size. AMDGPU defines distinct DWARF registers for each wavefront size. This +simplifies the consumer of the DWARF so that each register has a fixed size, +rather than being dynamic according to the wavefront size mode. Similarly, +distinct DWARF registers are defined for those registers that vary in size +according to the process address size. This allows a consumer to treat a +specific AMDGPU processor as a single architecture regardless of how it is +configured at run time. The compiler explicitly specifies the DWARF registers +that match the mode in which the code it is generating will be executed. + +DWARF registers are encoded as numbers, which are mapped to architecture +registers. The mapping for AMDGPU is defined in +:ref:`amdgpu-dwarf-register-mapping-table`. All AMDGPU targets use the same +mapping. + +.. table:: AMDGPU DWARF Register Mapping + :name: amdgpu-dwarf-register-mapping-table + + ============== ================= ======== ================================== + DWARF Register AMDGPU Register Bit Size Description + ============== ================= ======== ================================== + 0 PC_32 32 Program Counter (PC) when + executing in a 32-bit process + address space. Used in the CFI to + describe the PC of the calling + frame. + 1 EXEC_MASK_32 32 Execution Mask Register when + executing in wavefront 32 mode. + 2-15 *Reserved* *Reserved for highly accessed + registers using DWARF shortcut.* + 16 PC_64 64 Program Counter (PC) when + executing in a 64-bit process + address space. Used in the CFI to + describe the PC of the calling + frame. + 17 EXEC_MASK_64 64 Execution Mask Register when + executing in wavefront 64 mode. + 18-31 *Reserved* *Reserved for highly accessed + registers using DWARF shortcut.* + 32-95 SGPR0-SGPR63 32 Scalar General Purpose + Registers. + 96-127 *Reserved* *Reserved for frequently accessed + registers using DWARF 1-byte ULEB.* + 128 STATUS 32 Status Register. + 129-511 *Reserved* *Reserved for future Scalar + Architectural Registers.* + 512 VCC_32 32 Vector Condition Code Register + when executing in wavefront 32 + mode. + 513-767 *Reserved* *Reserved for future Vector + Architectural Registers when + executing in wavefront 32 mode.* + 768 VCC_64 64 Vector Condition Code Register + when executing in wavefront 64 + mode. + 769-1023 *Reserved* *Reserved for future Vector + Architectural Registers when + executing in wavefront 64 mode.* + 1024-1087 *Reserved* *Reserved for padding.* + 1088-1129 SGPR64-SGPR105 32 Scalar General Purpose Registers. + 1130-1535 *Reserved* *Reserved for future Scalar + General Purpose Registers.* + 1536-1791 VGPR0-VGPR255 32*32 Vector General Purpose Registers + when executing in wavefront 32 + mode. + 1792-2047 *Reserved* *Reserved for future Vector + General Purpose Registers when + executing in wavefront 32 mode.* + 2048-2303 AGPR0-AGPR255 32*32 Vector Accumulation Registers + when executing in wavefront 32 + mode. + 2304-2559 *Reserved* *Reserved for future Vector + Accumulation Registers when + executing in wavefront 32 mode.* + 2560-2815 VGPR0-VGPR255 64*32 Vector General Purpose Registers + when executing in wavefront 64 + mode. + 2816-3071 *Reserved* *Reserved for future Vector + General Purpose Registers when + executing in wavefront 64 mode.* + 3072-3327 AGPR0-AGPR255 64*32 Vector Accumulation Registers + when executing in wavefront 64 + mode. + 3328-3583 *Reserved* *Reserved for future Vector + Accumulation Registers when + executing in wavefront 64 mode.* + ============== ================= ======== ================================== + +The vector registers are represented as the full size for the wavefront. They +are organized as consecutive dwords (32-bits), one per lane, with the dword at +the least significant bit position corresponding to lane 0 and so forth. DWARF +location expressions involving the ``DW_OP_LLVM_offset`` and +``DW_OP_LLVM_push_lane`` operations are used to select the part of the vector +register corresponding to the lane that is executing the current thread of +execution in languages that are implemented using a SIMD or SIMT execution +model. + +If the wavefront size is 32 lanes then the wavefront 32 mode register +definitions are used. If the wavefront size is 64 lanes then the wavefront 64 +mode register definitions are used. Some AMDGPU targets support executing in +both wavefront 32 and wavefront 64 mode. The register definitions corresponding +to the wavefront mode of the generated code will be used. + +If code is generated to execute in a 32-bit process address space, then the +32-bit process address space register definitions are used. If code is generated +to execute in a 64-bit process address space, then the 64-bit process address +space register definitions are used. The ``amdgcn`` target only supports the +64-bit process address space. + +.. _amdgpu-dwarf-address-class-identifier: + +Address Class Identifier +------------------------ + +The DWARF address class represents the source language memory space. See DWARF +Version 5 section 2.12 which is updated by the *DWARF Extensions For +Heterogeneous Debugging* section :ref:`amdgpu-dwarf-segment_addresses`. + +The DWARF address class mapping used for AMDGPU is defined in +:ref:`amdgpu-dwarf-address-class-mapping-table`. + +.. table:: AMDGPU DWARF Address Class Mapping + :name: amdgpu-dwarf-address-class-mapping-table + + ========================= ====== ================= + DWARF AMDGPU + -------------------------------- ----------------- + Address Class Name Value Address Space + ========================= ====== ================= + ``DW_ADDR_none`` 0x0000 Generic (Flat) + ``DW_ADDR_LLVM_global`` 0x0001 Global + ``DW_ADDR_LLVM_constant`` 0x0002 Global + ``DW_ADDR_LLVM_group`` 0x0003 Local (group/LDS) + ``DW_ADDR_LLVM_private`` 0x0004 Private (Scratch) + ``DW_ADDR_AMDGPU_region`` 0x8000 Region (GDS) + ========================= ====== ================= + +The DWARF address class values defined in the *DWARF Extensions For +Heterogeneous Debugging* section :ref:`amdgpu-dwarf-segment_addresses` are used. + +In addition, ``DW_ADDR_AMDGPU_region`` is encoded as a vendor extension. This is +available for use for the AMD extension for access to the hardware GDS memory +which is scratchpad memory allocated per device. + +For AMDGPU if no ``DW_AT_address_class`` attribute is present, then the default +address class of ``DW_ADDR_none`` is used. + +See :ref:`amdgpu-dwarf-address-space-identifier` for information on the AMDGPU +mapping of DWARF address classes to DWARF address spaces, including address size +and NULL value. + +.. _amdgpu-dwarf-address-space-identifier: + +Address Space Identifier +------------------------ + +DWARF address spaces correspond to target architecture specific linear +addressable memory areas. See DWARF Version 5 section 2.12 and *DWARF Extensions +For Heterogeneous Debugging* section :ref:`amdgpu-dwarf-segment_addresses`. + +The DWARF address space mapping used for AMDGPU is defined in +:ref:`amdgpu-dwarf-address-space-mapping-table`. + +.. table:: AMDGPU DWARF Address Space Mapping + :name: amdgpu-dwarf-address-space-mapping-table + + ======================================= ===== ======= ======== ================= ======================= + DWARF AMDGPU Notes + --------------------------------------- ----- ---------------- ----------------- ----------------------- + Address Space Name Value Address Bit Size Address Space + --------------------------------------- ----- ------- -------- ----------------- ----------------------- + .. 64-bit 32-bit + process process + address address + space space + ======================================= ===== ======= ======== ================= ======================= + ``DW_ASPACE_none`` 0x00 64 32 Global *default address space* + ``DW_ASPACE_AMDGPU_generic`` 0x01 64 32 Generic (Flat) + ``DW_ASPACE_AMDGPU_region`` 0x02 32 32 Region (GDS) + ``DW_ASPACE_AMDGPU_local`` 0x03 32 32 Local (group/LDS) + *Reserved* 0x04 + ``DW_ASPACE_AMDGPU_private_lane`` 0x05 32 32 Private (Scratch) *focused lane* + ``DW_ASPACE_AMDGPU_private_wave`` 0x06 32 32 Private (Scratch) *unswizzled wavefront* + ======================================= ===== ======= ======== ================= ======================= + +See :ref:`amdgpu-address-spaces` for information on the AMDGPU address spaces +including address size and NULL value. + +The ``DW_ASPACE_none`` address space is the default target architecture address +space used in DWARF operations that do not specify an address space. It +therefore has to map to the global address space so that the ``DW_OP_addr*`` and +related operations can refer to addresses in the program code. + +The ``DW_ASPACE_AMDGPU_generic`` address space allows location expressions to +specify the flat address space. If the address corresponds to an address in the +local address space, then it corresponds to the wavefront that is executing the +focused thread of execution. If the address corresponds to an address in the +private address space, then it corresponds to the lane that is executing the +focused thread of execution for languages that are implemented using a SIMD or +SIMT execution model. + +.. note:: + + CUDA-like languages such as HIP that do not have address spaces in the + language type system, but do allow variables to be allocated in different + address spaces, need to explicitly specify the ``DW_ASPACE_AMDGPU_generic`` + address space in the DWARF expression operations as the default address space + is the global address space. + +The ``DW_ASPACE_AMDGPU_local`` address space allows location expressions to +specify the local address space corresponding to the wavefront that is executing +the focused thread of execution. + +The ``DW_ASPACE_AMDGPU_private_lane`` address space allows location expressions +to specify the private address space corresponding to the lane that is executing +the focused thread of execution for languages that are implemented using a SIMD +or SIMT execution model. + +The ``DW_ASPACE_AMDGPU_private_wave`` address space allows location expressions +to specify the unswizzled private address space corresponding to the wavefront +that is executing the focused thread of execution. The wavefront view of private +memory is the per wavefront unswizzled backing memory layout defined in +:ref:`amdgpu-address-spaces`, such that address 0 corresponds to the first +location for the backing memory of the wavefront (namely the address is not +offset by ``wavefront-scratch-base``). The following formula can be used to +convert from a ``DW_ASPACE_AMDGPU_private_lane`` address to a +``DW_ASPACE_AMDGPU_private_wave`` address: + +:: + + private-address-wavefront = + ((private-address-lane / 4) * wavefront-size * 4) + + (wavefront-lane-id * 4) + (private-address-lane % 4) + +If the ``DW_ASPACE_AMDGPU_private_lane`` address is dword aligned, and the start +of the dwords for each lane starting with lane 0 is required, then this +simplifies to: + +:: + + private-address-wavefront = + private-address-lane * wavefront-size + +A compiler can use the ``DW_ASPACE_AMDGPU_private_wave`` address space to read a +complete spilled vector register back into a complete vector register in the +CFI. The frame pointer can be a private lane address which is dword aligned, +which can be shifted to multiply by the wavefront size, and then used to form a +private wavefront address that gives a location for a contiguous set of dwords, +one per lane, where the vector register dwords are spilled. The compiler knows +the wavefront size since it generates the code. Note that the type of the +address may have to be converted as the size of a +``DW_ASPACE_AMDGPU_private_lane`` address may be smaller than the size of a +``DW_ASPACE_AMDGPU_private_wave`` address. + +.. _amdgpu-dwarf-lane-identifier: + +Lane identifier +--------------- + +DWARF lane identifies specify a target architecture lane position for hardware +that executes in a SIMD or SIMT manner, and on which a source language maps its +threads of execution onto those lanes. The DWARF lane identifier is pushed by +the ``DW_OP_LLVM_push_lane`` DWARF expression operation. See DWARF Version 5 +section 2.5 which is updated by *DWARF Extensions For Heterogeneous Debugging* +section :ref:`amdgpu-dwarf-operation-expressions`. + +For AMDGPU, the lane identifier corresponds to the hardware lane ID of a +wavefront. It is numbered from 0 to the wavefront size minus 1. + +Operation Expressions +--------------------- + +DWARF expressions are used to compute program values and the locations of +program objects. See DWARF Version 5 section 2.5 and +:ref:`amdgpu-dwarf-operation-expressions`. + +DWARF location descriptions describe how to access storage which includes memory +and registers. When accessing storage on AMDGPU, bytes are ordered with least +significant bytes first, and bits are ordered within bytes with least +significant bits first. + +For AMDGPU CFI expressions, ``DW_OP_LLVM_select_bit_piece`` is used to describe +unwinding vector registers that are spilled under the execution mask to memory: +the zero-single location description is the vector register, and the one-single +location description is the spilled memory location description. The +``DW_OP_LLVM_form_aspace_address`` is used to specify the address space of the +memory location description. + +In AMDGPU expressions, ``DW_OP_LLVM_select_bit_piece`` is used by the +``DW_AT_LLVM_lane_pc`` attribute expression where divergent control flow is +controlled by the execution mask. An undefined location description together +with ``DW_OP_LLVM_extend`` is used to indicate the lane was not active on entry +to the subprogram. See :ref:`amdgpu-dwarf-dw-at-llvm-lane-pc` for an example. + +Debugger Information Entry Attributes +------------------------------------- + +This section describes how certain debugger information entry attributes are +used by AMDGPU. See the sections in DWARF Version 5 section 2 which are updated +by *DWARF Extensions For Heterogeneous Debugging* section +:ref:`amdgpu-dwarf-debugging-information-entry-attributes`. + +.. _amdgpu-dwarf-dw-at-llvm-lane-pc: + +``DW_AT_LLVM_lane_pc`` +~~~~~~~~~~~~~~~~~~~~~~ + +For AMDGPU, the ``DW_AT_LLVM_lane_pc`` attribute is used to specify the program +location of the separate lanes of a SIMT thread. + +If the lane is an active lane then this will be the same as the current program +location. + +If the lane is inactive, but was active on entry to the subprogram, then this is +the program location in the subprogram at which execution of the lane is +conceptual positioned. + +If the lane was not active on entry to the subprogram, then this will be the +undefined location. A client debugger can check if the lane is part of a valid +work-group by checking that the lane is in the range of the associated +work-group within the grid, accounting for partial work-groups. If it is not, +then the debugger can omit any information for the lane. Otherwise, the debugger +may repeatedly unwind the stack and inspect the ``DW_AT_LLVM_lane_pc`` of the +calling subprogram until it finds a non-undefined location. Conceptually the +lane only has the call frames that it has a non-undefined +``DW_AT_LLVM_lane_pc``. + +The following example illustrates how the AMDGPU backend can generate a DWARF +location list expression for the nested ``IF/THEN/ELSE`` structures of the +following subprogram pseudo code for a target with 64 lanes per wavefront. + +.. code:: + :number-lines: + + SUBPROGRAM X + BEGIN + a; + IF (c1) THEN + b; + IF (c2) THEN + c; + ELSE + d; + ENDIF + e; + ELSE + f; + ENDIF + g; + END + +The AMDGPU backend may generate the following pseudo LLVM MIR to manipulate the +execution mask (``EXEC``) to linearize the control flow. The condition is +evaluated to make a mask of the lanes for which the condition evaluates to true. +First the ``THEN`` region is executed by setting the ``EXEC`` mask to the +logical ``AND`` of the current ``EXEC`` mask with the condition mask. Then the +``ELSE`` region is executed by negating the ``EXEC`` mask and logical ``AND`` of +the saved ``EXEC`` mask at the start of the region. After the ``IF/THEN/ELSE`` +region the ``EXEC`` mask is restored to the value it had at the beginning of the +region. This is shown below. Other approaches are possible, but the basic +concept is the same. + +.. code:: + :number-lines: + + $lex_start: + a; + %1 = EXEC + %2 = c1 + $lex_1_start: + EXEC = %1 & %2 + $if_1_then: + b; + %3 = EXEC + %4 = c2 + $lex_1_1_start: + EXEC = %3 & %4 + $lex_1_1_then: + c; + EXEC = ~EXEC & %3 + $lex_1_1_else: + d; + EXEC = %3 + $lex_1_1_end: + e; + EXEC = ~EXEC & %1 + $lex_1_else: + f; + EXEC = %1 + $lex_1_end: + g; + $lex_end: + +To create the DWARF location list expression that defines the location +description of a vector of lane program locations, the LLVM MIR ``DBG_VALUE`` +pseudo instruction can be used to annotate the linearized control flow. This can +be done by defining an artificial variable for the lane PC. The DWARF location +list expression created for it is used as the value of the +``DW_AT_LLVM_lane_pc`` attribute on the subprogram's debugger information entry. + +A DWARF procedure is defined for each well nested structured control flow region +which provides the conceptual lane program location for a lane if it is not +active (namely it is divergent). The DWARF operation expression for each region +conceptually inherits the value of the immediately enclosing region and modifies +it according to the semantics of the region. + +For an ``IF/THEN/ELSE`` region the divergent program location is at the start of +the region for the ``THEN`` region since it is executed first. For the ``ELSE`` +region the divergent program location is at the end of the ``IF/THEN/ELSE`` +region since the ``THEN`` region has completed. + +The lane PC artificial variable is assigned at each region transition. It uses +the immediately enclosing region's DWARF procedure to compute the program +location for each lane assuming they are divergent, and then modifies the result +by inserting the current program location for each lane that the ``EXEC`` mask +indicates is active. + +By having separate DWARF procedures for each region, they can be reused to +define the value for any nested region. This reduces the total size of the DWARF +operation expressions. + +The following provides an example using pseudo LLVM MIR. + +.. code:: + :number-lines: + + $lex_start: + DEFINE_DWARF %__uint_64 = DW_TAG_base_type[ + DW_AT_name = "__uint64"; + DW_AT_byte_size = 8; + DW_AT_encoding = DW_ATE_unsigned; + ]; + DEFINE_DWARF %__active_lane_pc = DW_TAG_dwarf_procedure[ + DW_AT_name = "__active_lane_pc"; + DW_AT_location = [ + DW_OP_regx PC; + DW_OP_LLVM_extend 64, 64; + DW_OP_regval_type EXEC, %uint_64; + DW_OP_LLVM_select_bit_piece 64, 64; + ]; + ]; + DEFINE_DWARF %__divergent_lane_pc = DW_TAG_dwarf_procedure[ + DW_AT_name = "__divergent_lane_pc"; + DW_AT_location = [ + DW_OP_LLVM_undefined; + DW_OP_LLVM_extend 64, 64; + ]; + ]; + DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[ + DW_OP_call_ref %__divergent_lane_pc; + DW_OP_call_ref %__active_lane_pc; + ]; + a; + %1 = EXEC; + DBG_VALUE %1, $noreg, %__lex_1_save_exec; + %2 = c1; + $lex_1_start: + EXEC = %1 & %2; + $lex_1_then: + DEFINE_DWARF %__divergent_lane_pc_1_then = DW_TAG_dwarf_procedure[ + DW_AT_name = "__divergent_lane_pc_1_then"; + DW_AT_location = DIExpression[ + DW_OP_call_ref %__divergent_lane_pc; + DW_OP_addrx &lex_1_start; + DW_OP_stack_value; + DW_OP_LLVM_extend 64, 64; + DW_OP_call_ref %__lex_1_save_exec; + DW_OP_deref_type 64, %__uint_64; + DW_OP_LLVM_select_bit_piece 64, 64; + ]; + ]; + DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[ + DW_OP_call_ref %__divergent_lane_pc_1_then; + DW_OP_call_ref %__active_lane_pc; + ]; + b; + %3 = EXEC; + DBG_VALUE %3, %__lex_1_1_save_exec; + %4 = c2; + $lex_1_1_start: + EXEC = %3 & %4; + $lex_1_1_then: + DEFINE_DWARF %__divergent_lane_pc_1_1_then = DW_TAG_dwarf_procedure[ + DW_AT_name = "__divergent_lane_pc_1_1_then"; + DW_AT_location = DIExpression[ + DW_OP_call_ref %__divergent_lane_pc_1_then; + DW_OP_addrx &lex_1_1_start; + DW_OP_stack_value; + DW_OP_LLVM_extend 64, 64; + DW_OP_call_ref %__lex_1_1_save_exec; + DW_OP_deref_type 64, %__uint_64; + DW_OP_LLVM_select_bit_piece 64, 64; + ]; + ]; + DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[ + DW_OP_call_ref %__divergent_lane_pc_1_1_then; + DW_OP_call_ref %__active_lane_pc; + ]; + c; + EXEC = ~EXEC & %3; + $lex_1_1_else: + DEFINE_DWARF %__divergent_lane_pc_1_1_else = DW_TAG_dwarf_procedure[ + DW_AT_name = "__divergent_lane_pc_1_1_else"; + DW_AT_location = DIExpression[ + DW_OP_call_ref %__divergent_lane_pc_1_then; + DW_OP_addrx &lex_1_1_end; + DW_OP_stack_value; + DW_OP_LLVM_extend 64, 64; + DW_OP_call_ref %__lex_1_1_save_exec; + DW_OP_deref_type 64, %__uint_64; + DW_OP_LLVM_select_bit_piece 64, 64; + ]; + ]; + DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[ + DW_OP_call_ref %__divergent_lane_pc_1_1_else; + DW_OP_call_ref %__active_lane_pc; + ]; + d; + EXEC = %3; + $lex_1_1_end: + DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[ + DW_OP_call_ref %__divergent_lane_pc; + DW_OP_call_ref %__active_lane_pc; + ]; + e; + EXEC = ~EXEC & %1; + $lex_1_else: + DEFINE_DWARF %__divergent_lane_pc_1_else = DW_TAG_dwarf_procedure[ + DW_AT_name = "__divergent_lane_pc_1_else"; + DW_AT_location = DIExpression[ + DW_OP_call_ref %__divergent_lane_pc; + DW_OP_addrx &lex_1_end; + DW_OP_stack_value; + DW_OP_LLVM_extend 64, 64; + DW_OP_call_ref %__lex_1_save_exec; + DW_OP_deref_type 64, %__uint_64; + DW_OP_LLVM_select_bit_piece 64, 64; + ]; + ]; + DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc, DIExpression[ + DW_OP_call_ref %__divergent_lane_pc_1_else; + DW_OP_call_ref %__active_lane_pc; + ]; + f; + EXEC = %1; + $lex_1_end: + DBG_VALUE $noreg, $noreg, %DW_AT_LLVM_lane_pc DIExpression[ + DW_OP_call_ref %__divergent_lane_pc; + DW_OP_call_ref %__active_lane_pc; + ]; + g; + $lex_end: + +The DWARF procedure ``%__active_lane_pc`` is used to update the lane pc elements +that are active, with the current program location. + +Artificial variables %__lex_1_save_exec and %__lex_1_1_save_exec are created for +the execution masks saved on entry to a region. Using the ``DBG_VALUE`` pseudo +instruction, location list entries will be created that describe where the +artificial variables are allocated at any given program location. The compiler +may allocate them to registers or spill them to memory. + +The DWARF procedures for each region use the values of the saved execution mask +artificial variables to only update the lanes that are active on entry to the +region. All other lanes retain the value of the enclosing region where they were +last active. If they were not active on entry to the subprogram, then will have +the undefined location description. + +Other structured control flow regions can be handled similarly. For example, +loops would set the divergent program location for the region at the end of the +loop. Any lanes active will be in the loop, and any lanes not active must have +exited the loop. + +An ``IF/THEN/ELSEIF/ELSEIF/...`` region can be treated as a nest of +``IF/THEN/ELSE`` regions. + +The DWARF procedures can use the active lane artificial variable described in +:ref:`amdgpu-dwarf-amdgpu-dw-at-llvm-active-lane` rather than the actual +``EXEC`` mask in order to support whole or quad wavefront mode. + +.. _amdgpu-dwarf-amdgpu-dw-at-llvm-active-lane: + +``DW_AT_LLVM_active_lane`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``DW_AT_LLVM_active_lane`` attribute on a subprogram debugger information +entry is used to specify the lanes that are conceptually active for a SIMT +thread. + +The execution mask may be modified to implement whole or quad wavefront mode +operations. For example, all lanes may need to temporarily be made active to +execute a whole wavefront operation. Such regions would save the ``EXEC`` mask, +update it to enable the necessary lanes, perform the operations, and then +restore the ``EXEC`` mask from the saved value. While executing the whole +wavefront region, the conceptual execution mask is the saved value, not the +``EXEC`` value. + +This is handled by defining an artificial variable for the active lane mask. The +active lane mask artificial variable would be the actual ``EXEC`` mask for +normal regions, and the saved execution mask for regions where the mask is +temporarily updated. The location list expression created for this artificial +variable is used to define the value of the ``DW_AT_LLVM_active_lane`` +attribute. + +``DW_AT_LLVM_augmentation`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For AMDGPU, the ``DW_AT_LLVM_augmentation`` attribute of a compilation unit +debugger information entry has the following value for the augmentation string: + +:: + + [amdgpu:v0.0] + +The "vX.Y" specifies the major X and minor Y version number of the AMDGPU +extensions used in the DWARF of the compilation unit. The version number +conforms to [SEMVER]_. + +Call Frame Information +---------------------- + +DWARF Call Frame Information (CFI) describes how a consumer can virtually +*unwind* call frames in a running process or core dump. See DWARF Version 5 +section 6.4 and :ref:`amdgpu-dwarf-call-frame-information`. + +For AMDGPU, the Common Information Entry (CIE) fields have the following values: + +1. ``augmentation`` string contains the following null-terminated UTF-8 string: + + :: + + [amd:v0.0] + + The ``vX.Y`` specifies the major X and minor Y version number of the AMDGPU + extensions used in this CIE or to the FDEs that use it. The version number + conforms to [SEMVER]_. + +2. ``address_size`` for the ``Global`` address space is defined in + :ref:`amdgpu-dwarf-address-space-identifier`. + +3. ``segment_selector_size`` is 0 as AMDGPU does not use a segment selector. + +4. ``code_alignment_factor`` is 4 bytes. + + .. TODO:: + + Add to :ref:`amdgpu-processor-table` table. + +5. ``data_alignment_factor`` is 4 bytes. + + .. TODO:: + + Add to :ref:`amdgpu-processor-table` table. + +6. ``return_address_register`` is ``PC_32`` for 32-bit processes and ``PC_64`` + for 64-bit processes defined in :ref:`amdgpu-dwarf-register-identifier`. + +7. ``initial_instructions`` Since a subprogram X with fewer registers can be + called from subprogram Y that has more allocated, X will not change any of + the extra registers as it cannot access them. Therefore, the default rule + for all columns is ``same value``. + +For AMDGPU the register number follows the numbering defined in +:ref:`amdgpu-dwarf-register-identifier`. + +For AMDGPU the instructions are variable size. A consumer can subtract 1 from +the return address to get the address of a byte within the call site +instructions. See DWARF Version 5 section 6.4.4. + +Accelerated Access +------------------ + +See DWARF Version 5 section 6.1. + +Lookup By Name Section Header +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See DWARF Version 5 section 6.1.1.4.1 and :ref:`amdgpu-dwarf-lookup-by-name`. + +For AMDGPU the lookup by name section header table: + +``augmentation_string_size`` (uword) + + Set to the length of the ``augmentation_string`` value which is always a + multiple of 4. + +``augmentation_string`` (sequence of UTF-8 characters) + + Contains the following UTF-8 string null padded to a multiple of 4 bytes: + + :: + + [amdgpu:v0.0] + + The "vX.Y" specifies the major X and minor Y version number of the AMDGPU + extensions used in the DWARF of this index. The version number conforms to + [SEMVER]_. + + .. note:: + + This is different to the DWARF Version 5 definition that requires the first + 4 characters to be the vendor ID. But this is consistent with the other + augmentation strings and does allow multiple vendor contributions. However, + backwards compatibility may be more desirable. + +Lookup By Address Section Header +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See DWARF Version 5 section 6.1.2. + +For AMDGPU the lookup by address section header table: + +``address_size`` (ubyte) + + Match the address size for the ``Global`` address space defined in + :ref:`amdgpu-dwarf-address-space-identifier`. + +``segment_selector_size`` (ubyte) + + AMDGPU does not use a segment selector so this is 0. The entries in the + ``.debug_aranges`` do not have a segment selector. + +Line Number Information +----------------------- + +See DWARF Version 5 section 6.2 and :ref:`amdgpu-dwarf-line-number-information`. + +AMDGPU does not use the ``isa`` state machine registers and always sets it to 0. +The instruction set must be obtained from the ELF file header ``e_flags`` field +in the ``EF_AMDGPU_MACH`` bit position (see :ref:`ELF Header +`). See DWARF Version 5 section 6.2.2. + +.. TODO:: + + Should the ``isa`` state machine register be used to indicate if the code is + in wavefront32 or wavefront64 mode? Or used to specify the architecture ISA? + +For AMDGPU the line number program header fields have the following values (see +DWARF Version 5 section 6.2.4): + +``address_size`` (ubyte) + Matches the address size for the ``Global`` address space defined in + :ref:`amdgpu-dwarf-address-space-identifier`. + +``segment_selector_size`` (ubyte) + AMDGPU does not use a segment selector so this is 0. + +``minimum_instruction_length`` (ubyte) + For GFX9-GFX10 this is 4. + +``maximum_operations_per_instruction`` (ubyte) + For GFX9-GFX10 this is 1. + +Source text for online-compiled programs (for example, those compiled by the +OpenCL language runtime) may be embedded into the DWARF Version 5 line table. +See DWARF Version 5 section 6.2.4.1 which is updated by *DWARF Extensions For +Heterogeneous Debugging* section :ref:`DW_LNCT_LLVM_source +`. + +The Clang option used to control source embedding in AMDGPU is defined in +:ref:`amdgpu-clang-debug-options-table`. + + .. table:: AMDGPU Clang Debug Options + :name: amdgpu-clang-debug-options-table + + ==================== ================================================== + Debug Flag Description + ==================== ================================================== + -g[no-]embed-source Enable/disable embedding source text in DWARF + debug sections. Useful for environments where + source cannot be written to disk, such as + when performing online compilation. + ==================== ================================================== + +For example: + +``-gembed-source`` + Enable the embedded source. + +``-gno-embed-source`` + Disable the embedded source. + +32-Bit and 64-Bit DWARF Formats +------------------------------- + +See DWARF Version 5 section 7.4 and +:ref:`amdgpu-dwarf-32-bit-and-64-bit-dwarf-formats`. + +For AMDGPU: + +* For the ``amdgcn`` target architecture only the 64-bit process address space + is supported. + +* The producer can generate either 32-bit or 64-bit DWARF format. LLVM generates + the 32-bit DWARF format. + +Unit Headers +------------ + +For AMDGPU the following values apply for each of the unit headers described in +DWARF Version 5 sections 7.5.1.1, 7.5.1.2, and 7.5.1.3: + +``address_size`` (ubyte) + Matches the address size for the ``Global`` address space defined in + :ref:`amdgpu-dwarf-address-space-identifier`. + +.. _amdgpu-code-conventions: + +Code Conventions +================ + +This section provides code conventions used for each supported target triple OS +(see :ref:`amdgpu-target-triples`). + +AMDHSA +------ + +This section provides code conventions used when the target triple OS is +``amdhsa`` (see :ref:`amdgpu-target-triples`). + +.. _amdgpu-amdhsa-code-object-metadata: + +Code Object Metadata +~~~~~~~~~~~~~~~~~~~~ + +The code object metadata specifies extensible metadata associated with the code +objects executed on HSA [HSA]_ compatible runtimes (see :ref:`amdgpu-os`). The +encoding and semantics of this metadata depends on the code object version; see +:ref:`amdgpu-amdhsa-code-object-metadata-v2`, +:ref:`amdgpu-amdhsa-code-object-metadata-v3`, and +:ref:`amdgpu-amdhsa-code-object-metadata-v4`. + +Code object metadata is specified in a note record (see +:ref:`amdgpu-note-records`) and is required when the target triple OS is +``amdhsa`` (see :ref:`amdgpu-target-triples`). It must contain the minimum +information necessary to support the HSA compatible runtime kernel queries. For +example, the segment sizes needed in a dispatch packet. In addition, a +high-level language runtime may require other information to be included. For +example, the AMD OpenCL runtime records kernel argument information. + +.. _amdgpu-amdhsa-code-object-metadata-v2: + +Code Object V2 Metadata ++++++++++++++++++++++++ + +.. warning:: + Code object V2 is not the default code object version emitted by this version + of LLVM. + +Code object V2 metadata is specified by the ``NT_AMD_HSA_METADATA`` note record +(see :ref:`amdgpu-note-records-v2`). + +The metadata is specified as a YAML formatted string (see [YAML]_ and +:doc:`YamlIO`). + +.. TODO:: + + Is the string null terminated? It probably should not if YAML allows it to + contain null characters, otherwise it should be. + +The metadata is represented as a single YAML document comprised of the mapping +defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-v2-table` and +referenced tables. + +For boolean values, the string values of ``false`` and ``true`` are used for +false and true respectively. + +Additional information can be added to the mappings. To avoid conflicts, any +non-AMD key names should be prefixed by "*vendor-name*.". + + .. table:: AMDHSA Code Object V2 Metadata Map + :name: amdgpu-amdhsa-code-object-metadata-map-v2-table + + ========== ============== ========= ======================================= + String Key Value Type Required? Description + ========== ============== ========= ======================================= + "Version" sequence of Required - The first integer is the major + 2 integers version. Currently 1. + - The second integer is the minor + version. Currently 0. + "Printf" sequence of Each string is encoded information + strings about a printf function call. The + encoded information is organized as + fields separated by colon (':'): + + ``ID:N:S[0]:S[1]:...:S[N-1]:FormatString`` + + where: + + ``ID`` + A 32-bit integer as a unique id for + each printf function call + + ``N`` + A 32-bit integer equal to the number + of arguments of printf function call + minus 1 + + ``S[i]`` (where i = 0, 1, ... , N-1) + 32-bit integers for the size in bytes + of the i-th FormatString argument of + the printf function call + + FormatString + The format string passed to the + printf function call. + "Kernels" sequence of Required Sequence of the mappings for each + mapping kernel in the code object. See + :ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-v2-table` + for the definition of the mapping. + ========== ============== ========= ======================================= + +.. + + .. table:: AMDHSA Code Object V2 Kernel Metadata Map + :name: amdgpu-amdhsa-code-object-kernel-metadata-map-v2-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Required Source name of the kernel. + "SymbolName" string Required Name of the kernel + descriptor ELF symbol. + "Language" string Source language of the kernel. + Values include: + + - "OpenCL C" + - "OpenCL C++" + - "HCC" + - "OpenMP" + + "LanguageVersion" sequence of - The first integer is the major + 2 integers version. + - The second integer is the + minor version. + "Attrs" mapping Mapping of kernel attributes. + See + :ref:`amdgpu-amdhsa-code-object-kernel-attribute-metadata-map-v2-table` + for the mapping definition. + "Args" sequence of Sequence of mappings of the + mapping kernel arguments. See + :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-map-v2-table` + for the definition of the mapping. + "CodeProps" mapping Mapping of properties related to + the kernel code. See + :ref:`amdgpu-amdhsa-code-object-kernel-code-properties-metadata-map-v2-table` + for the mapping definition. + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object V2 Kernel Attribute Metadata Map + :name: amdgpu-amdhsa-code-object-kernel-attribute-metadata-map-v2-table + + =================== ============== ========= ============================== + String Key Value Type Required? Description + =================== ============== ========= ============================== + "ReqdWorkGroupSize" sequence of If not 0, 0, 0 then all values + 3 integers must be >=1 and the dispatch + work-group size X, Y, Z must + correspond to the specified + values. Defaults to 0, 0, 0. + + Corresponds to the OpenCL + ``reqd_work_group_size`` + attribute. + "WorkGroupSizeHint" sequence of The dispatch work-group size + 3 integers X, Y, Z is likely to be the + specified values. + + Corresponds to the OpenCL + ``work_group_size_hint`` + attribute. + "VecTypeHint" string The name of a scalar or vector + type. + + Corresponds to the OpenCL + ``vec_type_hint`` attribute. + + "RuntimeHandle" string The external symbol name + associated with a kernel. + OpenCL runtime allocates a + global buffer for the symbol + and saves the kernel's address + to it, which is used for + device side enqueueing. Only + available for device side + enqueued kernels. + =================== ============== ========= ============================== + +.. + + .. table:: AMDHSA Code Object V2 Kernel Argument Metadata Map + :name: amdgpu-amdhsa-code-object-kernel-argument-metadata-map-v2-table + + ================= ============== ========= ================================ + String Key Value Type Required? Description + ================= ============== ========= ================================ + "Name" string Kernel argument name. + "TypeName" string Kernel argument type name. + "Size" integer Required Kernel argument size in bytes. + "Align" integer Required Kernel argument alignment in + bytes. Must be a power of two. + "ValueKind" string Required Kernel argument kind that + specifies how to set up the + corresponding argument. + Values include: + + "ByValue" + The argument is copied + directly into the kernarg. + + "GlobalBuffer" + A global address space pointer + to the buffer data is passed + in the kernarg. + + "DynamicSharedPointer" + A group address space pointer + to dynamically allocated LDS + is passed in the kernarg. + + "Sampler" + A global address space + pointer to a S# is passed in + the kernarg. + + "Image" + A global address space + pointer to a T# is passed in + the kernarg. + + "Pipe" + A global address space pointer + to an OpenCL pipe is passed in + the kernarg. + + "Queue" + A global address space pointer + to an OpenCL device enqueue + queue is passed in the + kernarg. + + "HiddenGlobalOffsetX" + The OpenCL grid dispatch + global offset for the X + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetY" + The OpenCL grid dispatch + global offset for the Y + dimension is passed in the + kernarg. + + "HiddenGlobalOffsetZ" + The OpenCL grid dispatch + global offset for the Z + dimension is passed in the + kernarg. + + "HiddenNone" + An argument that is not used + by the kernel. Space needs to + be left for it, but it does + not need to be set up. + + "HiddenPrintfBuffer" + A global address space pointer + to the runtime printf buffer + is passed in kernarg. + + "HiddenHostcallBuffer" + A global address space pointer + to the runtime hostcall buffer + is passed in kernarg. + + "HiddenDefaultQueue" + A global address space pointer + to the OpenCL device enqueue + queue that should be used by + the kernel by default is + passed in the kernarg. + + "HiddenCompletionAction" + A global address space pointer + to help link enqueued kernels into + the ancestor tree for determining + when the parent kernel has finished. + + "HiddenMultiGridSyncArg" + A global address space pointer for + multi-grid synchronization is + passed in the kernarg. + + "ValueType" string Unused and deprecated. This should no longer + be emitted, but is accepted for compatibility. + + + "PointeeAlign" integer Alignment in bytes of pointee + type for pointer type kernel + argument. Must be a power + of 2. Only present if + "ValueKind" is + "DynamicSharedPointer". + "AddrSpaceQual" string Kernel argument address space + qualifier. Only present if + "ValueKind" is "GlobalBuffer" or + "DynamicSharedPointer". Values + are: + + - "Private" + - "Global" + - "Constant" + - "Local" + - "Generic" + - "Region" + + .. TODO:: + + Is GlobalBuffer only Global + or Constant? Is + DynamicSharedPointer always + Local? Can HCC allow Generic? + How can Private or Region + ever happen? + + "AccQual" string Kernel argument access + qualifier. Only present if + "ValueKind" is "Image" or + "Pipe". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + .. TODO:: + + Does this apply to + GlobalBuffer? + + "ActualAccQual" string The actual memory accesses + performed by the kernel on the + kernel argument. Only present if + "ValueKind" is "GlobalBuffer", + "Image", or "Pipe". This may be + more restrictive than indicated + by "AccQual" to reflect what the + kernel actual does. If not + present then the runtime must + assume what is implied by + "AccQual" and "IsConst". Values + are: + + - "ReadOnly" + - "WriteOnly" + - "ReadWrite" + + "IsConst" boolean Indicates if the kernel argument + is const qualified. Only present + if "ValueKind" is + "GlobalBuffer". + + "IsRestrict" boolean Indicates if the kernel argument + is restrict qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsVolatile" boolean Indicates if the kernel argument + is volatile qualified. Only + present if "ValueKind" is + "GlobalBuffer". + + "IsPipe" boolean Indicates if the kernel argument + is pipe qualified. Only present + if "ValueKind" is "Pipe". + + .. TODO:: + + Can GlobalBuffer be pipe + qualified? + + ================= ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object V2 Kernel Code Properties Metadata Map + :name: amdgpu-amdhsa-code-object-kernel-code-properties-metadata-map-v2-table + + ============================ ============== ========= ===================== + String Key Value Type Required? Description + ============================ ============== ========= ===================== + "KernargSegmentSize" integer Required The size in bytes of + the kernarg segment + that holds the values + of the arguments to + the kernel. + "GroupSegmentFixedSize" integer Required The amount of group + segment memory + required by a + work-group in + bytes. This does not + include any + dynamically allocated + group segment memory + that may be added + when the kernel is + dispatched. + "PrivateSegmentFixedSize" integer Required The amount of fixed + private address space + memory required for a + work-item in + bytes. If the kernel + uses a dynamic call + stack then additional + space must be added + to this value for the + call stack. + "KernargSegmentAlign" integer Required The maximum byte + alignment of + arguments in the + kernarg segment. Must + be a power of 2. + "WavefrontSize" integer Required Wavefront size. Must + be a power of 2. + "NumSGPRs" integer Required Number of scalar + registers used by a + wavefront for + GFX6-GFX10. This + includes the special + SGPRs for VCC, Flat + Scratch (GFX7-GFX10) + and XNACK (for + GFX8-GFX10). It does + not include the 16 + SGPR added if a trap + handler is + enabled. It is not + rounded up to the + allocation + granularity. + "NumVGPRs" integer Required Number of vector + registers used by + each work-item for + GFX6-GFX10 + "MaxFlatWorkGroupSize" integer Required Maximum flat + work-group size + supported by the + kernel in work-items. + Must be >=1 and + consistent with + ReqdWorkGroupSize if + not 0, 0, 0. + "NumSpilledSGPRs" integer Number of stores from + a scalar register to + a register allocator + created spill + location. + "NumSpilledVGPRs" integer Number of stores from + a vector register to + a register allocator + created spill + location. + ============================ ============== ========= ===================== + +.. _amdgpu-amdhsa-code-object-metadata-v3: + +Code Object V3 Metadata ++++++++++++++++++++++++ + +Code object V3 to V4 metadata is specified by the ``NT_AMDGPU_METADATA`` note +record (see :ref:`amdgpu-note-records-v3-v4`). + +The metadata is represented as Message Pack formatted binary data (see +[MsgPack]_). The top level is a Message Pack map that includes the +keys defined in table +:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v3` and referenced +tables. + +Additional information can be added to the maps. To avoid conflicts, +any key names should be prefixed by "*vendor-name*." where +``vendor-name`` can be the name of the vendor and specific vendor +tool that generates the information. The prefix is abbreviated to +simply "." when it appears within a map that has been added by the +same *vendor-name*. + + .. table:: AMDHSA Code Object V3 Metadata Map + :name: amdgpu-amdhsa-code-object-metadata-map-table-v3 + + ================= ============== ========= ======================================= + String Key Value Type Required? Description + ================= ============== ========= ======================================= + "amdhsa.version" sequence of Required - The first integer is the major + 2 integers version. Currently 1. + - The second integer is the minor + version. Currently 0. + "amdhsa.printf" sequence of Each string is encoded information + strings about a printf function call. The + encoded information is organized as + fields separated by colon (':'): + + ``ID:N:S[0]:S[1]:...:S[N-1]:FormatString`` + + where: + + ``ID`` + A 32-bit integer as a unique id for + each printf function call + + ``N`` + A 32-bit integer equal to the number + of arguments of printf function call + minus 1 + + ``S[i]`` (where i = 0, 1, ... , N-1) + 32-bit integers for the size in bytes + of the i-th FormatString argument of + the printf function call + + FormatString + The format string passed to the + printf function call. + "amdhsa.kernels" sequence of Required Sequence of the maps for each + map kernel in the code object. See + :ref:`amdgpu-amdhsa-code-object-kernel-metadata-map-table-v3` + for the definition of the keys included + in that map. + ================= ============== ========= ======================================= + +.. + + .. table:: AMDHSA Code Object V3 Kernel Metadata Map + :name: amdgpu-amdhsa-code-object-kernel-metadata-map-table-v3 + + =================================== ============== ========= ================================ + String Key Value Type Required? Description + =================================== ============== ========= ================================ + ".name" string Required Source name of the kernel. + ".symbol" string Required Name of the kernel + descriptor ELF symbol. + ".language" string Source language of the kernel. + Values include: + + - "OpenCL C" + - "OpenCL C++" + - "HCC" + - "HIP" + - "OpenMP" + - "Assembler" + + ".language_version" sequence of - The first integer is the major + 2 integers version. + - The second integer is the + minor version. + ".args" sequence of Sequence of maps of the + map kernel arguments. See + :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-map-table-v3` + for the definition of the keys + included in that map. + ".reqd_workgroup_size" sequence of If not 0, 0, 0 then all values + 3 integers must be >=1 and the dispatch + work-group size X, Y, Z must + correspond to the specified + values. Defaults to 0, 0, 0. + + Corresponds to the OpenCL + ``reqd_work_group_size`` + attribute. + ".workgroup_size_hint" sequence of The dispatch work-group size + 3 integers X, Y, Z is likely to be the + specified values. + + Corresponds to the OpenCL + ``work_group_size_hint`` + attribute. + ".vec_type_hint" string The name of a scalar or vector + type. + + Corresponds to the OpenCL + ``vec_type_hint`` attribute. + + ".device_enqueue_symbol" string The external symbol name + associated with a kernel. + OpenCL runtime allocates a + global buffer for the symbol + and saves the kernel's address + to it, which is used for + device side enqueueing. Only + available for device side + enqueued kernels. + ".kernarg_segment_size" integer Required The size in bytes of + the kernarg segment + that holds the values + of the arguments to + the kernel. + ".group_segment_fixed_size" integer Required The amount of group + segment memory + required by a + work-group in + bytes. This does not + include any + dynamically allocated + group segment memory + that may be added + when the kernel is + dispatched. + ".private_segment_fixed_size" integer Required The amount of fixed + private address space + memory required for a + work-item in + bytes. If the kernel + uses a dynamic call + stack then additional + space must be added + to this value for the + call stack. + ".kernarg_segment_align" integer Required The maximum byte + alignment of + arguments in the + kernarg segment. Must + be a power of 2. + ".wavefront_size" integer Required Wavefront size. Must + be a power of 2. + ".sgpr_count" integer Required Number of scalar + registers required by a + wavefront for + GFX6-GFX9. A register + is required if it is + used explicitly, or + if a higher numbered + register is used + explicitly. This + includes the special + SGPRs for VCC, Flat + Scratch (GFX7-GFX9) + and XNACK (for + GFX8-GFX9). It does + not include the 16 + SGPR added if a trap + handler is + enabled. It is not + rounded up to the + allocation + granularity. + ".vgpr_count" integer Required Number of vector + registers required by + each work-item for + GFX6-GFX9. A register + is required if it is + used explicitly, or + if a higher numbered + register is used + explicitly. + ".max_flat_workgroup_size" integer Required Maximum flat + work-group size + supported by the + kernel in work-items. + Must be >=1 and + consistent with + ReqdWorkGroupSize if + not 0, 0, 0. + ".sgpr_spill_count" integer Number of stores from + a scalar register to + a register allocator + created spill + location. + ".vgpr_spill_count" integer Number of stores from + a vector register to + a register allocator + created spill + location. + ".kind" string The kind of the kernel + with the following + values: + + "normal" + Regular kernels. + + "init" + These kernels must be + invoked after loading + the containing code + object and must + complete before any + normal and fini + kernels in the same + code object are + invoked. + + "fini" + These kernels must be + invoked before + unloading the + containing code object + and after all init and + normal kernels in the + same code object have + been invoked and + completed. + + If omitted, "normal" is + assumed. + =================================== ============== ========= ================================ + +.. + + .. table:: AMDHSA Code Object V3 Kernel Argument Metadata Map + :name: amdgpu-amdhsa-code-object-kernel-argument-metadata-map-table-v3 + + ====================== ============== ========= ================================ + String Key Value Type Required? Description + ====================== ============== ========= ================================ + ".name" string Kernel argument name. + ".type_name" string Kernel argument type name. + ".size" integer Required Kernel argument size in bytes. + ".offset" integer Required Kernel argument offset in + bytes. The offset must be a + multiple of the alignment + required by the argument. + ".value_kind" string Required Kernel argument kind that + specifies how to set up the + corresponding argument. + Values include: + + "by_value" + The argument is copied + directly into the kernarg. + + "global_buffer" + A global address space pointer + to the buffer data is passed + in the kernarg. + + "dynamic_shared_pointer" + A group address space pointer + to dynamically allocated LDS + is passed in the kernarg. + + "sampler" + A global address space + pointer to a S# is passed in + the kernarg. + + "image" + A global address space + pointer to a T# is passed in + the kernarg. + + "pipe" + A global address space pointer + to an OpenCL pipe is passed in + the kernarg. + + "queue" + A global address space pointer + to an OpenCL device enqueue + queue is passed in the + kernarg. + + "hidden_global_offset_x" + The OpenCL grid dispatch + global offset for the X + dimension is passed in the + kernarg. + + "hidden_global_offset_y" + The OpenCL grid dispatch + global offset for the Y + dimension is passed in the + kernarg. + + "hidden_global_offset_z" + The OpenCL grid dispatch + global offset for the Z + dimension is passed in the + kernarg. + + "hidden_none" + An argument that is not used + by the kernel. Space needs to + be left for it, but it does + not need to be set up. + + "hidden_printf_buffer" + A global address space pointer + to the runtime printf buffer + is passed in kernarg. + + "hidden_hostcall_buffer" + A global address space pointer + to the runtime hostcall buffer + is passed in kernarg. + + "hidden_default_queue" + A global address space pointer + to the OpenCL device enqueue + queue that should be used by + the kernel by default is + passed in the kernarg. + + "hidden_completion_action" + A global address space pointer + to help link enqueued kernels into + the ancestor tree for determining + when the parent kernel has finished. + + "hidden_multigrid_sync_arg" + A global address space pointer for + multi-grid synchronization is + passed in the kernarg. + + ".value_type" string Unused and deprecated. This should no longer + be emitted, but is accepted for compatibility. + + ".pointee_align" integer Alignment in bytes of pointee + type for pointer type kernel + argument. Must be a power + of 2. Only present if + ".value_kind" is + "dynamic_shared_pointer". + ".address_space" string Kernel argument address space + qualifier. Only present if + ".value_kind" is "global_buffer" or + "dynamic_shared_pointer". Values + are: + + - "private" + - "global" + - "constant" + - "local" + - "generic" + - "region" + + .. TODO:: + + Is "global_buffer" only "global" + or "constant"? Is + "dynamic_shared_pointer" always + "local"? Can HCC allow "generic"? + How can "private" or "region" + ever happen? + + ".access" string Kernel argument access + qualifier. Only present if + ".value_kind" is "image" or + "pipe". Values + are: + + - "read_only" + - "write_only" + - "read_write" + + .. TODO:: + + Does this apply to + "global_buffer"? + + ".actual_access" string The actual memory accesses + performed by the kernel on the + kernel argument. Only present if + ".value_kind" is "global_buffer", + "image", or "pipe". This may be + more restrictive than indicated + by ".access" to reflect what the + kernel actual does. If not + present then the runtime must + assume what is implied by + ".access" and ".is_const" . Values + are: + + - "read_only" + - "write_only" + - "read_write" + + ".is_const" boolean Indicates if the kernel argument + is const qualified. Only present + if ".value_kind" is + "global_buffer". + + ".is_restrict" boolean Indicates if the kernel argument + is restrict qualified. Only + present if ".value_kind" is + "global_buffer". + + ".is_volatile" boolean Indicates if the kernel argument + is volatile qualified. Only + present if ".value_kind" is + "global_buffer". + + ".is_pipe" boolean Indicates if the kernel argument + is pipe qualified. Only present + if ".value_kind" is "pipe". + + .. TODO:: + + Can "global_buffer" be pipe + qualified? + + ====================== ============== ========= ================================ + +.. _amdgpu-amdhsa-code-object-metadata-v4: + +Code Object V4 Metadata ++++++++++++++++++++++++ + +.. warning:: + Code object V4 is not the default code object version emitted by this version + of LLVM. + +Code object V4 metadata is the same as +:ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions +defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v3`. + + .. table:: AMDHSA Code Object V4 Metadata Map Changes from :ref:`amdgpu-amdhsa-code-object-metadata-v3` + :name: amdgpu-amdhsa-code-object-metadata-map-table-v4 + + ================= ============== ========= ======================================= + String Key Value Type Required? Description + ================= ============== ========= ======================================= + "amdhsa.version" sequence of Required - The first integer is the major + 2 integers version. Currently 1. + - The second integer is the minor + version. Currently 1. + "amdhsa.target" string Required The target name of the code using the syntax: + + .. code:: + + [ "-" ] + + A canonical target ID must be + used. See :ref:`amdgpu-target-triples` + and :ref:`amdgpu-target-id`. + ================= ============== ========= ======================================= + +.. + +Kernel Dispatch +~~~~~~~~~~~~~~~ + +The HSA architected queuing language (AQL) defines a user space memory interface +that can be used to control the dispatch of kernels, in an agent independent +way. An agent can have zero or more AQL queues created for it using an HSA +compatible runtime (see :ref:`amdgpu-os`), in which AQL packets (all of which +are 64 bytes) can be placed. See the *HSA Platform System Architecture +Specification* [HSA]_ for the AQL queue mechanics and packet layouts. + +The packet processor of a kernel agent is responsible for detecting and +dispatching HSA kernels from the AQL queues associated with it. For AMD GPUs the +packet processor is implemented by the hardware command processor (CP), +asynchronous dispatch controller (ADC) and shader processor input controller +(SPI). + +An HSA compatible runtime can be used to allocate an AQL queue object. It uses +the kernel mode driver to initialize and register the AQL queue with CP. + +To dispatch a kernel the following actions are performed. This can occur in the +CPU host program, or from an HSA kernel executing on a GPU. + +1. A pointer to an AQL queue for the kernel agent on which the kernel is to be + executed is obtained. +2. A pointer to the kernel descriptor (see + :ref:`amdgpu-amdhsa-kernel-descriptor`) of the kernel to execute is obtained. + It must be for a kernel that is contained in a code object that that was + loaded by an HSA compatible runtime on the kernel agent with which the AQL + queue is associated. +3. Space is allocated for the kernel arguments using the HSA compatible runtime + allocator for a memory region with the kernarg property for the kernel agent + that will execute the kernel. It must be at least 16-byte aligned. +4. Kernel argument values are assigned to the kernel argument memory + allocation. The layout is defined in the *HSA Programmer's Language + Reference* [HSA]_. For AMDGPU the kernel execution directly accesses the + kernel argument memory in the same way constant memory is accessed. (Note + that the HSA specification allows an implementation to copy the kernel + argument contents to another location that is accessed by the kernel.) +5. An AQL kernel dispatch packet is created on the AQL queue. The HSA compatible + runtime api uses 64-bit atomic operations to reserve space in the AQL queue + for the packet. The packet must be set up, and the final write must use an + atomic store release to set the packet kind to ensure the packet contents are + visible to the kernel agent. AQL defines a doorbell signal mechanism to + notify the kernel agent that the AQL queue has been updated. These rules, and + the layout of the AQL queue and kernel dispatch packet is defined in the *HSA + System Architecture Specification* [HSA]_. +6. A kernel dispatch packet includes information about the actual dispatch, + such as grid and work-group size, together with information from the code + object about the kernel, such as segment sizes. The HSA compatible runtime + queries on the kernel symbol can be used to obtain the code object values + which are recorded in the :ref:`amdgpu-amdhsa-code-object-metadata`. +7. CP executes micro-code and is responsible for detecting and setting up the + GPU to execute the wavefronts of a kernel dispatch. +8. CP ensures that when the a wavefront starts executing the kernel machine + code, the scalar general purpose registers (SGPR) and vector general purpose + registers (VGPR) are set up as required by the machine code. The required + setup is defined in the :ref:`amdgpu-amdhsa-kernel-descriptor`. The initial + register state is defined in + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`. +9. The prolog of the kernel machine code (see + :ref:`amdgpu-amdhsa-kernel-prolog`) sets up the machine state as necessary + before continuing executing the machine code that corresponds to the kernel. +10. When the kernel dispatch has completed execution, CP signals the completion + signal specified in the kernel dispatch packet if not 0. + +.. _amdgpu-amdhsa-memory-spaces: + +Memory Spaces +~~~~~~~~~~~~~ + +The memory space properties are: + + .. table:: AMDHSA Memory Spaces + :name: amdgpu-amdhsa-memory-spaces-table + + ================= =========== ======== ======= ================== + Memory Space Name HSA Segment Hardware Address NULL Value + Name Name Size + ================= =========== ======== ======= ================== + Private private scratch 32 0x00000000 + Local group LDS 32 0xFFFFFFFF + Global global global 64 0x0000000000000000 + Constant constant *same as 64 0x0000000000000000 + global* + Generic flat flat 64 0x0000000000000000 + Region N/A GDS 32 *not implemented + for AMDHSA* + ================= =========== ======== ======= ================== + +The global and constant memory spaces both use global virtual addresses, which +are the same virtual address space used by the CPU. However, some virtual +addresses may only be accessible to the CPU, some only accessible by the GPU, +and some by both. + +Using the constant memory space indicates that the data will not change during +the execution of the kernel. This allows scalar read instructions to be +used. The vector and scalar L1 caches are invalidated of volatile data before +each kernel dispatch execution to allow constant memory to change values between +kernel dispatches. + +The local memory space uses the hardware Local Data Store (LDS) which is +automatically allocated when the hardware creates work-groups of wavefronts, and +freed when all the wavefronts of a work-group have terminated. The data store +(DS) instructions can be used to access it. + +The private memory space uses the hardware scratch memory support. If the kernel +uses scratch, then the hardware allocates memory that is accessed using +wavefront lane dword (4 byte) interleaving. The mapping used from private +address to physical address is: + + ``wavefront-scratch-base + + (private-address * wavefront-size * 4) + + (wavefront-lane-id * 4)`` + +There are different ways that the wavefront scratch base address is determined +by a wavefront (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). This +memory can be accessed in an interleaved manner using buffer instruction with +the scratch buffer descriptor and per wavefront scratch offset, by the scratch +instructions, or by flat instructions. If each lane of a wavefront accesses the +same private address, the interleaving results in adjacent dwords being accessed +and hence requires fewer cache lines to be fetched. Multi-dword access is not +supported except by flat and scratch instructions in GFX9-GFX10. + +The generic address space uses the hardware flat address support available in +GFX7-GFX10. This uses two fixed ranges of virtual addresses (the private and +local apertures), that are outside the range of addressible global memory, to +map from a flat address to a private or local address. + +FLAT instructions can take a flat address and access global, private (scratch) +and group (LDS) memory depending in if the address is within one of the +aperture ranges. Flat access to scratch requires hardware aperture setup and +setup in the kernel prologue (see +:ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`). Flat access to LDS requires +hardware aperture setup and M0 (GFX7-GFX8) register setup (see +:ref:`amdgpu-amdhsa-kernel-prolog-m0`). + +To convert between a segment address and a flat address the base address of the +apertures address can be used. For GFX7-GFX8 these are available in the +:ref:`amdgpu-amdhsa-hsa-aql-queue` the address of which can be obtained with +Queue Ptr SGPR (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). For +GFX9-GFX10 the aperture base addresses are directly available as inline constant +registers ``SRC_SHARED_BASE/LIMIT`` and ``SRC_PRIVATE_BASE/LIMIT``. In 64 bit +address mode the aperture sizes are 2^32 bytes and the base is aligned to 2^32 +which makes it easier to convert from flat to segment or segment to flat. + +Image and Samplers +~~~~~~~~~~~~~~~~~~ + +Image and sample handles created by an HSA compatible runtime (see +:ref:`amdgpu-os`) are 64-bit addresses of a hardware 32-byte V# and 48 byte S# +object respectively. In order to support the HSA ``query_sampler`` operations +two extra dwords are used to store the HSA BRIG enumeration values for the +queries that are not trivially deducible from the S# representation. + +HSA Signals +~~~~~~~~~~~ + +HSA signal handles created by an HSA compatible runtime (see :ref:`amdgpu-os`) +are 64-bit addresses of a structure allocated in memory accessible from both the +CPU and GPU. The structure is defined by the runtime and subject to change +between releases. For example, see [AMD-ROCm-github]_. + +.. _amdgpu-amdhsa-hsa-aql-queue: + +HSA AQL Queue +~~~~~~~~~~~~~ + +The HSA AQL queue structure is defined by an HSA compatible runtime (see +:ref:`amdgpu-os`) and subject to change between releases. For example, see +[AMD-ROCm-github]_. For some processors it contains fields needed to implement +certain language features such as the flat address aperture bases. It also +contains fields used by CP such as managing the allocation of scratch memory. + +.. _amdgpu-amdhsa-kernel-descriptor: + +Kernel Descriptor +~~~~~~~~~~~~~~~~~ + +A kernel descriptor consists of the information needed by CP to initiate the +execution of a kernel, including the entry point address of the machine code +that implements the kernel. + +Code Object V3 Kernel Descriptor +++++++++++++++++++++++++++++++++ + +CP microcode requires the Kernel descriptor to be allocated on 64-byte +alignment. + +The fields used by CP for code objects before V3 also match those specified in +:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + + .. table:: Code Object V3 Kernel Descriptor + :name: amdgpu-amdhsa-kernel-descriptor-v3-table + + ======= ======= =============================== ============================ + Bits Size Field Name Description + ======= ======= =============================== ============================ + 31:0 4 bytes GROUP_SEGMENT_FIXED_SIZE The amount of fixed local + address space memory + required for a work-group + in bytes. This does not + include any dynamically + allocated local address + space memory that may be + added when the kernel is + dispatched. + 63:32 4 bytes PRIVATE_SEGMENT_FIXED_SIZE The amount of fixed + private address space + memory required for a + work-item in bytes. + Additional space may need to + be added to this value if + the call stack has + non-inlined function calls. + 95:64 4 bytes KERNARG_SIZE The size of the kernarg + memory pointed to by the + AQL dispatch packet. The + kernarg memory is used to + pass arguments to the + kernel. + + * If the kernarg pointer in + the dispatch packet is NULL + then there are no kernel + arguments. + * If the kernarg pointer in + the dispatch packet is + not NULL and this value + is 0 then the kernarg + memory size is + unspecified. + * If the kernarg pointer in + the dispatch packet is + not NULL and this value + is not 0 then the value + specifies the kernarg + memory size in bytes. It + is recommended to provide + a value as it may be used + by CP to optimize making + the kernarg memory + visible to the kernel + code. + + 127:96 4 bytes Reserved, must be 0. + 191:128 8 bytes KERNEL_CODE_ENTRY_BYTE_OFFSET Byte offset (possibly + negative) from base + address of kernel + descriptor to kernel's + entry point instruction + which must be 256 byte + aligned. + 351:272 20 Reserved, must be 0. + bytes + 383:352 4 bytes COMPUTE_PGM_RSRC3 GFX6-GFX9 + Reserved, must be 0. + GFX90A + Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC3`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`. + GFX10 + Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC3`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table`. + 415:384 4 bytes COMPUTE_PGM_RSRC1 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC1`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + 447:416 4 bytes COMPUTE_PGM_RSRC2 Compute Shader (CS) + program settings used by + CP to set up + ``COMPUTE_PGM_RSRC2`` + configuration + register. See + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + 458:448 7 bits *See separate bits below.* Enable the setup of the + SGPR user data registers + (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + The total number of SGPR + user data registers + requested must not exceed + 16 and match value in + ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``. + Any requests beyond 16 + will be ignored. + >448 1 bit ENABLE_SGPR_PRIVATE_SEGMENT If the *Target Properties* + _BUFFER column of + :ref:`amdgpu-processor-table` + specifies *Architected flat + scratch* then not supported + and must be 0, + >449 1 bit ENABLE_SGPR_DISPATCH_PTR + >450 1 bit ENABLE_SGPR_QUEUE_PTR + >451 1 bit ENABLE_SGPR_KERNARG_SEGMENT_PTR + >452 1 bit ENABLE_SGPR_DISPATCH_ID + >453 1 bit ENABLE_SGPR_FLAT_SCRATCH_INIT If the *Target Properties* + column of + :ref:`amdgpu-processor-table` + specifies *Architected flat + scratch* then not supported + and must be 0, + >454 1 bit ENABLE_SGPR_PRIVATE_SEGMENT + _SIZE + 457:455 3 bits Reserved, must be 0. + 458 1 bit ENABLE_WAVEFRONT_SIZE32 GFX6-GFX9 + Reserved, must be 0. + GFX10 + - If 0 execute in + wavefront size 64 mode. + - If 1 execute in + native wavefront size + 32 mode. + 463:459 1 bit Reserved, must be 0. + 464 1 bit RESERVED_464 Deprecated, must be 0. + 467:465 3 bits Reserved, must be 0. + 468 1 bit RESERVED_468 Deprecated, must be 0. + 469:471 3 bits Reserved, must be 0. + 511:472 5 bytes Reserved, must be 0. + 512 **Total size 64 bytes.** + ======= ==================================================================== + +.. + + .. table:: compute_pgm_rsrc1 for GFX6-GFX10 + :name: amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 5:0 6 bits GRANULATED_WORKITEM_VGPR_COUNT Number of vector register + blocks used by each work-item; + granularity is device + specific: + + GFX6-GFX9 + - vgprs_used 0..256 + - max(0, ceil(vgprs_used / 4) - 1) + GFX90A + - vgprs_used 0..512 + - vgprs_used = align(arch_vgprs, 4) + + acc_vgprs + - max(0, ceil(vgprs_used / 8) - 1) + GFX10 (wavefront size 64) + - max_vgpr 1..256 + - max(0, ceil(vgprs_used / 4) - 1) + GFX10 (wavefront size 32) + - max_vgpr 1..256 + - max(0, ceil(vgprs_used / 8) - 1) + + Where vgprs_used is defined + as the highest VGPR number + explicitly referenced plus + one. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.VGPRS``. + + The + :ref:`amdgpu-assembler` + calculates this + automatically for the + selected processor from + values provided to the + `.amdhsa_kernel` directive + by the + `.amdhsa_next_free_vgpr` + nested directive (see + :ref:`amdhsa-kernel-directives-table`). + 9:6 4 bits GRANULATED_WAVEFRONT_SGPR_COUNT Number of scalar register + blocks used by a wavefront; + granularity is device + specific: + + GFX6-GFX8 + - sgprs_used 0..112 + - max(0, ceil(sgprs_used / 8) - 1) + GFX9 + - sgprs_used 0..112 + - 2 * max(0, ceil(sgprs_used / 16) - 1) + GFX10 + Reserved, must be 0. + (128 SGPRs always + allocated.) + + Where sgprs_used is + defined as the highest + SGPR number explicitly + referenced plus one, plus + a target specific number + of additional special + SGPRs for VCC, + FLAT_SCRATCH (GFX7+) and + XNACK_MASK (GFX8+), and + any additional + target specific + limitations. It does not + include the 16 SGPRs added + if a trap handler is + enabled. + + The target specific + limitations and special + SGPR layout are defined in + the hardware + documentation, which can + be found in the + :ref:`amdgpu-processors` + table. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.SGPRS``. + + The + :ref:`amdgpu-assembler` + calculates this + automatically for the + selected processor from + values provided to the + `.amdhsa_kernel` directive + by the + `.amdhsa_next_free_sgpr` + and `.amdhsa_reserve_*` + nested directives (see + :ref:`amdhsa-kernel-directives-table`). + 11:10 2 bits PRIORITY Must be 0. + + Start executing wavefront + at the specified priority. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIORITY``. + 13:12 2 bits FLOAT_ROUND_MODE_32 Wavefront starts execution + with specified rounding + mode for single (32 + bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 15:14 2 bits FLOAT_ROUND_MODE_16_64 Wavefront starts execution + with specified rounding + denorm mode for half/double (16 + and 64-bit) floating point + precision floating point + operations. + + Floating point rounding + mode values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 17:16 2 bits FLOAT_DENORM_MODE_32 Wavefront starts execution + with specified denorm mode + for single (32 + bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 19:18 2 bits FLOAT_DENORM_MODE_16_64 Wavefront starts execution + with specified denorm mode + for half/double (16 + and 64-bit) floating point + precision floating point + operations. + + Floating point denorm mode + values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. + 20 1 bit PRIV Must be 0. + + Start executing wavefront + in privilege trap handler + mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.PRIV``. + 21 1 bit ENABLE_DX10_CLAMP Wavefront starts execution + with DX10 clamp mode + enabled. Used by the vector + ALU to force DX10 style + treatment of NaN's (when + set, clamp NaN to zero, + otherwise pass NaN + through). + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.DX10_CLAMP``. + 22 1 bit DEBUG_MODE Must be 0. + + Start executing wavefront + in single step mode. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.DEBUG_MODE``. + 23 1 bit ENABLE_IEEE_MODE Wavefront starts execution + with IEEE mode + enabled. Floating point + opcodes that support + exception flag gathering + will quiet and propagate + signaling-NaN inputs per + IEEE 754-2008. Min_dx10 and + max_dx10 become IEEE + 754-2008 compliant due to + signaling-NaN propagation + and quieting. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.IEEE_MODE``. + 24 1 bit BULKY Must be 0. + + Only one work-group allowed + to execute on a compute + unit. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.BULKY``. + 25 1 bit CDBG_USER Must be 0. + + Flag that can be used to + control debugging code. + + CP is responsible for + filling in + ``COMPUTE_PGM_RSRC1.CDBG_USER``. + 26 1 bit FP16_OVFL GFX6-GFX8 + Reserved, must be 0. + GFX9-GFX10 + Wavefront starts execution + with specified fp16 overflow + mode. + + - If 0, fp16 overflow generates + +/-INF values. + - If 1, fp16 overflow that is the + result of an +/-INF input value + or divide by 0 produces a +/-INF, + otherwise clamps computed + overflow to +/-MAX_FP16 as + appropriate. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FP16_OVFL``. + 28:27 2 bits Reserved, must be 0. + 29 1 bit WGP_MODE GFX6-GFX9 + Reserved, must be 0. + GFX10 + - If 0 execute work-groups in + CU wavefront execution mode. + - If 1 execute work-groups on + in WGP wavefront execution mode. + + See :ref:`amdgpu-amdhsa-memory-model`. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.WGP_MODE``. + 30 1 bit MEM_ORDERED GFX6-GFX9 + Reserved, must be 0. + GFX10 + Controls the behavior of the + s_waitcnt's vmcnt and vscnt + counters. + + - If 0 vmcnt reports completion + of load and atomic with return + out of order with sample + instructions, and the vscnt + reports the completion of + store and atomic without + return in order. + - If 1 vmcnt reports completion + of load, atomic with return + and sample instructions in + order, and the vscnt reports + the completion of store and + atomic without return in order. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.MEM_ORDERED``. + 31 1 bit FWD_PROGRESS GFX6-GFX9 + Reserved, must be 0. + GFX10 + - If 0 execute SIMD wavefronts + using oldest first policy. + - If 1 execute SIMD wavefronts to + ensure wavefronts will make some + forward progress. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FWD_PROGRESS``. + 32 **Total size 4 bytes** + ======= =================================================================================================================== + +.. + + .. table:: compute_pgm_rsrc2 for GFX6-GFX10 + :name: amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 0 1 bit ENABLE_PRIVATE_SEGMENT * Enable the setup of the + private segment. + * If the *Target Properties* + column of + :ref:`amdgpu-processor-table` + does not specify + *Architected flat + scratch* then enable the + setup of the SGPR + wavefront scratch offset + system register (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + * If the *Target Properties* + column of + :ref:`amdgpu-processor-table` + specifies *Architected + flat scratch* then enable + the setup of the + FLAT_SCRATCH register + pair (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.SCRATCH_EN``. + 5:1 5 bits USER_SGPR_COUNT The total number of SGPR + user data registers + requested. This number must + match the number of user + data registers enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.USER_SGPR``. + 6 1 bit ENABLE_TRAP_HANDLER Must be 0. + + This bit represents + ``COMPUTE_PGM_RSRC2.TRAP_PRESENT``, + which is set by the CP if + the runtime has installed a + trap handler. + 7 1 bit ENABLE_SGPR_WORKGROUP_ID_X Enable the setup of the + system SGPR register for + the work-group id in the X + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_X_EN``. + 8 1 bit ENABLE_SGPR_WORKGROUP_ID_Y Enable the setup of the + system SGPR register for + the work-group id in the Y + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Y_EN``. + 9 1 bit ENABLE_SGPR_WORKGROUP_ID_Z Enable the setup of the + system SGPR register for + the work-group id in the Z + dimension (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_Z_EN``. + 10 1 bit ENABLE_SGPR_WORKGROUP_INFO Enable the setup of the + system SGPR register for + work-group information (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``. + 12:11 2 bits ENABLE_VGPR_WORKITEM_ID Enable the setup of the + VGPR system registers used + for the work-item ID. + :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table` + defines the values. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``. + 13 1 bit ENABLE_EXCEPTION_ADDRESS_WATCH Must be 0. + + Wavefront starts execution + with address watch + exceptions enabled which + are generated when L1 has + witnessed a thread access + an *address of + interest*. + + CP is responsible for + filling in the address + watch bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 14 1 bit ENABLE_EXCEPTION_MEMORY Must be 0. + + Wavefront starts execution + with memory violation + exceptions exceptions + enabled which are generated + when a memory violation has + occurred for this wavefront from + L1 or LDS + (write-to-read-only-memory, + mis-aligned atomic, LDS + address out of range, + illegal address, etc.). + + CP sets the memory + violation bit in + ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` + according to what the + runtime requests. + 23:15 9 bits GRANULATED_LDS_SIZE Must be 0. + + CP uses the rounded value + from the dispatch packet, + not this value, as the + dispatch may contain + dynamically allocated group + segment memory. CP writes + directly to + ``COMPUTE_PGM_RSRC2.LDS_SIZE``. + + Amount of group segment + (LDS) to allocate for each + work-group. Granularity is + device specific: + + GFX6 + roundup(lds-size / (64 * 4)) + GFX7-GFX10 + roundup(lds-size / (128 * 4)) + + 24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution + _INVALID_OPERATION with specified exceptions + enabled. + + Used by CP to set up + ``COMPUTE_PGM_RSRC2.EXCP_EN`` + (set from bits 0..6). + + IEEE 754 FP Invalid + Operation + 25 1 bit ENABLE_EXCEPTION_FP_DENORMAL FP Denormal one or more + _SOURCE input operands is a + denormal number + 26 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Division by + _DIVISION_BY_ZERO Zero + 27 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP FP Overflow + _OVERFLOW + 28 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Underflow + _UNDERFLOW + 29 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Inexact + _INEXACT + 30 1 bit ENABLE_EXCEPTION_INT_DIVIDE_BY Integer Division by Zero + _ZERO (rcp_iflag_f32 instruction + only) + 31 1 bit Reserved, must be 0. + 32 **Total size 4 bytes.** + ======= =================================================================================================================== + +.. + + .. table:: compute_pgm_rsrc3 for GFX90A + :name: amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 5:0 6 bits ACCUM_OFFSET Offset of a first AccVGPR in the unified register file. Granularity 4. + Value 0-63. 0 - accum-offset = 4, 1 - accum-offset = 8, ..., + 63 - accum-offset = 256. + 6:15 10 Reserved, must be 0. + bits + 16 1 bit TG_SPLIT - If 0 the waves of a work-group are + launched in the same CU. + - If 1 the waves of a work-group can be + launched in different CUs. The waves + cannot use S_BARRIER or LDS. + 17:31 15 Reserved, must be 0. + bits + 32 **Total size 4 bytes.** + ======= =================================================================================================================== + +.. + + .. table:: compute_pgm_rsrc3 for GFX10 + :name: amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table + + ======= ======= =============================== =========================================================================== + Bits Size Field Name Description + ======= ======= =============================== =========================================================================== + 3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPRs for wavefront size 64. Granularity 8. Value 0-120. + compute_pgm_rsrc1.vgprs + shared_vgpr_cnt cannot exceed 64. + 31:4 28 Reserved, must be 0. + bits + 32 **Total size 4 bytes.** + ======= =================================================================================================================== + +.. + + .. table:: Floating Point Rounding Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table + + ====================================== ===== ============================== + Enumeration Name Value Description + ====================================== ===== ============================== + FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even + FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity + FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity + FLOAT_ROUND_MODE_ZERO 3 Round Toward 0 + ====================================== ===== ============================== + +.. + + .. table:: Floating Point Denorm Mode Enumeration Values + :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table + + ====================================== ===== ============================== + Enumeration Name Value Description + ====================================== ===== ============================== + FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination + Denorms + FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms + FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms + FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush + ====================================== ===== ============================== + +.. + + .. table:: System VGPR Work-Item ID Enumeration Values + :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table + + ======================================== ===== ============================ + Enumeration Name Value Description + ======================================== ===== ============================ + SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension + ID. + SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y + dimensions ID. + SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z + dimensions ID. + SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined. + ======================================== ===== ============================ + +.. _amdgpu-amdhsa-initial-kernel-execution-state: + +Initial Kernel Execution State +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section defines the register state that will be set up by the packet +processor prior to the start of execution of every wavefront. This is limited by +the constraints of the hardware controllers of CP/ADC/SPI. + +The order of the SGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_sgpr_*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at SGPR0: the first enabled register is +SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have +an SGPR number. + +The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to +all wavefronts of the grid. It is possible to specify more than 16 User SGPRs +using the ``enable_sgpr_*`` bit fields, in which case only the first 16 are +actually initialized. These are then immediately followed by the System SGPRs +that are set up by ADC/SPI and can have different values for each wavefront of +the grid dispatch. + +SGPR register initial state is defined in +:ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + + .. table:: SGPR Register Set Up Order + :name: amdgpu-amdhsa-sgpr-register-set-up-order-table + + ========== ========================== ====== ============================== + SGPR Order Name Number Description + (kernel descriptor enable of + field) SGPRs + ========== ========================== ====== ============================== + First Private Segment Buffer 4 See + (enable_sgpr_private :ref:`amdgpu-amdhsa-kernel-prolog-private-segment-buffer`. + _segment_buffer) + then Dispatch Ptr 2 64-bit address of AQL dispatch + (enable_sgpr_dispatch_ptr) packet for kernel dispatch + actually executing. + then Queue Ptr 2 64-bit address of amd_queue_t + (enable_sgpr_queue_ptr) object for AQL queue on which + the dispatch packet was + queued. + then Kernarg Segment Ptr 2 64-bit address of Kernarg + (enable_sgpr_kernarg segment. This is directly + _segment_ptr) copied from the + kernarg_address in the kernel + dispatch packet. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + then Dispatch Id 2 64-bit Dispatch ID of the + (enable_sgpr_dispatch_id) dispatch packet being + executed. + then Flat Scratch Init 2 See + (enable_sgpr_flat_scratch :ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`. + _init) + then Private Segment Size 1 The 32-bit byte size of a + (enable_sgpr_private single work-item's memory + _segment_size) allocation. This is the + value from the kernel + dispatch packet Private + Segment Byte Size rounded up + by CP to a multiple of + DWORD. + + Having CP load it once avoids + loading it at the beginning of + every wavefront. + + This is not used for + GFX7-GFX8 since it is the same + value as the second SGPR of + Flat Scratch Init. However, it + may be needed for GFX9-GFX10 which + changes the meaning of the + Flat Scratch Init value. + then Work-Group Id X 1 32-bit work-group id in X + (enable_sgpr_workgroup_id dimension of grid for + _X) wavefront. + then Work-Group Id Y 1 32-bit work-group id in Y + (enable_sgpr_workgroup_id dimension of grid for + _Y) wavefront. + then Work-Group Id Z 1 32-bit work-group id in Z + (enable_sgpr_workgroup_id dimension of grid for + _Z) wavefront. + then Work-Group Info 1 {first_wavefront, 14'b0000, + (enable_sgpr_workgroup ordered_append_term[10:0], + _info) threadgroup_size_in_wavefronts[5:0]} + then Scratch Wavefront Offset 1 See + (enable_sgpr_private :ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`. + _segment_wavefront_offset) and + :ref:`amdgpu-amdhsa-kernel-prolog-private-segment-buffer`. + ========== ========================== ====== ============================== + +The order of the VGPR registers is defined, but the compiler can specify which +ones are actually setup in the kernel descriptor using the ``enable_vgpr*`` bit +fields (see :ref:`amdgpu-amdhsa-kernel-descriptor`). The register numbers used +for enabled registers are dense starting at VGPR0: the first enabled register is +VGPR0, the next enabled register is VGPR1 etc.; disabled registers do not have a +VGPR number. + +There are different methods used for the VGPR initial state: + +* Unless the *Target Properties* column of :ref:`amdgpu-processor-table` + specifies otherwise, a separate VGPR register is used per work-item ID. The + VGPR register initial state for this method is defined in + :ref:`amdgpu-amdhsa-vgpr-register-set-up-order-for-unpacked-work-item-id-method-table`. +* If *Target Properties* column of :ref:`amdgpu-processor-table` + specifies *Packed work-item IDs*, the initial value of VGPR0 register is used + for all work-item IDs. The register layout for this method is defined in + :ref:`amdgpu-amdhsa-register-layout-for-packed-work-item-id-method-table`. + + .. table:: VGPR Register Set Up Order for Unpacked Work-Item ID Method + :name: amdgpu-amdhsa-vgpr-register-set-up-order-for-unpacked-work-item-id-method-table + + ========== ========================== ====== ============================== + VGPR Order Name Number Description + (kernel descriptor enable of + field) VGPRs + ========== ========================== ====== ============================== + First Work-Item Id X 1 32-bit work-item id in X + (Always initialized) dimension of work-group for + wavefront lane. + then Work-Item Id Y 1 32-bit work-item id in Y + (enable_vgpr_workitem_id dimension of work-group for + > 0) wavefront lane. + then Work-Item Id Z 1 32-bit work-item id in Z + (enable_vgpr_workitem_id dimension of work-group for + > 1) wavefront lane. + ========== ========================== ====== ============================== + +.. + + .. table:: Register Layout for Packed Work-Item ID Method + :name: amdgpu-amdhsa-register-layout-for-packed-work-item-id-method-table + + ======= ======= ================ ========================================= + Bits Size Field Name Description + ======= ======= ================ ========================================= + 0:9 10 bits Work-Item Id X Work-item id in X + dimension of work-group for + wavefront lane. + + Always initialized. + + 10:19 10 bits Work-Item Id Y Work-item id in Y + dimension of work-group for + wavefront lane. + + Initialized if enable_vgpr_workitem_id > + 0, otherwise set to 0. + 20:29 10 bits Work-Item Id Z Work-item id in Z + dimension of work-group for + wavefront lane. + + Initialized if enable_vgpr_workitem_id > + 1, otherwise set to 0. + 30:31 2 bits Reserved, set to 0. + ======= ======= ================ ========================================= + +The setting of registers is done by GPU CP/ADC/SPI hardware as follows: + +1. SGPRs before the Work-Group Ids are set by CP using the 16 User Data + registers. +2. Work-group Id registers X, Y, Z are set by ADC which supports any + combination including none. +3. Scratch Wavefront Offset is set by SPI in a per wavefront basis which is why + its value cannot be included with the flat scratch init value which is per + queue (see :ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`). +4. The VGPRs are set by SPI which only supports specifying either (X), (X, Y) + or (X, Y, Z). +5. Flat Scratch register pair initialization is described in + :ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`. + +The global segment can be accessed either using buffer instructions (GFX6 which +has V# 64-bit address support), flat instructions (GFX7-GFX10), or global +instructions (GFX9-GFX10). + +If buffer operations are used, then the compiler can generate a V# with the +following properties: + +* base address of 0 +* no swizzle +* ATC: 1 if IOMMU present (such as APU) +* ptr64: 1 +* MTYPE set to support memory coherence that matches the runtime (such as CC for + APU and NC for dGPU). + +.. _amdgpu-amdhsa-kernel-prolog: + +Kernel Prolog +~~~~~~~~~~~~~ + +The compiler performs initialization in the kernel prologue depending on the +target and information about things like stack usage in the kernel and called +functions. Some of this initialization requires the compiler to request certain +User and System SGPRs be present in the +:ref:`amdgpu-amdhsa-initial-kernel-execution-state` via the +:ref:`amdgpu-amdhsa-kernel-descriptor`. + +.. _amdgpu-amdhsa-kernel-prolog-cfi: + +CFI ++++ + +1. The CFI return address is undefined. + +2. The CFI CFA is defined using an expression which evaluates to a location + description that comprises one memory location description for the + ``DW_ASPACE_AMDGPU_private_lane`` address space address ``0``. + +.. _amdgpu-amdhsa-kernel-prolog-m0: + +M0 +++ + +GFX6-GFX8 + The M0 register must be initialized with a value at least the total LDS size + if the kernel may access LDS via DS or flat operations. Total LDS size is + available in dispatch packet. For M0, it is also possible to use maximum + possible value of LDS for given target (0x7FFF for GFX6 and 0xFFFF for + GFX7-GFX8). +GFX9-GFX10 + The M0 register is not used for range checking LDS accesses and so does not + need to be initialized in the prolog. + +.. _amdgpu-amdhsa-kernel-prolog-stack-pointer: + +Stack Pointer ++++++++++++++ + +If the kernel has function calls it must set up the ABI stack pointer described +in :ref:`amdgpu-amdhsa-function-call-convention-non-kernel-functions` by setting +SGPR32 to the unswizzled scratch offset of the address past the last local +allocation. + +.. _amdgpu-amdhsa-kernel-prolog-frame-pointer: + +Frame Pointer ++++++++++++++ + +If the kernel needs a frame pointer for the reasons defined in +``SIFrameLowering`` then SGPR33 is used and is always set to ``0`` in the +kernel prolog. If a frame pointer is not required then all uses of the frame +pointer are replaced with immediate ``0`` offsets. + +.. _amdgpu-amdhsa-kernel-prolog-flat-scratch: + +Flat Scratch +++++++++++++ + +There are different methods used for initializing flat scratch: + +* If the *Target Properties* column of :ref:`amdgpu-processor-table` + specifies *Does not support generic address space*: + + Flat scratch is not supported and there is no flat scratch register pair. + +* If the *Target Properties* column of :ref:`amdgpu-processor-table` + specifies *Offset flat scratch*: + + If the kernel or any function it calls may use flat operations to access + scratch memory, the prolog code must set up the FLAT_SCRATCH register pair + (FLAT_SCRATCH_LO/FLAT_SCRATCH_HI). Initialization uses Flat Scratch Init and + Scratch Wavefront Offset SGPR registers (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`): + + 1. The low word of Flat Scratch Init is the 32-bit byte offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID`` to the base of scratch backing memory + being managed by SPI for the queue executing the kernel dispatch. This is + the same value used in the Scratch Segment Buffer V# base address. + + CP obtains this from the runtime. (The Scratch Segment Buffer base address + is ``SH_HIDDEN_PRIVATE_BASE_VIMID`` plus this offset.) + + The prolog must add the value of Scratch Wavefront Offset to get the + wavefront's byte scratch backing memory offset from + ``SH_HIDDEN_PRIVATE_BASE_VIMID``. + + The Scratch Wavefront Offset must also be used as an offset with Private + segment address when using the Scratch Segment Buffer. + + Since FLAT_SCRATCH_LO is in units of 256 bytes, the offset must be right + shifted by 8 before moving into FLAT_SCRATCH_HI. + + FLAT_SCRATCH_HI corresponds to SGPRn-4 on GFX7, and SGPRn-6 on GFX8 (where + SGPRn is the highest numbered SGPR allocated to the wavefront). + FLAT_SCRATCH_HI is multiplied by 256 (as it is in units of 256 bytes) and + added to ``SH_HIDDEN_PRIVATE_BASE_VIMID`` to calculate the per wavefront + FLAT SCRATCH BASE in flat memory instructions that access the scratch + aperture. + 2. The second word of Flat Scratch Init is 32-bit byte size of a single + work-items scratch memory usage. + + CP obtains this from the runtime, and it is always a multiple of DWORD. CP + checks that the value in the kernel dispatch packet Private Segment Byte + Size is not larger and requests the runtime to increase the queue's scratch + size if necessary. + + CP directly loads from the kernel dispatch packet Private Segment Byte Size + field and rounds up to a multiple of DWORD. Having CP load it once avoids + loading it at the beginning of every wavefront. + + The kernel prolog code must move it to FLAT_SCRATCH_LO which is SGPRn-3 on + GFX7 and SGPRn-5 on GFX8. FLAT_SCRATCH_LO is used as the FLAT SCRATCH SIZE + in flat memory instructions. + +* If the *Target Properties* column of :ref:`amdgpu-processor-table` + specifies *Absolute flat scratch*: + + If the kernel or any function it calls may use flat operations to access + scratch memory, the prolog code must set up the FLAT_SCRATCH register pair + (FLAT_SCRATCH_LO/FLAT_SCRATCH_HI which are in SGPRn-4/SGPRn-3). Initialization + uses Flat Scratch Init and Scratch Wavefront Offset SGPR registers (see + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`): + + The Flat Scratch Init is the 64-bit address of the base of scratch backing + memory being managed by SPI for the queue executing the kernel dispatch. + + CP obtains this from the runtime. + + The kernel prolog must add the value of the wave's Scratch Wavefront Offset + and move the result as a 64-bit value to the FLAT_SCRATCH SGPR register pair + which is SGPRn-6 and SGPRn-5. It is used as the FLAT SCRATCH BASE in flat + memory instructions. + + The Scratch Wavefront Offset must also be used as an offset with Private + segment address when using the Scratch Segment Buffer (see + :ref:`amdgpu-amdhsa-kernel-prolog-private-segment-buffer`). + +* If the *Target Properties* column of :ref:`amdgpu-processor-table` + specifies *Architected flat scratch*: + + If ENABLE_PRIVATE_SEGMENT is enabled in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table` then the FLAT_SCRATCH + register pair will be initialized to the 64-bit address of the base of scratch + backing memory being managed by SPI for the queue executing the kernel + dispatch plus the value of the wave's Scratch Wavefront Offset for use as the + flat scratch base in flat memory instructions. + +.. _amdgpu-amdhsa-kernel-prolog-private-segment-buffer: + +Private Segment Buffer +++++++++++++++++++++++ + +If the *Target Properties* column of :ref:`amdgpu-processor-table` specifies +*Architected flat scratch* then a Private Segment Buffer is not supported. +Instead the flat SCRATCH instructions are used. + +Otherwise, Private Segment Buffer SGPR register is used to initialize 4 SGPRs +that are used as a V# to access scratch. CP uses the value provided by the +runtime. It is used, together with Scratch Wavefront Offset as an offset, to +access the private memory space using a segment address. See +:ref:`amdgpu-amdhsa-initial-kernel-execution-state`. + +The scratch V# is a four-aligned SGPR and always selected for the kernel as +follows: + + - If it is known during instruction selection that there is stack usage, + SGPR0-3 is reserved for use as the scratch V#. Stack usage is assumed if + optimizations are disabled (``-O0``), if stack objects already exist (for + locals, etc.), or if there are any function calls. + + - Otherwise, four high numbered SGPRs beginning at a four-aligned SGPR index + are reserved for the tentative scratch V#. These will be used if it is + determined that spilling is needed. + + - If no use is made of the tentative scratch V#, then it is unreserved, + and the register count is determined ignoring it. + - If use is made of the tentative scratch V#, then its register numbers + are shifted to the first four-aligned SGPR index after the highest one + allocated by the register allocator, and all uses are updated. The + register count includes them in the shifted location. + - In either case, if the processor has the SGPR allocation bug, the + tentative allocation is not shifted or unreserved in order to ensure + the register count is higher to workaround the bug. + + .. note:: + + This approach of using a tentative scratch V# and shifting the register + numbers if used avoids having to perform register allocation a second + time if the tentative V# is eliminated. This is more efficient and + avoids the problem that the second register allocation may perform + spilling which will fail as there is no longer a scratch V#. + +When the kernel prolog code is being emitted it is known whether the scratch V# +described above is actually used. If it is, the prolog code must set it up by +copying the Private Segment Buffer to the scratch V# registers and then adding +the Private Segment Wavefront Offset to the queue base address in the V#. The +result is a V# with a base address pointing to the beginning of the wavefront +scratch backing memory. + +The Private Segment Buffer is always requested, but the Private Segment +Wavefront Offset is only requested if it is used (see +:ref:`amdgpu-amdhsa-initial-kernel-execution-state`). + +.. _amdgpu-amdhsa-memory-model: + +Memory Model +~~~~~~~~~~~~ + +This section describes the mapping of the LLVM memory model onto AMDGPU machine +code (see :ref:`memmodel`). + +The AMDGPU backend supports the memory synchronization scopes specified in +:ref:`amdgpu-memory-scopes`. + +The code sequences used to implement the memory model specify the order of +instructions that a single thread must execute. The ``s_waitcnt`` and cache +management instructions such as ``buffer_wbinvl1_vol`` are defined with respect +to other memory instructions executed by the same thread. This allows them to be +moved earlier or later which can allow them to be combined with other instances +of the same instruction, or hoisted/sunk out of loops to improve performance. +Only the instructions related to the memory model are given; additional +``s_waitcnt`` instructions are required to ensure registers are defined before +being used. These may be able to be combined with the memory model ``s_waitcnt`` +instructions as described above. + +The AMDGPU backend supports the following memory models: + + HSA Memory Model [HSA]_ + The HSA memory model uses a single happens-before relation for all address + spaces (see :ref:`amdgpu-address-spaces`). + OpenCL Memory Model [OpenCL]_ + The OpenCL memory model which has separate happens-before relations for the + global and local address spaces. Only a fence specifying both global and + local address space, and seq_cst instructions join the relationships. Since + the LLVM ``memfence`` instruction does not allow an address space to be + specified the OpenCL fence has to conservatively assume both local and + global address space was specified. However, optimizations can often be + done to eliminate the additional ``s_waitcnt`` instructions when there are + no intervening memory instructions which access the corresponding address + space. The code sequences in the table indicate what can be omitted for the + OpenCL memory. The target triple environment is used to determine if the + source language is OpenCL (see :ref:`amdgpu-opencl`). + +``ds/flat_load/store/atomic`` instructions to local memory are termed LDS +operations. + +``buffer/global/flat_load/store/atomic`` instructions to global memory are +termed vector memory operations. + +Private address space uses ``buffer_load/store`` using the scratch V# +(GFX6-GFX8), or ``scratch_load/store`` (GFX9-GFX10). Since only a single thread +is accessing the memory, atomic memory orderings are not meaningful, and all +accesses are treated as non-atomic. + +Constant address space uses ``buffer/global_load`` instructions (or equivalent +scalar memory instructions). Since the constant address space contents do not +change during the execution of a kernel dispatch it is not legal to perform +stores, and atomic memory orderings are not meaningful, and all accesses are +treated as non-atomic. + +A memory synchronization scope wider than work-group is not meaningful for the +group (LDS) address space and is treated as work-group. + +The memory model does not support the region address space which is treated as +non-atomic. + +Acquire memory ordering is not meaningful on store atomic instructions and is +treated as non-atomic. + +Release memory ordering is not meaningful on load atomic instructions and is +treated a non-atomic. + +Acquire-release memory ordering is not meaningful on load or store atomic +instructions and is treated as acquire and release respectively. + +The memory order also adds the single thread optimization constraints defined in +table +:ref:`amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-table`. + + .. table:: AMDHSA Memory Model Single Thread Optimization Constraints + :name: amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-table + + ============ ============================================================== + LLVM Memory Optimization Constraints + Ordering + ============ ============================================================== + unordered *none* + monotonic *none* + acquire - If a load atomic/atomicrmw then no following load/load + atomic/store/store atomic/atomicrmw/fence instruction can be + moved before the acquire. + - If a fence then same as load atomic, plus no preceding + associated fence-paired-atomic can be moved after the fence. + release - If a store atomic/atomicrmw then no preceding load/load + atomic/store/store atomic/atomicrmw/fence instruction can be + moved after the release. + - If a fence then same as store atomic, plus no following + associated fence-paired-atomic can be moved before the + fence. + acq_rel Same constraints as both acquire and release. + seq_cst - If a load atomic then same constraints as acquire, plus no + preceding sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved after the + seq_cst. + - If a store atomic then the same constraints as release, plus + no following sequentially consistent load atomic/store + atomic/atomicrmw/fence instruction can be moved before the + seq_cst. + - If an atomicrmw/fence then same constraints as acq_rel. + ============ ============================================================== + +The code sequences used to implement the memory model are defined in the +following sections: + +* :ref:`amdgpu-amdhsa-memory-model-gfx6-gfx9` +* :ref:`amdgpu-amdhsa-memory-model-gfx90a` +* :ref:`amdgpu-amdhsa-memory-model-gfx10` + +.. _amdgpu-amdhsa-memory-model-gfx6-gfx9: + +Memory Model GFX6-GFX9 +++++++++++++++++++++++ + +For GFX6-GFX9: + +* Each agent has multiple shader arrays (SA). +* Each SA has multiple compute units (CU). +* Each CU has multiple SIMDs that execute wavefronts. +* The wavefronts for a single work-group are executed in the same CU but may be + executed by different SIMDs. +* Each CU has a single LDS memory shared by the wavefronts of the work-groups + executing on it. +* All LDS operations of a CU are performed as wavefront wide operations in a + global order and involve no caching. Completion is reported to a wavefront in + execution order. +* The LDS memory has multiple request queues shared by the SIMDs of a + CU. Therefore, the LDS operations performed by different wavefronts of a + work-group can be reordered relative to each other, which can result in + reordering the visibility of vector memory operations with respect to LDS + operations of other wavefronts in the same work-group. A ``s_waitcnt + lgkmcnt(0)`` is required to ensure synchronization between LDS operations and + vector memory operations between wavefronts of a work-group, but not between + operations performed by the same wavefront. +* The vector memory operations are performed as wavefront wide operations and + completion is reported to a wavefront in execution order. The exception is + that for GFX7-GFX9 ``flat_load/store/atomic`` instructions can report out of + vector memory order if they access LDS memory, and out of LDS operation order + if they access global memory. +* The vector memory operations access a single vector L1 cache shared by all + SIMDs a CU. Therefore, no special action is required for coherence between the + lanes of a single wavefront, or for coherence between wavefronts in the same + work-group. A ``buffer_wbinvl1_vol`` is required for coherence between + wavefronts executing in different work-groups as they may be executing on + different CUs. +* The scalar memory operations access a scalar L1 cache shared by all wavefronts + on a group of CUs. The scalar and vector L1 caches are not coherent. However, + scalar operations are used in a restricted way so do not impact the memory + model. See :ref:`amdgpu-amdhsa-memory-spaces`. +* The vector and scalar memory operations use an L2 cache shared by all CUs on + the same agent. +* The L2 cache has independent channels to service disjoint ranges of virtual + addresses. +* Each CU has a separate request queue per channel. Therefore, the vector and + scalar memory operations performed by wavefronts executing in different + work-groups (which may be executing on different CUs) of an agent can be + reordered relative to each other. A ``s_waitcnt vmcnt(0)`` is required to + ensure synchronization between vector memory operations of different CUs. It + ensures a previous vector memory operation has completed before executing a + subsequent vector memory or LDS operation and so can be used to meet the + requirements of acquire and release. +* The L2 cache can be kept coherent with other agents on some targets, or ranges + of virtual addresses can be set up to bypass it to ensure system coherence. + +Scalar memory operations are only used to access memory that is proven to not +change during the execution of the kernel dispatch. This includes constant +address space and global address space for program scope ``const`` variables. +Therefore, the kernel machine code does not have to maintain the scalar cache to +ensure it is coherent with the vector caches. The scalar and vector caches are +invalidated between kernel dispatches by CP since constant address space data +may change between kernel dispatch executions. See +:ref:`amdgpu-amdhsa-memory-spaces`. + +The one exception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a ``s_dcache_wb`` is inserted before the ``s_endpgm`` and before a function +return since the locations may be used for vector memory instructions by a +future wavefront that uses the same scratch area, or a function call that +creates a frame at the same address, respectively. There is no need for a +``s_dcache_inv`` as all scalar writes are write-before-read in the same thread. + +For kernarg backing memory: + +* CP invalidates the L1 cache at the start of each kernel dispatch. +* On dGPU the kernarg backing memory is allocated in host memory accessed as + MTYPE UC (uncached) to avoid needing to invalidate the L2 cache. This also + causes it to be treated as non-volatile and so is not invalidated by + ``*_vol``. +* On APU the kernarg backing memory it is accessed as MTYPE CC (cache coherent) + and so the L2 cache will be coherent with the CPU and other agents. + +Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC_NV (non-coherent non-volatile). Since the private address space is +only accessed by a single thread, and is always write-before-read, there is +never a need to invalidate these entries from the L1 cache. Hence all cache +invalidates are done as ``*_vol`` to only invalidate the volatile cache lines. + +The code sequences used to implement the memory model for GFX6-GFX9 are defined +in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table`. + + .. table:: AMDHSA Memory Model Code Sequences GFX6-GFX9 + :name: amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table + + ============ ============ ============== ========== ================================ + LLVM Instr LLVM Memory LLVM Memory AMDGPU AMDGPU Machine Code + Ordering Sync Scope Address GFX6-GFX9 + Space + ============ ============ ============== ========== ================================ + **Non-Atomic** + ------------------------------------------------------------------------------------ + load *none* *none* - global - !volatile & !nontemporal + - generic + - private 1. buffer/global/flat_load + - constant + - !volatile & nontemporal + + 1. buffer/global/flat_load + glc=1 slc=1 + + - volatile + + 1. buffer/global/flat_load + glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + any following volatile + global/generic + load/store. + - Ensures that + volatile + operations to + different + addresses will not + be reordered by + hardware. + + load *none* *none* - local 1. ds_load + store *none* *none* - global - !volatile & !nontemporal + - generic + - private 1. buffer/global/flat_store + - constant + - !volatile & nontemporal + + 1. buffer/global/flat_store + glc=1 slc=1 + + - volatile + + 1. buffer/global/flat_store + 2. s_waitcnt vmcnt(0) + + - Must happen before + any following volatile + global/generic + load/store. + - Ensures that + volatile + operations to + different + addresses will not + be reordered by + hardware. + + store *none* *none* - local 1. ds_store + **Unordered Atomic** + ------------------------------------------------------------------------------------ + load atomic unordered *any* *any* *Same as non-atomic*. + store atomic unordered *any* *any* *Same as non-atomic*. + atomicrmw unordered *any* *any* *Same as monotonic atomic*. + **Monotonic Atomic** + ------------------------------------------------------------------------------------ + load atomic monotonic - singlethread - global 1. buffer/global/ds/flat_load + - wavefront - local + - workgroup - generic + load atomic monotonic - agent - global 1. buffer/global/flat_load + - system - generic glc=1 + store atomic monotonic - singlethread - global 1. buffer/global/flat_store + - wavefront - generic + - workgroup + - agent + - system + store atomic monotonic - singlethread - local 1. ds_store + - wavefront + - workgroup + atomicrmw monotonic - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + - workgroup + - agent + - system + atomicrmw monotonic - singlethread - local 1. ds_atomic + - wavefront + - workgroup + **Acquire Atomic** + ------------------------------------------------------------------------------------ + load atomic acquire - singlethread - global 1. buffer/global/ds/flat_load + - wavefront - local + - generic + load atomic acquire - workgroup - global 1. buffer/global_load + load atomic acquire - workgroup - local 1. ds/flat_load + - generic 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than a local load + atomic value being + acquired. + + load atomic acquire - agent - global 1. buffer/global_load + - system glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale global data. + + load atomic acquire - agent - generic 1. flat_load glc=1 + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the flat_load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acquire - workgroup - global 1. buffer/global_atomic + atomicrmw acquire - workgroup - local 1. ds/flat_atomic + - generic 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than a local + atomicrmw value + being acquired. + + atomicrmw acquire - agent - global 1. buffer/global_atomic + - system 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - agent - generic 1. flat_atomic + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acquire - singlethread *none* *none* + - wavefront + fence acquire - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit. + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + value read by the + fence-paired-atomic. + + fence acquire - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 2. buffer_wbinvl1_vol + + - Must happen before any + following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + **Release Atomic** + ------------------------------------------------------------------------------------ + store atomic release - singlethread - global 1. buffer/global/ds/flat_store + - wavefront - local + - generic + store atomic release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to local have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + store atomic release - workgroup - local 1. ds_store + store atomic release - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to memory have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + atomicrmw release - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw release - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - If OpenCL, omit. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global/flat_atomic + atomicrmw release - workgroup - local 1. ds_atomic + atomicrmw release - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and local + have completed + before performing + the atomicrmw that + is being released. + + 2. buffer/global/flat_atomic + fence release - singlethread *none* *none* + - wavefront + fence release - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit. + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + to local have + completed before + performing the + following + fence-paired-atomic. + + fence release - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + have + completed before + performing the + following + fence-paired-atomic. + + **Acquire-Release Atomic** + ------------------------------------------------------------------------------------ + atomicrmw acq_rel - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + + atomicrmw acq_rel - workgroup - local 1. ds_atomic + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the local load + atomic value being + acquired. + + atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than a local load + atomic value being + acquired. + + atomicrmw acq_rel - agent - global 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acq_rel - agent - generic 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acq_rel - singlethread *none* *none* + - wavefront + fence acq_rel - workgroup *none* 1. s_waitcnt lgkmcnt(0) + + - If OpenCL and + address space is + not generic, omit. + - However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing any + following global + memory operations. + - Ensures that the + preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + acquire-fence-paired-atomic) + has completed + before following + global memory + operations. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + local/generic store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + release-fence-paired-atomic). + This satisfies the + requirements of + release. + + fence acq_rel - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + preceding + global/local/generic + load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + acquire-fence-paired-atomic) + has completed + before invalidating + the cache. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + global/local/generic + store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + release-fence-paired-atomic). + This satisfies the + requirements of + release. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. This + satisfies the + requirements of + acquire. + + **Sequential Consistent Atomic** + ------------------------------------------------------------------------------------ + load atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local load atomic acquire, + - generic except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + + - Must + happen after + preceding + local/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent local + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + s_waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + completing out of + order. The s_waitcnt + could be placed after + seq_store or before + the seq_load. We + choose the load to + make the s_waitcnt be + as late as possible + so that the store + may have already + completed.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - local *Same as corresponding + load atomic acquire, + except must generated + all instructions even + for OpenCL.* + + load atomic seq_cst - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) + + - Could be split into + separate s_waitcnt + vmcnt(0) + and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt lgkmcnt(0) + must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vmcnt(0) + must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + s_waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + completing out of + order. The s_waitcnt + could be placed after + seq_store or before + the seq_load. We + choose the load to + make the s_waitcnt be + as late as possible + so that the store + may have already + completed.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + store atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local store atomic release, + - workgroup - generic except must generated + - agent all instructions even + - system for OpenCL.* + atomicrmw seq_cst - singlethread - global *Same as corresponding + - wavefront - local atomicrmw acq_rel, + - workgroup - generic except must generated + - agent all instructions even + - system for OpenCL.* + fence seq_cst - singlethread *none* *Same as corresponding + - wavefront fence acq_rel, + - workgroup except must generated + - agent all instructions even + - system for OpenCL.* + ============ ============ ============== ========== ================================ + +.. _amdgpu-amdhsa-memory-model-gfx90a: + +Memory Model GFX90A ++++++++++++++++++++ + +For GFX90A: + +* Each agent has multiple shader arrays (SA). +* Each SA has multiple compute units (CU). +* Each CU has multiple SIMDs that execute wavefronts. +* The wavefronts for a single work-group are executed in the same CU but may be + executed by different SIMDs. The exception is when in tgsplit execution mode + when the wavefronts may be executed by different SIMDs in different CUs. +* Each CU has a single LDS memory shared by the wavefronts of the work-groups + executing on it. The exception is when in tgsplit execution mode when no LDS + is allocated as wavefronts of the same work-group can be in different CUs. +* All LDS operations of a CU are performed as wavefront wide operations in a + global order and involve no caching. Completion is reported to a wavefront in + execution order. +* The LDS memory has multiple request queues shared by the SIMDs of a + CU. Therefore, the LDS operations performed by different wavefronts of a + work-group can be reordered relative to each other, which can result in + reordering the visibility of vector memory operations with respect to LDS + operations of other wavefronts in the same work-group. A ``s_waitcnt + lgkmcnt(0)`` is required to ensure synchronization between LDS operations and + vector memory operations between wavefronts of a work-group, but not between + operations performed by the same wavefront. +* The vector memory operations are performed as wavefront wide operations and + completion is reported to a wavefront in execution order. The exception is + that ``flat_load/store/atomic`` instructions can report out of vector memory + order if they access LDS memory, and out of LDS operation order if they access + global memory. +* The vector memory operations access a single vector L1 cache shared by all + SIMDs a CU. Therefore: + + * No special action is required for coherence between the lanes of a single + wavefront. + + * No special action is required for coherence between wavefronts in the same + work-group since they execute on the same CU. The exception is when in + tgsplit execution mode as wavefronts of the same work-group can be in + different CUs and so a ``buffer_wbinvl1_vol`` is required as described in + the following item. + + * A ``buffer_wbinvl1_vol`` is required for coherence between wavefronts + executing in different work-groups as they may be executing on different + CUs. + +* The scalar memory operations access a scalar L1 cache shared by all wavefronts + on a group of CUs. The scalar and vector L1 caches are not coherent. However, + scalar operations are used in a restricted way so do not impact the memory + model. See :ref:`amdgpu-amdhsa-memory-spaces`. +* The vector and scalar memory operations use an L2 cache shared by all CUs on + the same agent. + + * The L2 cache has independent channels to service disjoint ranges of virtual + addresses. + * Each CU has a separate request queue per channel. Therefore, the vector and + scalar memory operations performed by wavefronts executing in different + work-groups (which may be executing on different CUs), or the same + work-group if executing in tgsplit mode, of an agent can be reordered + relative to each other. A ``s_waitcnt vmcnt(0)`` is required to ensure + synchronization between vector memory operations of different CUs. It + ensures a previous vector memory operation has completed before executing a + subsequent vector memory or LDS operation and so can be used to meet the + requirements of acquire and release. + * The L2 cache of one agent can be kept coherent with other agents by: + using the MTYPE RW (read-write) or MTYPE CC (cache-coherent) with the PTE + C-bit for memory local to the L2; and using the MTYPE NC (non-coherent) with + the PTE C-bit set or MTYPE UC (uncached) for memory not local to the L2. + + * Any local memory cache lines will be automatically invalidated by writes + from CUs associated with other L2 caches, or writes from the CPU, due to + the cache probe caused by coherent requests. Coherent requests are caused + by GPU accesses to pages with the PTE C-bit set, by CPU accesses over + XGMI, and by PCIe requests that are configured to be coherent requests. + * XGMI accesses from the CPU to local memory may be cached on the CPU. + Subsequent access from the GPU will automatically invalidate or writeback + the CPU cache due to the L2 probe filter and and the PTE C-bit being set. + * Since all work-groups on the same agent share the same L2, no L2 + invalidation or writeback is required for coherence. + * To ensure coherence of local and remote memory writes of work-groups in + different agents a ``buffer_wbl2`` is required. It will writeback dirty L2 + cache lines of MTYPE RW (used for local coarse grain memory) and MTYPE NC + ()used for remote coarse grain memory). Note that MTYPE CC (used for local + fine grain memory) causes write through to DRAM, and MTYPE UC (used for + remote fine grain memory) bypasses the L2, so both will never result in + dirty L2 cache lines. + * To ensure coherence of local and remote memory reads of work-groups in + different agents a ``buffer_invl2`` is required. It will invalidate L2 + cache lines with MTYPE NC (used for remote coarse grain memory). Note that + MTYPE CC (used for local fine grain memory) and MTYPE RW (used for local + coarse memory) cause local reads to be invalidated by remote writes with + with the PTE C-bit so these cache lines are not invalidated. Note that + MTYPE UC (used for remote fine grain memory) bypasses the L2, so will + never result in L2 cache lines that need to be invalidated. + + * PCIe access from the GPU to the CPU memory is kept coherent by using the + MTYPE UC (uncached) which bypasses the L2. + +Scalar memory operations are only used to access memory that is proven to not +change during the execution of the kernel dispatch. This includes constant +address space and global address space for program scope ``const`` variables. +Therefore, the kernel machine code does not have to maintain the scalar cache to +ensure it is coherent with the vector caches. The scalar and vector caches are +invalidated between kernel dispatches by CP since constant address space data +may change between kernel dispatch executions. See +:ref:`amdgpu-amdhsa-memory-spaces`. + +The one exception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a ``s_dcache_wb`` is inserted before the ``s_endpgm`` and before a function +return since the locations may be used for vector memory instructions by a +future wavefront that uses the same scratch area, or a function call that +creates a frame at the same address, respectively. There is no need for a +``s_dcache_inv`` as all scalar writes are write-before-read in the same thread. + +For kernarg backing memory: + +* CP invalidates the L1 cache at the start of each kernel dispatch. +* On dGPU over XGMI or PCIe the kernarg backing memory is allocated in host + memory accessed as MTYPE UC (uncached) to avoid needing to invalidate the L2 + cache. This also causes it to be treated as non-volatile and so is not + invalidated by ``*_vol``. +* On APU the kernarg backing memory is accessed as MTYPE CC (cache coherent) and + so the L2 cache will be coherent with the CPU and other agents. + +Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC_NV (non-coherent non-volatile). Since the private address space is +only accessed by a single thread, and is always write-before-read, there is +never a need to invalidate these entries from the L1 cache. Hence all cache +invalidates are done as ``*_vol`` to only invalidate the volatile cache lines. + +The code sequences used to implement the memory model for GFX90A are defined +in table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table`. + + .. table:: AMDHSA Memory Model Code Sequences GFX90A + :name: amdgpu-amdhsa-memory-model-code-sequences-gfx90a-table + + ============ ============ ============== ========== ================================ + LLVM Instr LLVM Memory LLVM Memory AMDGPU AMDGPU Machine Code + Ordering Sync Scope Address GFX90A + Space + ============ ============ ============== ========== ================================ + **Non-Atomic** + ------------------------------------------------------------------------------------ + load *none* *none* - global - !volatile & !nontemporal + - generic + - private 1. buffer/global/flat_load + - constant + - !volatile & nontemporal + + 1. buffer/global/flat_load + glc=1 slc=1 + + - volatile + + 1. buffer/global/flat_load + glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + any following volatile + global/generic + load/store. + - Ensures that + volatile + operations to + different + addresses will not + be reordered by + hardware. + + load *none* *none* - local 1. ds_load + store *none* *none* - global - !volatile & !nontemporal + - generic + - private 1. buffer/global/flat_store + - constant + - !volatile & nontemporal + + 1. buffer/global/flat_store + glc=1 slc=1 + + - volatile + + 1. buffer/global/flat_store + 2. s_waitcnt vmcnt(0) + + - Must happen before + any following volatile + global/generic + load/store. + - Ensures that + volatile + operations to + different + addresses will not + be reordered by + hardware. + + store *none* *none* - local 1. ds_store + **Unordered Atomic** + ------------------------------------------------------------------------------------ + load atomic unordered *any* *any* *Same as non-atomic*. + store atomic unordered *any* *any* *Same as non-atomic*. + atomicrmw unordered *any* *any* *Same as monotonic atomic*. + **Monotonic Atomic** + ------------------------------------------------------------------------------------ + load atomic monotonic - singlethread - global 1. buffer/global/flat_load + - wavefront - generic + load atomic monotonic - workgroup - global 1. buffer/global/flat_load + - generic glc=1 + + - If not TgSplit execution + mode, omit glc=1. + + load atomic monotonic - singlethread - local *If TgSplit execution mode, + - wavefront local address space cannot + - workgroup be used.* + + 1. ds_load + load atomic monotonic - agent - global 1. buffer/global/flat_load + - generic glc=1 + load atomic monotonic - system - global 1. buffer/global/flat_load + - generic glc=1 + store atomic monotonic - singlethread - global 1. buffer/global/flat_store + - wavefront - generic + - workgroup + - agent + store atomic monotonic - system - global 1. buffer/global/flat_store + - generic + store atomic monotonic - singlethread - local *If TgSplit execution mode, + - wavefront local address space cannot + - workgroup be used.* + + 1. ds_store + atomicrmw monotonic - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + - workgroup + - agent + atomicrmw monotonic - system - global 1. buffer/global/flat_atomic + - generic + atomicrmw monotonic - singlethread - local *If TgSplit execution mode, + - wavefront local address space cannot + - workgroup be used.* + + 1. ds_atomic + **Acquire Atomic** + ------------------------------------------------------------------------------------ + load atomic acquire - singlethread - global 1. buffer/global/ds/flat_load + - wavefront - local + - generic + load atomic acquire - workgroup - global 1. buffer/global_load glc=1 + + - If not TgSplit execution + mode, omit glc=1. + + 2. s_waitcnt vmcnt(0) + + - If not TgSplit execution + mode, omit. + - Must happen before the + following buffer_wbinvl1_vol. + + 3. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale data. + + load atomic acquire - workgroup - local *If TgSplit execution mode, + local address space cannot + be used.* + + 1. ds_load + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the local load + atomic value being + acquired. + + load atomic acquire - workgroup - generic 1. flat_load glc=1 + + - If not TgSplit execution + mode, omit glc=1. + + 2. s_waitcnt lgkm/vmcnt(0) + + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL, omit lgkmcnt(0). + - Must happen before + the following + buffer_wbinvl1_vol and any + following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than a local load + atomic value being + acquired. + + 3. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + load atomic acquire - agent - global 1. buffer/global_load + glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale global data. + + load atomic acquire - system - global 1. buffer/global/flat_load + glc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + following buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures the load + has completed + before invalidating + the cache. + + 3. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + + load atomic acquire - agent - generic 1. flat_load glc=1 + 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the flat_load + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + load atomic acquire - system - generic 1. flat_load glc=1 + 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL omit + lgkmcnt(0). + - Must happen before + following + buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures the flat_load + has completed + before invalidating + the caches. + + 3. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + + atomicrmw acquire - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + atomicrmw acquire - singlethread - local *If TgSplit execution mode, + - wavefront local address space cannot + be used.* + + 1. ds_atomic + atomicrmw acquire - workgroup - global 1. buffer/global_atomic + 2. s_waitcnt vmcnt(0) + + - If not TgSplit execution + mode, omit. + - Must happen before the + following buffer_wbinvl1_vol. + - Ensures the atomicrmw + has completed + before invalidating + the cache. + + 3. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - workgroup - local *If TgSplit execution mode, + local address space cannot + be used.* + + 1. ds_atomic + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the local + atomicrmw value + being acquired. + + atomicrmw acquire - workgroup - generic 1. flat_atomic + 2. s_waitcnt lgkm/vmcnt(0) + + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL, omit lgkmcnt(0). + - Must happen before + the following + buffer_wbinvl1_vol and + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than a local + atomicrmw value + being acquired. + + 3. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acquire - agent - global 1. buffer/global_atomic + 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - system - global 1. buffer/global_atomic + 2. s_waitcnt vmcnt(0) + + - Must happen before + following buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 3. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + + atomicrmw acquire - agent - generic 1. flat_atomic + 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 3. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - system - generic 1. flat_atomic + 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 3. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + + fence acquire - singlethread *none* *none* + - wavefront + fence acquire - workgroup *none* 1. s_waitcnt lgkm/vmcnt(0) + + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/ + atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_wbinvl1_vol and + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + value read by the + fence-paired-atomic. + + 2. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + fence acquire - agent *none* 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 2. buffer_wbinvl1_vol + + - Must happen before any + following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acquire - system *none* 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 2. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before any + following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + **Release Atomic** + ------------------------------------------------------------------------------------ + store atomic release - singlethread - global 1. buffer/global/flat_store + - wavefront - generic + store atomic release - singlethread - local *If TgSplit execution mode, + - wavefront local address space cannot + be used.* + + 1. ds_store + store atomic release - workgroup - global 1. s_waitcnt lgkm/vmcnt(0) + - generic + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL, omit lgkmcnt(0). + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/store/ + load atomic/store atomic/ + atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + store atomic release - workgroup - local *If TgSplit execution mode, + local address space cannot + be used.* + + 1. ds_store + store atomic release - agent - global 1. s_waitcnt lgkmcnt(0) & + - generic vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to memory have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + store atomic release - system - global 1. buffer_wbl2 + - generic + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after any + preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after any + preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to memory and the L2 + writeback have + completed before + performing the + store that is being + released. + + 3. buffer/global/flat_store + atomicrmw release - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + atomicrmw release - singlethread - local *If TgSplit execution mode, + - wavefront local address space cannot + be used.* + + 1. ds_atomic + atomicrmw release - workgroup - global 1. s_waitcnt lgkm/vmcnt(0) + - generic + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL, omit + lgkmcnt(0). + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/store/ + load atomic/store atomic/ + atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global/flat_atomic + atomicrmw release - workgroup - local *If TgSplit execution mode, + local address space cannot + be used.* + + 1. ds_atomic + atomicrmw release - agent - global 1. s_waitcnt lgkmcnt(0) & + - generic vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and local + have completed + before performing + the atomicrmw that + is being released. + + 2. buffer/global/flat_atomic + atomicrmw release - system - global 1. buffer_wbl2 + - generic + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to memory and the L2 + writeback have + completed before + performing the + store that is being + released. + + 3. buffer/global/flat_atomic + fence release - singlethread *none* *none* + - wavefront + fence release - workgroup *none* 1. s_waitcnt lgkm/vmcnt(0) + + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/ + load atomic/store atomic/ + atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + have + completed before + performing the + following + fence-paired-atomic. + + fence release - agent *none* 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + have + completed before + performing the + following + fence-paired-atomic. + + fence release - system *none* 1. buffer_wbl2 + + - If OpenCL and + address space is + local, omit. + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + have + completed before + performing the + following + fence-paired-atomic. + + **Acquire-Release Atomic** + ------------------------------------------------------------------------------------ + atomicrmw acq_rel - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + atomicrmw acq_rel - singlethread - local *If TgSplit execution mode, + - wavefront local address space cannot + be used.* + + 1. ds_atomic + atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkm/vmcnt(0) + + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL, omit + lgkmcnt(0). + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/store/ + load atomic/store atomic/ + atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vmcnt(0) + + - If not TgSplit execution + mode, omit. + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures any + following global + data read is no + older than the + atomicrmw value + being acquired. + + 4. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acq_rel - workgroup - local *If TgSplit execution mode, + local address space cannot + be used.* + + 1. ds_atomic + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the local load + atomic value being + acquired. + + atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkm/vmcnt(0) + + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL, omit + lgkmcnt(0). + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/store/ + load atomic/store atomic/ + atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If not TgSplit execution + mode, omit vmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + the following + buffer_wbinvl1_vol and + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than a local load + atomic value being + acquired. + + 3. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acq_rel - agent - global 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acq_rel - system - global 1. buffer_wbl2 + + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and L2 writeback + have completed before + performing the + atomicrmw that is + being released. + + 3. buffer/global_atomic + 4. s_waitcnt vmcnt(0) + + - Must happen before + following buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 5. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + + atomicrmw acq_rel - agent - generic 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + cache. + + 4. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acq_rel - system - generic 1. buffer_wbl2 + + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and L2 writeback + have completed before + performing the + atomicrmw that is + being released. + + 3. flat_atomic + 4. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + following buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 5. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + + fence acq_rel - singlethread *none* *none* + - wavefront + fence acq_rel - workgroup *none* 1. s_waitcnt lgkm/vmcnt(0) + + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0). + - However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/ + load atomic/store atomic/ + atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that all + memory operations + have + completed before + performing any + following global + memory operations. + - Ensures that the + preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + acquire-fence-paired-atomic) + has completed + before following + global memory + operations. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + local/generic store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + release-fence-paired-atomic). + This satisfies the + requirements of + release. + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + acquire-fence-paired + atomic has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + acquire-fence-paired-atomic. + + 2. buffer_wbinvl1_vol + + - If not TgSplit execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + fence acq_rel - agent *none* 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + buffer_wbinvl1_vol. + - Ensures that the + preceding + global/local/generic + load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + acquire-fence-paired-atomic) + has completed + before invalidating + the cache. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + global/local/generic + store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + release-fence-paired-atomic). + This satisfies the + requirements of + release. + + 2. buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. This + satisfies the + requirements of + acquire. + + fence acq_rel - system *none* 1. buffer_wbl2 + + - If OpenCL and + address space is + local, omit. + - Must happen before + following s_waitcnt. + - Performs L2 writeback to + ensure previous + global/generic + store/atomicrmw are + visible at system scope. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0) and + s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/store/load + atomic/store + atomic/atomicrmw. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following buffer_invl2 and + buffer_wbinvl1_vol. + - Ensures that the + preceding + global/local/generic + load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + acquire-fence-paired-atomic) + has completed + before invalidating + the cache. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + global/local/generic + store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + release-fence-paired-atomic). + This satisfies the + requirements of + release. + + 3. buffer_invl2; + buffer_wbinvl1_vol + + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale L1 global data, + nor see stale L2 MTYPE + NC global data. + MTYPE RW and CC memory will + never be stale in L2 due to + the memory probes. + + **Sequential Consistent Atomic** + ------------------------------------------------------------------------------------ + load atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local load atomic acquire, + - generic except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - global 1. s_waitcnt lgkm/vmcnt(0) + - generic + - Use lgkmcnt(0) if not + TgSplit execution mode + and vmcnt(0) if TgSplit + execution mode. + - s_waitcnt lgkmcnt(0) must + happen after + preceding + local/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vmcnt(0) + must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global/local + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + s_waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + completing out of + order. The s_waitcnt + could be placed after + seq_store or before + the seq_load. We + choose the load to + make the s_waitcnt be + as late as possible + so that the store + may have already + completed.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - local *If TgSplit execution mode, + local address space cannot + be used.* + + *Same as corresponding + load atomic acquire, + except must generated + all instructions even + for OpenCL.* + + load atomic seq_cst - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) + + - If TgSplit execution mode, + omit lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0) + and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt lgkmcnt(0) + must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vmcnt(0) + must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + s_waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + completing out of + order. The s_waitcnt + could be placed after + seq_store or before + the seq_load. We + choose the load to + make the s_waitcnt be + as late as possible + so that the store + may have already + completed.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + store atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local store atomic release, + - workgroup - generic except must generated + - agent all instructions even + - system for OpenCL.* + atomicrmw seq_cst - singlethread - global *Same as corresponding + - wavefront - local atomicrmw acq_rel, + - workgroup - generic except must generated + - agent all instructions even + - system for OpenCL.* + fence seq_cst - singlethread *none* *Same as corresponding + - wavefront fence acq_rel, + - workgroup except must generated + - agent all instructions even + - system for OpenCL.* + ============ ============ ============== ========== ================================ + +.. _amdgpu-amdhsa-memory-model-gfx10: + +Memory Model GFX10 +++++++++++++++++++ + +For GFX10: + +* Each agent has multiple shader arrays (SA). +* Each SA has multiple work-group processors (WGP). +* Each WGP has multiple compute units (CU). +* Each CU has multiple SIMDs that execute wavefronts. +* The wavefronts for a single work-group are executed in the same + WGP. In CU wavefront execution mode the wavefronts may be executed by + different SIMDs in the same CU. In WGP wavefront execution mode the + wavefronts may be executed by different SIMDs in different CUs in the same + WGP. +* Each WGP has a single LDS memory shared by the wavefronts of the work-groups + executing on it. +* All LDS operations of a WGP are performed as wavefront wide operations in a + global order and involve no caching. Completion is reported to a wavefront in + execution order. +* The LDS memory has multiple request queues shared by the SIMDs of a + WGP. Therefore, the LDS operations performed by different wavefronts of a + work-group can be reordered relative to each other, which can result in + reordering the visibility of vector memory operations with respect to LDS + operations of other wavefronts in the same work-group. A ``s_waitcnt + lgkmcnt(0)`` is required to ensure synchronization between LDS operations and + vector memory operations between wavefronts of a work-group, but not between + operations performed by the same wavefront. +* The vector memory operations are performed as wavefront wide operations. + Completion of load/store/sample operations are reported to a wavefront in + execution order of other load/store/sample operations performed by that + wavefront. +* The vector memory operations access a vector L0 cache. There is a single L0 + cache per CU. Each SIMD of a CU accesses the same L0 cache. Therefore, no + special action is required for coherence between the lanes of a single + wavefront. However, a ``buffer_gl0_inv`` is required for coherence between + wavefronts executing in the same work-group as they may be executing on SIMDs + of different CUs that access different L0s. A ``buffer_gl0_inv`` is also + required for coherence between wavefronts executing in different work-groups + as they may be executing on different WGPs. +* The scalar memory operations access a scalar L0 cache shared by all wavefronts + on a WGP. The scalar and vector L0 caches are not coherent. However, scalar + operations are used in a restricted way so do not impact the memory model. See + :ref:`amdgpu-amdhsa-memory-spaces`. +* The vector and scalar memory L0 caches use an L1 cache shared by all WGPs on + the same SA. Therefore, no special action is required for coherence between + the wavefronts of a single work-group. However, a ``buffer_gl1_inv`` is + required for coherence between wavefronts executing in different work-groups + as they may be executing on different SAs that access different L1s. +* The L1 caches have independent quadrants to service disjoint ranges of virtual + addresses. +* Each L0 cache has a separate request queue per L1 quadrant. Therefore, the + vector and scalar memory operations performed by different wavefronts, whether + executing in the same or different work-groups (which may be executing on + different CUs accessing different L0s), can be reordered relative to each + other. A ``s_waitcnt vmcnt(0) & vscnt(0)`` is required to ensure + synchronization between vector memory operations of different wavefronts. It + ensures a previous vector memory operation has completed before executing a + subsequent vector memory or LDS operation and so can be used to meet the + requirements of acquire, release and sequential consistency. +* The L1 caches use an L2 cache shared by all SAs on the same agent. +* The L2 cache has independent channels to service disjoint ranges of virtual + addresses. +* Each L1 quadrant of a single SA accesses a different L2 channel. Each L1 + quadrant has a separate request queue per L2 channel. Therefore, the vector + and scalar memory operations performed by wavefronts executing in different + work-groups (which may be executing on different SAs) of an agent can be + reordered relative to each other. A ``s_waitcnt vmcnt(0) & vscnt(0)`` is + required to ensure synchronization between vector memory operations of + different SAs. It ensures a previous vector memory operation has completed + before executing a subsequent vector memory and so can be used to meet the + requirements of acquire, release and sequential consistency. +* The L2 cache can be kept coherent with other agents on some targets, or ranges + of virtual addresses can be set up to bypass it to ensure system coherence. + +Scalar memory operations are only used to access memory that is proven to not +change during the execution of the kernel dispatch. This includes constant +address space and global address space for program scope ``const`` variables. +Therefore, the kernel machine code does not have to maintain the scalar cache to +ensure it is coherent with the vector caches. The scalar and vector caches are +invalidated between kernel dispatches by CP since constant address space data +may change between kernel dispatch executions. See +:ref:`amdgpu-amdhsa-memory-spaces`. + +The one exception is if scalar writes are used to spill SGPR registers. In this +case the AMDGPU backend ensures the memory location used to spill is never +accessed by vector memory operations at the same time. If scalar writes are used +then a ``s_dcache_wb`` is inserted before the ``s_endpgm`` and before a function +return since the locations may be used for vector memory instructions by a +future wavefront that uses the same scratch area, or a function call that +creates a frame at the same address, respectively. There is no need for a +``s_dcache_inv`` as all scalar writes are write-before-read in the same thread. + +For kernarg backing memory: + +* CP invalidates the L0 and L1 caches at the start of each kernel dispatch. +* On dGPU the kernarg backing memory is accessed as MTYPE UC (uncached) to avoid + needing to invalidate the L2 cache. +* On APU the kernarg backing memory is accessed as MTYPE CC (cache coherent) and + so the L2 cache will be coherent with the CPU and other agents. + +Scratch backing memory (which is used for the private address space) is accessed +with MTYPE NC (non-coherent). Since the private address space is only accessed +by a single thread, and is always write-before-read, there is never a need to +invalidate these entries from the L0 or L1 caches. + +Wavefronts are executed in native mode with in-order reporting of loads and +sample instructions. In this mode vmcnt reports completion of load, atomic with +return and sample instructions in order, and the vscnt reports the completion of +store and atomic without return in order. See ``MEM_ORDERED`` field in +:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + +Wavefronts can be executed in WGP or CU wavefront execution mode: + +* In WGP wavefront execution mode the wavefronts of a work-group are executed + on the SIMDs of both CUs of the WGP. Therefore, explicit management of the per + CU L0 caches is required for work-group synchronization. Also accesses to L1 + at work-group scope need to be explicitly ordered as the accesses from + different CUs are not ordered. +* In CU wavefront execution mode the wavefronts of a work-group are executed on + the SIMDs of a single CU of the WGP. Therefore, all global memory access by + the work-group access the same L0 which in turn ensures L1 accesses are + ordered and so do not require explicit management of the caches for + work-group synchronization. + +See ``WGP_MODE`` field in +:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table` and +:ref:`amdgpu-target-features`. + +The code sequences used to implement the memory model for GFX10 are defined in +table :ref:`amdgpu-amdhsa-memory-model-code-sequences-gfx10-table`. + + .. table:: AMDHSA Memory Model Code Sequences GFX10 + :name: amdgpu-amdhsa-memory-model-code-sequences-gfx10-table + + ============ ============ ============== ========== ================================ + LLVM Instr LLVM Memory LLVM Memory AMDGPU AMDGPU Machine Code + Ordering Sync Scope Address GFX10 + Space + ============ ============ ============== ========== ================================ + **Non-Atomic** + ------------------------------------------------------------------------------------ + load *none* *none* - global - !volatile & !nontemporal + - generic + - private 1. buffer/global/flat_load + - constant + - !volatile & nontemporal + + 1. buffer/global/flat_load + slc=1 + + - volatile + + 1. buffer/global/flat_load + glc=1 dlc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + any following volatile + global/generic + load/store. + - Ensures that + volatile + operations to + different + addresses will not + be reordered by + hardware. + + load *none* *none* - local 1. ds_load + store *none* *none* - global - !volatile & !nontemporal + - generic + - private 1. buffer/global/flat_store + - constant + - !volatile & nontemporal + + 1. buffer/global/flat_store + slc=1 + + - volatile + + 1. buffer/global/flat_store + 2. s_waitcnt vscnt(0) + + - Must happen before + any following volatile + global/generic + load/store. + - Ensures that + volatile + operations to + different + addresses will not + be reordered by + hardware. + + store *none* *none* - local 1. ds_store + **Unordered Atomic** + ------------------------------------------------------------------------------------ + load atomic unordered *any* *any* *Same as non-atomic*. + store atomic unordered *any* *any* *Same as non-atomic*. + atomicrmw unordered *any* *any* *Same as monotonic atomic*. + **Monotonic Atomic** + ------------------------------------------------------------------------------------ + load atomic monotonic - singlethread - global 1. buffer/global/flat_load + - wavefront - generic + load atomic monotonic - workgroup - global 1. buffer/global/flat_load + - generic glc=1 + + - If CU wavefront execution + mode, omit glc=1. + + load atomic monotonic - singlethread - local 1. ds_load + - wavefront + - workgroup + load atomic monotonic - agent - global 1. buffer/global/flat_load + - system - generic glc=1 dlc=1 + store atomic monotonic - singlethread - global 1. buffer/global/flat_store + - wavefront - generic + - workgroup + - agent + - system + store atomic monotonic - singlethread - local 1. ds_store + - wavefront + - workgroup + atomicrmw monotonic - singlethread - global 1. buffer/global/flat_atomic + - wavefront - generic + - workgroup + - agent + - system + atomicrmw monotonic - singlethread - local 1. ds_atomic + - wavefront + - workgroup + **Acquire Atomic** + ------------------------------------------------------------------------------------ + load atomic acquire - singlethread - global 1. buffer/global/ds/flat_load + - wavefront - local + - generic + load atomic acquire - workgroup - global 1. buffer/global_load glc=1 + + - If CU wavefront execution + mode, omit glc=1. + + 2. s_waitcnt vmcnt(0) + + - If CU wavefront execution + mode, omit. + - Must happen before + the following buffer_gl0_inv + and before any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + load atomic acquire - workgroup - local 1. ds_load + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + the following buffer_gl0_inv + and before any following + global/generic load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the local load + atomic value being + acquired. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - If OpenCL, omit. + - Ensures that + following + loads will not see + stale data. + + load atomic acquire - workgroup - generic 1. flat_load glc=1 + + - If CU wavefront execution + mode, omit glc=1. + + 2. s_waitcnt lgkmcnt(0) & + vmcnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Must happen before + the following + buffer_gl0_inv and any + following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than a local load + atomic value being + acquired. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + load atomic acquire - agent - global 1. buffer/global_load + - system glc=1 dlc=1 + 2. s_waitcnt vmcnt(0) + + - Must happen before + following + buffer_gl*_inv. + - Ensures the load + has completed + before invalidating + the caches. + + 3. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following + loads will not see + stale global data. + + load atomic acquire - agent - generic 1. flat_load glc=1 dlc=1 + - system 2. s_waitcnt vmcnt(0) & + lgkmcnt(0) + + - If OpenCL omit + lgkmcnt(0). + - Must happen before + following + buffer_gl*_invl. + - Ensures the flat_load + has completed + before invalidating + the caches. + + 3. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acquire - workgroup - global 1. buffer/global_atomic + 2. s_waitcnt vm/vscnt(0) + + - If CU wavefront execution + mode, omit. + - Use vmcnt(0) if atomic with + return and vscnt(0) if + atomic with no-return. + - Must happen before + the following buffer_gl0_inv + and before any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acquire - workgroup - local 1. ds_atomic + 2. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + the following + buffer_gl0_inv. + - Ensures any + following global + data read is no + older than the local + atomicrmw value + being acquired. + + 3. buffer_gl0_inv + + - If OpenCL omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acquire - workgroup - generic 1. flat_atomic + 2. s_waitcnt lgkmcnt(0) & + vm/vscnt(0) + + - If CU wavefront execution + mode, omit vm/vscnt(0). + - If OpenCL, omit lgkmcnt(0). + - Use vmcnt(0) if atomic with + return and vscnt(0) if + atomic with no-return. + - Must happen before + the following + buffer_gl0_inv. + - Ensures any + following global + data read is no + older than a local + atomicrmw value + being acquired. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acquire - agent - global 1. buffer/global_atomic + - system 2. s_waitcnt vm/vscnt(0) + + - Use vmcnt(0) if atomic with + return and vscnt(0) if + atomic with no-return. + - Must happen before + following + buffer_gl*_inv. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 3. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acquire - agent - generic 1. flat_atomic + - system 2. s_waitcnt vm/vscnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Use vmcnt(0) if atomic with + return and vscnt(0) if + atomic with no-return. + - Must happen before + following + buffer_gl*_inv. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 3. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acquire - singlethread *none* *none* + - wavefront + fence acquire - workgroup *none* 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0) and vscnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/ + atomicrmw-with-return-value + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + atomicrmw-no-return-value + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_gl0_inv. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + fence acquire - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) & vscnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0) and vscnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load + atomic/ + atomicrmw-with-return-value + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + atomicrmw-no-return-value + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Must happen before + the following + buffer_gl*_inv. + - Ensures that the + fence-paired atomic + has completed + before invalidating + the + caches. Therefore + any following + locations read must + be no older than + the value read by + the + fence-paired-atomic. + + 2. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before any + following global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + **Release Atomic** + ------------------------------------------------------------------------------------ + store atomic release - singlethread - global 1. buffer/global/ds/flat_store + - wavefront - local + - generic + store atomic release - workgroup - global 1. s_waitcnt lgkmcnt(0) & + - generic vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store + atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + store atomic release - workgroup - local 1. s_waitcnt vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit. + - If OpenCL, omit. + - Could be split into + separate s_waitcnt + vmcnt(0) and s_waitcnt + vscnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - Must happen before + the following + store. + - Ensures that all + global memory + operations have + completed before + performing the + store that is being + released. + + 2. ds_store + store atomic release - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) & vscnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt vscnt(0) + and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + have + completed before + performing the + store that is being + released. + + 2. buffer/global/flat_store + atomicrmw release - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw release - workgroup - global 1. s_waitcnt lgkmcnt(0) & + - generic vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL, omit lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store + atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global/flat_atomic + atomicrmw release - workgroup - local 1. s_waitcnt vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit. + - If OpenCL, omit. + - Could be split into + separate s_waitcnt + vmcnt(0) and s_waitcnt + vscnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - Must happen before + the following + store. + - Ensures that all + global memory + operations have + completed before + performing the + store that is being + released. + + 2. ds_atomic + atomicrmw release - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) & vscnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global and local + have completed + before performing + the atomicrmw that + is being released. + + 2. buffer/global/flat_atomic + fence release - singlethread *none* *none* + - wavefront + fence release - workgroup *none* 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0) and vscnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store atomic/ + atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + have + completed before + performing the + following + fence-paired-atomic. + + fence release - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) & vscnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0) and vscnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + any following store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + fence-paired-atomic). + - Ensures that all + memory operations + have + completed before + performing the + following + fence-paired-atomic. + + **Acquire-Release Atomic** + ------------------------------------------------------------------------------------ + atomicrmw acq_rel - singlethread - global 1. buffer/global/ds/flat_atomic + - wavefront - local + - generic + atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL, omit + lgkmcnt(0). + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0), and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store + atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vm/vscnt(0) + + - If CU wavefront execution + mode, omit. + - Use vmcnt(0) if atomic with + return and vscnt(0) if + atomic with no-return. + - Must happen before + the following + buffer_gl0_inv. + - Ensures any + following global + data read is no + older than the + atomicrmw value + being acquired. + + 4. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acq_rel - workgroup - local 1. s_waitcnt vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit. + - If OpenCL, omit. + - Could be split into + separate s_waitcnt + vmcnt(0) and s_waitcnt + vscnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - Must happen before + the following + store. + - Ensures that all + global memory + operations have + completed before + performing the + store that is being + released. + + 2. ds_atomic + 3. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen before + the following + buffer_gl0_inv. + - Ensures any + following global + data read is no + older than the local load + atomic value being + acquired. + + 4. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - If OpenCL omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL, omit lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store + atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt lgkmcnt(0) & + vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL, omit lgkmcnt(0). + - Must happen before + the following + buffer_gl0_inv. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + atomicrmw acq_rel - agent - global 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) & vscnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to global have + completed before + performing the + atomicrmw that is + being released. + + 2. buffer/global_atomic + 3. s_waitcnt vm/vscnt(0) + + - Use vmcnt(0) if atomic with + return and vscnt(0) if + atomic with no-return. + - Must happen before + following + buffer_gl*_inv. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 4. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + atomicrmw acq_rel - agent - generic 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) & vscnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0), and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load atomic + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + 3. s_waitcnt vm/vscnt(0) & + lgkmcnt(0) + + - If OpenCL, omit + lgkmcnt(0). + - Use vmcnt(0) if atomic with + return and vscnt(0) if + atomic with no-return. + - Must happen before + following + buffer_gl*_inv. + - Ensures the + atomicrmw has + completed before + invalidating the + caches. + + 4. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before + any following + global/generic + load/load + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. + + fence acq_rel - singlethread *none* *none* + - wavefront + fence acq_rel - workgroup *none* 1. s_waitcnt lgkmcnt(0) & + vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0) and vscnt(0). + - However, + since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store atomic/ + atomicrmw. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that all + memory operations + have + completed before + performing any + following global + memory operations. + - Ensures that the + preceding + local/generic load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + acquire-fence-paired-atomic) + has completed + before following + global memory + operations. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + local/generic store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + release-fence-paired-atomic). + This satisfies the + requirements of + release. + - Must happen before + the following + buffer_gl0_inv. + - Ensures that the + acquire-fence-paired + atomic has completed + before invalidating + the + cache. Therefore + any following + locations read must + be no older than + the value read by + the + acquire-fence-paired-atomic. + + 3. buffer_gl0_inv + + - If CU wavefront execution + mode, omit. + - Ensures that + following + loads will not see + stale data. + + fence acq_rel - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) & vscnt(0) + + - If OpenCL and + address space is + not generic, omit + lgkmcnt(0). + - If OpenCL and + address space is + local, omit + vmcnt(0) and vscnt(0). + - However, since LLVM + currently has no + address space on + the fence need to + conservatively + always generate + (see comment for + previous fence). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + must happen after + any preceding + global/generic + load/load + atomic/ + atomicrmw-with-return-value. + - s_waitcnt vscnt(0) + must happen after + any preceding + global/generic + store/store atomic/ + atomicrmw-no-return-value. + - s_waitcnt lgkmcnt(0) + must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + buffer_gl*_inv. + - Ensures that the + preceding + global/local/generic + load + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + acquire-fence-paired-atomic) + has completed + before invalidating + the caches. This + satisfies the + requirements of + acquire. + - Ensures that all + previous memory + operations have + completed before a + following + global/local/generic + store + atomic/atomicrmw + with an equal or + wider sync scope + and memory ordering + stronger than + unordered (this is + termed the + release-fence-paired-atomic). + This satisfies the + requirements of + release. + + 2. buffer_gl0_inv; + buffer_gl1_inv + + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures that + following loads + will not see stale + global data. This + satisfies the + requirements of + acquire. + + **Sequential Consistent Atomic** + ------------------------------------------------------------------------------------ + load atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local load atomic acquire, + - generic except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - global 1. s_waitcnt lgkmcnt(0) & + - generic vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit vmcnt(0) and + vscnt(0). + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0), and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt lgkmcnt(0) must + happen after + preceding + local/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vmcnt(0) + must happen after + preceding + global/generic load + atomic/ + atomicrmw-with-return-value + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vscnt(0) + Must happen after + preceding + global/generic store + atomic/ + atomicrmw-no-return-value + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vscnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global/local + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + s_waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + completing out of + order. The s_waitcnt + could be placed after + seq_store or before + the seq_load. We + choose the load to + make the s_waitcnt be + as late as possible + so that the store + may have already + completed.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - local + + 1. s_waitcnt vmcnt(0) & vscnt(0) + + - If CU wavefront execution + mode, omit. + - Could be split into + separate s_waitcnt + vmcnt(0) and s_waitcnt + vscnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt vmcnt(0) + Must happen after + preceding + global/generic load + atomic/ + atomicrmw-with-return-value + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vscnt(0) + Must happen after + preceding + global/generic store + atomic/ + atomicrmw-no-return-value + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vscnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + s_waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + completing out of + order. The s_waitcnt + could be placed after + seq_store or before + the seq_load. We + choose the load to + make the s_waitcnt be + as late as possible + so that the store + may have already + completed.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + + load atomic seq_cst - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) & vscnt(0) + + - Could be split into + separate s_waitcnt + vmcnt(0), s_waitcnt + vscnt(0) and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - s_waitcnt lgkmcnt(0) + must happen after + preceding + local load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vmcnt(0) + must happen after + preceding + global/generic load + atomic/ + atomicrmw-with-return-value + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vmcnt(0) and so do + not need to be + considered.) + - s_waitcnt vscnt(0) + Must happen after + preceding + global/generic store + atomic/ + atomicrmw-no-return-value + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + vscnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent global + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + s_waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + completing out of + order. The s_waitcnt + could be placed after + seq_store or before + the seq_load. We + choose the load to + make the s_waitcnt be + as late as possible + so that the store + may have already + completed.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + store atomic seq_cst - singlethread - global *Same as corresponding + - wavefront - local store atomic release, + - workgroup - generic except must generated + - agent all instructions even + - system for OpenCL.* + atomicrmw seq_cst - singlethread - global *Same as corresponding + - wavefront - local atomicrmw acq_rel, + - workgroup - generic except must generated + - agent all instructions even + - system for OpenCL.* + fence seq_cst - singlethread *none* *Same as corresponding + - wavefront fence acq_rel, + - workgroup except must generated + - agent all instructions even + - system for OpenCL.* + ============ ============ ============== ========== ================================ + +Trap Handler ABI +~~~~~~~~~~~~~~~~ + +For code objects generated by the AMDGPU backend for HSA [HSA]_ compatible +runtimes (see :ref:`amdgpu-os`), the runtime installs a trap handler that +supports the ``s_trap`` instruction. For usage see: + +- :ref:`amdgpu-trap-handler-for-amdhsa-os-v2-table` +- :ref:`amdgpu-trap-handler-for-amdhsa-os-v3-table` +- :ref:`amdgpu-trap-handler-for-amdhsa-os-v4-table` + + .. table:: AMDGPU Trap Handler for AMDHSA OS Code Object V2 + :name: amdgpu-trap-handler-for-amdhsa-os-v2-table + + =================== =============== =============== ======================================= + Usage Code Sequence Trap Handler Description + Inputs + =================== =============== =============== ======================================= + reserved ``s_trap 0x00`` Reserved by hardware. + ``debugtrap(arg)`` ``s_trap 0x01`` ``SGPR0-1``: Reserved for Finalizer HSA ``debugtrap`` + ``queue_ptr`` intrinsic (not implemented). + ``VGPR0``: + ``arg`` + ``llvm.trap`` ``s_trap 0x02`` ``SGPR0-1``: Causes wave to be halted with the PC at + ``queue_ptr`` the trap instruction. The associated + queue is signalled to put it into the + error state. When the queue is put in + the error state, the waves executing + dispatches on the queue will be + terminated. + ``llvm.debugtrap`` ``s_trap 0x03`` *none* - If debugger not enabled then behaves + as a no-operation. The trap handler + is entered and immediately returns to + continue execution of the wavefront. + - If the debugger is enabled, causes + the debug trap to be reported by the + debugger and the wavefront is put in + the halt state with the PC at the + instruction. The debugger must + increment the PC and resume the wave. + reserved ``s_trap 0x04`` Reserved. + reserved ``s_trap 0x05`` Reserved. + reserved ``s_trap 0x06`` Reserved. + reserved ``s_trap 0x07`` Reserved. + reserved ``s_trap 0x08`` Reserved. + reserved ``s_trap 0xfe`` Reserved. + reserved ``s_trap 0xff`` Reserved. + =================== =============== =============== ======================================= + +.. + + .. table:: AMDGPU Trap Handler for AMDHSA OS Code Object V3 + :name: amdgpu-trap-handler-for-amdhsa-os-v3-table + + =================== =============== =============== ======================================= + Usage Code Sequence Trap Handler Description + Inputs + =================== =============== =============== ======================================= + reserved ``s_trap 0x00`` Reserved by hardware. + debugger breakpoint ``s_trap 0x01`` *none* Reserved for debugger to use for + breakpoints. Causes wave to be halted + with the PC at the trap instruction. + The debugger is responsible to resume + the wave, including the instruction + that the breakpoint overwrote. + ``llvm.trap`` ``s_trap 0x02`` ``SGPR0-1``: Causes wave to be halted with the PC at + ``queue_ptr`` the trap instruction. The associated + queue is signalled to put it into the + error state. When the queue is put in + the error state, the waves executing + dispatches on the queue will be + terminated. + ``llvm.debugtrap`` ``s_trap 0x03`` *none* - If debugger not enabled then behaves + as a no-operation. The trap handler + is entered and immediately returns to + continue execution of the wavefront. + - If the debugger is enabled, causes + the debug trap to be reported by the + debugger and the wavefront is put in + the halt state with the PC at the + instruction. The debugger must + increment the PC and resume the wave. + reserved ``s_trap 0x04`` Reserved. + reserved ``s_trap 0x05`` Reserved. + reserved ``s_trap 0x06`` Reserved. + reserved ``s_trap 0x07`` Reserved. + reserved ``s_trap 0x08`` Reserved. + reserved ``s_trap 0xfe`` Reserved. + reserved ``s_trap 0xff`` Reserved. + =================== =============== =============== ======================================= + +.. + + .. table:: AMDGPU Trap Handler for AMDHSA OS Code Object V4 + :name: amdgpu-trap-handler-for-amdhsa-os-v4-table + + =================== =============== ================ ================= ======================================= + Usage Code Sequence GFX6-GFX8 Inputs GFX9-GFX10 Inputs Description + =================== =============== ================ ================= ======================================= + reserved ``s_trap 0x00`` Reserved by hardware. + debugger breakpoint ``s_trap 0x01`` *none* *none* Reserved for debugger to use for + breakpoints. Causes wave to be halted + with the PC at the trap instruction. + The debugger is responsible to resume + the wave, including the instruction + that the breakpoint overwrote. + ``llvm.trap`` ``s_trap 0x02`` ``SGPR0-1``: *none* Causes wave to be halted with the PC at + ``queue_ptr`` the trap instruction. The associated + queue is signalled to put it into the + error state. When the queue is put in + the error state, the waves executing + dispatches on the queue will be + terminated. + ``llvm.debugtrap`` ``s_trap 0x03`` *none* *none* - If debugger not enabled then behaves + as a no-operation. The trap handler + is entered and immediately returns to + continue execution of the wavefront. + - If the debugger is enabled, causes + the debug trap to be reported by the + debugger and the wavefront is put in + the halt state with the PC at the + instruction. The debugger must + increment the PC and resume the wave. + reserved ``s_trap 0x04`` Reserved. + reserved ``s_trap 0x05`` Reserved. + reserved ``s_trap 0x06`` Reserved. + reserved ``s_trap 0x07`` Reserved. + reserved ``s_trap 0x08`` Reserved. + reserved ``s_trap 0xfe`` Reserved. + reserved ``s_trap 0xff`` Reserved. + =================== =============== ================ ================= ======================================= + +.. _amdgpu-amdhsa-function-call-convention: + +Call Convention +~~~~~~~~~~~~~~~ + +.. note:: + + This section is currently incomplete and has inaccuracies. It is WIP that will + be updated as information is determined. + +See :ref:`amdgpu-dwarf-address-space-identifier` for information on swizzled +addresses. Unswizzled addresses are normal linear addresses. + +.. _amdgpu-amdhsa-function-call-convention-kernel-functions: + +Kernel Functions +++++++++++++++++ + +This section describes the call convention ABI for the outer kernel function. + +See :ref:`amdgpu-amdhsa-initial-kernel-execution-state` for the kernel call +convention. + +The following is not part of the AMDGPU kernel calling convention but describes +how the AMDGPU implements function calls: + +1. Clang decides the kernarg layout to match the *HSA Programmer's Language + Reference* [HSA]_. + + - All structs are passed directly. + - Lambda values are passed *TBA*. + + .. TODO:: + + - Does this really follow HSA rules? Or are structs >16 bytes passed + by-value struct? + - What is ABI for lambda values? + +4. The kernel performs certain setup in its prolog, as described in + :ref:`amdgpu-amdhsa-kernel-prolog`. + +.. _amdgpu-amdhsa-function-call-convention-non-kernel-functions: + +Non-Kernel Functions +++++++++++++++++++++ + +This section describes the call convention ABI for functions other than the +outer kernel function. + +If a kernel has function calls then scratch is always allocated and used for +the call stack which grows from low address to high address using the swizzled +scratch address space. + +On entry to a function: + +1. SGPR0-3 contain a V# with the following properties (see + :ref:`amdgpu-amdhsa-kernel-prolog-private-segment-buffer`): + + * Base address pointing to the beginning of the wavefront scratch backing + memory. + * Swizzled with dword element size and stride of wavefront size elements. + +2. The FLAT_SCRATCH register pair is setup. See + :ref:`amdgpu-amdhsa-kernel-prolog-flat-scratch`. +3. GFX6-GFX8: M0 register set to the size of LDS in bytes. See + :ref:`amdgpu-amdhsa-kernel-prolog-m0`. +4. The EXEC register is set to the lanes active on entry to the function. +5. MODE register: *TBD* +6. VGPR0-31 and SGPR4-29 are used to pass function input arguments as described + below. +7. SGPR30-31 return address (RA). The code address that the function must + return to when it completes. The value is undefined if the function is *no + return*. +8. SGPR32 is used for the stack pointer (SP). It is an unswizzled scratch + offset relative to the beginning of the wavefront scratch backing memory. + + The unswizzled SP can be used with buffer instructions as an unswizzled SGPR + offset with the scratch V# in SGPR0-3 to access the stack in a swizzled + manner. + + The unswizzled SP value can be converted into the swizzled SP value by: + + | swizzled SP = unswizzled SP / wavefront size + + This may be used to obtain the private address space address of stack + objects and to convert this address to a flat address by adding the flat + scratch aperture base address. + + The swizzled SP value is always 4 bytes aligned for the ``r600`` + architecture and 16 byte aligned for the ``amdgcn`` architecture. + + .. note:: + + The ``amdgcn`` value is selected to avoid dynamic stack alignment for the + OpenCL language which has the largest base type defined as 16 bytes. + + On entry, the swizzled SP value is the address of the first function + argument passed on the stack. Other stack passed arguments are positive + offsets from the entry swizzled SP value. + + The function may use positive offsets beyond the last stack passed argument + for stack allocated local variables and register spill slots. If necessary, + the function may align these to greater alignment than 16 bytes. After these + the function may dynamically allocate space for such things as runtime sized + ``alloca`` local allocations. + + If the function calls another function, it will place any stack allocated + arguments after the last local allocation and adjust SGPR32 to the address + after the last local allocation. + +9. All other registers are unspecified. +10. Any necessary ``s_waitcnt`` has been performed to ensure memory is available + to the function. + +On exit from a function: + +1. VGPR0-31 and SGPR4-29 are used to pass function result arguments as + described below. Any registers used are considered clobbered registers. +2. The following registers are preserved and have the same value as on entry: + + * FLAT_SCRATCH + * EXEC + * GFX6-GFX8: M0 + * All SGPR registers except the clobbered registers of SGPR4-31. + * VGPR40-47 + * VGPR56-63 + * VGPR72-79 + * VGPR88-95 + * VGPR104-111 + * VGPR120-127 + * VGPR136-143 + * VGPR152-159 + * VGPR168-175 + * VGPR184-191 + * VGPR200-207 + * VGPR216-223 + * VGPR232-239 + * VGPR248-255 + + .. note:: + + Except the argument registers, the VGPRs clobbered and the preserved + registers are intermixed at regular intervals in order to keep a + similar ratio independent of the number of allocated VGPRs. + + * GFX90A: All AGPR registers except the clobbered registers AGPR0-31. + * Lanes of all VGPRs that are inactive at the call site. + + For the AMDGPU backend, an inter-procedural register allocation (IPRA) + optimization may mark some of clobbered SGPR and VGPR registers as + preserved if it can be determined that the called function does not change + their value. + +2. The PC is set to the RA provided on entry. +3. MODE register: *TBD*. +4. All other registers are clobbered. +5. Any necessary ``s_waitcnt`` has been performed to ensure memory accessed by + function is available to the caller. + +.. TODO:: + + - How are function results returned? The address of structured types is passed + by reference, but what about other types? + +The function input arguments are made up of the formal arguments explicitly +declared by the source language function plus the implicit input arguments used +by the implementation. + +The source language input arguments are: + +1. Any source language implicit ``this`` or ``self`` argument comes first as a + pointer type. +2. Followed by the function formal arguments in left to right source order. + +The source language result arguments are: + +1. The function result argument. + +The source language input or result struct type arguments that are less than or +equal to 16 bytes, are decomposed recursively into their base type fields, and +each field is passed as if a separate argument. For input arguments, if the +called function requires the struct to be in memory, for example because its +address is taken, then the function body is responsible for allocating a stack +location and copying the field arguments into it. Clang terms this *direct +struct*. + +The source language input struct type arguments that are greater than 16 bytes, +are passed by reference. The caller is responsible for allocating a stack +location to make a copy of the struct value and pass the address as the input +argument. The called function is responsible to perform the dereference when +accessing the input argument. Clang terms this *by-value struct*. + +A source language result struct type argument that is greater than 16 bytes, is +returned by reference. The caller is responsible for allocating a stack location +to hold the result value and passes the address as the last input argument +(before the implicit input arguments). In this case there are no result +arguments. The called function is responsible to perform the dereference when +storing the result value. Clang terms this *structured return (sret)*. + +*TODO: correct the ``sret`` definition.* + +.. TODO:: + + Is this definition correct? Or is ``sret`` only used if passing in registers, and + pass as non-decomposed struct as stack argument? Or something else? Is the + memory location in the caller stack frame, or a stack memory argument and so + no address is passed as the caller can directly write to the argument stack + location? But then the stack location is still live after return. If an + argument stack location is it the first stack argument or the last one? + +Lambda argument types are treated as struct types with an implementation defined +set of fields. + +.. TODO:: + + Need to specify the ABI for lambda types for AMDGPU. + +For AMDGPU backend all source language arguments (including the decomposed +struct type arguments) are passed in VGPRs unless marked ``inreg`` in which case +they are passed in SGPRs. + +The AMDGPU backend walks the function call graph from the leaves to determine +which implicit input arguments are used, propagating to each caller of the +function. The used implicit arguments are appended to the function arguments +after the source language arguments in the following order: + +.. TODO:: + + Is recursion or external functions supported? + +1. Work-Item ID (1 VGPR) + + The X, Y and Z work-item ID are packed into a single VGRP with the following + layout. Only fields actually used by the function are set. The other bits + are undefined. + + The values come from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-initial-kernel-execution-state`. + + .. table:: Work-item implicit argument layout + :name: amdgpu-amdhsa-workitem-implicit-argument-layout-table + + ======= ======= ============== + Bits Size Field Name + ======= ======= ============== + 9:0 10 bits X Work-Item ID + 19:10 10 bits Y Work-Item ID + 29:20 10 bits Z Work-Item ID + 31:30 2 bits Unused + ======= ======= ============== + +2. Dispatch Ptr (2 SGPRs) + + The value comes from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + +3. Queue Ptr (2 SGPRs) + + The value comes from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + +4. Kernarg Segment Ptr (2 SGPRs) + + The value comes from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + +5. Dispatch id (2 SGPRs) + + The value comes from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + +6. Work-Group ID X (1 SGPR) + + The value comes from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + +7. Work-Group ID Y (1 SGPR) + + The value comes from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + +8. Work-Group ID Z (1 SGPR) + + The value comes from the initial kernel execution state. See + :ref:`amdgpu-amdhsa-sgpr-register-set-up-order-table`. + +9. Implicit Argument Ptr (2 SGPRs) + + The value is computed by adding an offset to Kernarg Segment Ptr to get the + global address space pointer to the first kernarg implicit argument. + +The input and result arguments are assigned in order in the following manner: + +.. note:: + + There are likely some errors and omissions in the following description that + need correction. + + .. TODO:: + + Check the Clang source code to decipher how function arguments and return + results are handled. Also see the AMDGPU specific values used. + +* VGPR arguments are assigned to consecutive VGPRs starting at VGPR0 up to + VGPR31. + + If there are more arguments than will fit in these registers, the remaining + arguments are allocated on the stack in order on naturally aligned + addresses. + + .. TODO:: + + How are overly aligned structures allocated on the stack? + +* SGPR arguments are assigned to consecutive SGPRs starting at SGPR0 up to + SGPR29. + + If there are more arguments than will fit in these registers, the remaining + arguments are allocated on the stack in order on naturally aligned + addresses. + +Note that decomposed struct type arguments may have some fields passed in +registers and some in memory. + +.. TODO:: + + So, a struct which can pass some fields as decomposed register arguments, will + pass the rest as decomposed stack elements? But an argument that will not start + in registers will not be decomposed and will be passed as a non-decomposed + stack value? + +The following is not part of the AMDGPU function calling convention but +describes how the AMDGPU implements function calls: + +1. SGPR33 is used as a frame pointer (FP) if necessary. Like the SP it is an + unswizzled scratch address. It is only needed if runtime sized ``alloca`` + are used, or for the reasons defined in ``SIFrameLowering``. +2. Runtime stack alignment is supported. SGPR34 is used as a base pointer (BP) + to access the incoming stack arguments in the function. The BP is needed + only when the function requires the runtime stack alignment. + +3. Allocating SGPR arguments on the stack are not supported. + +4. No CFI is currently generated. See + :ref:`amdgpu-dwarf-call-frame-information`. + + .. note:: + + CFI will be generated that defines the CFA as the unswizzled address + relative to the wave scratch base in the unswizzled private address space + of the lowest address stack allocated local variable. + + ``DW_AT_frame_base`` will be defined as the swizzled address in the + swizzled private address space by dividing the CFA by the wavefront size + (since CFA is always at least dword aligned which matches the scratch + swizzle element size). + + If no dynamic stack alignment was performed, the stack allocated arguments + are accessed as negative offsets relative to ``DW_AT_frame_base``, and the + local variables and register spill slots are accessed as positive offsets + relative to ``DW_AT_frame_base``. + +5. Function argument passing is implemented by copying the input physical + registers to virtual registers on entry. The register allocator can spill if + necessary. These are copied back to physical registers at call sites. The + net effect is that each function call can have these values in entirely + distinct locations. The IPRA can help avoid shuffling argument registers. +6. Call sites are implemented by setting up the arguments at positive offsets + from SP. Then SP is incremented to account for the known frame size before + the call and decremented after the call. + + .. note:: + + The CFI will reflect the changed calculation needed to compute the CFA + from SP. + +7. 4 byte spill slots are used in the stack frame. One slot is allocated for an + emergency spill slot. Buffer instructions are used for stack accesses and + not the ``flat_scratch`` instruction. + + .. TODO:: + + Explain when the emergency spill slot is used. + +.. TODO:: + + Possible broken issues: + + - Stack arguments must be aligned to required alignment. + - Stack is aligned to max(16, max formal argument alignment) + - Direct argument < 64 bits should check register budget. + - Register budget calculation should respect ``inreg`` for SGPR. + - SGPR overflow is not handled. + - struct with 1 member unpeeling is not checking size of member. + - ``sret`` is after ``this`` pointer. + - Caller is not implementing stack realignment: need an extra pointer. + - Should say AMDGPU passes FP rather than SP. + - Should CFI define CFA as address of locals or arguments. Difference is + apparent when have implemented dynamic alignment. + - If ``SCRATCH`` instruction could allow negative offsets, then can make FP be + highest address of stack frame and use negative offset for locals. Would + allow SP to be the same as FP and could support signal-handler-like as now + have a real SP for the top of the stack. + - How is ``sret`` passed on the stack? In argument stack area? Can it overlay + arguments? + +AMDPAL +------ + +This section provides code conventions used when the target triple OS is +``amdpal`` (see :ref:`amdgpu-target-triples`). + +.. _amdgpu-amdpal-code-object-metadata-section: + +Code Object Metadata +~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + The metadata is currently in development and is subject to major + changes. Only the current version is supported. *When this document + was generated the version was 2.6.* + +Code object metadata is specified by the ``NT_AMDGPU_METADATA`` note +record (see :ref:`amdgpu-note-records-v3-v4`). + +The metadata is represented as Message Pack formatted binary data (see +[MsgPack]_). The top level is a Message Pack map that includes the keys +defined in table :ref:`amdgpu-amdpal-code-object-metadata-map-table` +and referenced tables. + +Additional information can be added to the maps. To avoid conflicts, any +key names should be prefixed by "*vendor-name*." where ``vendor-name`` +can be the name of the vendor and specific vendor tool that generates the +information. The prefix is abbreviated to simply "." when it appears +within a map that has been added by the same *vendor-name*. + + .. table:: AMDPAL Code Object Metadata Map + :name: amdgpu-amdpal-code-object-metadata-map-table + + =================== ============== ========= ====================================================================== + String Key Value Type Required? Description + =================== ============== ========= ====================================================================== + "amdpal.version" sequence of Required PAL code object metadata (major, minor) version. The current values + 2 integers are defined by *Util::Abi::PipelineMetadata(Major|Minor)Version*. + "amdpal.pipelines" sequence of Required Per-pipeline metadata. See + map :ref:`amdgpu-amdpal-code-object-pipeline-metadata-map-table` for the + definition of the keys included in that map. + =================== ============== ========= ====================================================================== + +.. + + .. table:: AMDPAL Code Object Pipeline Metadata Map + :name: amdgpu-amdpal-code-object-pipeline-metadata-map-table + + ====================================== ============== ========= =================================================== + String Key Value Type Required? Description + ====================================== ============== ========= =================================================== + ".name" string Source name of the pipeline. + ".type" string Pipeline type, e.g. VsPs. Values include: + + - "VsPs" + - "Gs" + - "Cs" + - "Ngg" + - "Tess" + - "GsTess" + - "NggTess" + + ".internal_pipeline_hash" sequence of Required Internal compiler hash for this pipeline. Lower + 2 integers 64 bits is the "stable" portion of the hash, used + for e.g. shader replacement lookup. Upper 64 bits + is the "unique" portion of the hash, used for + e.g. pipeline cache lookup. The value is + implementation defined, and can not be relied on + between different builds of the compiler. + ".shaders" map Per-API shader metadata. See + :ref:`amdgpu-amdpal-code-object-shader-map-table` + for the definition of the keys included in that + map. + ".hardware_stages" map Per-hardware stage metadata. See + :ref:`amdgpu-amdpal-code-object-hardware-stage-map-table` + for the definition of the keys included in that + map. + ".shader_functions" map Per-shader function metadata. See + :ref:`amdgpu-amdpal-code-object-shader-function-map-table` + for the definition of the keys included in that + map. + ".registers" map Required Hardware register configuration. See + :ref:`amdgpu-amdpal-code-object-register-map-table` + for the definition of the keys included in that + map. + ".user_data_limit" integer Number of user data entries accessed by this + pipeline. + ".spill_threshold" integer The user data spill threshold. 0xFFFF for + NoUserDataSpilling. + ".uses_viewport_array_index" boolean Indicates whether or not the pipeline uses the + viewport array index feature. Pipelines which use + this feature can render into all 16 viewports, + whereas pipelines which do not use it are + restricted to viewport #0. + ".es_gs_lds_size" integer Size in bytes of LDS space used internally for + handling data-passing between the ES and GS + shader stages. This can be zero if the data is + passed using off-chip buffers. This value should + be used to program all user-SGPRs which have been + marked with "UserDataMapping::EsGsLdsSize" + (typically only the GS and VS HW stages will ever + have a user-SGPR so marked). + ".nggSubgroupSize" integer Explicit maximum subgroup size for NGG shaders + (maximum number of threads in a subgroup). + ".num_interpolants" integer Graphics only. Number of PS interpolants. + ".mesh_scratch_memory_size" integer Max mesh shader scratch memory used. + ".api" string Name of the client graphics API. + ".api_create_info" binary Graphics API shader create info binary blob. Can + be defined by the driver using the compiler if + they want to be able to correlate API-specific + information used during creation at a later time. + ====================================== ============== ========= =================================================== + +.. + + .. table:: AMDPAL Code Object Shader Map + :name: amdgpu-amdpal-code-object-shader-map-table + + + +-------------+--------------+-------------------------------------------------------------------+ + |String Key |Value Type |Description | + +=============+==============+===================================================================+ + |- ".compute" |map |See :ref:`amdgpu-amdpal-code-object-api-shader-metadata-map-table` | + |- ".vertex" | |for the definition of the keys included in that map. | + |- ".hull" | | | + |- ".domain" | | | + |- ".geometry"| | | + |- ".pixel" | | | + +-------------+--------------+-------------------------------------------------------------------+ + +.. + + .. table:: AMDPAL Code Object API Shader Metadata Map + :name: amdgpu-amdpal-code-object-api-shader-metadata-map-table + + ==================== ============== ========= ===================================================================== + String Key Value Type Required? Description + ==================== ============== ========= ===================================================================== + ".api_shader_hash" sequence of Required Input shader hash, typically passed in from the client. The value + 2 integers is implementation defined, and can not be relied on between + different builds of the compiler. + ".hardware_mapping" sequence of Required Flags indicating the HW stages this API shader maps to. Values + string include: + + - ".ls" + - ".hs" + - ".es" + - ".gs" + - ".vs" + - ".ps" + - ".cs" + + ==================== ============== ========= ===================================================================== + +.. + + .. table:: AMDPAL Code Object Hardware Stage Map + :name: amdgpu-amdpal-code-object-hardware-stage-map-table + + +-------------+--------------+-----------------------------------------------------------------------+ + |String Key |Value Type |Description | + +=============+==============+=======================================================================+ + |- ".ls" |map |See :ref:`amdgpu-amdpal-code-object-hardware-stage-metadata-map-table` | + |- ".hs" | |for the definition of the keys included in that map. | + |- ".es" | | | + |- ".gs" | | | + |- ".vs" | | | + |- ".ps" | | | + |- ".cs" | | | + +-------------+--------------+-----------------------------------------------------------------------+ + +.. + + .. table:: AMDPAL Code Object Hardware Stage Metadata Map + :name: amdgpu-amdpal-code-object-hardware-stage-metadata-map-table + + ========================== ============== ========= =============================================================== + String Key Value Type Required? Description + ========================== ============== ========= =============================================================== + ".entry_point" string The ELF symbol pointing to this pipeline's stage entry point. + ".scratch_memory_size" integer Scratch memory size in bytes. + ".lds_size" integer Local Data Share size in bytes. + ".perf_data_buffer_size" integer Performance data buffer size in bytes. + ".vgpr_count" integer Number of VGPRs used. + ".sgpr_count" integer Number of SGPRs used. + ".vgpr_limit" integer If non-zero, indicates the shader was compiled with a + directive to instruct the compiler to limit the VGPR usage to + be less than or equal to the specified value (only set if + different from HW default). + ".sgpr_limit" integer SGPR count upper limit (only set if different from HW + default). + ".threadgroup_dimensions" sequence of Thread-group X/Y/Z dimensions (Compute only). + 3 integers + ".wavefront_size" integer Wavefront size (only set if different from HW default). + ".uses_uavs" boolean The shader reads or writes UAVs. + ".uses_rovs" boolean The shader reads or writes ROVs. + ".writes_uavs" boolean The shader writes to one or more UAVs. + ".writes_depth" boolean The shader writes out a depth value. + ".uses_append_consume" boolean The shader uses append and/or consume operations, either + memory or GDS. + ".uses_prim_id" boolean The shader uses PrimID. + ========================== ============== ========= =============================================================== + +.. + + .. table:: AMDPAL Code Object Shader Function Map + :name: amdgpu-amdpal-code-object-shader-function-map-table + + =============== ============== ==================================================================== + String Key Value Type Description + =============== ============== ==================================================================== + *symbol name* map *symbol name* is the ELF symbol name of the shader function code + entry address. The value is the function's metadata. See + :ref:`amdgpu-amdpal-code-object-shader-function-metadata-map-table`. + =============== ============== ==================================================================== + +.. + + .. table:: AMDPAL Code Object Shader Function Metadata Map + :name: amdgpu-amdpal-code-object-shader-function-metadata-map-table + + ============================= ============== ================================================================= + String Key Value Type Description + ============================= ============== ================================================================= + ".api_shader_hash" sequence of Input shader hash, typically passed in from the client. The value + 2 integers is implementation defined, and can not be relied on between + different builds of the compiler. + ".scratch_memory_size" integer Size in bytes of scratch memory used by the shader. + ".lds_size" integer Size in bytes of LDS memory. + ".vgpr_count" integer Number of VGPRs used by the shader. + ".sgpr_count" integer Number of SGPRs used by the shader. + ".stack_frame_size_in_bytes" integer Amount of stack size used by the shader. + ".shader_subtype" string Shader subtype/kind. Values include: + + - "Unknown" + + ============================= ============== ================================================================= + +.. + + .. table:: AMDPAL Code Object Register Map + :name: amdgpu-amdpal-code-object-register-map-table + + ========================== ============== ==================================================================== + 32-bit Integer Key Value Type Description + ========================== ============== ==================================================================== + ``reg offset`` 32-bit integer ``reg offset`` is the dword offset into the GFXIP register space of + a GRBM register (i.e., driver accessible GPU register number, not + shader GPR register number). The driver is required to program each + specified register to the corresponding specified value when + executing this pipeline. Typically, the ``reg offsets`` are the + ``uint16_t`` offsets to each register as defined by the hardware + chip headers. The register is set to the provided value. However, a + ``reg offset`` that specifies a user data register (e.g., + COMPUTE_USER_DATA_0) needs special treatment. See + :ref:`amdgpu-amdpal-code-object-user-data-section` section for more + information. + ========================== ============== ==================================================================== + +.. _amdgpu-amdpal-code-object-user-data-section: + +User Data ++++++++++ + +Each hardware stage has a set of 32-bit physical SPI *user data registers* +(either 16 or 32 based on graphics IP and the stage) which can be +written from a command buffer and then loaded into SGPRs when waves are +launched via a subsequent dispatch or draw operation. This is the way +most arguments are passed from the application/runtime to a hardware +shader. + +PAL abstracts this functionality by exposing a set of 128 *user data +entries* per pipeline a client can use to pass arguments from a command +buffer to one or more shaders in that pipeline. The ELF code object must +specify a mapping from virtualized *user data entries* to physical *user +data registers*, and PAL is responsible for implementing that mapping, +including spilling overflow *user data entries* to memory if needed. + +Since the *user data registers* are GRBM-accessible SPI registers, this +mapping is actually embedded in the ``.registers`` metadata entry. For +most registers, the value in that map is a literal 32-bit value that +should be written to the register by the driver. However, when the +register is a *user data register* (any USER_DATA register e.g., +SPI_SHADER_USER_DATA_PS_5), the value is instead an encoding that tells +the driver to write either a *user data entry* value or one of several +driver-internal values to the register. This encoding is described in +the following table: + +.. note:: + + Currently, *user data registers* 0 and 1 (e.g., SPI_SHADER_USER_DATA_PS_0, + and SPI_SHADER_USER_DATA_PS_1) are reserved. *User data register* 0 must + always be programmed to the address of the GlobalTable, and *user data + register* 1 must always be programmed to the address of the PerShaderTable. + +.. + + .. table:: AMDPAL User Data Mapping + :name: amdgpu-amdpal-code-object-metadata-user-data-mapping-table + + ========== ================= =============================================================================== + Value Name Description + ========== ================= =============================================================================== + 0..127 *User Data Entry* 32-bit value of user_data_entry[N] as specified via *CmdSetUserData()* + 0x10000000 GlobalTable 32-bit pointer to GPU memory containing the global internal table (should + always point to *user data register* 0). + 0x10000001 PerShaderTable 32-bit pointer to GPU memory containing the per-shader internal table. See + :ref:`amdgpu-amdpal-code-object-metadata-user-data-per-shader-table-section` + for more detail (should always point to *user data register* 1). + 0x10000002 SpillTable 32-bit pointer to GPU memory containing the user data spill table. See + :ref:`amdgpu-amdpal-code-object-metadata-user-data-spill-table-section` for + more detail. + 0x10000003 BaseVertex Vertex offset (32-bit unsigned integer). Not needed if the pipeline doesn't + reference the draw index in the vertex shader. Only supported by the first + stage in a graphics pipeline. + 0x10000004 BaseInstance Instance offset (32-bit unsigned integer). Only supported by the first stage in + a graphics pipeline. + 0x10000005 DrawIndex Draw index (32-bit unsigned integer). Only supported by the first stage in a + graphics pipeline. + 0x10000006 Workgroup Thread group count (32-bit unsigned integer). Low half of a 64-bit address of + a buffer containing the grid dimensions for a Compute dispatch operation. The + high half of the address is stored in the next sequential user-SGPR. Only + supported by compute pipelines. + 0x1000000A EsGsLdsSize Indicates that PAL will program this user-SGPR to contain the amount of LDS + space used for the ES/GS pseudo-ring-buffer for passing data between shader + stages. + 0x1000000B ViewId View id (32-bit unsigned integer) identifies a view of graphic + pipeline instancing. + 0x1000000C StreamOutTable 32-bit pointer to GPU memory containing the stream out target SRD table. This + can only appear for one shader stage per pipeline. + 0x1000000D PerShaderPerfData 32-bit pointer to GPU memory containing the per-shader performance data buffer. + 0x1000000F VertexBufferTable 32-bit pointer to GPU memory containing the vertex buffer SRD table. This can + only appear for one shader stage per pipeline. + 0x10000010 UavExportTable 32-bit pointer to GPU memory containing the UAV export SRD table. This can + only appear for one shader stage per pipeline (PS). These replace color targets + and are completely separate from any UAVs used by the shader. This is optional, + and only used by the PS when UAV exports are used to replace color-target + exports to optimize specific shaders. + 0x10000011 NggCullingData 64-bit pointer to GPU memory containing the hardware register data needed by + some NGG pipelines to perform culling. This value contains the address of the + first of two consecutive registers which provide the full GPU address. + 0x10000015 FetchShaderPtr 64-bit pointer to GPU memory containing the fetch shader subroutine. + ========== ================= =============================================================================== + +.. _amdgpu-amdpal-code-object-metadata-user-data-per-shader-table-section: + +Per-Shader Table +################ + +Low 32 bits of the GPU address for an optional buffer in the ``.data`` +section of the ELF. The high 32 bits of the address match the high 32 bits +of the shader's program counter. + +The buffer can be anything the shader compiler needs it for, and +allows each shader to have its own region of the ``.data`` section. +Typically, this could be a table of buffer SRD's and the data pointed to +by the buffer SRD's, but it could be a flat-address region of memory as +well. Its layout and usage are defined by the shader compiler. + +Each shader's table in the ``.data`` section is referenced by the symbol +``_amdgpu_``\ *xs*\ ``_shdr_intrl_data`` where *xs* corresponds with the +hardware shader stage the data is for. E.g., +``_amdgpu_cs_shdr_intrl_data`` for the compute shader hardware stage. + +.. _amdgpu-amdpal-code-object-metadata-user-data-spill-table-section: + +Spill Table +########### + +It is possible for a hardware shader to need access to more *user data +entries* than there are slots available in user data registers for one +or more hardware shader stages. In that case, the PAL runtime expects +the necessary *user data entries* to be spilled to GPU memory and use +one user data register to point to the spilled user data memory. The +value of the *user data entry* must then represent the location where +a shader expects to read the low 32-bits of the table's GPU virtual +address. The *spill table* itself represents a set of 32-bit values +managed by the PAL runtime in GPU-accessible memory that can be made +indirectly accessible to a hardware shader. + +Unspecified OS +-------------- + +This section provides code conventions used when the target triple OS is +empty (see :ref:`amdgpu-target-triples`). + +Trap Handler ABI +~~~~~~~~~~~~~~~~ + +For code objects generated by AMDGPU backend for non-amdhsa OS, the runtime does +not install a trap handler. The ``llvm.trap`` and ``llvm.debugtrap`` +instructions are handled as follows: + + .. table:: AMDGPU Trap Handler for Non-AMDHSA OS + :name: amdgpu-trap-handler-for-non-amdhsa-os-table + + =============== =============== =========================================== + Usage Code Sequence Description + =============== =============== =========================================== + llvm.trap s_endpgm Causes wavefront to be terminated. + llvm.debugtrap *none* Compiler warning given that there is no + trap handler installed. + =============== =============== =========================================== + +Source Languages +================ + +.. _amdgpu-opencl: + +OpenCL +------ + +When the language is OpenCL the following differences occur: + +1. The OpenCL memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). +2. The AMDGPU backend appends additional arguments to the kernel's explicit + arguments for the AMDHSA OS (see + :ref:`opencl-kernel-implicit-arguments-appended-for-amdhsa-os-table`). +3. Additional metadata is generated + (see :ref:`amdgpu-amdhsa-code-object-metadata`). + + .. table:: OpenCL kernel implicit arguments appended for AMDHSA OS + :name: opencl-kernel-implicit-arguments-appended-for-amdhsa-os-table + + ======== ==== ========= =========================================== + Position Byte Byte Description + Size Alignment + ======== ==== ========= =========================================== + 1 8 8 OpenCL Global Offset X + 2 8 8 OpenCL Global Offset Y + 3 8 8 OpenCL Global Offset Z + 4 8 8 OpenCL address of printf buffer + 5 8 8 OpenCL address of virtual queue used by + enqueue_kernel. + 6 8 8 OpenCL address of AqlWrap struct used by + enqueue_kernel. + 7 8 8 Pointer argument used for Multi-gird + synchronization. + ======== ==== ========= =========================================== + +.. _amdgpu-hcc: + +HCC +--- + +When the language is HCC the following differences occur: + +1. The HSA memory model is used (see :ref:`amdgpu-amdhsa-memory-model`). + +.. _amdgpu-assembler: + +Assembler +--------- + +AMDGPU backend has LLVM-MC based assembler which is currently in development. +It supports AMDGCN GFX6-GFX10. + +This section describes general syntax for instructions and operands. + +Instructions +~~~~~~~~~~~~ + +An instruction has the following :doc:`syntax`: + + | ``<``\ *opcode*\ ``> <``\ *operand0*\ ``>, <``\ *operand1*\ ``>,... + <``\ *modifier0*\ ``> <``\ *modifier1*\ ``>...`` + +:doc:`Operands` are comma-separated while +:doc:`modifiers` are space-separated. + +The order of operands and modifiers is fixed. +Most modifiers are optional and may be omitted. + +Links to detailed instruction syntax description may be found in the following +table. Note that features under development are not included +in this description. + + =================================== ======================================= + Core ISA ISA Extensions + =================================== ======================================= + :doc:`GFX7` \- + :doc:`GFX8` \- + :doc:`GFX9` :doc:`gfx900` + + :doc:`gfx902` + + :doc:`gfx904` + + :doc:`gfx906` + + :doc:`gfx908` + + :doc:`gfx909` + + :doc:`gfx90a` + + :doc:`GFX10` :doc:`gfx1011` + + :doc:`gfx1012` + =================================== ======================================= + +For more information about instructions, their semantics and supported +combinations of operands, refer to one of instruction set architecture manuals +[AMD-GCN-GFX6]_, [AMD-GCN-GFX7]_, [AMD-GCN-GFX8]_, +[AMD-GCN-GFX900-GFX904-VEGA]_, [AMD-GCN-GFX906-VEGA7NM]_ +[AMD-GCN-GFX908-CDNA1]_, [AMD-GCN-GFX10-RDNA1]_ and [AMD-GCN-GFX10-RDNA2]_. + +Operands +~~~~~~~~ + +Detailed description of operands may be found :doc:`here`. + +Modifiers +~~~~~~~~~ + +Detailed description of modifiers may be found +:doc:`here`. + +Instruction Examples +~~~~~~~~~~~~~~~~~~~~ + +DS +++ + +.. code-block:: nasm + + ds_add_u32 v2, v4 offset:16 + ds_write_src2_b64 v2 offset0:4 offset1:8 + ds_cmpst_f32 v2, v4, v6 + ds_min_rtn_f64 v[8:9], v2, v[4:5] + +For full list of supported instructions, refer to "LDS/GDS instructions" in ISA +Manual. + +FLAT +++++ + +.. code-block:: nasm + + flat_load_dword v1, v[3:4] + flat_store_dwordx3 v[3:4], v[5:7] + flat_atomic_swap v1, v[3:4], v5 glc + flat_atomic_cmpswap v1, v[3:4], v[5:6] glc slc + flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc + +For full list of supported instructions, refer to "FLAT instructions" in ISA +Manual. + +MUBUF ++++++ + +.. code-block:: nasm + + buffer_load_dword v1, off, s[4:7], s1 + buffer_store_dwordx4 v[1:4], v2, ttmp[4:7], s1 offen offset:4 glc tfe + buffer_store_format_xy v[1:2], off, s[4:7], s1 + buffer_wbinvl1 + buffer_atomic_inc v1, v2, s[8:11], s4 idxen offset:4 slc + +For full list of supported instructions, refer to "MUBUF Instructions" in ISA +Manual. + +SMRD/SMEM ++++++++++ + +.. code-block:: nasm + + s_load_dword s1, s[2:3], 0xfc + s_load_dwordx8 s[8:15], s[2:3], s4 + s_load_dwordx16 s[88:103], s[2:3], s4 + s_dcache_inv_vol + s_memtime s[4:5] + +For full list of supported instructions, refer to "Scalar Memory Operations" in +ISA Manual. + +SOP1 +++++ + +.. code-block:: nasm + + s_mov_b32 s1, s2 + s_mov_b64 s[0:1], 0x80000000 + s_cmov_b32 s1, 200 + s_wqm_b64 s[2:3], s[4:5] + s_bcnt0_i32_b64 s1, s[2:3] + s_swappc_b64 s[2:3], s[4:5] + s_cbranch_join s[4:5] + +For full list of supported instructions, refer to "SOP1 Instructions" in ISA +Manual. + +SOP2 +++++ + +.. code-block:: nasm + + s_add_u32 s1, s2, s3 + s_and_b64 s[2:3], s[4:5], s[6:7] + s_cselect_b32 s1, s2, s3 + s_andn2_b32 s2, s4, s6 + s_lshr_b64 s[2:3], s[4:5], s6 + s_ashr_i32 s2, s4, s6 + s_bfm_b64 s[2:3], s4, s6 + s_bfe_i64 s[2:3], s[4:5], s6 + s_cbranch_g_fork s[4:5], s[6:7] + +For full list of supported instructions, refer to "SOP2 Instructions" in ISA +Manual. + +SOPC +++++ + +.. code-block:: nasm + + s_cmp_eq_i32 s1, s2 + s_bitcmp1_b32 s1, s2 + s_bitcmp0_b64 s[2:3], s4 + s_setvskip s3, s5 + +For full list of supported instructions, refer to "SOPC Instructions" in ISA +Manual. + +SOPP +++++ + +.. code-block:: nasm + + s_barrier + s_nop 2 + s_endpgm + s_waitcnt 0 ; Wait for all counters to be 0 + s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0) ; Equivalent to above + s_waitcnt vmcnt(1) ; Wait for vmcnt counter to be 1. + s_sethalt 9 + s_sleep 10 + s_sendmsg 0x1 + s_sendmsg sendmsg(MSG_INTERRUPT) + s_trap 1 + +For full list of supported instructions, refer to "SOPP Instructions" in ISA +Manual. + +Unless otherwise mentioned, little verification is performed on the operands +of SOPP Instructions, so it is up to the programmer to be familiar with the +range or acceptable values. + +VALU +++++ + +For vector ALU instruction opcodes (VOP1, VOP2, VOP3, VOPC, VOP_DPP, VOP_SDWA), +the assembler will automatically use optimal encoding based on its operands. To +force specific encoding, one can add a suffix to the opcode of the instruction: + +* _e32 for 32-bit VOP1/VOP2/VOPC +* _e64 for 64-bit VOP3 +* _dpp for VOP_DPP +* _sdwa for VOP_SDWA + +VOP1/VOP2/VOP3/VOPC examples: + +.. code-block:: nasm + + v_mov_b32 v1, v2 + v_mov_b32_e32 v1, v2 + v_nop + v_cvt_f64_i32_e32 v[1:2], v2 + v_floor_f32_e32 v1, v2 + v_bfrev_b32_e32 v1, v2 + v_add_f32_e32 v1, v2, v3 + v_mul_i32_i24_e64 v1, v2, 3 + v_mul_i32_i24_e32 v1, -3, v3 + v_mul_i32_i24_e32 v1, -100, v3 + v_addc_u32 v1, s[0:1], v2, v3, s[2:3] + v_max_f16_e32 v1, v2, v3 + +VOP_DPP examples: + +.. code-block:: nasm + + v_mov_b32 v0, v0 quad_perm:[0,2,1,1] + v_sin_f32 v0, v0 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + v_mov_b32 v0, v0 wave_shl:1 + v_mov_b32 v0, v0 row_mirror + v_mov_b32 v0, v0 row_bcast:31 + v_mov_b32 v0, v0 quad_perm:[1,3,0,1] row_mask:0xa bank_mask:0x1 bound_ctrl:0 + v_add_f32 v0, v0, |v0| row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + v_max_f16 v1, v2, v3 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 + +VOP_SDWA examples: + +.. code-block:: nasm + + v_mov_b32 v1, v2 dst_sel:BYTE_0 dst_unused:UNUSED_PRESERVE src0_sel:DWORD + v_min_u32 v200, v200, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD + v_sin_f32 v0, v0 dst_unused:UNUSED_PAD src0_sel:WORD_1 + v_fract_f32 v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 + v_cmpx_le_u32 vcc, v1, v2 src0_sel:BYTE_2 src1_sel:WORD_0 + +For full list of supported instructions, refer to "Vector ALU instructions". + +.. _amdgpu-amdhsa-assembler-predefined-symbols-v2: + +Code Object V2 Predefined Symbols +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + Code object V2 is not the default code object version emitted by + this version of LLVM. + +The AMDGPU assembler defines and updates some symbols automatically. These +symbols do not affect code generation. + +.option.machine_version_major ++++++++++++++++++++++++++++++ + +Set to the GFX major generation number of the target being assembled for. For +example, when assembling for a "GFX9" target this will be set to the integer +value "9". The possible GFX major generation numbers are presented in +:ref:`amdgpu-processors`. + +.option.machine_version_minor ++++++++++++++++++++++++++++++ + +Set to the GFX minor generation number of the target being assembled for. For +example, when assembling for a "GFX810" target this will be set to the integer +value "1". The possible GFX minor generation numbers are presented in +:ref:`amdgpu-processors`. + +.option.machine_version_stepping +++++++++++++++++++++++++++++++++ + +Set to the GFX stepping generation number of the target being assembled for. +For example, when assembling for a "GFX704" target this will be set to the +integer value "4". The possible GFX stepping generation numbers are presented +in :ref:`amdgpu-processors`. + +.kernel.vgpr_count +++++++++++++++++++ + +Set to zero each time a +:ref:`amdgpu-amdhsa-assembler-directive-amdgpu_hsa_kernel` directive is +encountered. At each instruction, if the current value of this symbol is less +than or equal to the maximum VGPR number explicitly referenced within that +instruction then the symbol value is updated to equal that VGPR number plus +one. + +.kernel.sgpr_count +++++++++++++++++++ + +Set to zero each time a +:ref:`amdgpu-amdhsa-assembler-directive-amdgpu_hsa_kernel` directive is +encountered. At each instruction, if the current value of this symbol is less +than or equal to the maximum VGPR number explicitly referenced within that +instruction then the symbol value is updated to equal that SGPR number plus +one. + +.. _amdgpu-amdhsa-assembler-directives-v2: + +Code Object V2 Directives +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + Code object V2 is not the default code object version emitted by + this version of LLVM. + +AMDGPU ABI defines auxiliary data in output code object. In assembly source, +one can specify them with assembler directives. + +.hsa_code_object_version major, minor ++++++++++++++++++++++++++++++++++++++ + +*major* and *minor* are integers that specify the version of the HSA code +object that will be generated by the assembler. + +.hsa_code_object_isa [major, minor, stepping, vendor, arch] ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + +*major*, *minor*, and *stepping* are all integers that describe the instruction +set architecture (ISA) version of the assembly program. + +*vendor* and *arch* are quoted strings. *vendor* should always be equal to +"AMD" and *arch* should always be equal to "AMDGPU". + +By default, the assembler will derive the ISA version, *vendor*, and *arch* +from the value of the -mcpu option that is passed to the assembler. + +.. _amdgpu-amdhsa-assembler-directive-amdgpu_hsa_kernel: + +.amdgpu_hsa_kernel (name) ++++++++++++++++++++++++++ + +This directives specifies that the symbol with given name is a kernel entry +point (label) and the object should contain corresponding symbol of type +STT_AMDGPU_HSA_KERNEL. + +.amd_kernel_code_t +++++++++++++++++++ + +This directive marks the beginning of a list of key / value pairs that are used +to specify the amd_kernel_code_t object that will be emitted by the assembler. +The list must be terminated by the *.end_amd_kernel_code_t* directive. For any +amd_kernel_code_t values that are unspecified a default value will be used. The +default value for all keys is 0, with the following exceptions: + +- *amd_code_version_major* defaults to 1. +- *amd_kernel_code_version_minor* defaults to 2. +- *amd_machine_kind* defaults to 1. +- *amd_machine_version_major*, *machine_version_minor*, and + *amd_machine_version_stepping* are derived from the value of the -mcpu option + that is passed to the assembler. +- *kernel_code_entry_byte_offset* defaults to 256. +- *wavefront_size* defaults 6 for all targets before GFX10. For GFX10 onwards + defaults to 6 if target feature ``wavefrontsize64`` is enabled, otherwise 5. + Note that wavefront size is specified as a power of two, so a value of **n** + means a size of 2^ **n**. +- *call_convention* defaults to -1. +- *kernarg_segment_alignment*, *group_segment_alignment*, and + *private_segment_alignment* default to 4. Note that alignments are specified + as a power of 2, so a value of **n** means an alignment of 2^ **n**. +- *enable_tg_split* defaults to 1 if target feature ``tgsplit`` is enabled for + GFX90A onwards. +- *enable_wgp_mode* defaults to 1 if target feature ``cumode`` is disabled for + GFX10 onwards. +- *enable_mem_ordered* defaults to 1 for GFX10 onwards. + +The *.amd_kernel_code_t* directive must be placed immediately after the +function label and before any instructions. + +For a full list of amd_kernel_code_t keys, refer to AMDGPU ABI document, +comments in lib/Target/AMDGPU/AmdKernelCodeT.h and test/CodeGen/AMDGPU/hsa.s. + +.. _amdgpu-amdhsa-assembler-example-v2: + +Code Object V2 Example Source Code +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + Code Object V2 is not the default code object version emitted by + this version of LLVM. + +Here is an example of a minimal assembly source file, defining one HSA kernel: + +.. code:: + :number-lines: + + .hsa_code_object_version 1,0 + .hsa_code_object_isa + + .hsatext + .globl hello_world + .p2align 8 + .amdgpu_hsa_kernel hello_world + + hello_world: + + .amd_kernel_code_t + enable_sgpr_kernarg_segment_ptr = 1 + is_ptr64 = 1 + compute_pgm_rsrc1_vgprs = 0 + compute_pgm_rsrc1_sgprs = 0 + compute_pgm_rsrc2_user_sgpr = 2 + compute_pgm_rsrc1_wgp_mode = 0 + compute_pgm_rsrc1_mem_ordered = 0 + compute_pgm_rsrc1_fwd_progress = 1 + .end_amd_kernel_code_t + + s_load_dwordx2 s[0:1], s[0:1] 0x0 + v_mov_b32 v0, 3.14159 + s_waitcnt lgkmcnt(0) + v_mov_b32 v1, s0 + v_mov_b32 v2, s1 + flat_store_dword v[1:2], v0 + s_endpgm + .Lfunc_end0: + .size hello_world, .Lfunc_end0-hello_world + +.. _amdgpu-amdhsa-assembler-predefined-symbols-v3-v4: + +Code Object V3 to V4 Predefined Symbols +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The AMDGPU assembler defines and updates some symbols automatically. These +symbols do not affect code generation. + +.amdgcn.gfx_generation_number ++++++++++++++++++++++++++++++ + +Set to the GFX major generation number of the target being assembled for. For +example, when assembling for a "GFX9" target this will be set to the integer +value "9". The possible GFX major generation numbers are presented in +:ref:`amdgpu-processors`. + +.amdgcn.gfx_generation_minor +++++++++++++++++++++++++++++ + +Set to the GFX minor generation number of the target being assembled for. For +example, when assembling for a "GFX810" target this will be set to the integer +value "1". The possible GFX minor generation numbers are presented in +:ref:`amdgpu-processors`. + +.amdgcn.gfx_generation_stepping ++++++++++++++++++++++++++++++++ + +Set to the GFX stepping generation number of the target being assembled for. +For example, when assembling for a "GFX704" target this will be set to the +integer value "4". The possible GFX stepping generation numbers are presented +in :ref:`amdgpu-processors`. + +.. _amdgpu-amdhsa-assembler-symbol-next_free_vgpr: + +.amdgcn.next_free_vgpr +++++++++++++++++++++++ + +Set to zero before assembly begins. At each instruction, if the current value +of this symbol is less than or equal to the maximum VGPR number explicitly +referenced within that instruction then the symbol value is updated to equal +that VGPR number plus one. + +May be used to set the `.amdhsa_next_free_vgpr` directive in +:ref:`amdhsa-kernel-directives-table`. + +May be set at any time, e.g. manually set to zero at the start of each kernel. + +.. _amdgpu-amdhsa-assembler-symbol-next_free_sgpr: + +.amdgcn.next_free_sgpr +++++++++++++++++++++++ + +Set to zero before assembly begins. At each instruction, if the current value +of this symbol is less than or equal the maximum SGPR number explicitly +referenced within that instruction then the symbol value is updated to equal +that SGPR number plus one. + +May be used to set the `.amdhsa_next_free_spgr` directive in +:ref:`amdhsa-kernel-directives-table`. + +May be set at any time, e.g. manually set to zero at the start of each kernel. + +.. _amdgpu-amdhsa-assembler-directives-v3-v4: + +Code Object V3 to V4 Directives +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Directives which begin with ``.amdgcn`` are valid for all ``amdgcn`` +architecture processors, and are not OS-specific. Directives which begin with +``.amdhsa`` are specific to ``amdgcn`` architecture processors when the +``amdhsa`` OS is specified. See :ref:`amdgpu-target-triples` and +:ref:`amdgpu-processors`. + +.. _amdgpu-assembler-directive-amdgcn-target: + +.amdgcn_target "-" +++++++++++++++++++++++++++++++++++++++++++++++ + +Optional directive which declares the ``-`` supported +by the containing assembler source file. Used by the assembler to validate +command-line options such as ``-triple``, ``-mcpu``, and +``--offload-arch=``. A non-canonical target ID is allowed. See +:ref:`amdgpu-target-triples` and :ref:`amdgpu-target-id`. + +.. note:: + + The target ID syntax used for code object V2 to V3 for this directive differs + from that used elsewhere. See :ref:`amdgpu-target-id-v2-v3`. + +.amdhsa_kernel ++++++++++++++++++++++ + +Creates a correctly aligned AMDHSA kernel descriptor and a symbol, +``.kd``, in the current location of the current section. Only valid when +the OS is ``amdhsa``. ```` must be a symbol that labels the first +instruction to execute, and does not need to be previously defined. + +Marks the beginning of a list of directives used to generate the bytes of a +kernel descriptor, as described in :ref:`amdgpu-amdhsa-kernel-descriptor`. +Directives which may appear in this list are described in +:ref:`amdhsa-kernel-directives-table`. Directives may appear in any order, must +be valid for the target being assembled for, and cannot be repeated. Directives +support the range of values specified by the field they reference in +:ref:`amdgpu-amdhsa-kernel-descriptor`. If a directive is not specified, it is +assumed to have its default value, unless it is marked as "Required", in which +case it is an error to omit the directive. This list of directives is +terminated by an ``.end_amdhsa_kernel`` directive. + + .. table:: AMDHSA Kernel Assembler Directives + :name: amdhsa-kernel-directives-table + + ======================================================== =================== ============ =================== + Directive Default Supported On Description + ======================================================== =================== ============ =================== + ``.amdhsa_group_segment_fixed_size`` 0 GFX6-GFX10 Controls GROUP_SEGMENT_FIXED_SIZE in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_private_segment_fixed_size`` 0 GFX6-GFX10 Controls PRIVATE_SEGMENT_FIXED_SIZE in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_kernarg_size`` 0 GFX6-GFX10 Controls KERNARG_SIZE in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_private_segment_buffer`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_dispatch_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_PTR in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_queue_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_QUEUE_PTR in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_kernarg_segment_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_KERNARG_SEGMENT_PTR in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_dispatch_id`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_ID in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_flat_scratch_init`` 0 GFX6-GFX10 Controls ENABLE_SGPR_FLAT_SCRATCH_INIT in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_user_sgpr_private_segment_size`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_SIZE in + :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + ``.amdhsa_wavefront_size32`` Target GFX10 Controls ENABLE_WAVEFRONT_SIZE32 in + Feature :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + Specific + (wavefrontsize64) + ``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_system_sgpr_workgroup_id_x`` 1 GFX6-GFX10 Controls ENABLE_SGPR_WORKGROUP_ID_X in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_system_sgpr_workgroup_id_y`` 0 GFX6-GFX10 Controls ENABLE_SGPR_WORKGROUP_ID_Y in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_system_sgpr_workgroup_id_z`` 0 GFX6-GFX10 Controls ENABLE_SGPR_WORKGROUP_ID_Z in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_system_sgpr_workgroup_info`` 0 GFX6-GFX10 Controls ENABLE_SGPR_WORKGROUP_INFO in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_system_vgpr_workitem_id`` 0 GFX6-GFX10 Controls ENABLE_VGPR_WORKITEM_ID in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + Possible values are defined in + :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table`. + ``.amdhsa_next_free_vgpr`` Required GFX6-GFX10 Maximum VGPR number explicitly referenced, plus one. + Used to calculate GRANULATED_WORKITEM_VGPR_COUNT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_next_free_sgpr`` Required GFX6-GFX10 Maximum SGPR number explicitly referenced, plus one. + Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_accum_offset`` Required GFX90A Offset of a first AccVGPR in the unified register file. + Used to calculate ACCUM_OFFSET in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`. + ``.amdhsa_reserve_vcc`` 1 GFX6-GFX10 Whether the kernel may use the special VCC SGPR. + Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_reserve_flat_scratch`` 1 GFX7-GFX10 Whether the kernel may use flat instructions to access + scratch memory. Used to calculate + GRANULATED_WAVEFRONT_SGPR_COUNT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_reserve_xnack_mask`` Target GFX8-GFX10 Whether the kernel may trigger XNACK replay. + Feature Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in + Specific :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + (xnack) + ``.amdhsa_float_round_mode_32`` 0 GFX6-GFX10 Controls FLOAT_ROUND_MODE_32 in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + Possible values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + ``.amdhsa_float_round_mode_16_64`` 0 GFX6-GFX10 Controls FLOAT_ROUND_MODE_16_64 in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + Possible values are defined in + :ref:`amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table`. + ``.amdhsa_float_denorm_mode_32`` 0 GFX6-GFX10 Controls FLOAT_DENORM_MODE_32 in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + Possible values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + ``.amdhsa_float_denorm_mode_16_64`` 3 GFX6-GFX10 Controls FLOAT_DENORM_MODE_16_64 in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + Possible values are defined in + :ref:`amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table`. + ``.amdhsa_dx10_clamp`` 1 GFX6-GFX10 Controls ENABLE_DX10_CLAMP in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_ieee_mode`` 1 GFX6-GFX10 Controls ENABLE_IEEE_MODE in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_fp16_overflow`` 0 GFX9-GFX10 Controls FP16_OVFL in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_tg_split`` Target GFX90A Controls TG_SPLIT in + Feature :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`. + Specific + (tgsplit) + ``.amdhsa_workgroup_processor_mode`` Target GFX10 Controls ENABLE_WGP_MODE in + Feature :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`. + Specific + (cumode) + ``.amdhsa_memory_ordered`` 1 GFX10 Controls MEM_ORDERED in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_forward_progress`` 0 GFX10 Controls FWD_PROGRESS in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_exception_fp_ieee_div_zero`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_exception_fp_ieee_overflow`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_exception_fp_ieee_underflow`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_exception_fp_ieee_inexact`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_INEXACT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ``.amdhsa_exception_int_div_zero`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. + ======================================================== =================== ============ =================== + +.amdgpu_metadata +++++++++++++++++ + +Optional directive which declares the contents of the ``NT_AMDGPU_METADATA`` +note record (see :ref:`amdgpu-elf-note-records-table-v3-v4`). + +The contents must be in the [YAML]_ markup format, with the same structure and +semantics described in :ref:`amdgpu-amdhsa-code-object-metadata-v3` or +:ref:`amdgpu-amdhsa-code-object-metadata-v4`. + +This directive is terminated by an ``.end_amdgpu_metadata`` directive. + +.. _amdgpu-amdhsa-assembler-example-v3-v4: + +Code Object V3 to V4 Example Source Code +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here is an example of a minimal assembly source file, defining one HSA kernel: + +.. code:: + :number-lines: + + .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" // optional + + .text + .globl hello_world + .p2align 8 + .type hello_world,@function + hello_world: + s_load_dwordx2 s[0:1], s[0:1] 0x0 + v_mov_b32 v0, 3.14159 + s_waitcnt lgkmcnt(0) + v_mov_b32 v1, s0 + v_mov_b32 v2, s1 + flat_store_dword v[1:2], v0 + s_endpgm + .Lfunc_end0: + .size hello_world, .Lfunc_end0-hello_world + + .rodata + .p2align 6 + .amdhsa_kernel hello_world + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr + .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr + .end_amdhsa_kernel + + .amdgpu_metadata + --- + amdhsa.version: + - 1 + - 0 + amdhsa.kernels: + - .name: hello_world + .symbol: hello_world.kd + .kernarg_segment_size: 48 + .group_segment_fixed_size: 0 + .private_segment_fixed_size: 0 + .kernarg_segment_align: 4 + .wavefront_size: 64 + .sgpr_count: 2 + .vgpr_count: 3 + .max_flat_workgroup_size: 256 + .args: + - .size: 8 + .offset: 0 + .value_kind: global_buffer + .address_space: global + .actual_access: write_only + //... + .end_amdgpu_metadata + +This kernel is equivalent to the following HIP program: + +.. code:: + :number-lines: + + __global__ void hello_world(float *p) { + *p = 3.14159f; + } + +If an assembly source file contains multiple kernels and/or functions, the +:ref:`amdgpu-amdhsa-assembler-symbol-next_free_vgpr` and +:ref:`amdgpu-amdhsa-assembler-symbol-next_free_sgpr` symbols may be reset using +the ``.set , `` directive. For example, in the case of two +kernels, where ``function1`` is only called from ``kernel1`` it is sufficient +to group the function with the kernel that calls it and reset the symbols +between the two connected components: + +.. code:: + :number-lines: + + .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" // optional + + // gpr tracking symbols are implicitly set to zero + + .text + .globl kern0 + .p2align 8 + .type kern0,@function + kern0: + // ... + s_endpgm + .Lkern0_end: + .size kern0, .Lkern0_end-kern0 + + .rodata + .p2align 6 + .amdhsa_kernel kern0 + // ... + .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr + .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr + .end_amdhsa_kernel + + // reset symbols to begin tracking usage in func1 and kern1 + .set .amdgcn.next_free_vgpr, 0 + .set .amdgcn.next_free_sgpr, 0 + + .text + .hidden func1 + .global func1 + .p2align 2 + .type func1,@function + func1: + // ... + s_setpc_b64 s[30:31] + .Lfunc1_end: + .size func1, .Lfunc1_end-func1 + + .globl kern1 + .p2align 8 + .type kern1,@function + kern1: + // ... + s_getpc_b64 s[4:5] + s_add_u32 s4, s4, func1@rel32@lo+4 + s_addc_u32 s5, s5, func1@rel32@lo+4 + s_swappc_b64 s[30:31], s[4:5] + // ... + s_endpgm + .Lkern1_end: + .size kern1, .Lkern1_end-kern1 + + .rodata + .p2align 6 + .amdhsa_kernel kern1 + // ... + .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr + .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr + .end_amdhsa_kernel + +These symbols cannot identify connected components in order to automatically +track the usage for each kernel. However, in some cases careful organization of +the kernels and functions in the source file means there is minimal additional +effort required to accurately calculate GPR usage. + +Additional Documentation +======================== + +.. [AMD-GCN-GFX6] `AMD Southern Islands Series ISA `__ +.. [AMD-GCN-GFX7] `AMD Sea Islands Series ISA `_ +.. [AMD-GCN-GFX8] `AMD GCN3 Instruction Set Architecture `__ +.. [AMD-GCN-GFX900-GFX904-VEGA] `AMD Vega Instruction Set Architecture `__ +.. [AMD-GCN-GFX906-VEGA7NM] `AMD Vega 7nm Instruction Set Architecture `__ +.. [AMD-GCN-GFX908-CDNA1] `AMD Instinct MI100 Instruction Set Architecture `__ +.. [AMD-GCN-GFX10-RDNA1] `AMD RDNA 1.0 Instruction Set Architecture `__ +.. [AMD-GCN-GFX10-RDNA2] `AMD RDNA 2 Instruction Set Architecture `__ +.. [AMD-RADEON-HD-2000-3000] `AMD R6xx shader ISA `__ +.. [AMD-RADEON-HD-4000] `AMD R7xx shader ISA `__ +.. [AMD-RADEON-HD-5000] `AMD Evergreen shader ISA `__ +.. [AMD-RADEON-HD-6000] `AMD Cayman/Trinity shader ISA `__ +.. [AMD-ROCm] `AMD ROCm™ Platform `__ +.. [AMD-ROCm-github] `AMD ROCm™ github `__ +.. [AMD-ROCm-Release-Notes] `AMD ROCm Release Notes `__ +.. [CLANG-ATTR] `Attributes in Clang `__ +.. [DWARF] `DWARF Debugging Information Format `__ +.. [ELF] `Executable and Linkable Format (ELF) `__ +.. [HRF] `Heterogeneous-race-free Memory Models `__ +.. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ +.. [MsgPack] `Message Pack `__ +.. [OpenCL] `The OpenCL Specification Version 2.0 `__ +.. [SEMVER] `Semantic Versioning `__ +.. [YAML] `YAML Ain't Markup Language (YAML™) Version 1.2 `__ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/Atomics.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/Atomics.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/Atomics.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/Atomics.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,643 @@ +============================================== +LLVM Atomic Instructions and Concurrency Guide +============================================== + +.. contents:: + :local: + +Introduction +============ + +LLVM supports instructions which are well-defined in the presence of threads and +asynchronous signals. + +The atomic instructions are designed specifically to provide readable IR and +optimized code generation for the following: + +* The C++11 ```` header. (`C++11 draft available here + `_.) (`C11 draft available here + `_.) + +* Proper semantics for Java-style memory, for both ``volatile`` and regular + shared variables. (`Java Specification + `_) + +* gcc-compatible ``__sync_*`` builtins. (`Description + `_) + +* Other scenarios with atomic semantics, including ``static`` variables with + non-trivial constructors in C++. + +Atomic and volatile in the IR are orthogonal; "volatile" is the C/C++ volatile, +which ensures that every volatile load and store happens and is performed in the +stated order. A couple examples: if a SequentiallyConsistent store is +immediately followed by another SequentiallyConsistent store to the same +address, the first store can be erased. This transformation is not allowed for a +pair of volatile stores. On the other hand, a non-volatile non-atomic load can +be moved across a volatile load freely, but not an Acquire load. + +This document is intended to provide a guide to anyone either writing a frontend +for LLVM or working on optimization passes for LLVM with a guide for how to deal +with instructions with special semantics in the presence of concurrency. This +is not intended to be a precise guide to the semantics; the details can get +extremely complicated and unreadable, and are not usually necessary. + +.. _Optimization outside atomic: + +Optimization outside atomic +=========================== + +The basic ``'load'`` and ``'store'`` allow a variety of optimizations, but can +lead to undefined results in a concurrent environment; see `NotAtomic`_. This +section specifically goes into the one optimizer restriction which applies in +concurrent environments, which gets a bit more of an extended description +because any optimization dealing with stores needs to be aware of it. + +From the optimizer's point of view, the rule is that if there are not any +instructions with atomic ordering involved, concurrency does not matter, with +one exception: if a variable might be visible to another thread or signal +handler, a store cannot be inserted along a path where it might not execute +otherwise. Take the following example: + +.. code-block:: c + + /* C code, for readability; run through clang -O2 -S -emit-llvm to get + equivalent IR */ + int x; + void f(int* a) { + for (int i = 0; i < 100; i++) { + if (a[i]) + x += 1; + } + } + +The following is equivalent in non-concurrent situations: + +.. code-block:: c + + int x; + void f(int* a) { + int xtemp = x; + for (int i = 0; i < 100; i++) { + if (a[i]) + xtemp += 1; + } + x = xtemp; + } + +However, LLVM is not allowed to transform the former to the latter: it could +indirectly introduce undefined behavior if another thread can access ``x`` at +the same time. That thread would read `undef` instead of the value it was +expecting, which can lead to undefined behavior down the line. (This example is +particularly of interest because before the concurrency model was implemented, +LLVM would perform this transformation.) + +Note that speculative loads are allowed; a load which is part of a race returns +``undef``, but does not have undefined behavior. + +Atomic instructions +=================== + +For cases where simple loads and stores are not sufficient, LLVM provides +various atomic instructions. The exact guarantees provided depend on the +ordering; see `Atomic orderings`_. + +``load atomic`` and ``store atomic`` provide the same basic functionality as +non-atomic loads and stores, but provide additional guarantees in situations +where threads and signals are involved. + +``cmpxchg`` and ``atomicrmw`` are essentially like an atomic load followed by an +atomic store (where the store is conditional for ``cmpxchg``), but no other +memory operation can happen on any thread between the load and store. + +A ``fence`` provides Acquire and/or Release ordering which is not part of +another operation; it is normally used along with Monotonic memory operations. +A Monotonic load followed by an Acquire fence is roughly equivalent to an +Acquire load, and a Monotonic store following a Release fence is roughly +equivalent to a Release store. SequentiallyConsistent fences behave as both +an Acquire and a Release fence, and offer some additional complicated +guarantees, see the C++11 standard for details. + +Frontends generating atomic instructions generally need to be aware of the +target to some degree; atomic instructions are guaranteed to be lock-free, and +therefore an instruction which is wider than the target natively supports can be +impossible to generate. + +.. _Atomic orderings: + +Atomic orderings +================ + +In order to achieve a balance between performance and necessary guarantees, +there are six levels of atomicity. They are listed in order of strength; each +level includes all the guarantees of the previous level except for +Acquire/Release. (See also `LangRef Ordering `_.) + +.. _NotAtomic: + +NotAtomic +--------- + +NotAtomic is the obvious, a load or store which is not atomic. (This isn't +really a level of atomicity, but is listed here for comparison.) This is +essentially a regular load or store. If there is a race on a given memory +location, loads from that location return undef. + +Relevant standard + This is intended to match shared variables in C/C++, and to be used in any + other context where memory access is necessary, and a race is impossible. (The + precise definition is in `LangRef Memory Model `_.) + +Notes for frontends + The rule is essentially that all memory accessed with basic loads and stores + by multiple threads should be protected by a lock or other synchronization; + otherwise, you are likely to run into undefined behavior. If your frontend is + for a "safe" language like Java, use Unordered to load and store any shared + variable. Note that NotAtomic volatile loads and stores are not properly + atomic; do not try to use them as a substitute. (Per the C/C++ standards, + volatile does provide some limited guarantees around asynchronous signals, but + atomics are generally a better solution.) + +Notes for optimizers + Introducing loads to shared variables along a codepath where they would not + otherwise exist is allowed; introducing stores to shared variables is not. See + `Optimization outside atomic`_. + +Notes for code generation + The one interesting restriction here is that it is not allowed to write to + bytes outside of the bytes relevant to a store. This is mostly relevant to + unaligned stores: it is not allowed in general to convert an unaligned store + into two aligned stores of the same width as the unaligned store. Backends are + also expected to generate an i8 store as an i8 store, and not an instruction + which writes to surrounding bytes. (If you are writing a backend for an + architecture which cannot satisfy these restrictions and cares about + concurrency, please send an email to llvm-dev.) + +Unordered +--------- + +Unordered is the lowest level of atomicity. It essentially guarantees that races +produce somewhat sane results instead of having undefined behavior. It also +guarantees the operation to be lock-free, so it does not depend on the data +being part of a special atomic structure or depend on a separate per-process +global lock. Note that code generation will fail for unsupported atomic +operations; if you need such an operation, use explicit locking. + +Relevant standard + This is intended to match the Java memory model for shared variables. + +Notes for frontends + This cannot be used for synchronization, but is useful for Java and other + "safe" languages which need to guarantee that the generated code never + exhibits undefined behavior. Note that this guarantee is cheap on common + platforms for loads of a native width, but can be expensive or unavailable for + wider loads, like a 64-bit store on ARM. (A frontend for Java or other "safe" + languages would normally split a 64-bit store on ARM into two 32-bit unordered + stores.) + +Notes for optimizers + In terms of the optimizer, this prohibits any transformation that transforms a + single load into multiple loads, transforms a store into multiple stores, + narrows a store, or stores a value which would not be stored otherwise. Some + examples of unsafe optimizations are narrowing an assignment into a bitfield, + rematerializing a load, and turning loads and stores into a memcpy + call. Reordering unordered operations is safe, though, and optimizers should + take advantage of that because unordered operations are common in languages + that need them. + +Notes for code generation + These operations are required to be atomic in the sense that if you use + unordered loads and unordered stores, a load cannot see a value which was + never stored. A normal load or store instruction is usually sufficient, but + note that an unordered load or store cannot be split into multiple + instructions (or an instruction which does multiple memory operations, like + ``LDRD`` on ARM without LPAE, or not naturally-aligned ``LDRD`` on LPAE ARM). + +Monotonic +--------- + +Monotonic is the weakest level of atomicity that can be used in synchronization +primitives, although it does not provide any general synchronization. It +essentially guarantees that if you take all the operations affecting a specific +address, a consistent ordering exists. + +Relevant standard + This corresponds to the C++11/C11 ``memory_order_relaxed``; see those + standards for the exact definition. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. The + guarantees in terms of synchronization are very weak, so make sure these are + only used in a pattern which you know is correct. Generally, these would + either be used for atomic operations which do not protect other memory (like + an atomic counter), or along with a ``fence``. + +Notes for optimizers + In terms of the optimizer, this can be treated as a read+write on the relevant + memory location (and alias analysis will take advantage of that). In addition, + it is legal to reorder non-atomic and Unordered loads around Monotonic + loads. CSE/DSE and a few other optimizations are allowed, but Monotonic + operations are unlikely to be used in ways which would make those + optimizations useful. + +Notes for code generation + Code generation is essentially the same as that for unordered for loads and + stores. No fences are required. ``cmpxchg`` and ``atomicrmw`` are required + to appear as a single operation. + +Acquire +------- + +Acquire provides a barrier of the sort necessary to acquire a lock to access +other memory with normal loads and stores. + +Relevant standard + This corresponds to the C++11/C11 ``memory_order_acquire``. It should also be + used for C++11/C11 ``memory_order_consume``. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. + Acquire only provides a semantic guarantee when paired with a Release + operation. + +Notes for optimizers + Optimizers not aware of atomics can treat this like a nothrow call. It is + also possible to move stores from before an Acquire load or read-modify-write + operation to after it, and move non-Acquire loads from before an Acquire + operation to after it. + +Notes for code generation + Architectures with weak memory ordering (essentially everything relevant today + except x86 and SPARC) require some sort of fence to maintain the Acquire + semantics. The precise fences required varies widely by architecture, but for + a simple implementation, most architectures provide a barrier which is strong + enough for everything (``dmb`` on ARM, ``sync`` on PowerPC, etc.). Putting + such a fence after the equivalent Monotonic operation is sufficient to + maintain Acquire semantics for a memory operation. + +Release +------- + +Release is similar to Acquire, but with a barrier of the sort necessary to +release a lock. + +Relevant standard + This corresponds to the C++11/C11 ``memory_order_release``. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. + Release only provides a semantic guarantee when paired with a Acquire + operation. + +Notes for optimizers + Optimizers not aware of atomics can treat this like a nothrow call. It is + also possible to move loads from after a Release store or read-modify-write + operation to before it, and move non-Release stores from after a Release + operation to before it. + +Notes for code generation + See the section on Acquire; a fence before the relevant operation is usually + sufficient for Release. Note that a store-store fence is not sufficient to + implement Release semantics; store-store fences are generally not exposed to + IR because they are extremely difficult to use correctly. + +AcquireRelease +-------------- + +AcquireRelease (``acq_rel`` in IR) provides both an Acquire and a Release +barrier (for fences and operations which both read and write memory). + +Relevant standard + This corresponds to the C++11/C11 ``memory_order_acq_rel``. + +Notes for frontends + If you are writing a frontend which uses this directly, use with caution. + Acquire only provides a semantic guarantee when paired with a Release + operation, and vice versa. + +Notes for optimizers + In general, optimizers should treat this like a nothrow call; the possible + optimizations are usually not interesting. + +Notes for code generation + This operation has Acquire and Release semantics; see the sections on Acquire + and Release. + +SequentiallyConsistent +---------------------- + +SequentiallyConsistent (``seq_cst`` in IR) provides Acquire semantics for loads +and Release semantics for stores. Additionally, it guarantees that a total +ordering exists between all SequentiallyConsistent operations. + +Relevant standard + This corresponds to the C++11/C11 ``memory_order_seq_cst``, Java volatile, and + the gcc-compatible ``__sync_*`` builtins which do not specify otherwise. + +Notes for frontends + If a frontend is exposing atomic operations, these are much easier to reason + about for the programmer than other kinds of operations, and using them is + generally a practical performance tradeoff. + +Notes for optimizers + Optimizers not aware of atomics can treat this like a nothrow call. For + SequentiallyConsistent loads and stores, the same reorderings are allowed as + for Acquire loads and Release stores, except that SequentiallyConsistent + operations may not be reordered. + +Notes for code generation + SequentiallyConsistent loads minimally require the same barriers as Acquire + operations and SequentiallyConsistent stores require Release + barriers. Additionally, the code generator must enforce ordering between + SequentiallyConsistent stores followed by SequentiallyConsistent loads. This + is usually done by emitting either a full fence before the loads or a full + fence after the stores; which is preferred varies by architecture. + +Atomics and IR optimization +=========================== + +Predicates for optimizer writers to query: + +* ``isSimple()``: A load or store which is not volatile or atomic. This is + what, for example, memcpyopt would check for operations it might transform. + +* ``isUnordered()``: A load or store which is not volatile and at most + Unordered. This would be checked, for example, by LICM before hoisting an + operation. + +* ``mayReadFromMemory()``/``mayWriteToMemory()``: Existing predicate, but note + that they return true for any operation which is volatile or at least + Monotonic. + +* ``isStrongerThan`` / ``isAtLeastOrStrongerThan``: These are predicates on + orderings. They can be useful for passes that are aware of atomics, for + example to do DSE across a single atomic access, but not across a + release-acquire pair (see MemoryDependencyAnalysis for an example of this) + +* Alias analysis: Note that AA will return ModRef for anything Acquire or + Release, and for the address accessed by any Monotonic operation. + +To support optimizing around atomic operations, make sure you are using the +right predicates; everything should work if that is done. If your pass should +optimize some atomic operations (Unordered operations in particular), make sure +it doesn't replace an atomic load or store with a non-atomic operation. + +Some examples of how optimizations interact with various kinds of atomic +operations: + +* ``memcpyopt``: An atomic operation cannot be optimized into part of a + memcpy/memset, including unordered loads/stores. It can pull operations + across some atomic operations. + +* LICM: Unordered loads/stores can be moved out of a loop. It just treats + monotonic operations like a read+write to a memory location, and anything + stricter than that like a nothrow call. + +* DSE: Unordered stores can be DSE'ed like normal stores. Monotonic stores can + be DSE'ed in some cases, but it's tricky to reason about, and not especially + important. It is possible in some case for DSE to operate across a stronger + atomic operation, but it is fairly tricky. DSE delegates this reasoning to + MemoryDependencyAnalysis (which is also used by other passes like GVN). + +* Folding a load: Any atomic load from a constant global can be constant-folded, + because it cannot be observed. Similar reasoning allows sroa with + atomic loads and stores. + +Atomics and Codegen +=================== + +Atomic operations are represented in the SelectionDAG with ``ATOMIC_*`` opcodes. +On architectures which use barrier instructions for all atomic ordering (like +ARM), appropriate fences can be emitted by the AtomicExpand Codegen pass if +``setInsertFencesForAtomic()`` was used. + +The MachineMemOperand for all atomic operations is currently marked as volatile; +this is not correct in the IR sense of volatile, but CodeGen handles anything +marked volatile very conservatively. This should get fixed at some point. + +One very important property of the atomic operations is that if your backend +supports any inline lock-free atomic operations of a given size, you should +support *ALL* operations of that size in a lock-free manner. + +When the target implements atomic ``cmpxchg`` or LL/SC instructions (as most do) +this is trivial: all the other operations can be implemented on top of those +primitives. However, on many older CPUs (e.g. ARMv5, SparcV8, Intel 80386) there +are atomic load and store instructions, but no ``cmpxchg`` or LL/SC. As it is +invalid to implement ``atomic load`` using the native instruction, but +``cmpxchg`` using a library call to a function that uses a mutex, ``atomic +load`` must *also* expand to a library call on such architectures, so that it +can remain atomic with regards to a simultaneous ``cmpxchg``, by using the same +mutex. + +AtomicExpandPass can help with that: it will expand all atomic operations to the +proper ``__atomic_*`` libcalls for any size above the maximum set by +``setMaxAtomicSizeInBitsSupported`` (which defaults to 0). + +On x86, all atomic loads generate a ``MOV``. SequentiallyConsistent stores +generate an ``XCHG``, other stores generate a ``MOV``. SequentiallyConsistent +fences generate an ``MFENCE``, other fences do not cause any code to be +generated. ``cmpxchg`` uses the ``LOCK CMPXCHG`` instruction. ``atomicrmw xchg`` +uses ``XCHG``, ``atomicrmw add`` and ``atomicrmw sub`` use ``XADD``, and all +other ``atomicrmw`` operations generate a loop with ``LOCK CMPXCHG``. Depending +on the users of the result, some ``atomicrmw`` operations can be translated into +operations like ``LOCK AND``, but that does not work in general. + +On ARM (before v8), MIPS, and many other RISC architectures, Acquire, Release, +and SequentiallyConsistent semantics require barrier instructions for every such +operation. Loads and stores generate normal instructions. ``cmpxchg`` and +``atomicrmw`` can be represented using a loop with LL/SC-style instructions +which take some sort of exclusive lock on a cache line (``LDREX`` and ``STREX`` +on ARM, etc.). + +It is often easiest for backends to use AtomicExpandPass to lower some of the +atomic constructs. Here are some lowerings it can do: + +* cmpxchg -> loop with load-linked/store-conditional + by overriding ``shouldExpandAtomicCmpXchgInIR()``, ``emitLoadLinked()``, + ``emitStoreConditional()`` +* large loads/stores -> ll-sc/cmpxchg + by overriding ``shouldExpandAtomicStoreInIR()``/``shouldExpandAtomicLoadInIR()`` +* strong atomic accesses -> monotonic accesses + fences by overriding + ``shouldInsertFencesForAtomic()``, ``emitLeadingFence()``, and + ``emitTrailingFence()`` +* atomic rmw -> loop with cmpxchg or load-linked/store-conditional + by overriding ``expandAtomicRMWInIR()`` +* expansion to __atomic_* libcalls for unsupported sizes. +* part-word atomicrmw/cmpxchg -> target-specific intrinsic by overriding + ``shouldExpandAtomicRMWInIR``, ``emitMaskedAtomicRMWIntrinsic``, + ``shouldExpandAtomicCmpXchgInIR``, and ``emitMaskedAtomicCmpXchgIntrinsic``. + +For an example of these look at the ARM (first five lowerings) or RISC-V (last +lowering) backend. + +AtomicExpandPass supports two strategies for lowering atomicrmw/cmpxchg to +load-linked/store-conditional (LL/SC) loops. The first expands the LL/SC loop +in IR, calling target lowering hooks to emit intrinsics for the LL and SC +operations. However, many architectures have strict requirements for LL/SC +loops to ensure forward progress, such as restrictions on the number and type +of instructions in the loop. It isn't possible to enforce these restrictions +when the loop is expanded in LLVM IR, and so affected targets may prefer to +expand to LL/SC loops at a very late stage (i.e. after register allocation). +AtomicExpandPass can help support lowering of part-word atomicrmw or cmpxchg +using this strategy by producing IR for any shifting and masking that can be +performed outside of the LL/SC loop. + +Libcalls: __atomic_* +==================== + +There are two kinds of atomic library calls that are generated by LLVM. Please +note that both sets of library functions somewhat confusingly share the names of +builtin functions defined by clang. Despite this, the library functions are +not directly related to the builtins: it is *not* the case that ``__atomic_*`` +builtins lower to ``__atomic_*`` library calls and ``__sync_*`` builtins lower +to ``__sync_*`` library calls. + +The first set of library functions are named ``__atomic_*``. This set has been +"standardized" by GCC, and is described below. (See also `GCC's documentation +`_) + +LLVM's AtomicExpandPass will translate atomic operations on data sizes above +``MaxAtomicSizeInBitsSupported`` into calls to these functions. + +There are four generic functions, which can be called with data of any size or +alignment:: + + void __atomic_load(size_t size, void *ptr, void *ret, int ordering) + void __atomic_store(size_t size, void *ptr, void *val, int ordering) + void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int ordering) + bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void *desired, int success_order, int failure_order) + +There are also size-specialized versions of the above functions, which can only +be used with *naturally-aligned* pointers of the appropriate size. In the +signatures below, "N" is one of 1, 2, 4, 8, and 16, and "iN" is the appropriate +integer type of that size; if no such integer type exists, the specialization +cannot be used:: + + iN __atomic_load_N(iN *ptr, iN val, int ordering) + void __atomic_store_N(iN *ptr, iN val, int ordering) + iN __atomic_exchange_N(iN *ptr, iN val, int ordering) + bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, int success_order, int failure_order) + +Finally there are some read-modify-write functions, which are only available in +the size-specific variants (any other sizes use a ``__atomic_compare_exchange`` +loop):: + + iN __atomic_fetch_add_N(iN *ptr, iN val, int ordering) + iN __atomic_fetch_sub_N(iN *ptr, iN val, int ordering) + iN __atomic_fetch_and_N(iN *ptr, iN val, int ordering) + iN __atomic_fetch_or_N(iN *ptr, iN val, int ordering) + iN __atomic_fetch_xor_N(iN *ptr, iN val, int ordering) + iN __atomic_fetch_nand_N(iN *ptr, iN val, int ordering) + +This set of library functions have some interesting implementation requirements +to take note of: + +- They support all sizes and alignments -- including those which cannot be + implemented natively on any existing hardware. Therefore, they will certainly + use mutexes in for some sizes/alignments. + +- As a consequence, they cannot be shipped in a statically linked + compiler-support library, as they have state which must be shared amongst all + DSOs loaded in the program. They must be provided in a shared library used by + all objects. + +- The set of atomic sizes supported lock-free must be a superset of the sizes + any compiler can emit. That is: if a new compiler introduces support for + inline-lock-free atomics of size N, the ``__atomic_*`` functions must also have a + lock-free implementation for size N. This is a requirement so that code + produced by an old compiler (which will have called the ``__atomic_*`` function) + interoperates with code produced by the new compiler (which will use native + the atomic instruction). + +Note that it's possible to write an entirely target-independent implementation +of these library functions by using the compiler atomic builtins themselves to +implement the operations on naturally-aligned pointers of supported sizes, and a +generic mutex implementation otherwise. + +Libcalls: __sync_* +================== + +Some targets or OS/target combinations can support lock-free atomics, but for +various reasons, it is not practical to emit the instructions inline. + +There's two typical examples of this. + +Some CPUs support multiple instruction sets which can be switched back and forth +on function-call boundaries. For example, MIPS supports the MIPS16 ISA, which +has a smaller instruction encoding than the usual MIPS32 ISA. ARM, similarly, +has the Thumb ISA. In MIPS16 and earlier versions of Thumb, the atomic +instructions are not encodable. However, those instructions are available via a +function call to a function with the longer encoding. + +Additionally, a few OS/target pairs provide kernel-supported lock-free +atomics. ARM/Linux is an example of this: the kernel `provides +`_ a +function which on older CPUs contains a "magically-restartable" atomic sequence +(which looks atomic so long as there's only one CPU), and contains actual atomic +instructions on newer multicore models. This sort of functionality can typically +be provided on any architecture, if all CPUs which are missing atomic +compare-and-swap support are uniprocessor (no SMP). This is almost always the +case. The only common architecture without that property is SPARC -- SPARCV8 SMP +systems were common, yet it doesn't support any sort of compare-and-swap +operation. + +In either of these cases, the Target in LLVM can claim support for atomics of an +appropriate size, and then implement some subset of the operations via libcalls +to a ``__sync_*`` function. Such functions *must* not use locks in their +implementation, because unlike the ``__atomic_*`` routines used by +AtomicExpandPass, these may be mixed-and-matched with native instructions by the +target lowering. + +Further, these routines do not need to be shared, as they are stateless. So, +there is no issue with having multiple copies included in one binary. Thus, +typically these routines are implemented by the statically-linked compiler +runtime support library. + +LLVM will emit a call to an appropriate ``__sync_*`` routine if the target +ISelLowering code has set the corresponding ``ATOMIC_CMPXCHG``, ``ATOMIC_SWAP``, +or ``ATOMIC_LOAD_*`` operation to "Expand", and if it has opted-into the +availability of those library functions via a call to ``initSyncLibcalls()``. + +The full set of functions that may be called by LLVM is (for ``N`` being 1, 2, +4, 8, or 16):: + + iN __sync_val_compare_and_swap_N(iN *ptr, iN expected, iN desired) + iN __sync_lock_test_and_set_N(iN *ptr, iN val) + iN __sync_fetch_and_add_N(iN *ptr, iN val) + iN __sync_fetch_and_sub_N(iN *ptr, iN val) + iN __sync_fetch_and_and_N(iN *ptr, iN val) + iN __sync_fetch_and_or_N(iN *ptr, iN val) + iN __sync_fetch_and_xor_N(iN *ptr, iN val) + iN __sync_fetch_and_nand_N(iN *ptr, iN val) + iN __sync_fetch_and_max_N(iN *ptr, iN val) + iN __sync_fetch_and_umax_N(iN *ptr, iN val) + iN __sync_fetch_and_min_N(iN *ptr, iN val) + iN __sync_fetch_and_umin_N(iN *ptr, iN val) + +This list doesn't include any function for atomic load or store; all known +architectures support atomic loads and stores directly (possibly by emitting a +fence on either side of a normal load or store.) + +There's also, somewhat separately, the possibility to lower ``ATOMIC_FENCE`` to +``__sync_synchronize()``. This may happen or not happen independent of all the +above, controlled purely by ``setOperationAction(ISD::ATOMIC_FENCE, ...)``. + +On AArch64, a variant of the __sync_* routines is used which contain the memory +order as part of the function name. These routines may determine at runtime +whether the single-instruction atomic operations which were introduced as part +of AArch64 Large System Extensions "LSE" instruction set are available, or if +it needs to fall back to an LL/SC loop. The following helper functions are +implemented in both ``compiler-rt`` and ``libgcc`` libraries +(``N`` is one of 1, 2, 4, 8, and ``M`` is one of 1, 2, 4, 8 and 16, and +``ORDER`` is one of 'relax', 'acq', 'rel', 'acq_rel'):: + + iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) + iN __aarch64_swpN_ORDER(iN val, iN *ptr) + iN __aarch64_ldaddN_ORDER(iN val, iN *ptr) + iN __aarch64_ldclrN_ORDER(iN val, iN *ptr) + iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) + iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) + +Please note, if LSE instruction set is specified for AArch64 target then +out-of-line atomics calls are not generated and single-instruction atomic +operations are used in place. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/Benchmarking.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/Benchmarking.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/Benchmarking.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/Benchmarking.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,87 @@ +================================== +Benchmarking tips +================================== + + +Introduction +============ + +For benchmarking a patch we want to reduce all possible sources of +noise as much as possible. How to do that is very OS dependent. + +Note that low noise is required, but not sufficient. It does not +exclude measurement bias. See +https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf for +example. + +General +================================ + +* Use a high resolution timer, e.g. perf under linux. + +* Run the benchmark multiple times to be able to recognize noise. + +* Disable as many processes or services as possible on the target system. + +* Disable frequency scaling, turbo boost and address space + randomization (see OS specific section). + +* Static link if the OS supports it. That avoids any variation that + might be introduced by loading dynamic libraries. This can be done + by passing ``-DLLVM_BUILD_STATIC=ON`` to cmake. + +* Try to avoid storage. On some systems you can use tmpfs. Putting the + program, inputs and outputs on tmpfs avoids touching a real storage + system, which can have a pretty big variability. + + To mount it (on linux and freebsd at least):: + + mount -t tmpfs -o size=g none dir_to_mount + +Linux +===== + +* Disable address space randomization:: + + echo 0 > /proc/sys/kernel/randomize_va_space + +* Set scaling_governor to performance:: + + for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + do + echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + done + +* Use https://github.com/lpechacek/cpuset to reserve cpus for just the + program you are benchmarking. If using perf, leave at least 2 cores + so that perf runs in one and your program in another:: + + cset shield -c N1,N2 -k on + + This will move all threads out of N1 and N2. The ``-k on`` means + that even kernel threads are moved out. + +* Disable the SMT pair of the cpus you will use for the benchmark. The + pair of cpu N can be found in + ``/sys/devices/system/cpu/cpuN/topology/thread_siblings_list`` and + disabled with:: + + echo 0 > /sys/devices/system/cpu/cpuX/online + + +* Run the program with:: + + cset shield --exec -- perf stat -r 10 + + This will run the command after ``--`` in the isolated cpus. The + particular perf command runs the ```` 10 times and reports + statistics. + +With these in place you can expect perf variations of less than 0.1%. + +Linux Intel +----------- + +* Disable turbo mode:: + + echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BigEndianNEON.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BigEndianNEON.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BigEndianNEON.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BigEndianNEON.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,205 @@ +============================================== +Using ARM NEON instructions in big endian mode +============================================== + +.. contents:: + :local: + +Introduction +============ + +Generating code for big endian ARM processors is for the most part straightforward. NEON loads and stores however have some interesting properties that make code generation decisions less obvious in big endian mode. + +The aim of this document is to explain the problem with NEON loads and stores, and the solution that has been implemented in LLVM. + +In this document the term "vector" refers to what the ARM ABI calls a "short vector", which is a sequence of items that can fit in a NEON register. This sequence can be 64 or 128 bits in length, and can constitute 8, 16, 32 or 64 bit items. This document refers to A64 instructions throughout, but is almost applicable to the A32/ARMv7 instruction sets also. The ABI format for passing vectors in A32 is slightly different to A64. Apart from that, the same concepts apply. + +Example: C-level intrinsics -> assembly +--------------------------------------- + +It may be helpful first to illustrate how C-level ARM NEON intrinsics are lowered to instructions. + +This trivial C function takes a vector of four ints and sets the zero'th lane to the value "42":: + + #include + int32x4_t f(int32x4_t p) { + return vsetq_lane_s32(42, p, 0); + } + +arm_neon.h intrinsics generate "generic" IR where possible (that is, normal IR instructions not ``llvm.arm.neon.*`` intrinsic calls). The above generates:: + + define <4 x i32> @f(<4 x i32> %p) { + %vset_lane = insertelement <4 x i32> %p, i32 42, i32 0 + ret <4 x i32> %vset_lane + } + +Which then becomes the following trivial assembly:: + + f: // @f + movz w8, #0x2a + ins v0.s[0], w8 + ret + +Problem +======= + +The main problem is how vectors are represented in memory and in registers. + +First, a recap. The "endianness" of an item affects its representation in memory only. In a register, a number is just a sequence of bits - 64 bits in the case of AArch64 general purpose registers. Memory, however, is a sequence of addressable units of 8 bits in size. Any number greater than 8 bits must therefore be split up into 8-bit chunks, and endianness describes the order in which these chunks are laid out in memory. + +A "little endian" layout has the least significant byte first (lowest in memory address). A "big endian" layout has the *most* significant byte first. This means that when loading an item from big endian memory, the lowest 8-bits in memory must go in the most significant 8-bits, and so forth. + +``LDR`` and ``LD1`` +=================== + +.. figure:: ARM-BE-ldr.png + :align: right + + Big endian vector load using ``LDR``. + + +A vector is a consecutive sequence of items that are operated on simultaneously. To load a 64-bit vector, 64 bits need to be read from memory. In little endian mode, we can do this by just performing a 64-bit load - ``LDR q0, [foo]``. However if we try this in big endian mode, because of the byte swapping the lane indices end up being swapped! The zero'th item as laid out in memory becomes the n'th lane in the vector. + +.. figure:: ARM-BE-ld1.png + :align: right + + Big endian vector load using ``LD1``. Note that the lanes retain the correct ordering. + + +Because of this, the instruction ``LD1`` performs a vector load but performs byte swapping not on the entire 64 bits, but on the individual items within the vector. This means that the register content is the same as it would have been on a little endian system. + +It may seem that ``LD1`` should suffice to peform vector loads on a big endian machine. However there are pros and cons to the two approaches that make it less than simple which register format to pick. + +There are two options: + + 1. The content of a vector register is the same *as if* it had been loaded with an ``LDR`` instruction. + 2. The content of a vector register is the same *as if* it had been loaded with an ``LD1`` instruction. + +Because ``LD1 == LDR + REV`` and similarly ``LDR == LD1 + REV`` (on a big endian system), we can simulate either type of load with the other type of load plus a ``REV`` instruction. So we're not deciding which instructions to use, but which format to use (which will then influence which instruction is best to use). + +.. The 'clearer' container is required to make the following section header come after the floated + images above. +.. container:: clearer + + Note that throughout this section we only mention loads. Stores have exactly the same problems as their associated loads, so have been skipped for brevity. + + +Considerations +============== + +LLVM IR Lane ordering +--------------------- + +LLVM IR has first class vector types. In LLVM IR, the zero'th element of a vector resides at the lowest memory address. The optimizer relies on this property in certain areas, for example when concatenating vectors together. The intention is for arrays and vectors to have identical memory layouts - ``[4 x i8]`` and ``<4 x i8>`` should be represented the same in memory. Without this property there would be many special cases that the optimizer would have to cleverly handle. + +Use of ``LDR`` would break this lane ordering property. This doesn't preclude the use of ``LDR``, but we would have to do one of two things: + + 1. Insert a ``REV`` instruction to reverse the lane order after every ``LDR``. + 2. Disable all optimizations that rely on lane layout, and for every access to an individual lane (``insertelement``/``extractelement``/``shufflevector``) reverse the lane index. + +AAPCS +----- + +The ARM procedure call standard (AAPCS) defines the ABI for passing vectors between functions in registers. It states: + + When a short vector is transferred between registers and memory it is treated as an opaque object. That is a short vector is stored in memory as if it were stored with a single ``STR`` of the entire register; a short vector is loaded from memory using the corresponding ``LDR`` instruction. On a little-endian system this means that element 0 will always contain the lowest addressed element of a short vector; on a big-endian system element 0 will contain the highest-addressed element of a short vector. + + -- Procedure Call Standard for the ARM 64-bit Architecture (AArch64), 4.1.2 Short Vectors + +The use of ``LDR`` and ``STR`` as the ABI defines has at least one advantage over ``LD1`` and ``ST1``. ``LDR`` and ``STR`` are oblivious to the size of the individual lanes of a vector. ``LD1`` and ``ST1`` are not - the lane size is encoded within them. This is important across an ABI boundary, because it would become necessary to know the lane width the callee expects. Consider the following code: + +.. code-block:: c + + + void callee(uint32x2_t v) { + ... + } + + + extern void callee(uint32x2_t); + void caller() { + callee(...); + } + +If ``callee`` changed its signature to ``uint16x4_t``, which is equivalent in register content, if we passed as ``LD1`` we'd break this code until ``caller`` was updated and recompiled. + +There is an argument that if the signatures of the two functions are different then the behaviour should be undefined. But there may be functions that are agnostic to the lane layout of the vector, and treating the vector as an opaque value (just loading it and storing it) would be impossible without a common format across ABI boundaries. + +So to preserve ABI compatibility, we need to use the ``LDR`` lane layout across function calls. + +Alignment +--------- + +In strict alignment mode, ``LDR qX`` requires its address to be 128-bit aligned, whereas ``LD1`` only requires it to be as aligned as the lane size. If we canonicalised on using ``LDR``, we'd still need to use ``LD1`` in some places to avoid alignment faults (the result of the ``LD1`` would then need to be reversed with ``REV``). + +Most operating systems however do not run with alignment faults enabled, so this is often not an issue. + +Summary +------- + +The following table summarises the instructions that are required to be emitted for each property mentioned above for each of the two solutions. + ++-------------------------------+-------------------------------+---------------------+ +| | ``LDR`` layout | ``LD1`` layout | ++===============================+===============================+=====================+ +| Lane ordering | ``LDR + REV`` | ``LD1`` | ++-------------------------------+-------------------------------+---------------------+ +| AAPCS | ``LDR`` | ``LD1 + REV`` | ++-------------------------------+-------------------------------+---------------------+ +| Alignment for strict mode | ``LDR`` / ``LD1 + REV`` | ``LD1`` | ++-------------------------------+-------------------------------+---------------------+ + +Neither approach is perfect, and choosing one boils down to choosing the lesser of two evils. The issue with lane ordering, it was decided, would have to change target-agnostic compiler passes and would result in a strange IR in which lane indices were reversed. It was decided that this was worse than the changes that would have to be made to support ``LD1``, so ``LD1`` was chosen as the canonical vector load instruction (and by inference, ``ST1`` for vector stores). + +Implementation +============== + +There are 3 parts to the implementation: + + 1. Predicate ``LDR`` and ``STR`` instructions so that they are never allowed to be selected to generate vector loads and stores. The exception is one-lane vectors [1]_ - these by definition cannot have lane ordering problems so are fine to use ``LDR``/``STR``. + + 2. Create code generation patterns for bitconverts that create ``REV`` instructions. + + 3. Make sure appropriate bitconverts are created so that vector values get passed over call boundaries as 1-element vectors (which is the same as if they were loaded with ``LDR``). + +Bitconverts +----------- + +.. image:: ARM-BE-bitcastfail.png + :align: right + +The main problem with the ``LD1`` solution is dealing with bitconverts (or bitcasts, or reinterpret casts). These are pseudo instructions that only change the compiler's interpretation of data, not the underlying data itself. A requirement is that if data is loaded and then saved again (called a "round trip"), the memory contents should be the same after the store as before the load. If a vector is loaded and is then bitconverted to a different vector type before storing, the round trip will currently be broken. + +Take for example this code sequence:: + + %0 = load <4 x i32> %x + %1 = bitcast <4 x i32> %0 to <2 x i64> + store <2 x i64> %1, <2 x i64>* %y + +This would produce a code sequence such as that in the figure on the right. The mismatched ``LD1`` and ``ST1`` cause the stored data to differ from the loaded data. + +.. container:: clearer + + When we see a bitcast from type ``X`` to type ``Y``, what we need to do is to change the in-register representation of the data to be *as if* it had just been loaded by a ``LD1`` of type ``Y``. + +.. image:: ARM-BE-bitcastsuccess.png + :align: right + +Conceptually this is simple - we can insert a ``REV`` undoing the ``LD1`` of type ``X`` (converting the in-register representation to the same as if it had been loaded by ``LDR``) and then insert another ``REV`` to change the representation to be as if it had been loaded by an ``LD1`` of type ``Y``. + +For the previous example, this would be:: + + LD1 v0.4s, [x] + + REV64 v0.4s, v0.4s // There is no REV128 instruction, so it must be synthesizedcd + EXT v0.16b, v0.16b, v0.16b, #8 // with a REV64 then an EXT to swap the two 64-bit elements. + + REV64 v0.2d, v0.2d + EXT v0.16b, v0.16b, v0.16b, #8 + + ST1 v0.2d, [y] + +It turns out that these ``REV`` pairs can, in almost all cases, be squashed together into a single ``REV``. For the example above, a ``REV128 4s`` + ``REV128 2d`` is actually a ``REV64 4s``, as shown in the figure on the right. + +.. [1] One lane vectors may seem useless as a concept but they serve to distinguish between values held in general purpose registers and values held in NEON/VFP registers. For example, an ``i64`` would live in an ``x`` register, but ``<1 x i64>`` would live in a ``d`` register. + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BitCodeFormat.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BitCodeFormat.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BitCodeFormat.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BitCodeFormat.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,1395 @@ +.. role:: raw-html(raw) + :format: html + +======================== +LLVM Bitcode File Format +======================== + +.. contents:: + :local: + +Abstract +======== + +This document describes the LLVM bitstream file format and the encoding of the +LLVM IR into it. + +Overview +======== + +What is commonly known as the LLVM bitcode file format (also, sometimes +anachronistically known as bytecode) is actually two things: a `bitstream +container format`_ and an `encoding of LLVM IR`_ into the container format. + +The bitstream format is an abstract encoding of structured data, very similar to +XML in some ways. Like XML, bitstream files contain tags, and nested +structures, and you can parse the file without having to understand the tags. +Unlike XML, the bitstream format is a binary encoding, and unlike XML it +provides a mechanism for the file to self-describe "abbreviations", which are +effectively size optimizations for the content. + +LLVM IR files may be optionally embedded into a `wrapper`_ structure, or in a +`native object file`_. Both of these mechanisms make it easy to embed extra +data along with LLVM IR files. + +This document first describes the LLVM bitstream format, describes the wrapper +format, then describes the record structure used by LLVM IR files. + +.. _bitstream container format: + +Bitstream Format +================ + +The bitstream format is literally a stream of bits, with a very simple +structure. This structure consists of the following concepts: + +* A "`magic number`_" that identifies the contents of the stream. + +* Encoding `primitives`_ like variable bit-rate integers. + +* `Blocks`_, which define nested content. + +* `Data Records`_, which describe entities within the file. + +* Abbreviations, which specify compression optimizations for the file. + +Note that the :doc:`llvm-bcanalyzer ` tool can be +used to dump and inspect arbitrary bitstreams, which is very useful for +understanding the encoding. + +.. _magic number: + +Magic Numbers +------------- + +The first four bytes of a bitstream are used as an application-specific magic +number. Generic bitcode tools may look at the first four bytes to determine +whether the stream is a known stream type. However, these tools should *not* +determine whether a bitstream is valid based on its magic number alone. New +application-specific bitstream formats are being developed all the time; tools +should not reject them just because they have a hitherto unseen magic number. + +.. _primitives: + +Primitives +---------- + +A bitstream literally consists of a stream of bits, which are read in order +starting with the least significant bit of each byte. The stream is made up of +a number of primitive values that encode a stream of unsigned integer values. +These integers are encoded in two ways: either as `Fixed Width Integers`_ or as +`Variable Width Integers`_. + +.. _Fixed Width Integers: +.. _fixed-width value: + +Fixed Width Integers +^^^^^^^^^^^^^^^^^^^^ + +Fixed-width integer values have their low bits emitted directly to the file. +For example, a 3-bit integer value encodes 1 as 001. Fixed width integers are +used when there are a well-known number of options for a field. For example, +boolean values are usually encoded with a 1-bit wide integer. + +.. _Variable Width Integers: +.. _Variable Width Integer: +.. _variable-width value: + +Variable Width Integers +^^^^^^^^^^^^^^^^^^^^^^^ + +Variable-width integer (VBR) values encode values of arbitrary size, optimizing +for the case where the values are small. Given a 4-bit VBR field, any 3-bit +value (0 through 7) is encoded directly, with the high bit set to zero. Values +larger than N-1 bits emit their bits in a series of N-1 bit chunks, where all +but the last set the high bit. + +For example, the value 30 (0x1E) is encoded as 62 (0b0011'1110) when emitted as +a vbr4 value. The first set of four bits starting from the least significant +indicates the value 6 (110) with a continuation piece (indicated by a high bit +of 1). The next set of four bits indicates a value of 24 (011 << 3) with no +continuation. The sum (6+24) yields the value 30. + +.. _char6-encoded value: + +6-bit characters +^^^^^^^^^^^^^^^^ + +6-bit characters encode common characters into a fixed 6-bit field. They +represent the following characters with the following 6-bit values: + +:: + + 'a' .. 'z' --- 0 .. 25 + 'A' .. 'Z' --- 26 .. 51 + '0' .. '9' --- 52 .. 61 + '.' --- 62 + '_' --- 63 + +This encoding is only suitable for encoding characters and strings that consist +only of the above characters. It is completely incapable of encoding characters +not in the set. + +Word Alignment +^^^^^^^^^^^^^^ + +Occasionally, it is useful to emit zero bits until the bitstream is a multiple +of 32 bits. This ensures that the bit position in the stream can be represented +as a multiple of 32-bit words. + +Abbreviation IDs +---------------- + +A bitstream is a sequential series of `Blocks`_ and `Data Records`_. Both of +these start with an abbreviation ID encoded as a fixed-bitwidth field. The +width is specified by the current block, as described below. The value of the +abbreviation ID specifies either a builtin ID (which have special meanings, +defined below) or one of the abbreviation IDs defined for the current block by +the stream itself. + +The set of builtin abbrev IDs is: + +* 0 - `END_BLOCK`_ --- This abbrev ID marks the end of the current block. + +* 1 - `ENTER_SUBBLOCK`_ --- This abbrev ID marks the beginning of a new + block. + +* 2 - `DEFINE_ABBREV`_ --- This defines a new abbreviation. + +* 3 - `UNABBREV_RECORD`_ --- This ID specifies the definition of an + unabbreviated record. + +Abbreviation IDs 4 and above are defined by the stream itself, and specify an +`abbreviated record encoding`_. + +.. _Blocks: + +Blocks +------ + +Blocks in a bitstream denote nested regions of the stream, and are identified by +a content-specific id number (for example, LLVM IR uses an ID of 12 to represent +function bodies). Block IDs 0-7 are reserved for `standard blocks`_ whose +meaning is defined by Bitcode; block IDs 8 and greater are application +specific. Nested blocks capture the hierarchical structure of the data encoded +in it, and various properties are associated with blocks as the file is parsed. +Block definitions allow the reader to efficiently skip blocks in constant time +if the reader wants a summary of blocks, or if it wants to efficiently skip data +it does not understand. The LLVM IR reader uses this mechanism to skip function +bodies, lazily reading them on demand. + +When reading and encoding the stream, several properties are maintained for the +block. In particular, each block maintains: + +#. A current abbrev id width. This value starts at 2 at the beginning of the + stream, and is set every time a block record is entered. The block entry + specifies the abbrev id width for the body of the block. + +#. A set of abbreviations. Abbreviations may be defined within a block, in + which case they are only defined in that block (neither subblocks nor + enclosing blocks see the abbreviation). Abbreviations can also be defined + inside a `BLOCKINFO`_ block, in which case they are defined in all blocks + that match the ID that the ``BLOCKINFO`` block is describing. + +As sub blocks are entered, these properties are saved and the new sub-block has +its own set of abbreviations, and its own abbrev id width. When a sub-block is +popped, the saved values are restored. + +.. _ENTER_SUBBLOCK: + +ENTER_SUBBLOCK Encoding +^^^^^^^^^^^^^^^^^^^^^^^ + +:raw-html:`` +[ENTER_SUBBLOCK, blockid\ :sub:`vbr8`, newabbrevlen\ :sub:`vbr4`, , blocklen_32] +:raw-html:`` + +The ``ENTER_SUBBLOCK`` abbreviation ID specifies the start of a new block +record. The ``blockid`` value is encoded as an 8-bit VBR identifier, and +indicates the type of block being entered, which can be a `standard block`_ or +an application-specific block. The ``newabbrevlen`` value is a 4-bit VBR, which +specifies the abbrev id width for the sub-block. The ``blocklen`` value is a +32-bit aligned value that specifies the size of the subblock in 32-bit +words. This value allows the reader to skip over the entire block in one jump. + +.. _END_BLOCK: + +END_BLOCK Encoding +^^^^^^^^^^^^^^^^^^ + +``[END_BLOCK, ]`` + +The ``END_BLOCK`` abbreviation ID specifies the end of the current block record. +Its end is aligned to 32-bits to ensure that the size of the block is an even +multiple of 32-bits. + +.. _Data Records: + +Data Records +------------ + +Data records consist of a record code and a number of (up to) 64-bit integer +values. The interpretation of the code and values is application specific and +may vary between different block types. Records can be encoded either using an +unabbrev record, or with an abbreviation. In the LLVM IR format, for example, +there is a record which encodes the target triple of a module. The code is +``MODULE_CODE_TRIPLE``, and the values of the record are the ASCII codes for the +characters in the string. + +.. _UNABBREV_RECORD: + +UNABBREV_RECORD Encoding +^^^^^^^^^^^^^^^^^^^^^^^^ + +:raw-html:`` +[UNABBREV_RECORD, code\ :sub:`vbr6`, numops\ :sub:`vbr6`, op0\ :sub:`vbr6`, op1\ :sub:`vbr6`, ...] +:raw-html:`` + +An ``UNABBREV_RECORD`` provides a default fallback encoding, which is both +completely general and extremely inefficient. It can describe an arbitrary +record by emitting the code and operands as VBRs. + +For example, emitting an LLVM IR target triple as an unabbreviated record +requires emitting the ``UNABBREV_RECORD`` abbrevid, a vbr6 for the +``MODULE_CODE_TRIPLE`` code, a vbr6 for the length of the string, which is equal +to the number of operands, and a vbr6 for each character. Because there are no +letters with values less than 32, each letter would need to be emitted as at +least a two-part VBR, which means that each letter would require at least 12 +bits. This is not an efficient encoding, but it is fully general. + +.. _abbreviated record encoding: + +Abbreviated Record Encoding +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[, fields...]`` + +An abbreviated record is an abbreviation id followed by a set of fields that are +encoded according to the `abbreviation definition`_. This allows records to be +encoded significantly more densely than records encoded with the +`UNABBREV_RECORD`_ type, and allows the abbreviation types to be specified in +the stream itself, which allows the files to be completely self describing. The +actual encoding of abbreviations is defined below. + +The record code, which is the first field of an abbreviated record, may be +encoded in the abbreviation definition (as a literal operand) or supplied in the +abbreviated record (as a Fixed or VBR operand value). + +.. _abbreviation definition: + +Abbreviations +------------- + +Abbreviations are an important form of compression for bitstreams. The idea is +to specify a dense encoding for a class of records once, then use that encoding +to emit many records. It takes space to emit the encoding into the file, but +the space is recouped (hopefully plus some) when the records that use it are +emitted. + +Abbreviations can be determined dynamically per client, per file. Because the +abbreviations are stored in the bitstream itself, different streams of the same +format can contain different sets of abbreviations according to the needs of the +specific stream. As a concrete example, LLVM IR files usually emit an +abbreviation for binary operators. If a specific LLVM module contained no or +few binary operators, the abbreviation does not need to be emitted. + +.. _DEFINE_ABBREV: + +DEFINE_ABBREV Encoding +^^^^^^^^^^^^^^^^^^^^^^ + +:raw-html:`` +[DEFINE_ABBREV, numabbrevops\ :sub:`vbr5`, abbrevop0, abbrevop1, ...] +:raw-html:`` + +A ``DEFINE_ABBREV`` record adds an abbreviation to the list of currently defined +abbreviations in the scope of this block. This definition only exists inside +this immediate block --- it is not visible in subblocks or enclosing blocks. +Abbreviations are implicitly assigned IDs sequentially starting from 4 (the +first application-defined abbreviation ID). Any abbreviations defined in a +``BLOCKINFO`` record for the particular block type receive IDs first, in order, +followed by any abbreviations defined within the block itself. Abbreviated data +records reference this ID to indicate what abbreviation they are invoking. + +An abbreviation definition consists of the ``DEFINE_ABBREV`` abbrevid followed +by a VBR that specifies the number of abbrev operands, then the abbrev operands +themselves. Abbreviation operands come in three forms. They all start with a +single bit that indicates whether the abbrev operand is a literal operand (when +the bit is 1) or an encoding operand (when the bit is 0). + +#. Literal operands --- :raw-html:`` [1\ :sub:`1`, litvalue\ + :sub:`vbr8`] :raw-html:`` --- Literal operands specify that the value in + the result is always a single specific value. This specific value is emitted + as a vbr8 after the bit indicating that it is a literal operand. + +#. Encoding info without data --- :raw-html:`` [0\ :sub:`1`, encoding\ + :sub:`3`] :raw-html:`` --- Operand encodings that do not have extra data + are just emitted as their code. + +#. Encoding info with data --- :raw-html:`` [0\ :sub:`1`, encoding\ + :sub:`3`, value\ :sub:`vbr5`] :raw-html:`` --- Operand encodings that do + have extra data are emitted as their code, followed by the extra data. + +The possible operand encodings are: + +* Fixed (code 1): The field should be emitted as a `fixed-width value`_, whose + width is specified by the operand's extra data. + +* VBR (code 2): The field should be emitted as a `variable-width value`_, whose + width is specified by the operand's extra data. + +* Array (code 3): This field is an array of values. The array operand has no + extra data, but expects another operand to follow it, indicating the element + type of the array. When reading an array in an abbreviated record, the first + integer is a vbr6 that indicates the array length, followed by the encoded + elements of the array. An array may only occur as the last operand of an + abbreviation (except for the one final operand that gives the array's + type). + +* Char6 (code 4): This field should be emitted as a `char6-encoded value`_. + This operand type takes no extra data. Char6 encoding is normally used as an + array element type. + +* Blob (code 5): This field is emitted as a vbr6, followed by padding to a + 32-bit boundary (for alignment) and an array of 8-bit objects. The array of + bytes is further followed by tail padding to ensure that its total length is a + multiple of 4 bytes. This makes it very efficient for the reader to decode + the data without having to make a copy of it: it can use a pointer to the data + in the mapped in file and poke directly at it. A blob may only occur as the + last operand of an abbreviation. + +For example, target triples in LLVM modules are encoded as a record of the form +``[TRIPLE, 'a', 'b', 'c', 'd']``. Consider if the bitstream emitted the +following abbrev entry: + +:: + + [0, Fixed, 4] + [0, Array] + [0, Char6] + +When emitting a record with this abbreviation, the above entry would be emitted +as: + +:raw-html:`
` +[4\ :sub:`abbrevwidth`, 2\ :sub:`4`, 4\ :sub:`vbr6`, 0\ :sub:`6`, 1\ :sub:`6`, 2\ :sub:`6`, 3\ :sub:`6`] +:raw-html:`
` + +These values are: + +#. The first value, 4, is the abbreviation ID for this abbreviation. + +#. The second value, 2, is the record code for ``TRIPLE`` records within LLVM IR + file ``MODULE_BLOCK`` blocks. + +#. The third value, 4, is the length of the array. + +#. The rest of the values are the char6 encoded values for ``"abcd"``. + +With this abbreviation, the triple is emitted with only 37 bits (assuming a +abbrev id width of 3). Without the abbreviation, significantly more space would +be required to emit the target triple. Also, because the ``TRIPLE`` value is +not emitted as a literal in the abbreviation, the abbreviation can also be used +for any other string value. + +.. _standard blocks: +.. _standard block: + +Standard Blocks +--------------- + +In addition to the basic block structure and record encodings, the bitstream +also defines specific built-in block types. These block types specify how the +stream is to be decoded or other metadata. In the future, new standard blocks +may be added. Block IDs 0-7 are reserved for standard blocks. + +.. _BLOCKINFO: + +#0 - BLOCKINFO Block +^^^^^^^^^^^^^^^^^^^^ + +The ``BLOCKINFO`` block allows the description of metadata for other blocks. +The currently specified records are: + +:: + + [SETBID (#1), blockid] + [DEFINE_ABBREV, ...] + [BLOCKNAME, ...name...] + [SETRECORDNAME, RecordID, ...name...] + +The ``SETBID`` record (code 1) indicates which block ID is being described. +``SETBID`` records can occur multiple times throughout the block to change which +block ID is being described. There must be a ``SETBID`` record prior to any +other records. + +Standard ``DEFINE_ABBREV`` records can occur inside ``BLOCKINFO`` blocks, but +unlike their occurrence in normal blocks, the abbreviation is defined for blocks +matching the block ID we are describing, *not* the ``BLOCKINFO`` block +itself. The abbreviations defined in ``BLOCKINFO`` blocks receive abbreviation +IDs as described in `DEFINE_ABBREV`_. + +The ``BLOCKNAME`` record (code 2) can optionally occur in this block. The +elements of the record are the bytes of the string name of the block. +llvm-bcanalyzer can use this to dump out bitcode files symbolically. + +The ``SETRECORDNAME`` record (code 3) can also optionally occur in this block. +The first operand value is a record ID number, and the rest of the elements of +the record are the bytes for the string name of the record. llvm-bcanalyzer can +use this to dump out bitcode files symbolically. + +Note that although the data in ``BLOCKINFO`` blocks is described as "metadata," +the abbreviations they contain are essential for parsing records from the +corresponding blocks. It is not safe to skip them. + +.. _wrapper: + +Bitcode Wrapper Format +====================== + +Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper +structure. This structure contains a simple header that indicates the offset +and size of the embedded BC file. This allows additional information to be +stored alongside the BC file. The structure of this file header is: + +:raw-html:`
` +[Magic\ :sub:`32`, Version\ :sub:`32`, Offset\ :sub:`32`, Size\ :sub:`32`, CPUType\ :sub:`32`] +:raw-html:`
` + +Each of the fields are 32-bit fields stored in little endian form (as with the +rest of the bitcode file fields). The Magic number is always ``0x0B17C0DE`` and +the version is currently always ``0``. The Offset field is the offset in bytes +to the start of the bitcode stream in the file, and the Size field is the size +in bytes of the stream. CPUType is a target-specific value that can be used to +encode the CPU of the target. + +.. _native object file: + +Native Object File Wrapper Format +================================= + +Bitcode files for LLVM IR may also be wrapped in a native object file +(i.e. ELF, COFF, Mach-O). The bitcode must be stored in a section of the object +file named ``__LLVM,__bitcode`` for MachO and ``.llvmbc`` for the other object +formats. This wrapper format is useful for accommodating LTO in compilation +pipelines where intermediate objects must be native object files which contain +metadata in other sections. + +Not all tools support this format. + +.. _encoding of LLVM IR: + +LLVM IR Encoding +================ + +LLVM IR is encoded into a bitstream by defining blocks and records. It uses +blocks for things like constant pools, functions, symbol tables, etc. It uses +records for things like instructions, global variable descriptors, type +descriptions, etc. This document does not describe the set of abbreviations +that the writer uses, as these are fully self-described in the file, and the +reader is not allowed to build in any knowledge of this. + +Basics +------ + +LLVM IR Magic Number +^^^^^^^^^^^^^^^^^^^^ + +The magic number for LLVM IR files is: + +:raw-html:`
` +['B'\ :sub:`8`, 'C'\ :sub:`8`, 0x0\ :sub:`4`, 0xC\ :sub:`4`, 0xE\ :sub:`4`, 0xD\ :sub:`4`] +:raw-html:`
` + +.. _Signed VBRs: + +Signed VBRs +^^^^^^^^^^^ + +`Variable Width Integer`_ encoding is an efficient way to encode arbitrary sized +unsigned values, but is an extremely inefficient for encoding signed values, as +signed values are otherwise treated as maximally large unsigned values. + +As such, signed VBR values of a specific width are emitted as follows: + +* Positive values are emitted as VBRs of the specified width, but with their + value shifted left by one. + +* Negative values are emitted as VBRs of the specified width, but the negated + value is shifted left by one, and the low bit is set. + +With this encoding, small positive and small negative values can both be emitted +efficiently. Signed VBR encoding is used in ``CST_CODE_INTEGER`` and +``CST_CODE_WIDE_INTEGER`` records within ``CONSTANTS_BLOCK`` blocks. +It is also used for phi instruction operands in `MODULE_CODE_VERSION`_ 1. + +LLVM IR Blocks +^^^^^^^^^^^^^^ + +LLVM IR is defined with the following blocks: + +* 8 --- `MODULE_BLOCK`_ --- This is the top-level block that contains the entire + module, and describes a variety of per-module information. + +* 9 --- `PARAMATTR_BLOCK`_ --- This enumerates the parameter attributes. + +* 10 --- `PARAMATTR_GROUP_BLOCK`_ --- This describes the attribute group table. + +* 11 --- `CONSTANTS_BLOCK`_ --- This describes constants for a module or + function. + +* 12 --- `FUNCTION_BLOCK`_ --- This describes a function body. + +* 14 --- `VALUE_SYMTAB_BLOCK`_ --- This describes a value symbol table. + +* 15 --- `METADATA_BLOCK`_ --- This describes metadata items. + +* 16 --- `METADATA_ATTACHMENT`_ --- This contains records associating metadata + with function instruction values. + +* 17 --- `TYPE_BLOCK`_ --- This describes all of the types in the module. + +* 23 --- `STRTAB_BLOCK`_ --- The bitcode file's string table. + +.. _MODULE_BLOCK: + +MODULE_BLOCK Contents +--------------------- + +The ``MODULE_BLOCK`` block (id 8) is the top-level block for LLVM bitcode files, +and each bitcode file must contain exactly one. In addition to records +(described below) containing information about the module, a ``MODULE_BLOCK`` +block may contain the following sub-blocks: + +* `BLOCKINFO`_ +* `PARAMATTR_BLOCK`_ +* `PARAMATTR_GROUP_BLOCK`_ +* `TYPE_BLOCK`_ +* `VALUE_SYMTAB_BLOCK`_ +* `CONSTANTS_BLOCK`_ +* `FUNCTION_BLOCK`_ +* `METADATA_BLOCK`_ + +.. _MODULE_CODE_VERSION: + +MODULE_CODE_VERSION Record +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[VERSION, version#]`` + +The ``VERSION`` record (code 1) contains a single value indicating the format +version. Versions 0, 1 and 2 are supported at this time. The difference between +version 0 and 1 is in the encoding of instruction operands in +each `FUNCTION_BLOCK`_. + +In version 0, each value defined by an instruction is assigned an ID +unique to the function. Function-level value IDs are assigned starting from +``NumModuleValues`` since they share the same namespace as module-level +values. The value enumerator resets after each function. When a value is +an operand of an instruction, the value ID is used to represent the operand. +For large functions or large modules, these operand values can be large. + +The encoding in version 1 attempts to avoid large operand values +in common cases. Instead of using the value ID directly, operands are +encoded as relative to the current instruction. Thus, if an operand +is the value defined by the previous instruction, the operand +will be encoded as 1. + +For example, instead of + +.. code-block:: none + + #n = load #n-1 + #n+1 = icmp eq #n, #const0 + br #n+1, label #(bb1), label #(bb2) + +version 1 will encode the instructions as + +.. code-block:: none + + #n = load #1 + #n+1 = icmp eq #1, (#n+1)-#const0 + br #1, label #(bb1), label #(bb2) + +Note in the example that operands which are constants also use +the relative encoding, while operands like basic block labels +do not use the relative encoding. + +Forward references will result in a negative value. +This can be inefficient, as operands are normally encoded +as unsigned VBRs. However, forward references are rare, except in the +case of phi instructions. For phi instructions, operands are encoded as +`Signed VBRs`_ to deal with forward references. + +In version 2, the meaning of module records ``FUNCTION``, ``GLOBALVAR``, +``ALIAS``, ``IFUNC`` and ``COMDAT`` change such that the first two operands +specify an offset and size of a string in a string table (see `STRTAB_BLOCK +Contents`_), the function name is removed from the ``FNENTRY`` record in the +value symbol table, and the top-level ``VALUE_SYMTAB_BLOCK`` may only contain +``FNENTRY`` records. + +MODULE_CODE_TRIPLE Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[TRIPLE, ...string...]`` + +The ``TRIPLE`` record (code 2) contains a variable number of values representing +the bytes of the ``target triple`` specification string. + +MODULE_CODE_DATALAYOUT Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[DATALAYOUT, ...string...]`` + +The ``DATALAYOUT`` record (code 3) contains a variable number of values +representing the bytes of the ``target datalayout`` specification string. + +MODULE_CODE_ASM Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[ASM, ...string...]`` + +The ``ASM`` record (code 4) contains a variable number of values representing +the bytes of ``module asm`` strings, with individual assembly blocks separated +by newline (ASCII 10) characters. + +.. _MODULE_CODE_SECTIONNAME: + +MODULE_CODE_SECTIONNAME Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[SECTIONNAME, ...string...]`` + +The ``SECTIONNAME`` record (code 5) contains a variable number of values +representing the bytes of a single section name string. There should be one +``SECTIONNAME`` record for each section name referenced (e.g., in global +variable or function ``section`` attributes) within the module. These records +can be referenced by the 1-based index in the *section* fields of ``GLOBALVAR`` +or ``FUNCTION`` records. + +MODULE_CODE_DEPLIB Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[DEPLIB, ...string...]`` + +The ``DEPLIB`` record (code 6) contains a variable number of values representing +the bytes of a single dependent library name string, one of the libraries +mentioned in a ``deplibs`` declaration. There should be one ``DEPLIB`` record +for each library name referenced. + +MODULE_CODE_GLOBALVAR Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat, attributes, preemptionspecifier]`` + +The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a +global variable. The operand fields are: + +* *strtab offset*, *strtab size*: Specifies the name of the global variable. + See `STRTAB_BLOCK Contents`_. + +* *pointer type*: The type index of the pointer type used to point to this + global variable + +* *isconst*: Non-zero if the variable is treated as constant within the module, + or zero if it is not + +* *initid*: If non-zero, the value index of the initializer for this variable, + plus 1. + +.. _linkage type: + +* *linkage*: An encoding of the linkage type for this variable: + + * ``external``: code 0 + * ``weak``: code 1 + * ``appending``: code 2 + * ``internal``: code 3 + * ``linkonce``: code 4 + * ``dllimport``: code 5 + * ``dllexport``: code 6 + * ``extern_weak``: code 7 + * ``common``: code 8 + * ``private``: code 9 + * ``weak_odr``: code 10 + * ``linkonce_odr``: code 11 + * ``available_externally``: code 12 + * deprecated : code 13 + * deprecated : code 14 + +* alignment*: The logarithm base 2 of the variable's requested alignment, plus 1 + +* *section*: If non-zero, the 1-based section index in the table of + `MODULE_CODE_SECTIONNAME`_ entries. + +.. _visibility: + +* *visibility*: If present, an encoding of the visibility of this variable: + + * ``default``: code 0 + * ``hidden``: code 1 + * ``protected``: code 2 + +.. _bcthreadlocal: + +* *threadlocal*: If present, an encoding of the thread local storage mode of the + variable: + + * ``not thread local``: code 0 + * ``thread local; default TLS model``: code 1 + * ``localdynamic``: code 2 + * ``initialexec``: code 3 + * ``localexec``: code 4 + +.. _bcunnamedaddr: + +* *unnamed_addr*: If present, an encoding of the ``unnamed_addr`` attribute of this + variable: + + * not ``unnamed_addr``: code 0 + * ``unnamed_addr``: code 1 + * ``local_unnamed_addr``: code 2 + +.. _bcdllstorageclass: + +* *dllstorageclass*: If present, an encoding of the DLL storage class of this variable: + + * ``default``: code 0 + * ``dllimport``: code 1 + * ``dllexport``: code 2 + +* *comdat*: An encoding of the COMDAT of this function + +* *attributes*: If nonzero, the 1-based index into the table of AttributeLists. + +.. _bcpreemptionspecifier: + +* *preemptionspecifier*: If present, an encoding of the runtime preemption specifier of this variable: + + * ``dso_preemptable``: code 0 + * ``dso_local``: code 1 + +.. _FUNCTION: + +MODULE_CODE_FUNCTION Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn, preemptionspecifier]`` + +The ``FUNCTION`` record (code 8) marks the declaration or definition of a +function. The operand fields are: + +* *strtab offset*, *strtab size*: Specifies the name of the function. + See `STRTAB_BLOCK Contents`_. + +* *type*: The type index of the function type describing this function + +* *callingconv*: The calling convention number: + * ``ccc``: code 0 + * ``fastcc``: code 8 + * ``coldcc``: code 9 + * ``webkit_jscc``: code 12 + * ``anyregcc``: code 13 + * ``preserve_mostcc``: code 14 + * ``preserve_allcc``: code 15 + * ``swiftcc`` : code 16 + * ``cxx_fast_tlscc``: code 17 + * ``tailcc`` : code 18 + * ``cfguard_checkcc`` : code 19 + * ``swifttailcc`` : code 20 + * ``x86_stdcallcc``: code 64 + * ``x86_fastcallcc``: code 65 + * ``arm_apcscc``: code 66 + * ``arm_aapcscc``: code 67 + * ``arm_aapcs_vfpcc``: code 68 + +* isproto*: Non-zero if this entry represents a declaration rather than a + definition + +* *linkage*: An encoding of the `linkage type`_ for this function + +* *paramattr*: If nonzero, the 1-based parameter attribute index into the table + of `PARAMATTR_CODE_ENTRY`_ entries. + +* *alignment*: The logarithm base 2 of the function's requested alignment, plus + 1 + +* *section*: If non-zero, the 1-based section index in the table of + `MODULE_CODE_SECTIONNAME`_ entries. + +* *visibility*: An encoding of the `visibility`_ of this function + +* *gc*: If present and nonzero, the 1-based garbage collector index in the table + of `MODULE_CODE_GCNAME`_ entries. + +* *unnamed_addr*: If present, an encoding of the + :ref:`unnamed_addr` attribute of this function + +* *prologuedata*: If non-zero, the value index of the prologue data for this function, + plus 1. + +* *dllstorageclass*: An encoding of the + :ref:`dllstorageclass` of this function + +* *comdat*: An encoding of the COMDAT of this function + +* *prefixdata*: If non-zero, the value index of the prefix data for this function, + plus 1. + +* *personalityfn*: If non-zero, the value index of the personality function for this function, + plus 1. + +* *preemptionspecifier*: If present, an encoding of the :ref:`runtime preemption specifier` of this function. + +MODULE_CODE_ALIAS Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr, preemptionspecifier]`` + +The ``ALIAS`` record (code 9) marks the definition of an alias. The operand +fields are + +* *strtab offset*, *strtab size*: Specifies the name of the alias. + See `STRTAB_BLOCK Contents`_. + +* *alias type*: The type index of the alias + +* *aliasee val#*: The value index of the aliased value + +* *linkage*: An encoding of the `linkage type`_ for this alias + +* *visibility*: If present, an encoding of the `visibility`_ of the alias + +* *dllstorageclass*: If present, an encoding of the + :ref:`dllstorageclass` of the alias + +* *threadlocal*: If present, an encoding of the + :ref:`thread local property` of the alias + +* *unnamed_addr*: If present, an encoding of the + :ref:`unnamed_addr` attribute of this alias + +* *preemptionspecifier*: If present, an encoding of the :ref:`runtime preemption specifier` of this alias. + +.. _MODULE_CODE_GCNAME: + +MODULE_CODE_GCNAME Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[GCNAME, ...string...]`` + +The ``GCNAME`` record (code 11) contains a variable number of values +representing the bytes of a single garbage collector name string. There should +be one ``GCNAME`` record for each garbage collector name referenced in function +``gc`` attributes within the module. These records can be referenced by 1-based +index in the *gc* fields of ``FUNCTION`` records. + +.. _PARAMATTR_BLOCK: + +PARAMATTR_BLOCK Contents +------------------------ + +The ``PARAMATTR_BLOCK`` block (id 9) contains a table of entries describing the +attributes of function parameters. These entries are referenced by 1-based index +in the *paramattr* field of module block `FUNCTION`_ records, or within the +*attr* field of function block ``INST_INVOKE`` and ``INST_CALL`` records. + +Entries within ``PARAMATTR_BLOCK`` are constructed to ensure that each is unique +(i.e., no two indices represent equivalent attribute lists). + +.. _PARAMATTR_CODE_ENTRY: + +PARAMATTR_CODE_ENTRY Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[ENTRY, attrgrp0, attrgrp1, ...]`` + +The ``ENTRY`` record (code 2) contains a variable number of values describing a +unique set of function parameter attributes. Each *attrgrp* value is used as a +key with which to look up an entry in the attribute group table described +in the ``PARAMATTR_GROUP_BLOCK`` block. + +.. _PARAMATTR_CODE_ENTRY_OLD: + +PARAMATTR_CODE_ENTRY_OLD Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + This is a legacy encoding for attributes, produced by LLVM versions 3.2 and + earlier. It is guaranteed to be understood by the current LLVM version, as + specified in the :ref:`IR backwards compatibility` policy. + +``[ENTRY, paramidx0, attr0, paramidx1, attr1...]`` + +The ``ENTRY`` record (code 1) contains an even number of values describing a +unique set of function parameter attributes. Each *paramidx* value indicates +which set of attributes is represented, with 0 representing the return value +attributes, 0xFFFFFFFF representing function attributes, and other values +representing 1-based function parameters. Each *attr* value is a bitmap with the +following interpretation: + +* bit 0: ``zeroext`` +* bit 1: ``signext`` +* bit 2: ``noreturn`` +* bit 3: ``inreg`` +* bit 4: ``sret`` +* bit 5: ``nounwind`` +* bit 6: ``noalias`` +* bit 7: ``byval`` +* bit 8: ``nest`` +* bit 9: ``readnone`` +* bit 10: ``readonly`` +* bit 11: ``noinline`` +* bit 12: ``alwaysinline`` +* bit 13: ``optsize`` +* bit 14: ``ssp`` +* bit 15: ``sspreq`` +* bits 16-31: ``align n`` +* bit 32: ``nocapture`` +* bit 33: ``noredzone`` +* bit 34: ``noimplicitfloat`` +* bit 35: ``naked`` +* bit 36: ``inlinehint`` +* bits 37-39: ``alignstack n``, represented as the logarithm + base 2 of the requested alignment, plus 1 + +.. _PARAMATTR_GROUP_BLOCK: + +PARAMATTR_GROUP_BLOCK Contents +------------------------------ + +The ``PARAMATTR_GROUP_BLOCK`` block (id 10) contains a table of entries +describing the attribute groups present in the module. These entries can be +referenced within ``PARAMATTR_CODE_ENTRY`` entries. + +.. _PARAMATTR_GRP_CODE_ENTRY: + +PARAMATTR_GRP_CODE_ENTRY Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[ENTRY, grpid, paramidx, attr0, attr1, ...]`` + +The ``ENTRY`` record (code 3) contains *grpid* and *paramidx* values, followed +by a variable number of values describing a unique group of attributes. The +*grpid* value is a unique key for the attribute group, which can be referenced +within ``PARAMATTR_CODE_ENTRY`` entries. The *paramidx* value indicates which +set of attributes is represented, with 0 representing the return value +attributes, 0xFFFFFFFF representing function attributes, and other values +representing 1-based function parameters. + +Each *attr* is itself represented as a variable number of values: + +``kind, key [, ...], [value [, ...]]`` + +Each attribute is either a well-known LLVM attribute (possibly with an integer +value associated with it), or an arbitrary string (possibly with an arbitrary +string value associated with it). The *kind* value is an integer code +distinguishing between these possibilities: + +* code 0: well-known attribute +* code 1: well-known attribute with an integer value +* code 3: string attribute +* code 4: string attribute with a string value + +For well-known attributes (code 0 or 1), the *key* value is an integer code +identifying the attribute. For attributes with an integer argument (code 1), +the *value* value indicates the argument. + +For string attributes (code 3 or 4), the *key* value is actually a variable +number of values representing the bytes of a null-terminated string. For +attributes with a string argument (code 4), the *value* value is similarly a +variable number of values representing the bytes of a null-terminated string. + +The integer codes are mapped to well-known attributes as follows. + +* code 1: ``align()`` +* code 2: ``alwaysinline`` +* code 3: ``byval`` +* code 4: ``inlinehint`` +* code 5: ``inreg`` +* code 6: ``minsize`` +* code 7: ``naked`` +* code 8: ``nest`` +* code 9: ``noalias`` +* code 10: ``nobuiltin`` +* code 11: ``nocapture`` +* code 12: ``nodeduplicate`` +* code 13: ``noimplicitfloat`` +* code 14: ``noinline`` +* code 15: ``nonlazybind`` +* code 16: ``noredzone`` +* code 17: ``noreturn`` +* code 18: ``nounwind`` +* code 19: ``optsize`` +* code 20: ``readnone`` +* code 21: ``readonly`` +* code 22: ``returned`` +* code 23: ``returns_twice`` +* code 24: ``signext`` +* code 25: ``alignstack()`` +* code 26: ``ssp`` +* code 27: ``sspreq`` +* code 28: ``sspstrong`` +* code 29: ``sret`` +* code 30: ``sanitize_address`` +* code 31: ``sanitize_thread`` +* code 32: ``sanitize_memory`` +* code 33: ``uwtable`` +* code 34: ``zeroext`` +* code 35: ``builtin`` +* code 36: ``cold`` +* code 37: ``optnone`` +* code 38: ``inalloca`` +* code 39: ``nonnull`` +* code 40: ``jumptable`` +* code 41: ``dereferenceable()`` +* code 42: ``dereferenceable_or_null()`` +* code 43: ``convergent`` +* code 44: ``safestack`` +* code 45: ``argmemonly`` +* code 46: ``swiftself`` +* code 47: ``swifterror`` +* code 48: ``norecurse`` +* code 49: ``inaccessiblememonly`` +* code 50: ``inaccessiblememonly_or_argmemonly`` +* code 51: ``allocsize([, ])`` +* code 52: ``writeonly`` +* code 53: ``speculatable`` +* code 54: ``strictfp`` +* code 55: ``sanitize_hwaddress`` +* code 56: ``nocf_check`` +* code 57: ``optforfuzzing`` +* code 58: ``shadowcallstack`` +* code 59: ``speculative_load_hardening`` +* code 60: ``immarg`` +* code 61: ``willreturn`` +* code 62: ``nofree`` +* code 63: ``nosync`` +* code 64: ``sanitize_memtag`` +* code 65: ``preallocated`` +* code 66: ``no_merge`` +* code 67: ``null_pointer_is_valid`` +* code 68: ``noundef`` +* code 69: ``byref`` +* code 70: ``mustprogress`` +* code 74: ``vscale_range([, ])`` +* code 75: ``swiftasync`` +* code 76: ``nosanitize_coverage`` +* code 77: ``elementtype`` +* code 78: ``disable_sanitizer_instrumentation`` + +.. note:: + The ``allocsize`` attribute has a special encoding for its arguments. Its two + arguments, which are 32-bit integers, are packed into one 64-bit integer value + (i.e. ``(EltSizeParam << 32) | NumEltsParam``), with ``NumEltsParam`` taking on + the sentinel value -1 if it is not specified. + +.. note:: + The ``vscale_range`` attribute has a special encoding for its arguments. Its two + arguments, which are 32-bit integers, are packed into one 64-bit integer value + (i.e. ``(Min << 32) | Max``), with ``Max`` taking on the value of ``Min`` if + it is not specified. + +.. _TYPE_BLOCK: + +TYPE_BLOCK Contents +------------------- + +The ``TYPE_BLOCK`` block (id 17) contains records which constitute a table of +type operator entries used to represent types referenced within an LLVM +module. Each record (with the exception of `NUMENTRY`_) generates a single type +table entry, which may be referenced by 0-based index from instructions, +constants, metadata, type symbol table entries, or other type operator records. + +Entries within ``TYPE_BLOCK`` are constructed to ensure that each entry is +unique (i.e., no two indices represent structurally equivalent types). + +.. _TYPE_CODE_NUMENTRY: +.. _NUMENTRY: + +TYPE_CODE_NUMENTRY Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[NUMENTRY, numentries]`` + +The ``NUMENTRY`` record (code 1) contains a single value which indicates the +total number of type code entries in the type table of the module. If present, +``NUMENTRY`` should be the first record in the block. + +TYPE_CODE_VOID Record +^^^^^^^^^^^^^^^^^^^^^ + +``[VOID]`` + +The ``VOID`` record (code 2) adds a ``void`` type to the type table. + +TYPE_CODE_HALF Record +^^^^^^^^^^^^^^^^^^^^^ + +``[HALF]`` + +The ``HALF`` record (code 10) adds a ``half`` (16-bit floating point) type to +the type table. + +TYPE_CODE_BFLOAT Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[BFLOAT]`` + +The ``BFLOAT`` record (code 23) adds a ``bfloat`` (16-bit brain floating point) +type to the type table. + +TYPE_CODE_FLOAT Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[FLOAT]`` + +The ``FLOAT`` record (code 3) adds a ``float`` (32-bit floating point) type to +the type table. + +TYPE_CODE_DOUBLE Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[DOUBLE]`` + +The ``DOUBLE`` record (code 4) adds a ``double`` (64-bit floating point) type to +the type table. + +TYPE_CODE_LABEL Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[LABEL]`` + +The ``LABEL`` record (code 5) adds a ``label`` type to the type table. + +TYPE_CODE_OPAQUE Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[OPAQUE]`` + +The ``OPAQUE`` record (code 6) adds an ``opaque`` type to the type table, with +a name defined by a previously encountered ``STRUCT_NAME`` record. Note that +distinct ``opaque`` types are not unified. + +TYPE_CODE_INTEGER Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[INTEGER, width]`` + +The ``INTEGER`` record (code 7) adds an integer type to the type table. The +single *width* field indicates the width of the integer type. + +TYPE_CODE_POINTER Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[POINTER, pointee type, address space]`` + +The ``POINTER`` record (code 8) adds a pointer type to the type table. The +operand fields are + +* *pointee type*: The type index of the pointed-to type + +* *address space*: If supplied, the target-specific numbered address space where + the pointed-to object resides. Otherwise, the default address space is zero. + +TYPE_CODE_FUNCTION_OLD Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + This is a legacy encoding for functions, produced by LLVM versions 3.0 and + earlier. It is guaranteed to be understood by the current LLVM version, as + specified in the :ref:`IR backwards compatibility` policy. + +``[FUNCTION_OLD, vararg, ignored, retty, ...paramty... ]`` + +The ``FUNCTION_OLD`` record (code 9) adds a function type to the type table. +The operand fields are + +* *vararg*: Non-zero if the type represents a varargs function + +* *ignored*: This value field is present for backward compatibility only, and is + ignored + +* *retty*: The type index of the function's return type + +* *paramty*: Zero or more type indices representing the parameter types of the + function + +TYPE_CODE_ARRAY Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[ARRAY, numelts, eltty]`` + +The ``ARRAY`` record (code 11) adds an array type to the type table. The +operand fields are + +* *numelts*: The number of elements in arrays of this type + +* *eltty*: The type index of the array element type + +TYPE_CODE_VECTOR Record +^^^^^^^^^^^^^^^^^^^^^^^ + +``[VECTOR, numelts, eltty]`` + +The ``VECTOR`` record (code 12) adds a vector type to the type table. The +operand fields are + +* *numelts*: The number of elements in vectors of this type + +* *eltty*: The type index of the vector element type + +TYPE_CODE_X86_FP80 Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[X86_FP80]`` + +The ``X86_FP80`` record (code 13) adds an ``x86_fp80`` (80-bit floating point) +type to the type table. + +TYPE_CODE_FP128 Record +^^^^^^^^^^^^^^^^^^^^^^ + +``[FP128]`` + +The ``FP128`` record (code 14) adds an ``fp128`` (128-bit floating point) type +to the type table. + +TYPE_CODE_PPC_FP128 Record +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[PPC_FP128]`` + +The ``PPC_FP128`` record (code 15) adds a ``ppc_fp128`` (128-bit floating point) +type to the type table. + +TYPE_CODE_METADATA Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[METADATA]`` + +The ``METADATA`` record (code 16) adds a ``metadata`` type to the type table. + +TYPE_CODE_X86_MMX Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[X86_MMX]`` + +The ``X86_MMX`` record (code 17) adds an ``x86_mmx`` type to the type table. + +TYPE_CODE_STRUCT_ANON Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[STRUCT_ANON, ispacked, ...eltty...]`` + +The ``STRUCT_ANON`` record (code 18) adds a literal struct type to the type +table. The operand fields are + +* *ispacked*: Non-zero if the type represents a packed structure + +* *eltty*: Zero or more type indices representing the element types of the + structure + +TYPE_CODE_STRUCT_NAME Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[STRUCT_NAME, ...string...]`` + +The ``STRUCT_NAME`` record (code 19) contains a variable number of values +representing the bytes of a struct name. The next ``OPAQUE`` or +``STRUCT_NAMED`` record will use this name. + +TYPE_CODE_STRUCT_NAMED Record +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[STRUCT_NAMED, ispacked, ...eltty...]`` + +The ``STRUCT_NAMED`` record (code 20) adds an identified struct type to the +type table, with a name defined by a previously encountered ``STRUCT_NAME`` +record. The operand fields are + +* *ispacked*: Non-zero if the type represents a packed structure + +* *eltty*: Zero or more type indices representing the element types of the + structure + +TYPE_CODE_FUNCTION Record +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``[FUNCTION, vararg, retty, ...paramty... ]`` + +The ``FUNCTION`` record (code 21) adds a function type to the type table. The +operand fields are + +* *vararg*: Non-zero if the type represents a varargs function + +* *retty*: The type index of the function's return type + +* *paramty*: Zero or more type indices representing the parameter types of the + function + +TYPE_CODE_X86_AMX Record +^^^^^^^^^^^^^^^^^^^^^^^^ + +``[X86_AMX]`` + +The ``X86_AMX`` record (code 24) adds an ``x86_amx`` type to the type table. + +.. _CONSTANTS_BLOCK: + +CONSTANTS_BLOCK Contents +------------------------ + +The ``CONSTANTS_BLOCK`` block (id 11) ... + +.. _FUNCTION_BLOCK: + +FUNCTION_BLOCK Contents +----------------------- + +The ``FUNCTION_BLOCK`` block (id 12) ... + +In addition to the record types described below, a ``FUNCTION_BLOCK`` block may +contain the following sub-blocks: + +* `CONSTANTS_BLOCK`_ +* `VALUE_SYMTAB_BLOCK`_ +* `METADATA_ATTACHMENT`_ + +.. _VALUE_SYMTAB_BLOCK: + +VALUE_SYMTAB_BLOCK Contents +--------------------------- + +The ``VALUE_SYMTAB_BLOCK`` block (id 14) ... + +.. _METADATA_BLOCK: + +METADATA_BLOCK Contents +----------------------- + +The ``METADATA_BLOCK`` block (id 15) ... + +.. _METADATA_ATTACHMENT: + +METADATA_ATTACHMENT Contents +---------------------------- + +The ``METADATA_ATTACHMENT`` block (id 16) ... + +.. _STRTAB_BLOCK: + +STRTAB_BLOCK Contents +--------------------- + +The ``STRTAB`` block (id 23) contains a single record (``STRTAB_BLOB``, id 1) +with a single blob operand containing the bitcode file's string table. + +Strings in the string table are not null terminated. A record's *strtab +offset* and *strtab size* operands specify the byte offset and size of a +string within the string table. + +The string table is used by all preceding blocks in the bitcode file that are +not succeeded by another intervening ``STRTAB`` block. Normally a bitcode +file will have a single string table, but it may have more than one if it +was created by binary concatenation of multiple bitcode files. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BlockFrequencyTerminology.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BlockFrequencyTerminology.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BlockFrequencyTerminology.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BlockFrequencyTerminology.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,130 @@ +================================ +LLVM Block Frequency Terminology +================================ + +.. contents:: + :local: + +Introduction +============ + +Block Frequency is a metric for estimating the relative frequency of different +basic blocks. This document describes the terminology that the +``BlockFrequencyInfo`` and ``MachineBlockFrequencyInfo`` analysis passes use. + +Branch Probability +================== + +Blocks with multiple successors have probabilities associated with each +outgoing edge. These are called branch probabilities. For a given block, the +sum of its outgoing branch probabilities should be 1.0. + +Branch Weight +============= + +Rather than storing fractions on each edge, we store an integer weight. +Weights are relative to the other edges of a given predecessor block. The +branch probability associated with a given edge is its own weight divided by +the sum of the weights on the predecessor's outgoing edges. + +For example, consider this IR: + +.. code-block:: llvm + + define void @foo() { + ; ... + A: + br i1 %cond, label %B, label %C, !prof !0 + ; ... + } + !0 = metadata !{metadata !"branch_weights", i32 7, i32 8} + +and this simple graph representation:: + + A -> B (edge-weight: 7) + A -> C (edge-weight: 8) + +The probability of branching from block A to block B is 7/15, and the +probability of branching from block A to block C is 8/15. + +See :doc:`BranchWeightMetadata` for details about the branch weight IR +representation. + +Block Frequency +=============== + +Block frequency is a relative metric that represents the number of times a +block executes. The ratio of a block frequency to the entry block frequency is +the expected number of times the block will execute per entry to the function. + +Block frequency is the main output of the ``BlockFrequencyInfo`` and +``MachineBlockFrequencyInfo`` analysis passes. + +Implementation: a series of DAGs +================================ + +The implementation of the block frequency calculation analyses each loop, +bottom-up, ignoring backedges; i.e., as a DAG. After each loop is processed, +it's packaged up to act as a pseudo-node in its parent loop's (or the +function's) DAG analysis. + +Block Mass +========== + +For each DAG, the entry node is assigned a mass of ``UINT64_MAX`` and mass is +distributed to successors according to branch weights. Block Mass uses a +fixed-point representation where ``UINT64_MAX`` represents ``1.0`` and ``0`` +represents a number just above ``0.0``. + +After mass is fully distributed, in any cut of the DAG that separates the exit +nodes from the entry node, the sum of the block masses of the nodes succeeded +by a cut edge should equal ``UINT64_MAX``. In other words, mass is conserved +as it "falls" through the DAG. + +If a function's basic block graph is a DAG, then block masses are valid block +frequencies. This works poorly in practice though, since downstream users rely +on adding block frequencies together without hitting the maximum. + +Loop Scale +========== + +Loop scale is a metric that indicates how many times a loop iterates per entry. +As mass is distributed through the loop's DAG, the (otherwise ignored) backedge +mass is collected. This backedge mass is used to compute the exit frequency, +and thus the loop scale. + +Implementation: Getting from mass and scale to frequency +======================================================== + +After analysing the complete series of DAGs, each block has a mass (local to +its containing loop, if any), and each loop pseudo-node has a loop scale and +its own mass (from its parent's DAG). + +We can get an initial frequency assignment (with entry frequency of 1.0) by +multiplying these masses and loop scales together. A given block's frequency +is the product of its mass, the mass of containing loops' pseudo nodes, and the +containing loops' loop scales. + +Since downstream users need integers (not floating point), this initial +frequency assignment is shifted as necessary into the range of ``uint64_t``. + +Block Bias +========== + +Block bias is a proposed *absolute* metric to indicate a bias toward or away +from a given block during a function's execution. The idea is that bias can be +used in isolation to indicate whether a block is relatively hot or cold, or to +compare two blocks to indicate whether one is hotter or colder than the other. + +The proposed calculation involves calculating a *reference* block frequency, +where: + +* every branch weight is assumed to be 1 (i.e., every branch probability + distribution is even) and + +* loop scales are ignored. + +This reference frequency represents what the block frequency would be in an +unbiased graph. + +The bias is the ratio of the block frequency to this reference block frequency. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BranchWeightMetadata.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BranchWeightMetadata.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BranchWeightMetadata.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BranchWeightMetadata.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,226 @@ +=========================== +LLVM Branch Weight Metadata +=========================== + +.. contents:: + :local: + +Introduction +============ + +Branch Weight Metadata represents branch weights as its likeliness to be taken +(see :doc:`BlockFrequencyTerminology`). Metadata is assigned to an +``Instruction`` that is a terminator as a ``MDNode`` of the ``MD_prof`` kind. +The first operator is always a ``MDString`` node with the string +"branch_weights". Number of operators depends on the terminator type. + +Branch weights might be fetch from the profiling file, or generated based on +`__builtin_expect`_ and `__builtin_expect_with_probability`_ instruction. + +All weights are represented as an unsigned 32-bit values, where higher value +indicates greater chance to be taken. + +Supported Instructions +====================== + +``BranchInst`` +^^^^^^^^^^^^^^ + +Metadata is only assigned to the conditional branches. There are two extra +operands for the true and the false branch. + +.. code-block:: none + + !0 = metadata !{ + metadata !"branch_weights", + i32 , + i32 + } + +``SwitchInst`` +^^^^^^^^^^^^^^ + +Branch weights are assigned to every case (including the ``default`` case which +is always case #0). + +.. code-block:: none + + !0 = metadata !{ + metadata !"branch_weights", + i32 + [ , i32 ... ] + } + +``IndirectBrInst`` +^^^^^^^^^^^^^^^^^^ + +Branch weights are assigned to every destination. + +.. code-block:: none + + !0 = metadata !{ + metadata !"branch_weights", + i32 + [ , i32 ... ] + } + +``CallInst`` +^^^^^^^^^^^^^^^^^^ + +Calls may have branch weight metadata, containing the execution count of +the call. It is currently used in SamplePGO mode only, to augment the +block and entry counts which may not be accurate with sampling. + +.. code-block:: none + + !0 = metadata !{ + metadata !"branch_weights", + i32 + } + +``InvokeInst`` +^^^^^^^^^^^^^^^^^^ + +Invoke instruction may have branch weight metadata with one or two weights. +The second weight is optional and corresponds to the unwind branch. +If only one weight is set then it contains the execution count of the call +and used in SamplePGO mode only as described for the call instruction. If both +weights are specified then the second weight contains count of unwind branch +taken and the first weights contains the execution count of the call minus +the count of unwind branch taken. Both weights specified are used to calculate +BranchProbability as for BranchInst and for SamplePGO the sum of both weights +is used. + +.. code-block:: none + + !0 = metadata !{ + metadata !"branch_weights", + i32 + [ , i32 ] + } + +Other +^^^^^ + +Other terminator instructions are not allowed to contain Branch Weight Metadata. + +.. _\__builtin_expect: + +Built-in ``expect`` Instructions +================================ + +``__builtin_expect(long exp, long c)`` instruction provides branch prediction +information. The return value is the value of ``exp``. + +It is especially useful in conditional statements. Currently Clang supports two +conditional statements: + +``if`` statement +^^^^^^^^^^^^^^^^ + +The ``exp`` parameter is the condition. The ``c`` parameter is the expected +comparison value. If it is equal to 1 (true), the condition is likely to be +true, in other case condition is likely to be false. For example: + +.. code-block:: c++ + + if (__builtin_expect(x > 0, 1)) { + // This block is likely to be taken. + } + +``switch`` statement +^^^^^^^^^^^^^^^^^^^^ + +The ``exp`` parameter is the value. The ``c`` parameter is the expected +value. If the expected value doesn't show on the cases list, the ``default`` +case is assumed to be likely taken. + +.. code-block:: c++ + + switch (__builtin_expect(x, 5)) { + default: break; + case 0: // ... + case 3: // ... + case 5: // This case is likely to be taken. + } + +.. _\__builtin_expect_with_probability: + +Built-in ``expect.with.probability`` Instruction +================================================ + +``__builtin_expect_with_probability(long exp, long c, double probability)`` has +the same semantics as ``__builtin_expect``, but the caller provides the +probability that ``exp == c``. The last argument ``probability`` must be +constant floating-point expression and be in the range [0.0, 1.0] inclusive. +The usage is also similar as ``__builtin_expect``, for example: + +``if`` statement +^^^^^^^^^^^^^^^^ + +If the expect comparison value ``c`` is equal to 1(true), and probability +value ``probability`` is set to 0.8, that means the probability of condition +to be true is 80% while that of false is 20%. + +.. code-block:: c++ + + if (__builtin_expect_with_probability(x > 0, 1, 0.8)) { + // This block is likely to be taken with probability 80%. + } + +``switch`` statement +^^^^^^^^^^^^^^^^^^^^ + +This is basically the same as ``switch`` statement in ``__builtin_expect``. +The probability that ``exp`` is equal to the expect value is given in +the third argument ``probability``, while the probability of other value is +the average of remaining probability(``1.0 - probability``). For example: + +.. code-block:: c++ + + switch (__builtin_expect_with_probability(x, 5, 0.7)) { + default: break; // Take this case with probability 10% + case 0: break; // Take this case with probability 10% + case 3: break; // Take this case with probability 10% + case 5: break; // This case is likely to be taken with probability 70% + } + +CFG Modifications +================= + +Branch Weight Metatada is not proof against CFG changes. If terminator operands' +are changed some action should be taken. In other case some misoptimizations may +occur due to incorrect branch prediction information. + +Function Entry Counts +===================== + +To allow comparing different functions during inter-procedural analysis and +optimization, ``MD_prof`` nodes can also be assigned to a function definition. +The first operand is a string indicating the name of the associated counter. + +Currently, one counter is supported: "function_entry_count". The second operand +is a 64-bit counter that indicates the number of times that this function was +invoked (in the case of instrumentation-based profiles). In the case of +sampling-based profiles, this operand is an approximation of how many times +the function was invoked. + +For example, in the code below, the instrumentation for function foo() +indicates that it was called 2,590 times at runtime. + +.. code-block:: llvm + + define i32 @foo() !prof !1 { + ret i32 0 + } + !1 = !{!"function_entry_count", i64 2590} + +If "function_entry_count" has more than 2 operands, the later operands are +the GUID of the functions that needs to be imported by ThinLTO. This is only +set by sampling based profile. It is needed because the sampling based profile +was collected on a binary that had already imported and inlined these functions, +and we need to ensure the IR matches in the ThinLTO backends for profile +annotation. The reason why we cannot annotate this on the callsite is that it +can only goes down 1 level in the call chain. For the cases where +foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels +in the call chain to import both bar_in_b_cc and baz_in_c_cc. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BugLifeCycle.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BugLifeCycle.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BugLifeCycle.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BugLifeCycle.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,140 @@ +=================== +LLVM Bug Life Cycle +=================== + +.. contents:: + :local: + + + +Introduction - Achieving consistency in how we deal with bug reports +==================================================================== + +We aim to achieve a basic level of consistency in how reported bugs evolve from +being reported, to being worked on, and finally getting closed out. The +consistency helps reporters, developers and others to gain a better +understanding of what a particular bug state actually means and what to expect +might happen next. + +At the same time, we aim to not over-specify the life cycle of bugs in the +`the LLVM Bug Tracking System `_, as the +overall goal is to make it easier to work with and understand the bug reports. + +The main parts of the life cycle documented here are: + +#. `Reporting`_ +#. `Triaging`_ +#. `Actively working on fixing`_ +#. `Closing`_ + +Furthermore, some of the metadata in the bug tracker, such as who to notify on +newly reported bugs or what the breakdown into products & components is we use, +needs to be maintained. See the following for details: + +#. `Maintenance of Bug products/component metadata`_ +#. `Maintenance of cc-by-default settings`_ + + +.. _Reporting: + +Reporting bugs +============== + +See :doc:`HowToSubmitABug` on further details on how to submit good bug reports. + +Make sure that you have one or more people on cc on the bug report that you +think will react to it. We aim to automatically add specific people on cc for +most products/components, but may not always succeed in doing so. + +If you know the area of LLVM code the root cause of the bug is in, good +candidates to add as cc may be the same people you'd ask for a code review in +that area. See :ref:`finding-potential-reviewers` for more details. + + +.. _Triaging: + +Triaging bugs +============= + +Bugs with status NEW indicate that they still need to be triaged. +When triage is complete, the status of the bug is moved to CONFIRMED. + +The goal of triaging a bug is to make sure a newly reported bug ends up in a +good, actionable, state. Try to answer the following questions while triaging. + +* Is the reported behavior actually wrong? + + * E.g. does a miscompile example depend on undefined behavior? + +* Can you easily reproduce the bug? + + * If not, are there reasonable excuses why it cannot easily be reproduced? + +* Is it related to an already reported bug? + + * Use the "See also"/"depends on"/"blocks" fields if so. + * Close it as a duplicate if so, pointing to the issue it duplicates. + +* Are the following fields filled in correctly? + + * Product + * Component + * Title + +* CC others not already cc’ed that you happen to know would be good to pull in. +* Add the "beginner" keyword if you think this would be a good bug to be fixed + by someone new to LLVM. + +.. _Actively working on fixing: + +Actively working on fixing bugs +=============================== + +Please remember to assign the bug to yourself if you're actively working on +fixing it and to unassign it when you're no longer actively working on it. You +unassign a bug by setting the Assignee field to "unassignedbugs@nondot.org". + +.. _Closing: + +Resolving/Closing bugs +====================== + +For simplicity, we only have 1 status for all resolved or closed bugs: +RESOLVED. + +Resolving bugs is good! Make sure to properly record the reason for resolving. +Examples of reasons for resolving are: + +* Revision NNNNNN fixed the bug. +* The bug cannot be reproduced with revision NNNNNN. +* The circumstances for the bug don't apply anymore. +* There is a sound reason for not fixing it (WONTFIX). +* There is a specific and plausible reason to think that a given bug is + otherwise inapplicable or obsolete. + + * One example is an old open bug that doesn't contain enough information to + clearly understand the problem being reported (e.g. not reproducible). It is + fine to resolve such a bug e.g. with resolution WORKSFORME and leaving a + comment to encourage the reporter to reopen the bug with more information + if it's still reproducible on their end. + +If a bug is resolved, please fill in the revision number it was fixed in in the +"Fixed by Commit(s)" field. + + +.. _Maintenance of Bug products/component metadata: + +Maintenance of products/components metadata +=========================================== + +Please raise a bug against "Bugzilla Admin"/"Products" to request any changes +to be made to the breakdown of products & components modeled in Bugzilla. + + +.. _Maintenance of cc-by-default settings: + +Maintenance of cc-by-default settings +===================================== + +Please raise a bug against "Bugzilla Admin"/"Products" to request any changes +to be made to the cc-by-default settings for specific components. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BugpointRedesign.md.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BugpointRedesign.md.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BugpointRedesign.md.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BugpointRedesign.md.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,102 @@ +# Bugpoint Redesign +Author: Diego Treviño (diegotf@google.com) + +Date: 2019-06-05 + +Status: Draft + + +## Introduction +As use of bugpoint has grown several areas of improvement have been identified +through years of use: confusing to use, slow, it doesn’t always produce high +quality test cases, etc. This document proposes a new approach with a narrower +focus: minimization of IR test cases. + + +## Proposed New Design + + +### Narrow focus: test-case reduction +The main focus will be a code reduction strategy to obtain much smaller test +cases that still have the same property as the original one. This will be done +via classic delta debugging and by adding some IR-specific reductions (e.g. +replacing globals, removing unused instructions, etc), similar to what +already exists, but with more in-depth minimization. + + +Granted, if the community differs on this proposal, the legacy code could still +be present in the tool, but with the caveat of still being documented and +designed towards delta reduction. + + +### Command-Line Options +We are proposing to reduce the plethora of bugpoint’s options to just two: an +interesting-ness test and the arguments for said test, similar to other delta +reduction tools such as CReduce, Delta, and Lithium; the tool should feel less + cluttered, and there should also be no uncertainty about how to operate it. + + +The interesting-ness test that’s going to be run to reduce the code is given +by name: + `--test=` +If a `--test` option is not given, the program exits; this option is similar +to bugpoint’s current `-compile-custom` option, which lets the user run a +custom script. + + +The interesting-ness test would be defined as a script that returns 0 when the +IR achieves a user-defined behaviour (e.g. failure to compile on clang) and a +nonzero value when otherwise. Leaving the user the freedom to determine what is +and isn’t interesting to the tool, and thus, streamlining the process of +reducing a test-case. + + +If the test accepts any arguments (excluding the input ll/bc file), they are +given via the following flag: + `--test_args=` +If unspecified, the test is run as given. It’s worth noting that the input file +would be passed as a parameter to the test, similar how `-compile-custom` +currently operates. + + +### Implementation +The tool would behave similar to CReduce’s functionality in that it would have a +list of passes that try to minimize the given test-case. We should be able to +modularize the tool’s behavior, as well as making it easier to maintain and +expand. + + +The first version of this redesign would try to: + + +* Discard functions, instructions and metadata that don’t influence the + interesting-ness test +* Remove unused parameters from functions +* Eliminate unvisited conditional paths +* Rename variables to more regular ones (such as “a”, “b”, “c”, etc.) + + +Once these passes are implemented, more meaningful reductions (such as type +reduction) would be added to the tool, to even further reduce IR. + + +## Background on historical bugpoint issues + + +### Root Cause Analysis +Presently, bugpoint takes a long time to find the source problem in a given IR +file, mainly due to the fact that it tries to debug the input by running +various strategies to classify the bug, which in turn run multiple optimizer +and compilation passes over the input, taking up a lot of time. Furthermore, +when the IR crashes, it tries to reduce it by performing some sub-optimal +passes (e.g. a lot of unreachable blocks), and sometimes even fails to minimize +at all. + + +### "Quirky" Interface +Bugpoint’s current interface overwhelms and confuses the user, the help screen +alone ends up confusing rather providing guidance. And, not only are there +numerous features and options, but some of them also work in unexpected ways +and most of the time the user ends up using a custom script. Pruning and +simplifying the interface will be worth considering in order to make the tool +more useful in the general case and easier to maintain. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/Bugpoint.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/Bugpoint.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/Bugpoint.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/Bugpoint.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,227 @@ +==================================== +LLVM bugpoint tool: design and usage +==================================== + +.. contents:: + :local: + +Description +=========== + +``bugpoint`` narrows down the source of problems in LLVM tools and passes. It +can be used to debug three types of failures: optimizer crashes, miscompilations +by optimizers, or bad native code generation (including problems in the static +and JIT compilers). It aims to reduce large test cases to small, useful ones. +For example, if ``opt`` crashes while optimizing a file, it will identify the +optimization (or combination of optimizations) that causes the crash, and reduce +the file down to a small example which triggers the crash. + +For detailed case scenarios, such as debugging ``opt``, or one of the LLVM code +generators, see :doc:`HowToSubmitABug`. + +Design Philosophy +================= + +``bugpoint`` is designed to be a useful tool without requiring any hooks into +the LLVM infrastructure at all. It works with any and all LLVM passes and code +generators, and does not need to "know" how they work. Because of this, it may +appear to do stupid things or miss obvious simplifications. ``bugpoint`` is +also designed to trade off programmer time for computer time in the +compiler-debugging process; consequently, it may take a long period of +(unattended) time to reduce a test case, but we feel it is still worth it. Note +that ``bugpoint`` is generally very quick unless debugging a miscompilation +where each test of the program (which requires executing it) takes a long time. + +Automatic Debugger Selection +---------------------------- + +``bugpoint`` reads each ``.bc`` or ``.ll`` file specified on the command line +and links them together into a single module, called the test program. If any +LLVM passes are specified on the command line, it runs these passes on the test +program. If any of the passes crash, or if they produce malformed output (which +causes the verifier to abort), ``bugpoint`` starts the `crash debugger`_. + +Otherwise, if the ``-output`` option was not specified, ``bugpoint`` runs the +test program with the "safe" backend (which is assumed to generate good code) to +generate a reference output. Once ``bugpoint`` has a reference output for the +test program, it tries executing it with the selected code generator. If the +selected code generator crashes, ``bugpoint`` starts the `crash debugger`_ on +the code generator. Otherwise, if the resulting output differs from the +reference output, it assumes the difference resulted from a code generator +failure, and starts the `code generator debugger`_. + +Finally, if the output of the selected code generator matches the reference +output, ``bugpoint`` runs the test program after all of the LLVM passes have +been applied to it. If its output differs from the reference output, it assumes +the difference resulted from a failure in one of the LLVM passes, and enters the +`miscompilation debugger`_. Otherwise, there is no problem ``bugpoint`` can +debug. + +.. _crash debugger: + +Crash debugger +-------------- + +If an optimizer or code generator crashes, ``bugpoint`` will try as hard as it +can to reduce the list of passes (for optimizer crashes) and the size of the +test program. First, ``bugpoint`` figures out which combination of optimizer +passes triggers the bug. This is useful when debugging a problem exposed by +``opt``, for example, because it runs over 38 passes. + +Next, ``bugpoint`` tries removing functions from the test program, to reduce its +size. Usually it is able to reduce a test program to a single function, when +debugging intraprocedural optimizations. Once the number of functions has been +reduced, it attempts to delete various edges in the control flow graph, to +reduce the size of the function as much as possible. Finally, ``bugpoint`` +deletes any individual LLVM instructions whose absence does not eliminate the +failure. At the end, ``bugpoint`` should tell you what passes crash, give you a +bitcode file, and give you instructions on how to reproduce the failure with +``opt`` or ``llc``. + +.. _code generator debugger: + +Code generator debugger +----------------------- + +The code generator debugger attempts to narrow down the amount of code that is +being miscompiled by the selected code generator. To do this, it takes the test +program and partitions it into two pieces: one piece which it compiles with the +"safe" backend (into a shared object), and one piece which it runs with either +the JIT or the static LLC compiler. It uses several techniques to reduce the +amount of code pushed through the LLVM code generator, to reduce the potential +scope of the problem. After it is finished, it emits two bitcode files (called +"test" [to be compiled with the code generator] and "safe" [to be compiled with +the "safe" backend], respectively), and instructions for reproducing the +problem. The code generator debugger assumes that the "safe" backend produces +good code. + +.. _miscompilation debugger: + +Miscompilation debugger +----------------------- + +The miscompilation debugger works similarly to the code generator debugger. It +works by splitting the test program into two pieces, running the optimizations +specified on one piece, linking the two pieces back together, and then executing +the result. It attempts to narrow down the list of passes to the one (or few) +which are causing the miscompilation, then reduce the portion of the test +program which is being miscompiled. The miscompilation debugger assumes that +the selected code generator is working properly. + +Advice for using bugpoint +========================= + +``bugpoint`` can be a remarkably useful tool, but it sometimes works in +non-obvious ways. Here are some hints and tips: + +* In the code generator and miscompilation debuggers, ``bugpoint`` only works + with programs that have deterministic output. Thus, if the program outputs + ``argv[0]``, the date, time, or any other "random" data, ``bugpoint`` may + misinterpret differences in these data, when output, as the result of a + miscompilation. Programs should be temporarily modified to disable outputs + that are likely to vary from run to run. + +* In the `crash debugger`_, ``bugpoint`` does not distinguish different crashes + during reduction. Thus, if new crash or miscompilation happens, ``bugpoint`` + will continue with the new crash instead. If you would like to stick to + particular crash, you should write check scripts to validate the error + message, see ``-compile-command`` in :doc:`CommandGuide/bugpoint`. + +* In the code generator and miscompilation debuggers, debugging will go faster + if you manually modify the program or its inputs to reduce the runtime, but + still exhibit the problem. + +* ``bugpoint`` is extremely useful when working on a new optimization: it helps + track down regressions quickly. To avoid having to relink ``bugpoint`` every + time you change your optimization however, have ``bugpoint`` dynamically load + your optimization with the ``-load`` option. + +* ``bugpoint`` can generate a lot of output and run for a long period of time. + It is often useful to capture the output of the program to file. For example, + in the C shell, you can run: + + .. code-block:: console + + $ bugpoint ... |& tee bugpoint.log + + to get a copy of ``bugpoint``'s output in the file ``bugpoint.log``, as well + as on your terminal. + +* ``bugpoint`` cannot debug problems with the LLVM linker. If ``bugpoint`` + crashes before you see its "All input ok" message, you might try ``llvm-link + -v`` on the same set of input files. If that also crashes, you may be + experiencing a linker bug. + +* ``bugpoint`` is useful for proactively finding bugs in LLVM. Invoking + ``bugpoint`` with the ``-find-bugs`` option will cause the list of specified + optimizations to be randomized and applied to the program. This process will + repeat until a bug is found or the user kills ``bugpoint``. + +* ``bugpoint`` can produce IR which contains long names. Run ``opt + -metarenamer`` over the IR to rename everything using easy-to-read, + metasyntactic names. Alternatively, run ``opt -strip -instnamer`` to rename + everything with very short (often purely numeric) names. + +What to do when bugpoint isn't enough +===================================== + +Sometimes, ``bugpoint`` is not enough. In particular, InstCombine and +TargetLowering both have visitor structured code with lots of potential +transformations. If the process of using bugpoint has left you with still too +much code to figure out and the problem seems to be in instcombine, the +following steps may help. These same techniques are useful with TargetLowering +as well. + +Turn on ``-debug-only=instcombine`` and see which transformations within +instcombine are firing by selecting out lines with "``IC``" in them. + +At this point, you have a decision to make. Is the number of transformations +small enough to step through them using a debugger? If so, then try that. + +If there are too many transformations, then a source modification approach may +be helpful. In this approach, you can modify the source code of instcombine to +disable just those transformations that are being performed on your test input +and perform a binary search over the set of transformations. One set of places +to modify are the "``visit*``" methods of ``InstCombiner`` (*e.g.* +``visitICmpInst``) by adding a "``return false``" as the first line of the +method. + +If that still doesn't remove enough, then change the caller of +``InstCombiner::DoOneIteration``, ``InstCombiner::runOnFunction`` to limit the +number of iterations. + +You may also find it useful to use "``-stats``" now to see what parts of +instcombine are firing. This can guide where to put additional reporting code. + +At this point, if the amount of transformations is still too large, then +inserting code to limit whether or not to execute the body of the code in the +visit function can be helpful. Add a static counter which is incremented on +every invocation of the function. Then add code which simply returns false on +desired ranges. For example: + +.. code-block:: c++ + + + static int calledCount = 0; + calledCount++; + LLVM_DEBUG(if (calledCount < 212) return false); + LLVM_DEBUG(if (calledCount > 217) return false); + LLVM_DEBUG(if (calledCount == 213) return false); + LLVM_DEBUG(if (calledCount == 214) return false); + LLVM_DEBUG(if (calledCount == 215) return false); + LLVM_DEBUG(if (calledCount == 216) return false); + LLVM_DEBUG(dbgs() << "visitXOR calledCount: " << calledCount << "\n"); + LLVM_DEBUG(dbgs() << "I: "; I->dump()); + +could be added to ``visitXOR`` to limit ``visitXor`` to being applied only to +calls 212 and 217. This is from an actual test case and raises an important +point---a simple binary search may not be sufficient, as transformations that +interact may require isolating more than one call. In TargetLowering, use +``return SDNode();`` instead of ``return false;``. + +Now that the number of transformations is down to a manageable number, try +examining the output to see if you can figure out which transformations are +being done. If that can be figured out, then do the usual debugging. If which +code corresponds to the transformation being performed isn't obvious, set a +breakpoint after the call count based disabling and step through the code. +Alternatively, you can use "``printf``" style debugging to report waypoints. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BuildingADistribution.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BuildingADistribution.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/BuildingADistribution.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/BuildingADistribution.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,246 @@ +=============================== +Building a Distribution of LLVM +=============================== + +.. contents:: + :local: + +Introduction +============ + +This document is geared toward people who want to build and package LLVM and any +combination of LLVM sub-project tools for distribution. This document covers +useful features of the LLVM build system as well as best practices and general +information about packaging LLVM. + +If you are new to CMake you may find the :doc:`CMake` or :doc:`CMakePrimer` +documentation useful. Some of the things covered in this document are the inner +workings of the builds described in the :doc:`AdvancedBuilds` document. + +General Distribution Guidance +============================= + +When building a distribution of a compiler it is generally advised to perform a +bootstrap build of the compiler. That means building a "stage 1" compiler with +your host toolchain, then building the "stage 2" compiler using the "stage 1" +compiler. This is done so that the compiler you distribute benefits from all the +bug fixes, performance optimizations and general improvements provided by the +new compiler. + +In deciding how to build your distribution there are a few trade-offs that you +will need to evaluate. The big two are: + +#. Compile time of the distribution against performance of the built compiler + +#. Binary size of the distribution against performance of the built compiler + +The guidance for maximizing performance of the generated compiler is to use LTO, +PGO, and statically link everything. This will result in an overall larger +distribution, and it will take longer to generate, but it provides the most +opportunity for the compiler to optimize. + +The guidance for minimizing distribution size is to dynamically link LLVM and +Clang libraries into the tools to reduce code duplication. This will come at a +substantial performance penalty to the generated binary both because it reduces +optimization opportunity, and because dynamic linking requires resolving symbols +at process launch time, which can be very slow for C++ code. + +.. _shared_libs: + +.. warning:: + One very important note: Distributions should never be built using the + *BUILD_SHARED_LIBS* CMake option. That option exists for optimizing developer + workflow only. Due to design and implementation decisions, LLVM relies on + global data which can end up being duplicated across shared libraries + resulting in bugs. As such this is not a safe way to distribute LLVM or + LLVM-based tools. + +The simplest example of building a distribution with reasonable performance is +captured in the DistributionExample CMake cache file located at +clang/cmake/caches/DistributionExample.cmake. The following command will perform +and install the distribution build: + +.. code-block:: console + + $ cmake -G Ninja -C /cmake/caches/DistributionExample.cmake + $ ninja stage2-distribution + $ ninja stage2-install-distribution + +Difference between ``install`` and ``install-distribution`` +----------------------------------------------------------- + +One subtle but important thing to note is the difference between the ``install`` +and ``install-distribution`` targets. The ``install`` target is expected to +install every part of LLVM that your build is configured to generate except the +LLVM testing tools. Alternatively the ``install-distribution`` target, which is +recommended for building distributions, only installs specific parts of LLVM as +specified at configuration time by *LLVM_DISTRIBUTION_COMPONENTS*. + +Additionally by default the ``install`` target will install the LLVM testing +tools as the public tools. This can be changed well by setting +*LLVM_INSTALL_TOOLCHAIN_ONLY* to ``On``. The LLVM tools are intended for +development and testing of LLVM, and should only be included in distributions +that support LLVM development. + +When building with *LLVM_DISTRIBUTION_COMPONENTS* the build system also +generates a ``distribution`` target which builds all the components specified in +the list. This is a convenience build target to allow building just the +distributed pieces without needing to build all configured targets. + +.. _Multi-distribution configurations: + +Multi-distribution configurations +--------------------------------- + +The ``install-distribution`` target described above is for building a single +distribution. LLVM's build system also supports building multiple distributions, +which can be used to e.g. have one distribution containing just tools and +another for libraries (to enable development). These are configured by setting +the *LLVM_DISTRIBUTIONS* variable to hold a list of all distribution names +(which conventionally start with an uppercase letter, e.g. "Development"), and +then setting the *LLVM__DISTRIBUTION_COMPONENTS* variable to the +list of targets for that distribution. For each distribution, the build system +generates an ``install-${distribution}-distribution`` target, where +``${distribution}`` is the name of the distribution in lowercase, to install +that distribution. Each target can only be in one distribution. + +Each distribution creates its own set of CMake exports, and the target to +install the CMake exports for a particular distribution for a project is named +``${project}-${distribution}-cmake-exports``, where ``${project}`` is the name +of the project in lowercase and ``${distribution}`` is the name of the +distribution in lowercase, unless the project is LLVM, in which case the target +is just named ``${distribution}-cmake-exports``. These targets need to be +explicitly included in the *LLVM__DISTRIBUTION_COMPONENTS* +variable in order to be included as part of the distribution. + +Unlike with the single distribution setup, when building multiple distributions, +any components specified in *LLVM_RUNTIME_DISTRIBUTION_COMPONENTS* are not +automatically added to any distribution. Instead, you must include the targets +explicitly in some *LLVM__DISTRIBUTION_COMPONENTS* list. + +We strongly encourage looking at ``clang/cmake/caches/MultiDistributionExample.cmake`` +as an example of configuring multiple distributions. + +Special Notes for Library-only Distributions +-------------------------------------------- + +One of the most powerful features of LLVM is its library-first design mentality +and the way you can compose a wide variety of tools using different portions of +LLVM. Even in this situation using *BUILD_SHARED_LIBS* is not supported. If you +want to distribute LLVM as a shared library for use in a tool, the recommended +method is using *LLVM_BUILD_LLVM_DYLIB*, and you can use *LLVM_DYLIB_COMPONENTS* +to configure which LLVM components are part of libLLVM. +Note: *LLVM_BUILD_LLVM_DYLIB* is not available on Windows. + +Options for Optimizing LLVM +=========================== + +There are four main build optimizations that our CMake build system supports. +When performing a bootstrap build it is not beneficial to do anything other than +setting *CMAKE_BUILD_TYPE* to ``Release`` for the stage-1 compiler. This is +because the more intensive optimizations are expensive to perform and the +stage-1 compiler is thrown away. All of the further options described should be +set on the stage-2 compiler either using a CMake cache file, or by prefixing the +option with *BOOTSTRAP_*. + +The first and simplest to use is the compiler optimization level by setting the +*CMAKE_BUILD_TYPE* option. The main values of interest are ``Release`` or +``RelWithDebInfo``. By default the ``Release`` option uses the ``-O3`` +optimization level, and ``RelWithDebInfo`` uses ``-O2``. If you want to generate +debug information and use ``-O3`` you can override the +*CMAKE__FLAGS_RELWITHDEBINFO* option for C and CXX. +DistributionExample.cmake does this. + +Another easy to use option is Link-Time-Optimization. You can set the +*LLVM_ENABLE_LTO* option on your stage-2 build to ``Thin`` or ``Full`` to enable +building LLVM with LTO. These options will significantly increase link time of +the binaries in the distribution, but it will create much faster binaries. This +option should not be used if your distribution includes static archives, as the +objects inside the archive will be LLVM bitcode, which is not portable. + +The :doc:`AdvancedBuilds` documentation describes the built-in tooling for +generating LLVM profiling information to drive Profile-Guided-Optimization. The +in-tree profiling tests are very limited, and generating the profile takes a +significant amount of time, but it can result in a significant improvement in +the performance of the generated binaries. + +In addition to PGO profiling we also have limited support in-tree for generating +linker order files. These files provide the linker with a suggested ordering for +functions in the final binary layout. This can measurably speed up clang by +physically grouping functions that are called temporally close to each other. +The current tooling is only available on Darwin systems with ``dtrace(1)``. It +is worth noting that dtrace is non-deterministic, and so the order file +generation using dtrace is also non-deterministic. + +Options for Reducing Size +========================= + +.. warning:: + Any steps taken to reduce the binary size will come at a cost of runtime + performance in the generated binaries. + +The simplest and least significant way to reduce binary size is to set the +*CMAKE_BUILD_TYPE* variable to ``MinSizeRel``, which will set the compiler +optimization level to ``-Os`` which optimizes for binary size. This will have +both the least benefit to size and the least impact on performance. + +The most impactful way to reduce binary size is to dynamically link LLVM into +all the tools. This reduces code size by decreasing duplication of common code +between the LLVM-based tools. This can be done by setting the following two +CMake options to ``On``: *LLVM_BUILD_LLVM_DYLIB* and *LLVM_LINK_LLVM_DYLIB*. + +.. warning:: + Distributions should never be built using the *BUILD_SHARED_LIBS* CMake + option. (:ref:`See the warning above for more explanation `.). + +Relevant CMake Options +====================== + +This section provides documentation of the CMake options that are intended to +help construct distributions. This is not an exhaustive list, and many +additional options are documented in the :doc:`CMake` page. Some key options +that are already documented include: *LLVM_TARGETS_TO_BUILD*, +*LLVM_ENABLE_PROJECTS*, *LLVM_BUILD_LLVM_DYLIB*, and *LLVM_LINK_LLVM_DYLIB*. + +**LLVM_ENABLE_RUNTIMES**:STRING + When building a distribution that includes LLVM runtime projects (i.e. libcxx, + compiler-rt, libcxxabi, libunwind...), it is important to build those projects + with the just-built compiler. + +**LLVM_DISTRIBUTION_COMPONENTS**:STRING + This variable can be set to a semi-colon separated list of LLVM build system + components to install. All LLVM-based tools are components, as well as most + of the libraries and runtimes. Component names match the names of the build + system targets. + +**LLVM_DISTRIBUTIONS**:STRING + This variable can be set to a semi-colon separated list of distributions. See + the :ref:`Multi-distribution configurations` section above for details on this + and other CMake variables to configure multiple distributions. + +**LLVM_RUNTIME_DISTRIBUTION_COMPONENTS**:STRING + This variable can be set to a semi-colon separated list of runtime library + components. This is used in conjunction with *LLVM_ENABLE_RUNTIMES* to specify + components of runtime libraries that you want to include in your distribution. + Just like with *LLVM_DISTRIBUTION_COMPONENTS*, component names match the names + of the build system targets. + +**LLVM_DYLIB_COMPONENTS**:STRING + This variable can be set to a semi-colon separated name of LLVM library + components. LLVM library components are either library names with the LLVM + prefix removed (i.e. Support, Demangle...), LLVM target names, or special + purpose component names. The special purpose component names are: + + #. ``all`` - All LLVM available component libraries + #. ``Native`` - The LLVM target for the Native system + #. ``AllTargetsAsmParsers`` - All the included target ASM parsers libraries + #. ``AllTargetsDescs`` - All the included target descriptions libraries + #. ``AllTargetsDisassemblers`` - All the included target dissassemblers libraries + #. ``AllTargetsInfos`` - All the included target info libraries + +**LLVM_INSTALL_TOOLCHAIN_ONLY**:BOOL + This option defaults to ``Off``: when set to ``On`` it removes many of the + LLVM development and testing tools as well as component libraries from the + default ``install`` target. Including the development tools is not recommended + for distributions as many of the LLVM tools are only intended for development + and testing use. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CFIVerify.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CFIVerify.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CFIVerify.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CFIVerify.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,93 @@ +============================================== +Control Flow Verification Tool Design Document +============================================== + +.. contents:: + :local: + +Objective +========= + +This document provides an overview of an external tool to verify the protection +mechanisms implemented by Clang's *Control Flow Integrity* (CFI) schemes +(``-fsanitize=cfi``). This tool, provided a binary or DSO, should infer whether +indirect control flow operations are protected by CFI, and should output these +results in a human-readable form. + +This tool should also be added as part of Clang's continuous integration testing +framework, where modifications to the compiler ensure that CFI protection +schemes are still present in the final binary. + +Location +======== + +This tool will be present as a part of the LLVM toolchain, and will reside in +the "/llvm/tools/llvm-cfi-verify" directory, relative to the LLVM trunk. It will +be tested in two methods: + +- Unit tests to validate code sections, present in + "/llvm/unittests/tools/llvm-cfi-verify". +- Integration tests, present in "/llvm/tools/clang/test/LLVMCFIVerify". These + integration tests are part of clang as part of a continuous integration + framework, ensuring updates to the compiler that reduce CFI coverage on + indirect control flow instructions are identified. + +Background +========== + +This tool will continuously validate that CFI directives are properly +implemented around all indirect control flows by analysing the output machine +code. The analysis of machine code is important as it ensures that any bugs +present in linker or compiler do not subvert CFI protections in the final +shipped binary. + +Unprotected indirect control flow instructions will be flagged for manual +review. These unexpected control flows may simply have not been accounted for in +the compiler implementation of CFI (e.g. indirect jumps to facilitate switch +statements may not be fully protected). + +It may be possible in the future to extend this tool to flag unnecessary CFI +directives (e.g. CFI directives around a static call to a non-polymorphic base +type). This type of directive has no security implications, but may present +performance impacts. + +Design Ideas +============ + +This tool will disassemble binaries and DSO's from their machine code format and +analyse the disassembled machine code. The tool will inspect virtual calls and +indirect function calls. This tool will also inspect indirect jumps, as inlined +functions and jump tables should also be subject to CFI protections. Non-virtual +calls (``-fsanitize=cfi-nvcall``) and cast checks (``-fsanitize=cfi-*cast*``) +are not implemented due to a lack of information provided by the bytecode. + +The tool would operate by searching for indirect control flow instructions in +the disassembly. A control flow graph would be generated from a small buffer of +the instructions surrounding the 'target' control flow instruction. If the +target instruction is branched-to, the fallthrough of the branch should be the +CFI trap (on x86, this is a ``ud2`` instruction). If the target instruction is +the fallthrough (i.e. immediately succeeds) of a conditional jump, the +conditional jump target should be the CFI trap. If an indirect control flow +instruction does not conform to one of these formats, the target will be noted +as being CFI-unprotected. + +Note that in the second case outlined above (where the target instruction is the +fallthrough of a conditional jump), if the target represents a vcall that takes +arguments, these arguments may be pushed to the stack after the branch but +before the target instruction. In these cases, a secondary 'spill graph' in +constructed, to ensure the register argument used by the indirect jump/call is +not spilled from the stack at any point in the interim period. If there are no +spills that affect the target register, the target is marked as CFI-protected. + +Other Design Notes +~~~~~~~~~~~~~~~~~~ + +Only machine code sections that are marked as executable will be subject to this +analysis. Non-executable sections do not require analysis as any execution +present in these sections has already violated the control flow integrity. + +Suitable extensions may be made at a later date to include analysis for indirect +control flow operations across DSO boundaries. Currently, these CFI features are +only experimental with an unstable ABI, making them unsuitable for analysis. + +The tool currently only supports the x86, x86_64, and AArch64 architectures. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CMakePrimer.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CMakePrimer.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CMakePrimer.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CMakePrimer.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,439 @@ +============ +CMake Primer +============ + +.. contents:: + :local: + +.. warning:: + Disclaimer: This documentation is written by LLVM project contributors `not` + anyone affiliated with the CMake project. This document may contain + inaccurate terminology, phrasing, or technical details. It is provided with + the best intentions. + + +Introduction +============ + +The LLVM project and many of the core projects built on LLVM build using CMake. +This document aims to provide a brief overview of CMake for developers modifying +LLVM projects or building their own projects on top of LLVM. + +The official CMake language references is available in the cmake-language +manpage and `cmake-language online documentation +`_. + +10,000 ft View +============== + +CMake is a tool that reads script files in its own language that describe how a +software project builds. As CMake evaluates the scripts it constructs an +internal representation of the software project. Once the scripts have been +fully processed, if there are no errors, CMake will generate build files to +actually build the project. CMake supports generating build files for a variety +of command line build tools as well as for popular IDEs. + +When a user runs CMake it performs a variety of checks similar to how autoconf +worked historically. During the checks and the evaluation of the build +description scripts CMake caches values into the CMakeCache. This is useful +because it allows the build system to skip long-running checks during +incremental development. CMake caching also has some drawbacks, but that will be +discussed later. + +Scripting Overview +================== + +CMake's scripting language has a very simple grammar. Every language construct +is a command that matches the pattern _name_(_args_). Commands come in three +primary types: language-defined (commands implemented in C++ in CMake), defined +functions, and defined macros. The CMake distribution also contains a suite of +CMake modules that contain definitions for useful functionality. + +The example below is the full CMake build for building a C++ "Hello World" +program. The example uses only CMake language-defined functions. + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.15) + project(HelloWorld) + add_executable(HelloWorld HelloWorld.cpp) + +The CMake language provides control flow constructs in the form of foreach loops +and if blocks. To make the example above more complicated you could add an if +block to define "APPLE" when targeting Apple platforms: + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.15) + project(HelloWorld) + add_executable(HelloWorld HelloWorld.cpp) + if(APPLE) + target_compile_definitions(HelloWorld PUBLIC APPLE) + endif() + +Variables, Types, and Scope +=========================== + +Dereferencing +------------- + +In CMake variables are "stringly" typed. All variables are represented as +strings throughout evaluation. Wrapping a variable in ``${}`` dereferences it +and results in a literal substitution of the name for the value. CMake refers to +this as "variable evaluation" in their documentation. Dereferences are performed +*before* the command being called receives the arguments. This means +dereferencing a list results in multiple separate arguments being passed to the +command. + +Variable dereferences can be nested and be used to model complex data. For +example: + +.. code-block:: cmake + + set(var_name var1) + set(${var_name} foo) # same as "set(var1 foo)" + set(${${var_name}}_var bar) # same as "set(foo_var bar)" + +Dereferencing an unset variable results in an empty expansion. It is a common +pattern in CMake to conditionally set variables knowing that it will be used in +code paths that the variable isn't set. There are examples of this throughout +the LLVM CMake build system. + +An example of variable empty expansion is: + +.. code-block:: cmake + + if(APPLE) + set(extra_sources Apple.cpp) + endif() + add_executable(HelloWorld HelloWorld.cpp ${extra_sources}) + +In this example the ``extra_sources`` variable is only defined if you're +targeting an Apple platform. For all other targets the ``extra_sources`` will be +evaluated as empty before add_executable is given its arguments. + +Lists +----- + +In CMake lists are semi-colon delimited strings, and it is strongly advised that +you avoid using semi-colons in lists; it doesn't go smoothly. A few examples of +defining lists: + +.. code-block:: cmake + + # Creates a list with members a, b, c, and d + set(my_list a b c d) + set(my_list "a;b;c;d") + + # Creates a string "a b c d" + set(my_string "a b c d") + +Lists of Lists +-------------- + +One of the more complicated patterns in CMake is lists of lists. Because a list +cannot contain an element with a semi-colon to construct a list of lists you +make a list of variable names that refer to other lists. For example: + +.. code-block:: cmake + + set(list_of_lists a b c) + set(a 1 2 3) + set(b 4 5 6) + set(c 7 8 9) + +With this layout you can iterate through the list of lists printing each value +with the following code: + +.. code-block:: cmake + + foreach(list_name IN LISTS list_of_lists) + foreach(value IN LISTS ${list_name}) + message(${value}) + endforeach() + endforeach() + +You'll notice that the inner foreach loop's list is doubly dereferenced. This is +because the first dereference turns ``list_name`` into the name of the sub-list +(a, b, or c in the example), then the second dereference is to get the value of +the list. + +This pattern is used throughout CMake, the most common example is the compiler +flags options, which CMake refers to using the following variable expansions: +CMAKE_${LANGUAGE}_FLAGS and CMAKE_${LANGUAGE}_FLAGS_${CMAKE_BUILD_TYPE}. + +Other Types +----------- + +Variables that are cached or specified on the command line can have types +associated with them. The variable's type is used by CMake's UI tool to display +the right input field. A variable's type generally doesn't impact evaluation, +however CMake does have special handling for some variables such as PATH. +You can read more about the special handling in `CMake's set documentation +`_. + +Scope +----- + +CMake inherently has a directory-based scoping. Setting a variable in a +CMakeLists file, will set the variable for that file, and all subdirectories. +Variables set in a CMake module that is included in a CMakeLists file will be +set in the scope they are included from, and all subdirectories. + +When a variable that is already set is set again in a subdirectory it overrides +the value in that scope and any deeper subdirectories. + +The CMake set command provides two scope-related options. PARENT_SCOPE sets a +variable into the parent scope, and not the current scope. The CACHE option sets +the variable in the CMakeCache, which results in it being set in all scopes. The +CACHE option will not set a variable that already exists in the CACHE unless the +FORCE option is specified. + +In addition to directory-based scope, CMake functions also have their own scope. +This means variables set inside functions do not bleed into the parent scope. +This is not true of macros, and it is for this reason LLVM prefers functions +over macros whenever reasonable. + +.. note:: + Unlike C-based languages, CMake's loop and control flow blocks do not have + their own scopes. + +Control Flow +============ + +CMake features the same basic control flow constructs you would expect in any +scripting language, but there are a few quirks because, as with everything in +CMake, control flow constructs are commands. + +If, ElseIf, Else +---------------- + +.. note:: + For the full documentation on the CMake if command go + `here `_. That resource is + far more complete. + +In general CMake if blocks work the way you'd expect: + +.. code-block:: cmake + + if() + message("do stuff") + elseif() + message("do other stuff") + else() + message("do other other stuff") + endif() + +The single most important thing to know about CMake's if blocks coming from a C +background is that they do not have their own scope. Variables set inside +conditional blocks persist after the ``endif()``. + +Loops +----- + +The most common form of the CMake ``foreach`` block is: + +.. code-block:: cmake + + foreach(var ...) + message("do stuff") + endforeach() + +The variable argument portion of the ``foreach`` block can contain dereferenced +lists, values to iterate, or a mix of both: + +.. code-block:: cmake + + foreach(var foo bar baz) + message(${var}) + endforeach() + # prints: + # foo + # bar + # baz + + set(my_list 1 2 3) + foreach(var ${my_list}) + message(${var}) + endforeach() + # prints: + # 1 + # 2 + # 3 + + foreach(var ${my_list} out_of_bounds) + message(${var}) + endforeach() + # prints: + # 1 + # 2 + # 3 + # out_of_bounds + +There is also a more modern CMake foreach syntax. The code below is equivalent +to the code above: + +.. code-block:: cmake + + foreach(var IN ITEMS foo bar baz) + message(${var}) + endforeach() + # prints: + # foo + # bar + # baz + + set(my_list 1 2 3) + foreach(var IN LISTS my_list) + message(${var}) + endforeach() + # prints: + # 1 + # 2 + # 3 + + foreach(var IN LISTS my_list ITEMS out_of_bounds) + message(${var}) + endforeach() + # prints: + # 1 + # 2 + # 3 + # out_of_bounds + +Similar to the conditional statements, these generally behave how you would +expect, and they do not have their own scope. + +CMake also supports ``while`` loops, although they are not widely used in LLVM. + +Modules, Functions and Macros +============================= + +Modules +------- + +Modules are CMake's vehicle for enabling code reuse. CMake modules are just +CMake script files. They can contain code to execute on include as well as +definitions for commands. + +In CMake macros and functions are universally referred to as commands, and they +are the primary method of defining code that can be called multiple times. + +In LLVM we have several CMake modules that are included as part of our +distribution for developers who don't build our project from source. Those +modules are the fundamental pieces needed to build LLVM-based projects with +CMake. We also rely on modules as a way of organizing the build system's +functionality for maintainability and re-use within LLVM projects. + +Argument Handling +----------------- + +When defining a CMake command handling arguments is very useful. The examples +in this section will all use the CMake ``function`` block, but this all applies +to the ``macro`` block as well. + +CMake commands can have named arguments that are required at every call site. In +addition, all commands will implicitly accept a variable number of extra +arguments (In C parlance, all commands are varargs functions). When a command is +invoked with extra arguments (beyond the named ones) CMake will store the full +list of arguments (both named and unnamed) in a list named ``ARGV``, and the +sublist of unnamed arguments in ``ARGN``. Below is a trivial example of +providing a wrapper function for CMake's built in function ``add_dependencies``. + +.. code-block:: cmake + + function(add_deps target) + add_dependencies(${target} ${ARGN}) + endfunction() + +This example defines a new macro named ``add_deps`` which takes a required first +argument, and just calls another function passing through the first argument and +all trailing arguments. + +CMake provides a module ``CMakeParseArguments`` which provides an implementation +of advanced argument parsing. We use this all over LLVM, and it is recommended +for any function that has complex argument-based behaviors or optional +arguments. CMake's official documentation for the module is in the +``cmake-modules`` manpage, and is also available at the +`cmake-modules online documentation +`_. + +.. note:: + As of CMake 3.5 the cmake_parse_arguments command has become a native command + and the CMakeParseArguments module is empty and only left around for + compatibility. + +Functions Vs Macros +------------------- + +Functions and Macros look very similar in how they are used, but there is one +fundamental difference between the two. Functions have their own scope, and +macros don't. This means variables set in macros will bleed out into the calling +scope. That makes macros suitable for defining very small bits of functionality +only. + +The other difference between CMake functions and macros is how arguments are +passed. Arguments to macros are not set as variables, instead dereferences to +the parameters are resolved across the macro before executing it. This can +result in some unexpected behavior if using unreferenced variables. For example: + +.. code-block:: cmake + + macro(print_list my_list) + foreach(var IN LISTS my_list) + message("${var}") + endforeach() + endmacro() + + set(my_list a b c d) + set(my_list_of_numbers 1 2 3 4) + print_list(my_list_of_numbers) + # prints: + # a + # b + # c + # d + +Generally speaking this issue is uncommon because it requires using +non-dereferenced variables with names that overlap in the parent scope, but it +is important to be aware of because it can lead to subtle bugs. + +LLVM Project Wrappers +===================== + +LLVM projects provide lots of wrappers around critical CMake built-in commands. +We use these wrappers to provide consistent behaviors across LLVM components +and to reduce code duplication. + +We generally (but not always) follow the convention that commands prefaced with +``llvm_`` are intended to be used only as building blocks for other commands. +Wrapper commands that are intended for direct use are generally named following +with the project in the middle of the command name (i.e. ``add_llvm_executable`` +is the wrapper for ``add_executable``). The LLVM ``add_*`` wrapper functions are +all defined in ``AddLLVM.cmake`` which is installed as part of the LLVM +distribution. It can be included and used by any LLVM sub-project that requires +LLVM. + +.. note:: + + Not all LLVM projects require LLVM for all use cases. For example compiler-rt + can be built without LLVM, and the compiler-rt sanitizer libraries are used + with GCC. + +Useful Built-in Commands +======================== + +CMake has a bunch of useful built-in commands. This document isn't going to +go into details about them because The CMake project has excellent +documentation. To highlight a few useful functions see: + +* `add_custom_command `_ +* `add_custom_target `_ +* `file `_ +* `list `_ +* `math `_ +* `string `_ + +The full documentation for CMake commands is in the ``cmake-commands`` manpage +and available on `CMake's website `_ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CMake.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CMake.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CMake.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CMake.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,995 @@ +======================== +Building LLVM with CMake +======================== + +.. contents:: + :local: + +Introduction +============ + +`CMake `_ is a cross-platform build-generator tool. CMake +does not build the project, it generates the files needed by your build tool +(GNU make, Visual Studio, etc.) for building LLVM. + +If **you are a new contributor**, please start with the :doc:`GettingStarted` +page. This page is geared for existing contributors moving from the +legacy configure/make system. + +If you are really anxious about getting a functional LLVM build, go to the +`Quick start`_ section. If you are a CMake novice, start with `Basic CMake usage`_ +and then go back to the `Quick start`_ section once you know what you are doing. The +`Options and variables`_ section is a reference for customizing your build. If +you already have experience with CMake, this is the recommended starting point. + +This page is geared towards users of the LLVM CMake build. If you're looking for +information about modifying the LLVM CMake build system you may want to see the +:doc:`CMakePrimer` page. It has a basic overview of the CMake language. + +.. _Quick start: + +Quick start +=========== + +We use here the command-line, non-interactive CMake interface. + +#. `Download `_ and install + CMake. Version 3.13.4 is the minimum required. + +#. Open a shell. Your development tools must be reachable from this shell + through the PATH environment variable. + +#. Create a build directory. Building LLVM in the source + directory is not supported. cd to this directory: + + .. code-block:: console + + $ mkdir mybuilddir + $ cd mybuilddir + +#. Execute this command in the shell replacing `path/to/llvm/source/root` with + the path to the root of your LLVM source tree: + + .. code-block:: console + + $ cmake path/to/llvm/source/root + + CMake will detect your development environment, perform a series of tests, and + generate the files required for building LLVM. CMake will use default values + for all build parameters. See the `Options and variables`_ section for + a list of build parameters that you can modify. + + This can fail if CMake can't detect your toolset, or if it thinks that the + environment is not sane enough. In this case, make sure that the toolset that + you intend to use is the only one reachable from the shell, and that the shell + itself is the correct one for your development environment. CMake will refuse + to build MinGW makefiles if you have a POSIX shell reachable through the PATH + environment variable, for instance. You can force CMake to use a given build + tool; for instructions, see the `Usage`_ section, below. You may + also wish to control which targets LLVM enables, or which LLVM + components are built; see the `Frequently Used LLVM-related + variables`_ below. + +#. After CMake has finished running, proceed to use IDE project files, or start + the build from the build directory: + + .. code-block:: console + + $ cmake --build . + + The ``--build`` option tells ``cmake`` to invoke the underlying build + tool (``make``, ``ninja``, ``xcodebuild``, ``msbuild``, etc.) + + The underlying build tool can be invoked directly, of course, but + the ``--build`` option is portable. + +#. After LLVM has finished building, install it from the build directory: + + .. code-block:: console + + $ cmake --build . --target install + + The ``--target`` option with ``install`` parameter in addition to + the ``--build`` option tells ``cmake`` to build the ``install`` target. + + It is possible to set a different install prefix at installation time + by invoking the ``cmake_install.cmake`` script generated in the + build directory: + + .. code-block:: console + + $ cmake -DCMAKE_INSTALL_PREFIX=/tmp/llvm -P cmake_install.cmake + +.. _Basic CMake usage: +.. _Usage: + +Basic CMake usage +================= + +This section explains basic aspects of CMake +which you may need in your day-to-day usage. + +CMake comes with extensive documentation, in the form of html files, and as +online help accessible via the ``cmake`` executable itself. Execute ``cmake +--help`` for further help options. + +CMake allows you to specify a build tool (e.g., GNU make, Visual Studio, +or Xcode). If not specified on the command line, CMake tries to guess which +build tool to use, based on your environment. Once it has identified your +build tool, CMake uses the corresponding *Generator* to create files for your +build tool (e.g., Makefiles or Visual Studio or Xcode project files). You can +explicitly specify the generator with the command line option ``-G "Name of the +generator"``. To see a list of the available generators on your system, execute + +.. code-block:: console + + $ cmake --help + +This will list the generator names at the end of the help text. + +Generators' names are case-sensitive, and may contain spaces. For this reason, +you should enter them exactly as they are listed in the ``cmake --help`` +output, in quotes. For example, to generate project files specifically for +Visual Studio 12, you can execute: + +.. code-block:: console + + $ cmake -G "Visual Studio 12" path/to/llvm/source/root + +For a given development platform there can be more than one adequate +generator. If you use Visual Studio, "NMake Makefiles" is a generator you can use +for building with NMake. By default, CMake chooses the most specific generator +supported by your development environment. If you want an alternative generator, +you must tell this to CMake with the ``-G`` option. + +.. todo:: + + Explain variables and cache. Move explanation here from #options section. + +.. _Options and variables: + +Options and variables +===================== + +Variables customize how the build will be generated. Options are boolean +variables, with possible values ON/OFF. Options and variables are defined on the +CMake command line like this: + +.. code-block:: console + + $ cmake -DVARIABLE=value path/to/llvm/source + +You can set a variable after the initial CMake invocation to change its +value. You can also undefine a variable: + +.. code-block:: console + + $ cmake -UVARIABLE path/to/llvm/source + +Variables are stored in the CMake cache. This is a file named ``CMakeCache.txt`` +stored at the root of your build directory that is generated by ``cmake``. +Editing it yourself is not recommended. + +Variables are listed in the CMake cache and later in this document with +the variable name and type separated by a colon. You can also specify the +variable and type on the CMake command line: + +.. code-block:: console + + $ cmake -DVARIABLE:TYPE=value path/to/llvm/source + +Frequently-used CMake variables +------------------------------- + +Here are some of the CMake variables that are used often, along with a +brief explanation. For full documentation, consult the CMake manual, +or execute ``cmake --help-variable VARIABLE_NAME``. See `Frequently +Used LLVM-related Variables`_ below for information about commonly +used variables that control features of LLVM and enabled subprojects. + +**CMAKE_BUILD_TYPE**:STRING + Sets the build type for ``make``-based generators. Possible values are + Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as + Visual Studio, you should use the IDE settings to set the build type. + Be aware that Release and RelWithDebInfo use different optimization levels on + most platforms. Be aware that Release and + RelWithDebInfo use different optimization levels on most + platforms, and that the default value of ``LLVM_ENABLE_ASSERTIONS`` + is affected. + +**CMAKE_INSTALL_PREFIX**:PATH + Path where LLVM will be installed when the "install" target is built. + +**CMAKE_{C,CXX}_FLAGS**:STRING + Extra flags to use when compiling C and C++ source files respectively. + +**CMAKE_{C,CXX}_COMPILER**:STRING + Specify the C and C++ compilers to use. If you have multiple + compilers installed, CMake might not default to the one you wish to + use. + +.. _Frequently Used LLVM-related variables: + +Frequently Used LLVM-related variables +-------------------------------------- + +The default configuration may not match your requirements. Here are +LLVM variables that are frequently used to control that. The full +description is in `LLVM-related variables`_ below. + +**LLVM_ENABLE_PROJECTS**:STRING + Control which projects are enabled. For example you may want to work on clang + or lldb by specifying ``-DLLVM_ENABLE_PROJECTS="clang;lldb"``. + +**LLVM_LIBDIR_SUFFIX**:STRING + Extra suffix to append to the directory where libraries are to be + installed. On a 64-bit architecture, one could use ``-DLLVM_LIBDIR_SUFFIX=64`` + to install libraries to ``/usr/lib64``. + +**LLVM_PARALLEL_{COMPILE,LINK}_JOBS**:STRING + Building the llvm toolchain can use a lot of resources, particularly + linking. These options, when you use the Ninja generator, allow you + to restrict the parallelism. For example, to avoid OOMs or going + into swap, permit only one link job per 15GB of RAM available on a + 32GB machine, specify ``-G Ninja -DLLVM_PARALLEL_LINK_JOBS=2``. + +**LLVM_TARGETS_TO_BUILD**:STRING + Control which targets are enabled. For example you may only need to enable + your native target with, for example, ``-DLLVM_TARGETS_TO_BUILD=X86``. + +**LLVM_USE_LINKER**:STRING + Override the system's default linker. For instance use ``lld`` with + ``-DLLVM_USE_LINKER=lld``. + +Rarely-used CMake variables +--------------------------- + +Here are some of the CMake variables that are rarely used, along with a brief +explanation and LLVM-related notes. For full documentation, consult the CMake +manual, or execute ``cmake --help-variable VARIABLE_NAME``. + +**CMAKE_CXX_STANDARD**:STRING + Sets the C++ standard to conform to when building LLVM. Possible values are + 14, 17, 20. LLVM Requires C++ 14 or higher. This defaults to 14. + +.. _LLVM-related variables: + +LLVM-related variables +----------------------- + +These variables provide fine control over the build of LLVM and +enabled sub-projects. Nearly all of these variable names begin with +``LLVM_``. + +**BUILD_SHARED_LIBS**:BOOL + Flag indicating if each LLVM component (e.g. Support) is built as a shared + library (ON) or as a static library (OFF). Its default value is OFF. On + Windows, shared libraries may be used when building with MinGW, including + mingw-w64, but not when building with the Microsoft toolchain. + + .. note:: BUILD_SHARED_LIBS is only recommended for use by LLVM developers. + If you want to build LLVM as a shared library, you should use the + ``LLVM_BUILD_LLVM_DYLIB`` option. + +**LLVM_ABI_BREAKING_CHECKS**:STRING + Used to decide if LLVM should be built with ABI breaking checks or + not. Allowed values are `WITH_ASSERTS` (default), `FORCE_ON` and + `FORCE_OFF`. `WITH_ASSERTS` turns on ABI breaking checks in an + assertion enabled build. `FORCE_ON` (`FORCE_OFF`) turns them on + (off) irrespective of whether normal (`NDEBUG`-based) assertions are + enabled or not. A version of LLVM built with ABI breaking checks + is not ABI compatible with a version built without it. + +**LLVM_APPEND_VC_REV**:BOOL + Embed version control revision info (Git revision id). + The version info is provided by the ``LLVM_REVISION`` macro in + ``llvm/include/llvm/Support/VCSRevision.h``. Developers using git who don't + need revision info can disable this option to avoid re-linking most binaries + after a branch switch. Defaults to ON. + +**LLVM_BUILD_32_BITS**:BOOL + Build 32-bit executables and libraries on 64-bit systems. This option is + available only on some 64-bit Unix systems. Defaults to OFF. + +**LLVM_BUILD_BENCHMARKS**:BOOL + Adds benchmarks to the list of default targets. Defaults to OFF. + +**LLVM_BUILD_DOCS**:BOOL + Adds all *enabled* documentation targets (i.e. Doxgyen and Sphinx targets) as + dependencies of the default build targets. This results in all of the (enabled) + documentation targets being as part of a normal build. If the ``install`` + target is run then this also enables all built documentation targets to be + installed. Defaults to OFF. To enable a particular documentation target, see + see LLVM_ENABLE_SPHINX and LLVM_ENABLE_DOXYGEN. + +**LLVM_BUILD_EXAMPLES**:BOOL + Build LLVM examples. Defaults to OFF. Targets for building each example are + generated in any case. See documentation for *LLVM_BUILD_TOOLS* above for more + details. + +**LLVM_BUILD_INSTRUMENTED_COVERAGE**:BOOL + If enabled, `source-based code coverage + `_ instrumentation + is enabled while building llvm. If CMake can locate the code coverage + scripts and the llvm-cov and llvm-profdata tools that pair to your compiler, + the build will also generate the `generate-coverage-report` target to generate + the code coverage report for LLVM, and the `clear-profile-data` utility target + to delete captured profile data. See documentation for + *LLVM_CODE_COVERAGE_TARGETS* and *LLVM_COVERAGE_SOURCE_DIRS* for more + information on configuring code coverage reports. + +**LLVM_CODE_COVERAGE_TARGETS**:STRING + If set to a semicolon separated list of targets, those targets will be used + to drive the code coverage reports. If unset, the target list will be + constructed using the LLVM build's CMake export list. + +**LLVM_COVERAGE_SOURCE_DIRS**:STRING + If set to a semicolon separated list of directories, the coverage reports + will limit code coverage summaries to just the listed directories. If unset, + coverage reports will include all sources identified by the tooling. + +**LLVM_BUILD_LLVM_DYLIB**:BOOL + If enabled, the target for building the libLLVM shared library is added. + This library contains all of LLVM's components in a single shared library. + Defaults to OFF. This cannot be used in conjunction with BUILD_SHARED_LIBS. + Tools will only be linked to the libLLVM shared library if LLVM_LINK_LLVM_DYLIB + is also ON. + The components in the library can be customised by setting LLVM_DYLIB_COMPONENTS + to a list of the desired components. + This option is not available on Windows. + +**LLVM_BUILD_TESTS**:BOOL + Include LLVM unit tests in the 'all' build target. Defaults to OFF. Targets + for building each unit test are generated in any case. You can build a + specific unit test using the targets defined under *unittests*, such as + ADTTests, IRTests, SupportTests, etc. (Search for ``add_llvm_unittest`` in + the subdirectories of *unittests* for a complete list of unit tests.) It is + possible to build all unit tests with the target *UnitTests*. + +**LLVM_BUILD_TOOLS**:BOOL + Build LLVM tools. Defaults to ON. Targets for building each tool are generated + in any case. You can build a tool separately by invoking its target. For + example, you can build *llvm-as* with a Makefile-based system by executing *make + llvm-as* at the root of your build directory. + +**LLVM_CCACHE_BUILD**:BOOL + If enabled and the ``ccache`` program is available, then LLVM will be + built using ``ccache`` to speed up rebuilds of LLVM and its components. + Defaults to OFF. The size and location of the cache maintained + by ``ccache`` can be adjusted via the LLVM_CCACHE_MAXSIZE and LLVM_CCACHE_DIR + options, which are passed to the CCACHE_MAXSIZE and CCACHE_DIR environment + variables, respectively. + +**LLVM_CREATE_XCODE_TOOLCHAIN**:BOOL + macOS Only: If enabled CMake will generate a target named + 'install-xcode-toolchain'. This target will create a directory at + $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can + be used to override the default system tools. + +**LLVM_DOXYGEN_QCH_FILENAME**:STRING + The filename of the Qt Compressed Help file that will be generated when + ``-DLLVM_ENABLE_DOXYGEN=ON`` and + ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON`` are given. Defaults to + ``org.llvm.qch``. + This option is only useful in combination with + ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; + otherwise it has no effect. + +**LLVM_DOXYGEN_QHELPGENERATOR_PATH**:STRING + The path to the ``qhelpgenerator`` executable. Defaults to whatever CMake's + ``find_program()`` can find. This option is only useful in combination with + ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise it has no + effect. + +**LLVM_DOXYGEN_QHP_CUST_FILTER_NAME**:STRING + See `Qt Help Project`_ for + more information. Defaults to the CMake variable ``${PACKAGE_STRING}`` which + is a combination of the package name and version string. This filter can then + be used in Qt Creator to select only documentation from LLVM when browsing + through all the help files that you might have loaded. This option is only + useful in combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; + otherwise it has no effect. + +.. _Qt Help Project: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters + +**LLVM_DOXYGEN_QHP_NAMESPACE**:STRING + Namespace under which the intermediate Qt Help Project file lives. See `Qt + Help Project`_ + for more information. Defaults to "org.llvm". This option is only useful in + combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise + it has no effect. + +**LLVM_DOXYGEN_SVG**:BOOL + Uses .svg files instead of .png files for graphs in the Doxygen output. + Defaults to OFF. + +**LLVM_ENABLE_ASSERTIONS**:BOOL + Enables code assertions. Defaults to ON if and only if ``CMAKE_BUILD_TYPE`` + is *Debug*. + +**LLVM_ENABLE_BINDINGS**:BOOL + If disabled, do not try to build the OCaml and go bindings. + +**LLVM_ENABLE_DIA_SDK**:BOOL + Enable building with MSVC DIA SDK for PDB debugging support. Available + only with MSVC. Defaults to ON. + +**LLVM_ENABLE_DOXYGEN**:BOOL + Enables the generation of browsable HTML documentation using doxygen. + Defaults to OFF. + +**LLVM_ENABLE_DOXYGEN_QT_HELP**:BOOL + Enables the generation of a Qt Compressed Help file. Defaults to OFF. + This affects the make target ``doxygen-llvm``. When enabled, apart from + the normal HTML output generated by doxygen, this will produce a QCH file + named ``org.llvm.qch``. You can then load this file into Qt Creator. + This option is only useful in combination with ``-DLLVM_ENABLE_DOXYGEN=ON``; + otherwise this has no effect. + +**LLVM_ENABLE_EH**:BOOL + Build LLVM with exception-handling support. This is necessary if you wish to + link against LLVM libraries and make use of C++ exceptions in your own code + that need to propagate through LLVM code. Defaults to OFF. + +**LLVM_ENABLE_EXPENSIVE_CHECKS**:BOOL + Enable additional time/memory expensive checking. Defaults to OFF. + +**LLVM_ENABLE_FFI**:BOOL + Indicates whether the LLVM Interpreter will be linked with the Foreign Function + Interface library (libffi) in order to enable calling external functions. + If the library or its headers are installed in a custom + location, you can also set the variables FFI_INCLUDE_DIR and + FFI_LIBRARY_DIR to the directories where ffi.h and libffi.so can be found, + respectively. Defaults to OFF. + +**LLVM_ENABLE_IDE**:BOOL + Tell the build system that an IDE is being used. This in turn disables the + creation of certain convenience build system targets, such as the various + ``install-*`` and ``check-*`` targets, since IDEs don't always deal well with + a large number of targets. This is usually autodetected, but it can be + configured manually to explicitly control the generation of those targets. One + scenario where a manual override may be desirable is when using Visual Studio + 2017's CMake integration, which would not be detected as an IDE otherwise. + +**LLVM_ENABLE_LIBCXX**:BOOL + If the host compiler and linker supports the stdlib flag, -stdlib=libc++ is + passed to invocations of both so that the project is built using libc++ + instead of stdlibc++. Defaults to OFF. + +**LLVM_ENABLE_LIBPFM**:BOOL + Enable building with libpfm to support hardware counter measurements in LLVM + tools. + Defaults to ON. + +**LLVM_ENABLE_LLD**:BOOL + This option is equivalent to `-DLLVM_USE_LINKER=lld`, except during a 2-stage + build where a dependency is added from the first stage to the second ensuring + that lld is built before stage2 begins. + +**LLVM_ENABLE_LTO**:STRING + Add ``-flto`` or ``-flto=`` flags to the compile and link command + lines, enabling link-time optimization. Possible values are ``Off``, + ``On``, ``Thin`` and ``Full``. Defaults to OFF. + +**LLVM_ENABLE_MODULES**:BOOL + Compile with `Clang Header Modules + `_. + +**LLVM_ENABLE_PEDANTIC**:BOOL + Enable pedantic mode. This disables compiler-specific extensions, if + possible. Defaults to ON. + +**LLVM_ENABLE_PIC**:BOOL + Add the ``-fPIC`` flag to the compiler command-line, if the compiler supports + this flag. Some systems, like Windows, do not need this flag. Defaults to ON. + +**LLVM_ENABLE_PROJECTS**:STRING + Semicolon-separated list of projects to build, or *all* for building all + (clang, libcxx, libcxxabi, lldb, compiler-rt, lld, polly, etc) projects. + This flag assumes that projects are checked out side-by-side and not nested, + i.e. clang needs to be in parallel of llvm instead of nested in `llvm/tools`. + This feature allows to have one build for only LLVM and another for clang+llvm + using the same source checkout. + The full list is: + ``clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;libcxx;libcxxabi;libunwind;lld;lldb;openmp;parallel-libs;polly;pstl`` + +**LLVM_ENABLE_RUNTIMES**:STRING + Build libc++, libc++abi or other projects using that a just-built compiler. + This is the correct way to build libc++ when putting together a toolchain. + It will build the builtins separately from the other runtimes to preserve + correct dependency ordering. + Note: the list should not have duplicates with `LLVM_ENABLE_PROJECTS`. + The full list is: + ``compiler-rt;libc;libcxx;libcxxabi;libunwind;openmp`` + To enable all of them, use: + ``LLVM_ENABLE_RUNTIMES=all`` + + +**LLVM_ENABLE_RTTI**:BOOL + Build LLVM with run-time type information. Defaults to OFF. + +**LLVM_ENABLE_SPHINX**:BOOL + If specified, CMake will search for the ``sphinx-build`` executable and will make + the ``SPHINX_OUTPUT_HTML`` and ``SPHINX_OUTPUT_MAN`` CMake options available. + Defaults to OFF. + +**LLVM_ENABLE_THREADS**:BOOL + Build with threads support, if available. Defaults to ON. + +**LLVM_ENABLE_UNWIND_TABLES**:BOOL + Enable unwind tables in the binary. Disabling unwind tables can reduce the + size of the libraries. Defaults to ON. + +**LLVM_ENABLE_WARNINGS**:BOOL + Enable all compiler warnings. Defaults to ON. + +**LLVM_ENABLE_WERROR**:BOOL + Stop and fail the build, if a compiler warning is triggered. Defaults to OFF. + +**LLVM_ENABLE_Z3_SOLVER**:BOOL + If enabled, the Z3 constraint solver is activated for the Clang static analyzer. + A recent version of the z3 library needs to be available on the system. + +**LLVM_ENABLE_ZLIB**:BOOL + Enable building with zlib to support compression/uncompression in LLVM tools. + Defaults to ON. + +**LLVM_EXPERIMENTAL_TARGETS_TO_BUILD**:STRING + Semicolon-separated list of experimental targets to build and linked into + llvm. This will build the experimental target without needing it to add to the + list of all the targets available in the LLVM's main CMakeLists.txt. + +**LLVM_EXTERNAL_{CLANG,LLD,POLLY}_SOURCE_DIR**:PATH + These variables specify the path to the source directory for the external + LLVM projects Clang, lld, and Polly, respectively, relative to the top-level + source directory. If the in-tree subdirectory for an external project + exists (e.g., llvm/tools/clang for Clang), then the corresponding variable + will not be used. If the variable for an external project does not point + to a valid path, then that project will not be built. + +**LLVM_EXTERNAL_PROJECTS**:STRING + Semicolon-separated list of additional external projects to build as part of + llvm. For each project LLVM_EXTERNAL__SOURCE_DIR have to be specified + with the path for the source code of the project. Example: + ``-DLLVM_EXTERNAL_PROJECTS="Foo;Bar" + -DLLVM_EXTERNAL_FOO_SOURCE_DIR=/src/foo + -DLLVM_EXTERNAL_BAR_SOURCE_DIR=/src/bar``. + +**LLVM_EXTERNALIZE_DEBUGINFO**:BOOL + Generate dSYM files and strip executables and libraries (Darwin Only). + Defaults to OFF. + +**LLVM_FORCE_USE_OLD_TOOLCHAIN**:BOOL + If enabled, the compiler and standard library versions won't be checked. LLVM + may not compile at all, or might fail at runtime due to known bugs in these + toolchains. + +**LLVM_INCLUDE_BENCHMARKS**:BOOL + Generate build targets for the LLVM benchmarks. Defaults to ON. + +**LLVM_INCLUDE_EXAMPLES**:BOOL + Generate build targets for the LLVM examples. Defaults to ON. You can use this + option to disable the generation of build targets for the LLVM examples. + +**LLVM_INCLUDE_TESTS**:BOOL + Generate build targets for the LLVM unit tests. Defaults to ON. You can use + this option to disable the generation of build targets for the LLVM unit + tests. + +**LLVM_INCLUDE_TOOLS**:BOOL + Generate build targets for the LLVM tools. Defaults to ON. You can use this + option to disable the generation of build targets for the LLVM tools. + +**LLVM_INSTALL_BINUTILS_SYMLINKS**:BOOL + Install symlinks from the binutils tool names to the corresponding LLVM tools. + For example, ar will be symlinked to llvm-ar. + +**LLVM_INSTALL_CCTOOLS_SYMLINKS**:BOOL + Install symliks from the cctools tool names to the corresponding LLVM tools. + For example, lipo will be symlinked to llvm-lipo. + +**LLVM_INSTALL_OCAMLDOC_HTML_DIR**:STRING + The path to install OCamldoc-generated HTML documentation to. This path can + either be absolute or relative to the CMAKE_INSTALL_PREFIX. Defaults to + `share/doc/llvm/ocaml-html`. + +**LLVM_INSTALL_SPHINX_HTML_DIR**:STRING + The path to install Sphinx-generated HTML documentation to. This path can + either be absolute or relative to the CMAKE_INSTALL_PREFIX. Defaults to + `share/doc/llvm/html`. + +**LLVM_INSTALL_UTILS**:BOOL + If enabled, utility binaries like ``FileCheck`` and ``not`` will be installed + to CMAKE_INSTALL_PREFIX. + +**LLVM_INTEGRATED_CRT_ALLOC**:PATH + On Windows, allows embedding a different C runtime allocator into the LLVM + tools and libraries. Using a lock-free allocator such as the ones listed below + greatly decreases ThinLTO link time by about an order of magnitude. It also + midly improves Clang build times, by about 5-10%. At the moment, rpmalloc, + snmalloc and mimalloc are supported. Use the path to `git clone` to select + the respective allocator, for example: + + .. code-block:: console + + $ D:\git> git clone https://github.com/mjansson/rpmalloc + $ D:\llvm-project> cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:\git\rpmalloc + + This flag needs to be used along with the static CRT, ie. if building the + Release target, add -DLLVM_USE_CRT_RELEASE=MT. + +**LLVM_INSTALL_DOXYGEN_HTML_DIR**:STRING + The path to install Doxygen-generated HTML documentation to. This path can + either be absolute or relative to the CMAKE_INSTALL_PREFIX. Defaults to + `share/doc/llvm/doxygen-html`. + +**LLVM_LINK_LLVM_DYLIB**:BOOL + If enabled, tools will be linked with the libLLVM shared library. Defaults + to OFF. Setting LLVM_LINK_LLVM_DYLIB to ON also sets LLVM_BUILD_LLVM_DYLIB + to ON. + This option is not available on Windows. + +**LLVM_LIT_ARGS**:STRING + Arguments given to lit. ``make check`` and ``make clang-test`` are affected. + By default, ``'-sv --no-progress-bar'`` on Visual C++ and Xcode, ``'-sv'`` on + others. + +**LLVM_LIT_TOOLS_DIR**:PATH + The path to GnuWin32 tools for tests. Valid on Windows host. Defaults to + the empty string, in which case lit will look for tools needed for tests + (e.g. ``grep``, ``sort``, etc.) in your %PATH%. If GnuWin32 is not in your + %PATH%, then you can set this variable to the GnuWin32 directory so that + lit can find tools needed for tests in that directory. + +**LLVM_OPTIMIZED_TABLEGEN**:BOOL + If enabled and building a debug or asserts build the CMake build system will + generate a Release build tree to build a fully optimized tablegen for use + during the build. Enabling this option can significantly speed up build times + especially when building LLVM in Debug configurations. + +**LLVM_PARALLEL_COMPILE_JOBS**:STRING + Define the maximum number of concurrent compilation jobs. + +**LLVM_PARALLEL_LINK_JOBS**:STRING + Define the maximum number of concurrent link jobs. + +**LLVM_PROFDATA_FILE**:PATH + Path to a profdata file to pass into clang's -fprofile-instr-use flag. This + can only be specified if you're building with clang. + +**LLVM_REVERSE_ITERATION**:BOOL + If enabled, all supported unordered llvm containers would be iterated in + reverse order. This is useful for uncovering non-determinism caused by + iteration of unordered containers. + +**LLVM_STATIC_LINK_CXX_STDLIB**:BOOL + Statically link to the C++ standard library if possible. This uses the flag + "-static-libstdc++", but a Clang host compiler will statically link to libc++ + if used in conjunction with the **LLVM_ENABLE_LIBCXX** flag. Defaults to OFF. + +**LLVM_TABLEGEN**:STRING + Full path to a native TableGen executable (usually named ``llvm-tblgen``). This is + intended for cross-compiling: if the user sets this variable, no native + TableGen will be created. + +**LLVM_TARGET_ARCH**:STRING + LLVM target to use for native code generation. This is required for JIT + generation. It defaults to "host", meaning that it shall pick the architecture + of the machine where LLVM is being built. If you are cross-compiling, set it + to the target architecture name. + +**LLVM_TARGETS_TO_BUILD**:STRING + Semicolon-separated list of targets to build, or *all* for building all + targets. Case-sensitive. Defaults to *all*. Example: + ``-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"``. + +**LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN**:BOOL + If enabled, the compiler version check will only warn when using a toolchain + which is about to be deprecated, instead of emitting an error. + +**LLVM_UBSAN_FLAGS**:STRING + Defines the set of compile flags used to enable UBSan. Only used if + ``LLVM_USE_SANITIZER`` contains ``Undefined``. This can be used to override + the default set of UBSan flags. + +**LLVM_USE_CRT_{target}**:STRING + On Windows, tells which version of the C runtime library (CRT) should be used. + For example, -DLLVM_USE_CRT_RELEASE=MT would statically link the CRT into the + LLVM tools and library. + +**LLVM_USE_INTEL_JITEVENTS**:BOOL + Enable building support for Intel JIT Events API. Defaults to OFF. + +**LLVM_USE_LINKER**:STRING + Add ``-fuse-ld={name}`` to the link invocation. The possible value depend on + your compiler, for clang the value can be an absolute path to your custom + linker, otherwise clang will prefix the name with ``ld.`` and apply its usual + search. For example to link LLVM with the Gold linker, cmake can be invoked + with ``-DLLVM_USE_LINKER=gold``. + +**LLVM_USE_NEWPM**:BOOL + If enabled, use the experimental new pass manager. + +**LLVM_USE_OPROFILE**:BOOL + Enable building OProfile JIT support. Defaults to OFF. + +**LLVM_USE_PERF**:BOOL + Enable building support for Perf (linux profiling tool) JIT support. Defaults to OFF. + +**LLVM_USE_RELATIVE_PATHS_IN_FILES**:BOOL + Rewrite absolute source paths in sources and debug info to relative ones. The + source prefix can be adjusted via the LLVM_SOURCE_PREFIX variable. + +**LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO**:BOOL + Rewrite absolute source paths in debug info to relative ones. The source prefix + can be adjusted via the LLVM_SOURCE_PREFIX variable. + +**LLVM_USE_SANITIZER**:STRING + Define the sanitizer used to build LLVM binaries and tests. Possible values + are ``Address``, ``Memory``, ``MemoryWithOrigins``, ``Undefined``, ``Thread``, + ``DataFlow``, and ``Address;Undefined``. Defaults to empty string. + +**SPHINX_EXECUTABLE**:STRING + The path to the ``sphinx-build`` executable detected by CMake. + For installation instructions, see + https://www.sphinx-doc.org/en/master/usage/installation.html + +**SPHINX_OUTPUT_HTML**:BOOL + If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) then the targets for + building the documentation as html are added (but not built by default unless + ``LLVM_BUILD_DOCS`` is enabled). There is a target for each project in the + source tree that uses sphinx (e.g. ``docs-llvm-html``, ``docs-clang-html`` + and ``docs-lld-html``). Defaults to ON. + +**SPHINX_OUTPUT_MAN**:BOOL + If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) the targets for building + the man pages are added (but not built by default unless ``LLVM_BUILD_DOCS`` + is enabled). Currently the only target added is ``docs-llvm-man``. Defaults + to ON. + +**SPHINX_WARNINGS_AS_ERRORS**:BOOL + If enabled then sphinx documentation warnings will be treated as + errors. Defaults to ON. + +CMake Caches +============ + +Recently LLVM and Clang have been adding some more complicated build system +features. Utilizing these new features often involves a complicated chain of +CMake variables passed on the command line. Clang provides a collection of CMake +cache scripts to make these features more approachable. + +CMake cache files are utilized using CMake's -C flag: + +.. code-block:: console + + $ cmake -C + +CMake cache scripts are processed in an isolated scope, only cached variables +remain set when the main configuration runs. CMake cached variables do not reset +variables that are already set unless the FORCE option is specified. + +A few notes about CMake Caches: + +- Order of command line arguments is important + + - -D arguments specified before -C are set before the cache is processed and + can be read inside the cache file + - -D arguments specified after -C are set after the cache is processed and + are unset inside the cache file + +- All -D arguments will override cache file settings +- CMAKE_TOOLCHAIN_FILE is evaluated after both the cache file and the command + line arguments +- It is recommended that all -D options should be specified *before* -C + +For more information about some of the advanced build configurations supported +via Cache files see :doc:`AdvancedBuilds`. + +Executing the Tests +=================== + +Testing is performed when the *check-all* target is built. For instance, if you are +using Makefiles, execute this command in the root of your build directory: + +.. code-block:: console + + $ make check-all + +On Visual Studio, you may run tests by building the project "check-all". +For more information about testing, see the :doc:`TestingGuide`. + +Cross compiling +=============== + +See `this wiki page `_ for +generic instructions on how to cross-compile with CMake. It goes into detailed +explanations and may seem daunting, but it is not. On the wiki page there are +several examples including toolchain files. Go directly to the +``Information how to set up various cross compiling toolchains`` section +for a quick solution. + +Also see the `LLVM-related variables`_ section for variables used when +cross-compiling. + +Embedding LLVM in your project +============================== + +From LLVM 3.5 onwards the CMake build system exports LLVM libraries as +importable CMake targets. This means that clients of LLVM can now reliably use +CMake to develop their own LLVM-based projects against an installed version of +LLVM regardless of how it was built. + +Here is a simple example of a CMakeLists.txt file that imports the LLVM libraries +and uses them to build a simple application ``simple-tool``. + +.. code-block:: cmake + + cmake_minimum_required(VERSION 3.13.4) + project(SimpleProject) + + find_package(LLVM REQUIRED CONFIG) + + message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") + + # Set your project compile flags. + # E.g. if using the C++ header files + # you will need to enable C++11 support + # for your compiler. + + include_directories(${LLVM_INCLUDE_DIRS}) + separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) + add_definitions(${LLVM_DEFINITIONS_LIST}) + + # Now build our tools + add_executable(simple-tool tool.cpp) + + # Find the libraries that correspond to the LLVM components + # that we wish to use + llvm_map_components_to_libnames(llvm_libs support core irreader) + + # Link against LLVM libraries + target_link_libraries(simple-tool ${llvm_libs}) + +The ``find_package(...)`` directive when used in CONFIG mode (as in the above +example) will look for the ``LLVMConfig.cmake`` file in various locations (see +cmake manual for details). It creates a ``LLVM_DIR`` cache entry to save the +directory where ``LLVMConfig.cmake`` is found or allows the user to specify the +directory (e.g. by passing ``-DLLVM_DIR=/usr/lib/cmake/llvm`` to +the ``cmake`` command or by setting it directly in ``ccmake`` or ``cmake-gui``). + +This file is available in two different locations. + +* ``/lib/cmake/llvm/LLVMConfig.cmake`` where + ```` is the install prefix of an installed version of LLVM. + On Linux typically this is ``/usr/lib/cmake/llvm/LLVMConfig.cmake``. + +* ``/lib/cmake/llvm/LLVMConfig.cmake`` where + ```` is the root of the LLVM build tree. **Note: this is only + available when building LLVM with CMake.** + +If LLVM is installed in your operating system's normal installation prefix (e.g. +on Linux this is usually ``/usr/``) ``find_package(LLVM ...)`` will +automatically find LLVM if it is installed correctly. If LLVM is not installed +or you wish to build directly against the LLVM build tree you can use +``LLVM_DIR`` as previously mentioned. + +The ``LLVMConfig.cmake`` file sets various useful variables. Notable variables +include + +``LLVM_CMAKE_DIR`` + The path to the LLVM CMake directory (i.e. the directory containing + LLVMConfig.cmake). + +``LLVM_DEFINITIONS`` + A list of preprocessor defines that should be used when building against LLVM. + +``LLVM_ENABLE_ASSERTIONS`` + This is set to ON if LLVM was built with assertions, otherwise OFF. + +``LLVM_ENABLE_EH`` + This is set to ON if LLVM was built with exception handling (EH) enabled, + otherwise OFF. + +``LLVM_ENABLE_RTTI`` + This is set to ON if LLVM was built with run time type information (RTTI), + otherwise OFF. + +``LLVM_INCLUDE_DIRS`` + A list of include paths to directories containing LLVM header files. + +``LLVM_PACKAGE_VERSION`` + The LLVM version. This string can be used with CMake conditionals, e.g., ``if + (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.5")``. + +``LLVM_TOOLS_BINARY_DIR`` + The path to the directory containing the LLVM tools (e.g. ``llvm-as``). + +Notice that in the above example we link ``simple-tool`` against several LLVM +libraries. The list of libraries is determined by using the +``llvm_map_components_to_libnames()`` CMake function. For a list of available +components look at the output of running ``llvm-config --components``. + +Note that for LLVM < 3.5 ``llvm_map_components_to_libraries()`` was +used instead of ``llvm_map_components_to_libnames()``. This is now deprecated +and will be removed in a future version of LLVM. + +.. _cmake-out-of-source-pass: + +Developing LLVM passes out of source +------------------------------------ + +It is possible to develop LLVM passes out of LLVM's source tree (i.e. against an +installed or built LLVM). An example of a project layout is provided below. + +.. code-block:: none + + / + | + CMakeLists.txt + / + | + CMakeLists.txt + Pass.cpp + ... + +Contents of ``/CMakeLists.txt``: + +.. code-block:: cmake + + find_package(LLVM REQUIRED CONFIG) + + separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) + add_definitions(${LLVM_DEFINITIONS_LIST}) + include_directories(${LLVM_INCLUDE_DIRS}) + + add_subdirectory() + +Contents of ``//CMakeLists.txt``: + +.. code-block:: cmake + + add_library(LLVMPassname MODULE Pass.cpp) + +Note if you intend for this pass to be merged into the LLVM source tree at some +point in the future it might make more sense to use LLVM's internal +``add_llvm_library`` function with the MODULE argument instead by... + + +Adding the following to ``/CMakeLists.txt`` (after +``find_package(LLVM ...)``) + +.. code-block:: cmake + + list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") + include(AddLLVM) + +And then changing ``//CMakeLists.txt`` to + +.. code-block:: cmake + + add_llvm_library(LLVMPassname MODULE + Pass.cpp + ) + +When you are done developing your pass, you may wish to integrate it +into the LLVM source tree. You can achieve it in two easy steps: + +#. Copying ```` folder into ``/lib/Transform`` directory. + +#. Adding ``add_subdirectory()`` line into + ``/lib/Transform/CMakeLists.txt``. + +Compiler/Platform-specific topics +================================= + +Notes for specific compilers and/or platforms. + +Microsoft Visual C++ +-------------------- + +**LLVM_COMPILER_JOBS**:STRING + Specifies the maximum number of parallel compiler jobs to use per project + when building with msbuild or Visual Studio. Only supported for the Visual + Studio 2010 CMake generator. 0 means use all processors. Default is 0. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodeGenerator.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodeGenerator.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodeGenerator.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodeGenerator.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,2704 @@ +========================================== +The LLVM Target-Independent Code Generator +========================================== + +.. role:: raw-html(raw) + :format: html + +.. raw:: html + + + +.. contents:: + :local: + +.. warning:: + This is a work in progress. + +Introduction +============ + +The LLVM target-independent code generator is a framework that provides a suite +of reusable components for translating the LLVM internal representation to the +machine code for a specified target---either in assembly form (suitable for a +static compiler) or in binary machine code format (usable for a JIT +compiler). The LLVM target-independent code generator consists of six main +components: + +1. `Abstract target description`_ interfaces which capture important properties + about various aspects of the machine, independently of how they will be used. + These interfaces are defined in ``include/llvm/Target/``. + +2. Classes used to represent the `code being generated`_ for a target. These + classes are intended to be abstract enough to represent the machine code for + *any* target machine. These classes are defined in + ``include/llvm/CodeGen/``. At this level, concepts like "constant pool + entries" and "jump tables" are explicitly exposed. + +3. Classes and algorithms used to represent code at the object file level, the + `MC Layer`_. These classes represent assembly level constructs like labels, + sections, and instructions. At this level, concepts like "constant pool + entries" and "jump tables" don't exist. + +4. `Target-independent algorithms`_ used to implement various phases of native + code generation (register allocation, scheduling, stack frame representation, + etc). This code lives in ``lib/CodeGen/``. + +5. `Implementations of the abstract target description interfaces`_ for + particular targets. These machine descriptions make use of the components + provided by LLVM, and can optionally provide custom target-specific passes, + to build complete code generators for a specific target. Target descriptions + live in ``lib/Target/``. + +6. The target-independent JIT components. The LLVM JIT is completely target + independent (it uses the ``TargetJITInfo`` structure to interface for + target-specific issues. The code for the target-independent JIT lives in + ``lib/ExecutionEngine/JIT``. + +Depending on which part of the code generator you are interested in working on, +different pieces of this will be useful to you. In any case, you should be +familiar with the `target description`_ and `machine code representation`_ +classes. If you want to add a backend for a new target, you will need to +`implement the target description`_ classes for your new target and understand +the :doc:`LLVM code representation `. If you are interested in +implementing a new `code generation algorithm`_, it should only depend on the +target-description and machine code representation classes, ensuring that it is +portable. + +Required components in the code generator +----------------------------------------- + +The two pieces of the LLVM code generator are the high-level interface to the +code generator and the set of reusable components that can be used to build +target-specific backends. The two most important interfaces (:raw-html:`` +`TargetMachine`_ :raw-html:`` and :raw-html:`` `DataLayout`_ +:raw-html:``) are the only ones that are required to be defined for a +backend to fit into the LLVM system, but the others must be defined if the +reusable code generator components are going to be used. + +This design has two important implications. The first is that LLVM can support +completely non-traditional code generation targets. For example, the C backend +does not require register allocation, instruction selection, or any of the other +standard components provided by the system. As such, it only implements these +two interfaces, and does its own thing. Note that C backend was removed from the +trunk since LLVM 3.1 release. Another example of a code generator like this is a +(purely hypothetical) backend that converts LLVM to the GCC RTL form and uses +GCC to emit machine code for a target. + +This design also implies that it is possible to design and implement radically +different code generators in the LLVM system that do not make use of any of the +built-in components. Doing so is not recommended at all, but could be required +for radically different targets that do not fit into the LLVM machine +description model: FPGAs for example. + +.. _high-level design of the code generator: + +The high-level design of the code generator +------------------------------------------- + +The LLVM target-independent code generator is designed to support efficient and +quality code generation for standard register-based microprocessors. Code +generation in this model is divided into the following stages: + +1. `Instruction Selection`_ --- This phase determines an efficient way to + express the input LLVM code in the target instruction set. This stage + produces the initial code for the program in the target instruction set, then + makes use of virtual registers in SSA form and physical registers that + represent any required register assignments due to target constraints or + calling conventions. This step turns the LLVM code into a DAG of target + instructions. + +2. `Scheduling and Formation`_ --- This phase takes the DAG of target + instructions produced by the instruction selection phase, determines an + ordering of the instructions, then emits the instructions as :raw-html:`` + `MachineInstr`_\s :raw-html:`` with that ordering. Note that we + describe this in the `instruction selection section`_ because it operates on + a `SelectionDAG`_. + +3. `SSA-based Machine Code Optimizations`_ --- This optional stage consists of a + series of machine-code optimizations that operate on the SSA-form produced by + the instruction selector. Optimizations like modulo-scheduling or peephole + optimization work here. + +4. `Register Allocation`_ --- The target code is transformed from an infinite + virtual register file in SSA form to the concrete register file used by the + target. This phase introduces spill code and eliminates all virtual register + references from the program. + +5. `Prolog/Epilog Code Insertion`_ --- Once the machine code has been generated + for the function and the amount of stack space required is known (used for + LLVM alloca's and spill slots), the prolog and epilog code for the function + can be inserted and "abstract stack location references" can be eliminated. + This stage is responsible for implementing optimizations like frame-pointer + elimination and stack packing. + +6. `Late Machine Code Optimizations`_ --- Optimizations that operate on "final" + machine code can go here, such as spill code scheduling and peephole + optimizations. + +7. `Code Emission`_ --- The final stage actually puts out the code for the + current function, either in the target assembler format or in machine + code. + +The code generator is based on the assumption that the instruction selector will +use an optimal pattern matching selector to create high-quality sequences of +native instructions. Alternative code generator designs based on pattern +expansion and aggressive iterative peephole optimization are much slower. This +design permits efficient compilation (important for JIT environments) and +aggressive optimization (used when generating code offline) by allowing +components of varying levels of sophistication to be used for any step of +compilation. + +In addition to these stages, target implementations can insert arbitrary +target-specific passes into the flow. For example, the X86 target uses a +special pass to handle the 80x87 floating point stack architecture. Other +targets with unusual requirements can be supported with custom passes as needed. + +Using TableGen for target description +------------------------------------- + +The target description classes require a detailed description of the target +architecture. These target descriptions often have a large amount of common +information (e.g., an ``add`` instruction is almost identical to a ``sub`` +instruction). In order to allow the maximum amount of commonality to be +factored out, the LLVM code generator uses the +:doc:`TableGen/index` tool to describe big chunks of the +target machine, which allows the use of domain-specific and target-specific +abstractions to reduce the amount of repetition. + +As LLVM continues to be developed and refined, we plan to move more and more of +the target description to the ``.td`` form. Doing so gives us a number of +advantages. The most important is that it makes it easier to port LLVM because +it reduces the amount of C++ code that has to be written, and the surface area +of the code generator that needs to be understood before someone can get +something working. Second, it makes it easier to change things. In particular, +if tables and other things are all emitted by ``tblgen``, we only need a change +in one place (``tblgen``) to update all of the targets to a new interface. + +.. _Abstract target description: +.. _target description: + +Target description classes +========================== + +The LLVM target description classes (located in the ``include/llvm/Target`` +directory) provide an abstract description of the target machine independent of +any particular client. These classes are designed to capture the *abstract* +properties of the target (such as the instructions and registers it has), and do +not incorporate any particular pieces of code generation algorithms. + +All of the target description classes (except the :raw-html:`` `DataLayout`_ +:raw-html:`` class) are designed to be subclassed by the concrete target +implementation, and have virtual methods implemented. To get to these +implementations, the :raw-html:`` `TargetMachine`_ :raw-html:`` class +provides accessors that should be implemented by the target. + +.. _TargetMachine: + +The ``TargetMachine`` class +--------------------------- + +The ``TargetMachine`` class provides virtual methods that are used to access the +target-specific implementations of the various target description classes via +the ``get*Info`` methods (``getInstrInfo``, ``getRegisterInfo``, +``getFrameInfo``, etc.). This class is designed to be specialized by a concrete +target implementation (e.g., ``X86TargetMachine``) which implements the various +virtual methods. The only required target description class is the +:raw-html:`` `DataLayout`_ :raw-html:`` class, but if the code +generator components are to be used, the other interfaces should be implemented +as well. + +.. _DataLayout: + +The ``DataLayout`` class +------------------------ + +The ``DataLayout`` class is the only required target description class, and it +is the only class that is not extensible (you cannot derive a new class from +it). ``DataLayout`` specifies information about how the target lays out memory +for structures, the alignment requirements for various data types, the size of +pointers in the target, and whether the target is little-endian or +big-endian. + +.. _TargetLowering: + +The ``TargetLowering`` class +---------------------------- + +The ``TargetLowering`` class is used by SelectionDAG based instruction selectors +primarily to describe how LLVM code should be lowered to SelectionDAG +operations. Among other things, this class indicates: + +* an initial register class to use for various ``ValueType``\s, + +* which operations are natively supported by the target machine, + +* the return type of ``setcc`` operations, + +* the type to use for shift amounts, and + +* various high-level characteristics, like whether it is profitable to turn + division by a constant into a multiplication sequence. + +.. _TargetRegisterInfo: + +The ``TargetRegisterInfo`` class +-------------------------------- + +The ``TargetRegisterInfo`` class is used to describe the register file of the +target and any interactions between the registers. + +Registers are represented in the code generator by unsigned integers. Physical +registers (those that actually exist in the target description) are unique +small numbers, and virtual registers are generally large. Note that +register ``#0`` is reserved as a flag value. + +Each register in the processor description has an associated +``TargetRegisterDesc`` entry, which provides a textual name for the register +(used for assembly output and debugging dumps) and a set of aliases (used to +indicate whether one register overlaps with another). + +In addition to the per-register description, the ``TargetRegisterInfo`` class +exposes a set of processor specific register classes (instances of the +``TargetRegisterClass`` class). Each register class contains sets of registers +that have the same properties (for example, they are all 32-bit integer +registers). Each SSA virtual register created by the instruction selector has +an associated register class. When the register allocator runs, it replaces +virtual registers with a physical register in the set. + +The target-specific implementations of these classes is auto-generated from a +:doc:`TableGen/index` description of the register file. + +.. _TargetInstrInfo: + +The ``TargetInstrInfo`` class +----------------------------- + +The ``TargetInstrInfo`` class is used to describe the machine instructions +supported by the target. Descriptions define things like the mnemonic for +the opcode, the number of operands, the list of implicit register uses and defs, +whether the instruction has certain target-independent properties (accesses +memory, is commutable, etc), and holds any target-specific flags. + +The ``TargetFrameLowering`` class +--------------------------------- + +The ``TargetFrameLowering`` class is used to provide information about the stack +frame layout of the target. It holds the direction of stack growth, the known +stack alignment on entry to each function, and the offset to the local area. +The offset to the local area is the offset from the stack pointer on function +entry to the first location where function data (local variables, spill +locations) can be stored. + +The ``TargetSubtarget`` class +----------------------------- + +The ``TargetSubtarget`` class is used to provide information about the specific +chip set being targeted. A sub-target informs code generation of which +instructions are supported, instruction latencies and instruction execution +itinerary; i.e., which processing units are used, in what order, and for how +long. + +The ``TargetJITInfo`` class +--------------------------- + +The ``TargetJITInfo`` class exposes an abstract interface used by the +Just-In-Time code generator to perform target-specific activities, such as +emitting stubs. If a ``TargetMachine`` supports JIT code generation, it should +provide one of these objects through the ``getJITInfo`` method. + +.. _code being generated: +.. _machine code representation: + +Machine code description classes +================================ + +At the high-level, LLVM code is translated to a machine specific representation +formed out of :raw-html:`` `MachineFunction`_ :raw-html:``, +:raw-html:`` `MachineBasicBlock`_ :raw-html:``, and :raw-html:`` +`MachineInstr`_ :raw-html:`` instances (defined in +``include/llvm/CodeGen``). This representation is completely target agnostic, +representing instructions in their most abstract form: an opcode and a series of +operands. This representation is designed to support both an SSA representation +for machine code, as well as a register allocated, non-SSA form. + +.. _MachineInstr: + +The ``MachineInstr`` class +-------------------------- + +Target machine instructions are represented as instances of the ``MachineInstr`` +class. This class is an extremely abstract way of representing machine +instructions. In particular, it only keeps track of an opcode number and a set +of operands. + +The opcode number is a simple unsigned integer that only has meaning to a +specific backend. All of the instructions for a target should be defined in the +``*InstrInfo.td`` file for the target. The opcode enum values are auto-generated +from this description. The ``MachineInstr`` class does not have any information +about how to interpret the instruction (i.e., what the semantics of the +instruction are); for that you must refer to the :raw-html:`` +`TargetInstrInfo`_ :raw-html:`` class. + +The operands of a machine instruction can be of several different types: a +register reference, a constant integer, a basic block reference, etc. In +addition, a machine operand should be marked as a def or a use of the value +(though only registers are allowed to be defs). + +By convention, the LLVM code generator orders instruction operands so that all +register definitions come before the register uses, even on architectures that +are normally printed in other orders. For example, the SPARC add instruction: +"``add %i1, %i2, %i3``" adds the "%i1", and "%i2" registers and stores the +result into the "%i3" register. In the LLVM code generator, the operands should +be stored as "``%i3, %i1, %i2``": with the destination first. + +Keeping destination (definition) operands at the beginning of the operand list +has several advantages. In particular, the debugging printer will print the +instruction like this: + +.. code-block:: llvm + + %r3 = add %i1, %i2 + +Also if the first operand is a def, it is easier to `create instructions`_ whose +only def is the first operand. + +.. _create instructions: + +Using the ``MachineInstrBuilder.h`` functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Machine instructions are created by using the ``BuildMI`` functions, located in +the ``include/llvm/CodeGen/MachineInstrBuilder.h`` file. The ``BuildMI`` +functions make it easy to build arbitrary machine instructions. Usage of the +``BuildMI`` functions look like this: + +.. code-block:: c++ + + // Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42') + // instruction and insert it at the end of the given MachineBasicBlock. + const TargetInstrInfo &TII = ... + MachineBasicBlock &MBB = ... + DebugLoc DL; + MachineInstr *MI = BuildMI(MBB, DL, TII.get(X86::MOV32ri), DestReg).addImm(42); + + // Create the same instr, but insert it before a specified iterator point. + MachineBasicBlock::iterator MBBI = ... + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), DestReg).addImm(42); + + // Create a 'cmp Reg, 0' instruction, no destination reg. + MI = BuildMI(MBB, DL, TII.get(X86::CMP32ri8)).addReg(Reg).addImm(42); + + // Create an 'sahf' instruction which takes no operands and stores nothing. + MI = BuildMI(MBB, DL, TII.get(X86::SAHF)); + + // Create a self looping branch instruction. + BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(&MBB); + +If you need to add a definition operand (other than the optional destination +register), you must explicitly mark it as such: + +.. code-block:: c++ + + MI.addReg(Reg, RegState::Define); + +Fixed (preassigned) registers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One important issue that the code generator needs to be aware of is the presence +of fixed registers. In particular, there are often places in the instruction +stream where the register allocator *must* arrange for a particular value to be +in a particular register. This can occur due to limitations of the instruction +set (e.g., the X86 can only do a 32-bit divide with the ``EAX``/``EDX`` +registers), or external factors like calling conventions. In any case, the +instruction selector should emit code that copies a virtual register into or out +of a physical register when needed. + +For example, consider this simple LLVM example: + +.. code-block:: llvm + + define i32 @test(i32 %X, i32 %Y) { + %Z = sdiv i32 %X, %Y + ret i32 %Z + } + +The X86 instruction selector might produce this machine code for the ``div`` and +``ret``: + +.. code-block:: text + + ;; Start of div + %EAX = mov %reg1024 ;; Copy X (in reg1024) into EAX + %reg1027 = sar %reg1024, 31 + %EDX = mov %reg1027 ;; Sign extend X into EDX + idiv %reg1025 ;; Divide by Y (in reg1025) + %reg1026 = mov %EAX ;; Read the result (Z) out of EAX + + ;; Start of ret + %EAX = mov %reg1026 ;; 32-bit return value goes in EAX + ret + +By the end of code generation, the register allocator would coalesce the +registers and delete the resultant identity moves producing the following +code: + +.. code-block:: text + + ;; X is in EAX, Y is in ECX + mov %EAX, %EDX + sar %EDX, 31 + idiv %ECX + ret + +This approach is extremely general (if it can handle the X86 architecture, it +can handle anything!) and allows all of the target specific knowledge about the +instruction stream to be isolated in the instruction selector. Note that +physical registers should have a short lifetime for good code generation, and +all physical registers are assumed dead on entry to and exit from basic blocks +(before register allocation). Thus, if you need a value to be live across basic +block boundaries, it *must* live in a virtual register. + +Call-clobbered registers +^^^^^^^^^^^^^^^^^^^^^^^^ + +Some machine instructions, like calls, clobber a large number of physical +registers. Rather than adding ```` operands for all of them, it is +possible to use an ``MO_RegisterMask`` operand instead. The register mask +operand holds a bit mask of preserved registers, and everything else is +considered to be clobbered by the instruction. + +Machine code in SSA form +^^^^^^^^^^^^^^^^^^^^^^^^ + +``MachineInstr``'s are initially selected in SSA-form, and are maintained in +SSA-form until register allocation happens. For the most part, this is +trivially simple since LLVM is already in SSA form; LLVM PHI nodes become +machine code PHI nodes, and virtual registers are only allowed to have a single +definition. + +After register allocation, machine code is no longer in SSA-form because there +are no virtual registers left in the code. + +.. _MachineBasicBlock: + +The ``MachineBasicBlock`` class +------------------------------- + +The ``MachineBasicBlock`` class contains a list of machine instructions +(:raw-html:`` `MachineInstr`_ :raw-html:`` instances). It roughly +corresponds to the LLVM code input to the instruction selector, but there can be +a one-to-many mapping (i.e. one LLVM basic block can map to multiple machine +basic blocks). The ``MachineBasicBlock`` class has a "``getBasicBlock``" method, +which returns the LLVM basic block that it comes from. + +.. _MachineFunction: + +The ``MachineFunction`` class +----------------------------- + +The ``MachineFunction`` class contains a list of machine basic blocks +(:raw-html:`` `MachineBasicBlock`_ :raw-html:`` instances). It +corresponds one-to-one with the LLVM function input to the instruction selector. +In addition to a list of basic blocks, the ``MachineFunction`` contains a a +``MachineConstantPool``, a ``MachineFrameInfo``, a ``MachineFunctionInfo``, and +a ``MachineRegisterInfo``. See ``include/llvm/CodeGen/MachineFunction.h`` for +more information. + +``MachineInstr Bundles`` +------------------------ + +LLVM code generator can model sequences of instructions as MachineInstr +bundles. A MI bundle can model a VLIW group / pack which contains an arbitrary +number of parallel instructions. It can also be used to model a sequential list +of instructions (potentially with data dependencies) that cannot be legally +separated (e.g. ARM Thumb2 IT blocks). + +Conceptually a MI bundle is a MI with a number of other MIs nested within: + +:: + + -------------- + | Bundle | --------- + -------------- \ + | ---------------- + | | MI | + | ---------------- + | | + | ---------------- + | | MI | + | ---------------- + | | + | ---------------- + | | MI | + | ---------------- + | + -------------- + | Bundle | -------- + -------------- \ + | ---------------- + | | MI | + | ---------------- + | | + | ---------------- + | | MI | + | ---------------- + | | + | ... + | + -------------- + | Bundle | -------- + -------------- \ + | + ... + +MI bundle support does not change the physical representations of +MachineBasicBlock and MachineInstr. All the MIs (including top level and nested +ones) are stored as sequential list of MIs. The "bundled" MIs are marked with +the 'InsideBundle' flag. A top level MI with the special BUNDLE opcode is used +to represent the start of a bundle. It's legal to mix BUNDLE MIs with individual +MIs that are not inside bundles nor represent bundles. + +MachineInstr passes should operate on a MI bundle as a single unit. Member +methods have been taught to correctly handle bundles and MIs inside bundles. +The MachineBasicBlock iterator has been modified to skip over bundled MIs to +enforce the bundle-as-a-single-unit concept. An alternative iterator +instr_iterator has been added to MachineBasicBlock to allow passes to iterate +over all of the MIs in a MachineBasicBlock, including those which are nested +inside bundles. The top level BUNDLE instruction must have the correct set of +register MachineOperand's that represent the cumulative inputs and outputs of +the bundled MIs. + +Packing / bundling of MachineInstrs for VLIW architectures should +generally be done as part of the register allocation super-pass. More +specifically, the pass which determines what MIs should be bundled +together should be done after code generator exits SSA form +(i.e. after two-address pass, PHI elimination, and copy coalescing). +Such bundles should be finalized (i.e. adding BUNDLE MIs and input and +output register MachineOperands) after virtual registers have been +rewritten into physical registers. This eliminates the need to add +virtual register operands to BUNDLE instructions which would +effectively double the virtual register def and use lists. Bundles may +use virtual registers and be formed in SSA form, but may not be +appropriate for all use cases. + +.. _MC Layer: + +The "MC" Layer +============== + +The MC Layer is used to represent and process code at the raw machine code +level, devoid of "high level" information like "constant pools", "jump tables", +"global variables" or anything like that. At this level, LLVM handles things +like label names, machine instructions, and sections in the object file. The +code in this layer is used for a number of important purposes: the tail end of +the code generator uses it to write a .s or .o file, and it is also used by the +llvm-mc tool to implement standalone machine code assemblers and disassemblers. + +This section describes some of the important classes. There are also a number +of important subsystems that interact at this layer, they are described later in +this manual. + +.. _MCStreamer: + +The ``MCStreamer`` API +---------------------- + +MCStreamer is best thought of as an assembler API. It is an abstract API which +is *implemented* in different ways (e.g. to output a .s file, output an ELF .o +file, etc) but whose API correspond directly to what you see in a .s file. +MCStreamer has one method per directive, such as EmitLabel, EmitSymbolAttribute, +SwitchSection, EmitValue (for .byte, .word), etc, which directly correspond to +assembly level directives. It also has an EmitInstruction method, which is used +to output an MCInst to the streamer. + +This API is most important for two clients: the llvm-mc stand-alone assembler is +effectively a parser that parses a line, then invokes a method on MCStreamer. In +the code generator, the `Code Emission`_ phase of the code generator lowers +higher level LLVM IR and Machine* constructs down to the MC layer, emitting +directives through MCStreamer. + +On the implementation side of MCStreamer, there are two major implementations: +one for writing out a .s file (MCAsmStreamer), and one for writing out a .o +file (MCObjectStreamer). MCAsmStreamer is a straightforward implementation +that prints out a directive for each method (e.g. ``EmitValue -> .byte``), but +MCObjectStreamer implements a full assembler. + +For target specific directives, the MCStreamer has a MCTargetStreamer instance. +Each target that needs it defines a class that inherits from it and is a lot +like MCStreamer itself: It has one method per directive and two classes that +inherit from it, a target object streamer and a target asm streamer. The target +asm streamer just prints it (``emitFnStart -> .fnstart``), and the object +streamer implement the assembler logic for it. + +To make llvm use these classes, the target initialization must call +TargetRegistry::RegisterAsmStreamer and TargetRegistry::RegisterMCObjectStreamer +passing callbacks that allocate the corresponding target streamer and pass it +to createAsmStreamer or to the appropriate object streamer constructor. + +The ``MCContext`` class +----------------------- + +The MCContext class is the owner of a variety of uniqued data structures at the +MC layer, including symbols, sections, etc. As such, this is the class that you +interact with to create symbols and sections. This class can not be subclassed. + +The ``MCSymbol`` class +---------------------- + +The MCSymbol class represents a symbol (aka label) in the assembly file. There +are two interesting kinds of symbols: assembler temporary symbols, and normal +symbols. Assembler temporary symbols are used and processed by the assembler +but are discarded when the object file is produced. The distinction is usually +represented by adding a prefix to the label, for example "L" labels are +assembler temporary labels in MachO. + +MCSymbols are created by MCContext and uniqued there. This means that MCSymbols +can be compared for pointer equivalence to find out if they are the same symbol. +Note that pointer inequality does not guarantee the labels will end up at +different addresses though. It's perfectly legal to output something like this +to the .s file: + +:: + + foo: + bar: + .byte 4 + +In this case, both the foo and bar symbols will have the same address. + +The ``MCSection`` class +----------------------- + +The ``MCSection`` class represents an object-file specific section. It is +subclassed by object file specific implementations (e.g. ``MCSectionMachO``, +``MCSectionCOFF``, ``MCSectionELF``) and these are created and uniqued by +MCContext. The MCStreamer has a notion of the current section, which can be +changed with the SwitchToSection method (which corresponds to a ".section" +directive in a .s file). + +.. _MCInst: + +The ``MCInst`` class +-------------------- + +The ``MCInst`` class is a target-independent representation of an instruction. +It is a simple class (much more so than `MachineInstr`_) that holds a +target-specific opcode and a vector of MCOperands. MCOperand, in turn, is a +simple discriminated union of three cases: 1) a simple immediate, 2) a target +register ID, 3) a symbolic expression (e.g. "``Lfoo-Lbar+42``") as an MCExpr. + +MCInst is the common currency used to represent machine instructions at the MC +layer. It is the type used by the instruction encoder, the instruction printer, +and the type generated by the assembly parser and disassembler. + +.. _Target-independent algorithms: +.. _code generation algorithm: + +Target-independent code generation algorithms +============================================= + +This section documents the phases described in the `high-level design of the +code generator`_. It explains how they work and some of the rationale behind +their design. + +.. _Instruction Selection: +.. _instruction selection section: + +Instruction Selection +--------------------- + +Instruction Selection is the process of translating LLVM code presented to the +code generator into target-specific machine instructions. There are several +well-known ways to do this in the literature. LLVM uses a SelectionDAG based +instruction selector. + +Portions of the DAG instruction selector are generated from the target +description (``*.td``) files. Our goal is for the entire instruction selector +to be generated from these ``.td`` files, though currently there are still +things that require custom C++ code. + +`GlobalISel `_ is another +instruction selection framework. + +.. _SelectionDAG: + +Introduction to SelectionDAGs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The SelectionDAG provides an abstraction for code representation in a way that +is amenable to instruction selection using automatic techniques +(e.g. dynamic-programming based optimal pattern matching selectors). It is also +well-suited to other phases of code generation; in particular, instruction +scheduling (SelectionDAG's are very close to scheduling DAGs post-selection). +Additionally, the SelectionDAG provides a host representation where a large +variety of very-low-level (but target-independent) `optimizations`_ may be +performed; ones which require extensive information about the instructions +efficiently supported by the target. + +The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the +``SDNode`` class. The primary payload of the ``SDNode`` is its operation code +(Opcode) that indicates what operation the node performs and the operands to the +operation. The various operation node types are described at the top of the +``include/llvm/CodeGen/ISDOpcodes.h`` file. + +Although most operations define a single value, each node in the graph may +define multiple values. For example, a combined div/rem operation will define +both the dividend and the remainder. Many other situations require multiple +values as well. Each node also has some number of operands, which are edges to +the node defining the used value. Because nodes may define multiple values, +edges are represented by instances of the ``SDValue`` class, which is a +```` pair, indicating the node and result value being used, +respectively. Each value produced by an ``SDNode`` has an associated ``MVT`` +(Machine Value Type) indicating what the type of the value is. + +SelectionDAGs contain two different kinds of values: those that represent data +flow and those that represent control flow dependencies. Data values are simple +edges with an integer or floating point value type. Control edges are +represented as "chain" edges which are of type ``MVT::Other``. These edges +provide an ordering between nodes that have side effects (such as loads, stores, +calls, returns, etc). All nodes that have side effects should take a token +chain as input and produce a new one as output. By convention, token chain +inputs are always operand #0, and chain results are always the last value +produced by an operation. However, after instruction selection, the +machine nodes have their chain after the instruction's operands, and +may be followed by glue nodes. + +A SelectionDAG has designated "Entry" and "Root" nodes. The Entry node is +always a marker node with an Opcode of ``ISD::EntryToken``. The Root node is +the final side-effecting node in the token chain. For example, in a single basic +block function it would be the return node. + +One important concept for SelectionDAGs is the notion of a "legal" vs. +"illegal" DAG. A legal DAG for a target is one that only uses supported +operations and supported types. On a 32-bit PowerPC, for example, a DAG with a +value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a +SREM or UREM operation. The `legalize types`_ and `legalize operations`_ phases +are responsible for turning an illegal DAG into a legal DAG. + +.. _SelectionDAG-Process: + +SelectionDAG Instruction Selection Process +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +SelectionDAG-based instruction selection consists of the following steps: + +#. `Build initial DAG`_ --- This stage performs a simple translation from the + input LLVM code to an illegal SelectionDAG. + +#. `Optimize SelectionDAG`_ --- This stage performs simple optimizations on the + SelectionDAG to simplify it, and recognize meta instructions (like rotates + and ``div``/``rem`` pairs) for targets that support these meta operations. + This makes the resultant code more efficient and the `select instructions + from DAG`_ phase (below) simpler. + +#. `Legalize SelectionDAG Types`_ --- This stage transforms SelectionDAG nodes + to eliminate any types that are unsupported on the target. + +#. `Optimize SelectionDAG`_ --- The SelectionDAG optimizer is run to clean up + redundancies exposed by type legalization. + +#. `Legalize SelectionDAG Ops`_ --- This stage transforms SelectionDAG nodes to + eliminate any operations that are unsupported on the target. + +#. `Optimize SelectionDAG`_ --- The SelectionDAG optimizer is run to eliminate + inefficiencies introduced by operation legalization. + +#. `Select instructions from DAG`_ --- Finally, the target instruction selector + matches the DAG operations to target instructions. This process translates + the target-independent input DAG into another DAG of target instructions. + +#. `SelectionDAG Scheduling and Formation`_ --- The last phase assigns a linear + order to the instructions in the target-instruction DAG and emits them into + the MachineFunction being compiled. This step uses traditional prepass + scheduling techniques. + +After all of these steps are complete, the SelectionDAG is destroyed and the +rest of the code generation passes are run. + +One great way to visualize what is going on here is to take advantage of a few +LLC command line options. The following options pop up a window displaying the +SelectionDAG at specific times (if you only get errors printed to the console +while using this, you probably `need to configure your +system `_ to add support for it). + +* ``-view-dag-combine1-dags`` displays the DAG after being built, before the + first optimization pass. + +* ``-view-legalize-dags`` displays the DAG before Legalization. + +* ``-view-dag-combine2-dags`` displays the DAG before the second optimization + pass. + +* ``-view-isel-dags`` displays the DAG before the Select phase. + +* ``-view-sched-dags`` displays the DAG before Scheduling. + +The ``-view-sunit-dags`` displays the Scheduler's dependency graph. This graph +is based on the final SelectionDAG, with nodes that must be scheduled together +bundled into a single scheduling-unit node, and with immediate operands and +other nodes that aren't relevant for scheduling omitted. + +The option ``-filter-view-dags`` allows to select the name of the basic block +that you are interested to visualize and filters all the previous +``view-*-dags`` options. + +.. _Build initial DAG: + +Initial SelectionDAG Construction +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The initial SelectionDAG is na\ :raw-html:`ï`\ vely peephole expanded from +the LLVM input by the ``SelectionDAGBuilder`` class. The intent of this pass +is to expose as much low-level, target-specific details to the SelectionDAG as +possible. This pass is mostly hard-coded (e.g. an LLVM ``add`` turns into an +``SDNode add`` while a ``getelementptr`` is expanded into the obvious +arithmetic). This pass requires target-specific hooks to lower calls, returns, +varargs, etc. For these features, the :raw-html:`` `TargetLowering`_ +:raw-html:`` interface is used. + +.. _legalize types: +.. _Legalize SelectionDAG Types: +.. _Legalize SelectionDAG Ops: + +SelectionDAG LegalizeTypes Phase +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Legalize phase is in charge of converting a DAG to only use the types that +are natively supported by the target. + +There are two main ways of converting values of unsupported scalar types to +values of supported types: converting small types to larger types ("promoting"), +and breaking up large integer types into smaller ones ("expanding"). For +example, a target might require that all f32 values are promoted to f64 and that +all i1/i8/i16 values are promoted to i32. The same target might require that +all i64 values be expanded into pairs of i32 values. These changes can insert +sign and zero extensions as needed to make sure that the final code has the same +behavior as the input. + +There are two main ways of converting values of unsupported vector types to +value of supported types: splitting vector types, multiple times if necessary, +until a legal type is found, and extending vector types by adding elements to +the end to round them out to legal types ("widening"). If a vector gets split +all the way down to single-element parts with no supported vector type being +found, the elements are converted to scalars ("scalarizing"). + +A target implementation tells the legalizer which types are supported (and which +register class to use for them) by calling the ``addRegisterClass`` method in +its ``TargetLowering`` constructor. + +.. _legalize operations: +.. _Legalizer: + +SelectionDAG Legalize Phase +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Legalize phase is in charge of converting a DAG to only use the operations +that are natively supported by the target. + +Targets often have weird constraints, such as not supporting every operation on +every supported datatype (e.g. X86 does not support byte conditional moves and +PowerPC does not support sign-extending loads from a 16-bit memory location). +Legalize takes care of this by open-coding another sequence of operations to +emulate the operation ("expansion"), by promoting one type to a larger type that +supports the operation ("promotion"), or by using a target-specific hook to +implement the legalization ("custom"). + +A target implementation tells the legalizer which operations are not supported +(and which of the above three actions to take) by calling the +``setOperationAction`` method in its ``TargetLowering`` constructor. + +If a target has legal vector types, it is expected to produce efficient machine +code for common forms of the shufflevector IR instruction using those types. +This may require custom legalization for SelectionDAG vector operations that +are created from the shufflevector IR. The shufflevector forms that should be +handled include: + +* Vector select --- Each element of the vector is chosen from either of the + corresponding elements of the 2 input vectors. This operation may also be + known as a "blend" or "bitwise select" in target assembly. This type of shuffle + maps directly to the ``shuffle_vector`` SelectionDAG node. + +* Insert subvector --- A vector is placed into a longer vector type starting + at index 0. This type of shuffle maps directly to the ``insert_subvector`` + SelectionDAG node with the ``index`` operand set to 0. + +* Extract subvector --- A vector is pulled from a longer vector type starting + at index 0. This type of shuffle maps directly to the ``extract_subvector`` + SelectionDAG node with the ``index`` operand set to 0. + +* Splat --- All elements of the vector have identical scalar elements. This + operation may also be known as a "broadcast" or "duplicate" in target assembly. + The shufflevector IR instruction may change the vector length, so this operation + may map to multiple SelectionDAG nodes including ``shuffle_vector``, + ``concat_vectors``, ``insert_subvector``, and ``extract_subvector``. + +Prior to the existence of the Legalize passes, we required that every target +`selector`_ supported and handled every operator and type even if they are not +natively supported. The introduction of the Legalize phases allows all of the +canonicalization patterns to be shared across targets, and makes it very easy to +optimize the canonicalized code because it is still in the form of a DAG. + +.. _optimizations: +.. _Optimize SelectionDAG: +.. _selector: + +SelectionDAG Optimization Phase: the DAG Combiner +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The SelectionDAG optimization phase is run multiple times for code generation, +immediately after the DAG is built and once after each legalization. The first +run of the pass allows the initial code to be cleaned up (e.g. performing +optimizations that depend on knowing that the operators have restricted type +inputs). Subsequent runs of the pass clean up the messy code generated by the +Legalize passes, which allows Legalize to be very simple (it can focus on making +code legal instead of focusing on generating *good* and legal code). + +One important class of optimizations performed is optimizing inserted sign and +zero extension instructions. We currently use ad-hoc techniques, but could move +to more rigorous techniques in the future. Here are some good papers on the +subject: + +"`Widening integer arithmetic `_" :raw-html:`
` +Kevin Redwine and Norman Ramsey :raw-html:`
` +International Conference on Compiler Construction (CC) 2004 + +"`Effective sign extension elimination `_" :raw-html:`
` +Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani :raw-html:`
` +Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language Design +and Implementation. + +.. _Select instructions from DAG: + +SelectionDAG Select Phase +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The Select phase is the bulk of the target-specific code for instruction +selection. This phase takes a legal SelectionDAG as input, pattern matches the +instructions supported by the target to this DAG, and produces a new DAG of +target code. For example, consider the following LLVM fragment: + +.. code-block:: llvm + + %t1 = fadd float %W, %X + %t2 = fmul float %t1, %Y + %t3 = fadd float %t2, %Z + +This LLVM code corresponds to a SelectionDAG that looks basically like this: + +.. code-block:: text + + (fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z) + +If a target supports floating point multiply-and-add (FMA) operations, one of +the adds can be merged with the multiply. On the PowerPC, for example, the +output of the instruction selector might look like this DAG: + +:: + + (FMADDS (FADDS W, X), Y, Z) + +The ``FMADDS`` instruction is a ternary instruction that multiplies its first +two operands and adds the third (as single-precision floating-point numbers). +The ``FADDS`` instruction is a simple binary single-precision add instruction. +To perform this pattern match, the PowerPC backend includes the following +instruction definitions: + +.. code-block:: text + :emphasize-lines: 4-5,9 + + def FMADDS : AForm_1<59, 29, + (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fmadds $FRT, $FRA, $FRC, $FRB", + [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB))]>; + def FADDS : AForm_2<59, 21, + (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB), + "fadds $FRT, $FRA, $FRB", + [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; + +The highlighted portion of the instruction definitions indicates the pattern +used to match the instructions. The DAG operators (like ``fmul``/``fadd``) +are defined in the ``include/llvm/Target/TargetSelectionDAG.td`` file. +"``F4RC``" is the register class of the input and result values. + +The TableGen DAG instruction selector generator reads the instruction patterns +in the ``.td`` file and automatically builds parts of the pattern matching code +for your target. It has the following strengths: + +* At compiler-compile time, it analyzes your instruction patterns and tells you + if your patterns make sense or not. + +* It can handle arbitrary constraints on operands for the pattern match. In + particular, it is straight-forward to say things like "match any immediate + that is a 13-bit sign-extended value". For examples, see the ``immSExt16`` + and related ``tblgen`` classes in the PowerPC backend. + +* It knows several important identities for the patterns defined. For example, + it knows that addition is commutative, so it allows the ``FMADDS`` pattern + above to match "``(fadd X, (fmul Y, Z))``" as well as "``(fadd (fmul X, Y), + Z)``", without the target author having to specially handle this case. + +* It has a full-featured type-inferencing system. In particular, you should + rarely have to explicitly tell the system what type parts of your patterns + are. In the ``FMADDS`` case above, we didn't have to tell ``tblgen`` that all + of the nodes in the pattern are of type 'f32'. It was able to infer and + propagate this knowledge from the fact that ``F4RC`` has type 'f32'. + +* Targets can define their own (and rely on built-in) "pattern fragments". + Pattern fragments are chunks of reusable patterns that get inlined into your + patterns during compiler-compile time. For example, the integer "``(not + x)``" operation is actually defined as a pattern fragment that expands as + "``(xor x, -1)``", since the SelectionDAG does not have a native '``not``' + operation. Targets can define their own short-hand fragments as they see fit. + See the definition of '``not``' and '``ineg``' for examples. + +* In addition to instructions, targets can specify arbitrary patterns that map + to one or more instructions using the 'Pat' class. For example, the PowerPC + has no way to load an arbitrary integer immediate into a register in one + instruction. To tell tblgen how to do this, it defines: + + :: + + // Arbitrary immediate support. Implement in terms of LIS/ORI. + def : Pat<(i32 imm:$imm), + (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; + + If none of the single-instruction patterns for loading an immediate into a + register match, this will be used. This rule says "match an arbitrary i32 + immediate, turning it into an ``ORI`` ('or a 16-bit immediate') and an ``LIS`` + ('load 16-bit immediate, where the immediate is shifted to the left 16 bits') + instruction". To make this work, the ``LO16``/``HI16`` node transformations + are used to manipulate the input immediate (in this case, take the high or low + 16-bits of the immediate). + +* When using the 'Pat' class to map a pattern to an instruction that has one + or more complex operands (like e.g. `X86 addressing mode`_), the pattern may + either specify the operand as a whole using a ``ComplexPattern``, or else it + may specify the components of the complex operand separately. The latter is + done e.g. for pre-increment instructions by the PowerPC back end: + + :: + + def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + + def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff), + (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>; + + Here, the pair of ``ptroff`` and ``ptrreg`` operands is matched onto the + complex operand ``dst`` of class ``memri`` in the ``STWU`` instruction. + +* While the system does automate a lot, it still allows you to write custom C++ + code to match special cases if there is something that is hard to + express. + +While it has many strengths, the system currently has some limitations, +primarily because it is a work in progress and is not yet finished: + +* Overall, there is no way to define or match SelectionDAG nodes that define + multiple values (e.g. ``SMUL_LOHI``, ``LOAD``, ``CALL``, etc). This is the + biggest reason that you currently still *have to* write custom C++ code + for your instruction selector. + +* There is no great way to support matching complex addressing modes yet. In + the future, we will extend pattern fragments to allow them to define multiple + values (e.g. the four operands of the `X86 addressing mode`_, which are + currently matched with custom C++ code). In addition, we'll extend fragments + so that a fragment can match multiple different patterns. + +* We don't automatically infer flags like ``isStore``/``isLoad`` yet. + +* We don't automatically generate the set of supported registers and operations + for the `Legalizer`_ yet. + +* We don't have a way of tying in custom legalized nodes yet. + +Despite these limitations, the instruction selector generator is still quite +useful for most of the binary and logical operations in typical instruction +sets. If you run into any problems or can't figure out how to do something, +please let Chris know! + +.. _Scheduling and Formation: +.. _SelectionDAG Scheduling and Formation: + +SelectionDAG Scheduling and Formation Phase +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The scheduling phase takes the DAG of target instructions from the selection +phase and assigns an order. The scheduler can pick an order depending on +various constraints of the machines (i.e. order for minimal register pressure or +try to cover instruction latencies). Once an order is established, the DAG is +converted to a list of :raw-html:`` `MachineInstr`_\s :raw-html:`` and +the SelectionDAG is destroyed. + +Note that this phase is logically separate from the instruction selection phase, +but is tied to it closely in the code because it operates on SelectionDAGs. + +Future directions for the SelectionDAG +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +#. Optional function-at-a-time selection. + +#. Auto-generate entire selector from ``.td`` file. + +.. _SSA-based Machine Code Optimizations: + +SSA-based Machine Code Optimizations +------------------------------------ + +To Be Written + +Live Intervals +-------------- + +Live Intervals are the ranges (intervals) where a variable is *live*. They are +used by some `register allocator`_ passes to determine if two or more virtual +registers which require the same physical register are live at the same point in +the program (i.e., they conflict). When this situation occurs, one virtual +register must be *spilled*. + +Live Variable Analysis +^^^^^^^^^^^^^^^^^^^^^^ + +The first step in determining the live intervals of variables is to calculate +the set of registers that are immediately dead after the instruction (i.e., the +instruction calculates the value, but it is never used) and the set of registers +that are used by the instruction, but are never used after the instruction +(i.e., they are killed). Live variable information is computed for +each *virtual* register and *register allocatable* physical register +in the function. This is done in a very efficient manner because it uses SSA to +sparsely compute lifetime information for virtual registers (which are in SSA +form) and only has to track physical registers within a block. Before register +allocation, LLVM can assume that physical registers are only live within a +single basic block. This allows it to do a single, local analysis to resolve +physical register lifetimes within each basic block. If a physical register is +not register allocatable (e.g., a stack pointer or condition codes), it is not +tracked. + +Physical registers may be live in to or out of a function. Live in values are +typically arguments in registers. Live out values are typically return values in +registers. Live in values are marked as such, and are given a dummy "defining" +instruction during live intervals analysis. If the last basic block of a +function is a ``return``, then it's marked as using all live out values in the +function. + +``PHI`` nodes need to be handled specially, because the calculation of the live +variable information from a depth first traversal of the CFG of the function +won't guarantee that a virtual register used by the ``PHI`` node is defined +before it's used. When a ``PHI`` node is encountered, only the definition is +handled, because the uses will be handled in other basic blocks. + +For each ``PHI`` node of the current basic block, we simulate an assignment at +the end of the current basic block and traverse the successor basic blocks. If a +successor basic block has a ``PHI`` node and one of the ``PHI`` node's operands +is coming from the current basic block, then the variable is marked as *alive* +within the current basic block and all of its predecessor basic blocks, until +the basic block with the defining instruction is encountered. + +Live Intervals Analysis +^^^^^^^^^^^^^^^^^^^^^^^ + +We now have the information available to perform the live intervals analysis and +build the live intervals themselves. We start off by numbering the basic blocks +and machine instructions. We then handle the "live-in" values. These are in +physical registers, so the physical register is assumed to be killed by the end +of the basic block. Live intervals for virtual registers are computed for some +ordering of the machine instructions ``[1, N]``. A live interval is an interval +``[i, j)``, where ``1 >= i >= j > N``, for which a variable is live. + +.. note:: + More to come... + +.. _Register Allocation: +.. _register allocator: + +Register Allocation +------------------- + +The *Register Allocation problem* consists in mapping a program +:raw-html:`` P\ :sub:`v`\ :raw-html:``, that can use an unbounded +number of virtual registers, to a program :raw-html:`` P\ :sub:`p`\ +:raw-html:`` that contains a finite (possibly small) number of physical +registers. Each target architecture has a different number of physical +registers. If the number of physical registers is not enough to accommodate all +the virtual registers, some of them will have to be mapped into memory. These +virtuals are called *spilled virtuals*. + +How registers are represented in LLVM +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In LLVM, physical registers are denoted by integer numbers that normally range +from 1 to 1023. To see how this numbering is defined for a particular +architecture, you can read the ``GenRegisterNames.inc`` file for that +architecture. For instance, by inspecting +``lib/Target/X86/X86GenRegisterInfo.inc`` we see that the 32-bit register +``EAX`` is denoted by 43, and the MMX register ``MM0`` is mapped to 65. + +Some architectures contain registers that share the same physical location. A +notable example is the X86 platform. For instance, in the X86 architecture, the +registers ``EAX``, ``AX`` and ``AL`` share the first eight bits. These physical +registers are marked as *aliased* in LLVM. Given a particular architecture, you +can check which registers are aliased by inspecting its ``RegisterInfo.td`` +file. Moreover, the class ``MCRegAliasIterator`` enumerates all the physical +registers aliased to a register. + +Physical registers, in LLVM, are grouped in *Register Classes*. Elements in the +same register class are functionally equivalent, and can be interchangeably +used. Each virtual register can only be mapped to physical registers of a +particular class. For instance, in the X86 architecture, some virtuals can only +be allocated to 8 bit registers. A register class is described by +``TargetRegisterClass`` objects. To discover if a virtual register is +compatible with a given physical, this code can be used: + +.. code-block:: c++ + + bool RegMapping_Fer::compatible_class(MachineFunction &mf, + unsigned v_reg, + unsigned p_reg) { + assert(TargetRegisterInfo::isPhysicalRegister(p_reg) && + "Target register must be physical"); + const TargetRegisterClass *trc = mf.getRegInfo().getRegClass(v_reg); + return trc->contains(p_reg); + } + +Sometimes, mostly for debugging purposes, it is useful to change the number of +physical registers available in the target architecture. This must be done +statically, inside the ``TargetRegisterInfo.td`` file. Just ``grep`` for +``RegisterClass``, the last parameter of which is a list of registers. Just +commenting some out is one simple way to avoid them being used. A more polite +way is to explicitly exclude some registers from the *allocation order*. See the +definition of the ``GR8`` register class in +``lib/Target/X86/X86RegisterInfo.td`` for an example of this. + +Virtual registers are also denoted by integer numbers. Contrary to physical +registers, different virtual registers never share the same number. Whereas +physical registers are statically defined in a ``TargetRegisterInfo.td`` file +and cannot be created by the application developer, that is not the case with +virtual registers. In order to create new virtual registers, use the method +``MachineRegisterInfo::createVirtualRegister()``. This method will return a new +virtual register. Use an ``IndexedMap`` to hold +information per virtual register. If you need to enumerate all virtual +registers, use the function ``TargetRegisterInfo::index2VirtReg()`` to find the +virtual register numbers: + +.. code-block:: c++ + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i); + stuff(VirtReg); + } + +Before register allocation, the operands of an instruction are mostly virtual +registers, although physical registers may also be used. In order to check if a +given machine operand is a register, use the boolean function +``MachineOperand::isRegister()``. To obtain the integer code of a register, use +``MachineOperand::getReg()``. An instruction may define or use a register. For +instance, ``ADD reg:1026 := reg:1025 reg:1024`` defines the registers 1024, and +uses registers 1025 and 1026. Given a register operand, the method +``MachineOperand::isUse()`` informs if that register is being used by the +instruction. The method ``MachineOperand::isDef()`` informs if that registers is +being defined. + +We will call physical registers present in the LLVM bitcode before register +allocation *pre-colored registers*. Pre-colored registers are used in many +different situations, for instance, to pass parameters of functions calls, and +to store results of particular instructions. There are two types of pre-colored +registers: the ones *implicitly* defined, and those *explicitly* +defined. Explicitly defined registers are normal operands, and can be accessed +with ``MachineInstr::getOperand(int)::getReg()``. In order to check which +registers are implicitly defined by an instruction, use the +``TargetInstrInfo::get(opcode)::ImplicitDefs``, where ``opcode`` is the opcode +of the target instruction. One important difference between explicit and +implicit physical registers is that the latter are defined statically for each +instruction, whereas the former may vary depending on the program being +compiled. For example, an instruction that represents a function call will +always implicitly define or use the same set of physical registers. To read the +registers implicitly used by an instruction, use +``TargetInstrInfo::get(opcode)::ImplicitUses``. Pre-colored registers impose +constraints on any register allocation algorithm. The register allocator must +make sure that none of them are overwritten by the values of virtual registers +while still alive. + +Mapping virtual registers to physical registers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There are two ways to map virtual registers to physical registers (or to memory +slots). The first way, that we will call *direct mapping*, is based on the use +of methods of the classes ``TargetRegisterInfo``, and ``MachineOperand``. The +second way, that we will call *indirect mapping*, relies on the ``VirtRegMap`` +class in order to insert loads and stores sending and getting values to and from +memory. + +The direct mapping provides more flexibility to the developer of the register +allocator; however, it is more error prone, and demands more implementation +work. Basically, the programmer will have to specify where load and store +instructions should be inserted in the target function being compiled in order +to get and store values in memory. To assign a physical register to a virtual +register present in a given operand, use ``MachineOperand::setReg(p_reg)``. To +insert a store instruction, use ``TargetInstrInfo::storeRegToStackSlot(...)``, +and to insert a load instruction, use ``TargetInstrInfo::loadRegFromStackSlot``. + +The indirect mapping shields the application developer from the complexities of +inserting load and store instructions. In order to map a virtual register to a +physical one, use ``VirtRegMap::assignVirt2Phys(vreg, preg)``. In order to map +a certain virtual register to memory, use +``VirtRegMap::assignVirt2StackSlot(vreg)``. This method will return the stack +slot where ``vreg``'s value will be located. If it is necessary to map another +virtual register to the same stack slot, use +``VirtRegMap::assignVirt2StackSlot(vreg, stack_location)``. One important point +to consider when using the indirect mapping, is that even if a virtual register +is mapped to memory, it still needs to be mapped to a physical register. This +physical register is the location where the virtual register is supposed to be +found before being stored or after being reloaded. + +If the indirect strategy is used, after all the virtual registers have been +mapped to physical registers or stack slots, it is necessary to use a spiller +object to place load and store instructions in the code. Every virtual that has +been mapped to a stack slot will be stored to memory after being defined and will +be loaded before being used. The implementation of the spiller tries to recycle +load/store instructions, avoiding unnecessary instructions. For an example of +how to invoke the spiller, see ``RegAllocLinearScan::runOnMachineFunction`` in +``lib/CodeGen/RegAllocLinearScan.cpp``. + +Handling two address instructions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +With very rare exceptions (e.g., function calls), the LLVM machine code +instructions are three address instructions. That is, each instruction is +expected to define at most one register, and to use at most two registers. +However, some architectures use two address instructions. In this case, the +defined register is also one of the used registers. For instance, an instruction +such as ``ADD %EAX, %EBX``, in X86 is actually equivalent to ``%EAX = %EAX + +%EBX``. + +In order to produce correct code, LLVM must convert three address instructions +that represent two address instructions into true two address instructions. LLVM +provides the pass ``TwoAddressInstructionPass`` for this specific purpose. It +must be run before register allocation takes place. After its execution, the +resulting code may no longer be in SSA form. This happens, for instance, in +situations where an instruction such as ``%a = ADD %b %c`` is converted to two +instructions such as: + +:: + + %a = MOVE %b + %a = ADD %a %c + +Notice that, internally, the second instruction is represented as ``ADD +%a[def/use] %c``. I.e., the register operand ``%a`` is both used and defined by +the instruction. + +The SSA deconstruction phase +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An important transformation that happens during register allocation is called +the *SSA Deconstruction Phase*. The SSA form simplifies many analyses that are +performed on the control flow graph of programs. However, traditional +instruction sets do not implement PHI instructions. Thus, in order to generate +executable code, compilers must replace PHI instructions with other instructions +that preserve their semantics. + +There are many ways in which PHI instructions can safely be removed from the +target code. The most traditional PHI deconstruction algorithm replaces PHI +instructions with copy instructions. That is the strategy adopted by LLVM. The +SSA deconstruction algorithm is implemented in +``lib/CodeGen/PHIElimination.cpp``. In order to invoke this pass, the identifier +``PHIEliminationID`` must be marked as required in the code of the register +allocator. + +Instruction folding +^^^^^^^^^^^^^^^^^^^ + +*Instruction folding* is an optimization performed during register allocation +that removes unnecessary copy instructions. For instance, a sequence of +instructions such as: + +:: + + %EBX = LOAD %mem_address + %EAX = COPY %EBX + +can be safely substituted by the single instruction: + +:: + + %EAX = LOAD %mem_address + +Instructions can be folded with the +``TargetRegisterInfo::foldMemoryOperand(...)`` method. Care must be taken when +folding instructions; a folded instruction can be quite different from the +original instruction. See ``LiveIntervals::addIntervalsForSpills`` in +``lib/CodeGen/LiveIntervalAnalysis.cpp`` for an example of its use. + +Built in register allocators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The LLVM infrastructure provides the application developer with three different +register allocators: + +* *Fast* --- This register allocator is the default for debug builds. It + allocates registers on a basic block level, attempting to keep values in + registers and reusing registers as appropriate. + +* *Basic* --- This is an incremental approach to register allocation. Live + ranges are assigned to registers one at a time in an order that is driven by + heuristics. Since code can be rewritten on-the-fly during allocation, this + framework allows interesting allocators to be developed as extensions. It is + not itself a production register allocator but is a potentially useful + stand-alone mode for triaging bugs and as a performance baseline. + +* *Greedy* --- *The default allocator*. This is a highly tuned implementation of + the *Basic* allocator that incorporates global live range splitting. This + allocator works hard to minimize the cost of spill code. + +* *PBQP* --- A Partitioned Boolean Quadratic Programming (PBQP) based register + allocator. This allocator works by constructing a PBQP problem representing + the register allocation problem under consideration, solving this using a PBQP + solver, and mapping the solution back to a register assignment. + +The type of register allocator used in ``llc`` can be chosen with the command +line option ``-regalloc=...``: + +.. code-block:: bash + + $ llc -regalloc=linearscan file.bc -o ln.s + $ llc -regalloc=fast file.bc -o fa.s + $ llc -regalloc=pbqp file.bc -o pbqp.s + +.. _Prolog/Epilog Code Insertion: + +Prolog/Epilog Code Insertion +---------------------------- + +Compact Unwind + +Throwing an exception requires *unwinding* out of a function. The information on +how to unwind a given function is traditionally expressed in DWARF unwind +(a.k.a. frame) info. But that format was originally developed for debuggers to +backtrace, and each Frame Description Entry (FDE) requires ~20-30 bytes per +function. There is also the cost of mapping from an address in a function to the +corresponding FDE at runtime. An alternative unwind encoding is called *compact +unwind* and requires just 4-bytes per function. + +The compact unwind encoding is a 32-bit value, which is encoded in an +architecture-specific way. It specifies which registers to restore and from +where, and how to unwind out of the function. When the linker creates a final +linked image, it will create a ``__TEXT,__unwind_info`` section. This section is +a small and fast way for the runtime to access unwind info for any given +function. If we emit compact unwind info for the function, that compact unwind +info will be encoded in the ``__TEXT,__unwind_info`` section. If we emit DWARF +unwind info, the ``__TEXT,__unwind_info`` section will contain the offset of the +FDE in the ``__TEXT,__eh_frame`` section in the final linked image. + +For X86, there are three modes for the compact unwind encoding: + +*Function with a Frame Pointer (``EBP`` or ``RBP``)* + ``EBP/RBP``-based frame, where ``EBP/RBP`` is pushed onto the stack + immediately after the return address, then ``ESP/RSP`` is moved to + ``EBP/RBP``. Thus to unwind, ``ESP/RSP`` is restored with the current + ``EBP/RBP`` value, then ``EBP/RBP`` is restored by popping the stack, and the + return is done by popping the stack once more into the PC. All non-volatile + registers that need to be restored must have been saved in a small range on + the stack that starts ``EBP-4`` to ``EBP-1020`` (``RBP-8`` to + ``RBP-1020``). The offset (divided by 4 in 32-bit mode and 8 in 64-bit mode) + is encoded in bits 16-23 (mask: ``0x00FF0000``). The registers saved are + encoded in bits 0-14 (mask: ``0x00007FFF``) as five 3-bit entries from the + following table: + + ============== ============= =============== + Compact Number i386 Register x86-64 Register + ============== ============= =============== + 1 ``EBX`` ``RBX`` + 2 ``ECX`` ``R12`` + 3 ``EDX`` ``R13`` + 4 ``EDI`` ``R14`` + 5 ``ESI`` ``R15`` + 6 ``EBP`` ``RBP`` + ============== ============= =============== + +*Frameless with a Small Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)* + To return, a constant (encoded in the compact unwind encoding) is added to the + ``ESP/RSP``. Then the return is done by popping the stack into the PC. All + non-volatile registers that need to be restored must have been saved on the + stack immediately after the return address. The stack size (divided by 4 in + 32-bit mode and 8 in 64-bit mode) is encoded in bits 16-23 (mask: + ``0x00FF0000``). There is a maximum stack size of 1024 bytes in 32-bit mode + and 2048 in 64-bit mode. The number of registers saved is encoded in bits 9-12 + (mask: ``0x00001C00``). Bits 0-9 (mask: ``0x000003FF``) contain which + registers were saved and their order. (See the + ``encodeCompactUnwindRegistersWithoutFrame()`` function in + ``lib/Target/X86FrameLowering.cpp`` for the encoding algorithm.) + +*Frameless with a Large Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)* + This case is like the "Frameless with a Small Constant Stack Size" case, but + the stack size is too large to encode in the compact unwind encoding. Instead + it requires that the function contains "``subl $nnnnnn, %esp``" in its + prolog. The compact encoding contains the offset to the ``$nnnnnn`` value in + the function in bits 9-12 (mask: ``0x00001C00``). + +.. _Late Machine Code Optimizations: + +Late Machine Code Optimizations +------------------------------- + +.. note:: + + To Be Written + +.. _Code Emission: + +Code Emission +------------- + +The code emission step of code generation is responsible for lowering from the +code generator abstractions (like `MachineFunction`_, `MachineInstr`_, etc) down +to the abstractions used by the MC layer (`MCInst`_, `MCStreamer`_, etc). This +is done with a combination of several different classes: the (misnamed) +target-independent AsmPrinter class, target-specific subclasses of AsmPrinter +(such as SparcAsmPrinter), and the TargetLoweringObjectFile class. + +Since the MC layer works at the level of abstraction of object files, it doesn't +have a notion of functions, global variables etc. Instead, it thinks about +labels, directives, and instructions. A key class used at this time is the +MCStreamer class. This is an abstract API that is implemented in different ways +(e.g. to output a .s file, output an ELF .o file, etc) that is effectively an +"assembler API". MCStreamer has one method per directive, such as EmitLabel, +EmitSymbolAttribute, SwitchSection, etc, which directly correspond to assembly +level directives. + +If you are interested in implementing a code generator for a target, there are +three important things that you have to implement for your target: + +#. First, you need a subclass of AsmPrinter for your target. This class + implements the general lowering process converting MachineFunction's into MC + label constructs. The AsmPrinter base class provides a number of useful + methods and routines, and also allows you to override the lowering process in + some important ways. You should get much of the lowering for free if you are + implementing an ELF, COFF, or MachO target, because the + TargetLoweringObjectFile class implements much of the common logic. + +#. Second, you need to implement an instruction printer for your target. The + instruction printer takes an `MCInst`_ and renders it to a raw_ostream as + text. Most of this is automatically generated from the .td file (when you + specify something like "``add $dst, $src1, $src2``" in the instructions), but + you need to implement routines to print operands. + +#. Third, you need to implement code that lowers a `MachineInstr`_ to an MCInst, + usually implemented in "MCInstLower.cpp". This lowering process is + often target specific, and is responsible for turning jump table entries, + constant pool indices, global variable addresses, etc into MCLabels as + appropriate. This translation layer is also responsible for expanding pseudo + ops used by the code generator into the actual machine instructions they + correspond to. The MCInsts that are generated by this are fed into the + instruction printer or the encoder. + +Finally, at your choosing, you can also implement a subclass of MCCodeEmitter +which lowers MCInst's into machine code bytes and relocations. This is +important if you want to support direct .o file emission, or would like to +implement an assembler for your target. + +Emitting function stack size information +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A section containing metadata on function stack sizes will be emitted when +``TargetLoweringObjectFile::StackSizesSection`` is not null, and +``TargetOptions::EmitStackSizeSection`` is set (-stack-size-section). The +section will contain an array of pairs of function symbol values (pointer size) +and stack sizes (unsigned LEB128). The stack size values only include the space +allocated in the function prologue. Functions with dynamic stack allocations are +not included. + +VLIW Packetizer +--------------- + +In a Very Long Instruction Word (VLIW) architecture, the compiler is responsible +for mapping instructions to functional-units available on the architecture. To +that end, the compiler creates groups of instructions called *packets* or +*bundles*. The VLIW packetizer in LLVM is a target-independent mechanism to +enable the packetization of machine instructions. + +Mapping from instructions to functional units +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Instructions in a VLIW target can typically be mapped to multiple functional +units. During the process of packetizing, the compiler must be able to reason +about whether an instruction can be added to a packet. This decision can be +complex since the compiler has to examine all possible mappings of instructions +to functional units. Therefore to alleviate compilation-time complexity, the +VLIW packetizer parses the instruction classes of a target and generates tables +at compiler build time. These tables can then be queried by the provided +machine-independent API to determine if an instruction can be accommodated in a +packet. + +How the packetization tables are generated and used +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The packetizer reads instruction classes from a target's itineraries and creates +a deterministic finite automaton (DFA) to represent the state of a packet. A DFA +consists of three major elements: inputs, states, and transitions. The set of +inputs for the generated DFA represents the instruction being added to a +packet. The states represent the possible consumption of functional units by +instructions in a packet. In the DFA, transitions from one state to another +occur on the addition of an instruction to an existing packet. If there is a +legal mapping of functional units to instructions, then the DFA contains a +corresponding transition. The absence of a transition indicates that a legal +mapping does not exist and that the instruction cannot be added to the packet. + +To generate tables for a VLIW target, add *Target*\ GenDFAPacketizer.inc as a +target to the Makefile in the target directory. The exported API provides three +functions: ``DFAPacketizer::clearResources()``, +``DFAPacketizer::reserveResources(MachineInstr *MI)``, and +``DFAPacketizer::canReserveResources(MachineInstr *MI)``. These functions allow +a target packetizer to add an instruction to an existing packet and to check +whether an instruction can be added to a packet. See +``llvm/CodeGen/DFAPacketizer.h`` for more information. + +Implementing a Native Assembler +=============================== + +Though you're probably reading this because you want to write or maintain a +compiler backend, LLVM also fully supports building a native assembler. +We've tried hard to automate the generation of the assembler from the .td files +(in particular the instruction syntax and encodings), which means that a large +part of the manual and repetitive data entry can be factored and shared with the +compiler. + +Instruction Parsing +------------------- + +.. note:: + + To Be Written + + +Instruction Alias Processing +---------------------------- + +Once the instruction is parsed, it enters the MatchInstructionImpl function. +The MatchInstructionImpl function performs alias processing and then does actual +matching. + +Alias processing is the phase that canonicalizes different lexical forms of the +same instructions down to one representation. There are several different kinds +of alias that are possible to implement and they are listed below in the order +that they are processed (which is in order from simplest/weakest to most +complex/powerful). Generally you want to use the first alias mechanism that +meets the needs of your instruction, because it will allow a more concise +description. + +Mnemonic Aliases +^^^^^^^^^^^^^^^^ + +The first phase of alias processing is simple instruction mnemonic remapping for +classes of instructions which are allowed with two different mnemonics. This +phase is a simple and unconditionally remapping from one input mnemonic to one +output mnemonic. It isn't possible for this form of alias to look at the +operands at all, so the remapping must apply for all forms of a given mnemonic. +Mnemonic aliases are defined simply, for example X86 has: + +:: + + def : MnemonicAlias<"cbw", "cbtw">; + def : MnemonicAlias<"smovq", "movsq">; + def : MnemonicAlias<"fldcww", "fldcw">; + def : MnemonicAlias<"fucompi", "fucomip">; + def : MnemonicAlias<"ud2a", "ud2">; + +... and many others. With a MnemonicAlias definition, the mnemonic is remapped +simply and directly. Though MnemonicAlias's can't look at any aspect of the +instruction (such as the operands) they can depend on global modes (the same +ones supported by the matcher), through a Requires clause: + +:: + + def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>; + def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>; + +In this example, the mnemonic gets mapped into a different one depending on +the current instruction set. + +Instruction Aliases +^^^^^^^^^^^^^^^^^^^ + +The most general phase of alias processing occurs while matching is happening: +it provides new forms for the matcher to match along with a specific instruction +to generate. An instruction alias has two parts: the string to match and the +instruction to generate. For example: + +:: + + def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8 :$src)>; + def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>; + def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8 :$src)>; + def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)>; + def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8 :$src)>; + def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)>; + def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)>; + +This shows a powerful example of the instruction aliases, matching the same +mnemonic in multiple different ways depending on what operands are present in +the assembly. The result of instruction aliases can include operands in a +different order than the destination instruction, and can use an input multiple +times, for example: + +:: + + def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>; + def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>; + def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>; + def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>; + +This example also shows that tied operands are only listed once. In the X86 +backend, XOR8rr has two input GR8's and one output GR8 (where an input is tied +to the output). InstAliases take a flattened operand list without duplicates +for tied operands. The result of an instruction alias can also use immediates +and fixed physical registers which are added as simple immediate operands in the +result, for example: + +:: + + // Fixed Immediate operand. + def : InstAlias<"aad", (AAD8i8 10)>; + + // Fixed register operand. + def : InstAlias<"fcomi", (COM_FIr ST1)>; + + // Simple alias. + def : InstAlias<"fcomi $reg", (COM_FIr RST:$reg)>; + +Instruction aliases can also have a Requires clause to make them subtarget +specific. + +If the back-end supports it, the instruction printer can automatically emit the +alias rather than what's being aliased. It typically leads to better, more +readable code. If it's better to print out what's being aliased, then pass a '0' +as the third parameter to the InstAlias definition. + +Instruction Matching +-------------------- + +.. note:: + + To Be Written + +.. _Implementations of the abstract target description interfaces: +.. _implement the target description: + +Target-specific Implementation Notes +==================================== + +This section of the document explains features or design decisions that are +specific to the code generator for a particular target. First we start with a +table that summarizes what features are supported by each target. + +.. _target-feature-matrix: + +Target Feature Matrix +--------------------- + +Note that this table does not list features that are not supported fully by any +target yet. It considers a feature to be supported if at least one subtarget +supports it. A feature being supported means that it is useful and works for +most cases, it does not indicate that there are zero known bugs in the +implementation. Here is the key: + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`
UnknownNot ApplicableNo supportPartial SupportComplete Support
` + +Here is the table: + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` + +:raw-html:`
Target
FeatureARMHexagonMSP430MipsNVPTXPowerPCSparcSystemZX86XCoreeBPF
is generally reliable
assembly parser
disassembler
inline asm
jit*
.o file writing
tail calls
segmented stacks *
` + +.. _feat_reliable: + +Is Generally Reliable +^^^^^^^^^^^^^^^^^^^^^ + +This box indicates whether the target is considered to be production quality. +This indicates that the target has been used as a static compiler to compile +large amounts of code by a variety of different people and is in continuous use. + +.. _feat_asmparser: + +Assembly Parser +^^^^^^^^^^^^^^^ + +This box indicates whether the target supports parsing target specific .s files +by implementing the MCAsmParser interface. This is required for llvm-mc to be +able to act as a native assembler and is required for inline assembly support in +the native .o file writer. + +.. _feat_disassembler: + +Disassembler +^^^^^^^^^^^^ + +This box indicates whether the target supports the MCDisassembler API for +disassembling machine opcode bytes into MCInst's. + +.. _feat_inlineasm: + +Inline Asm +^^^^^^^^^^ + +This box indicates whether the target supports most popular inline assembly +constraints and modifiers. + +.. _feat_jit: + +JIT Support +^^^^^^^^^^^ + +This box indicates whether the target supports the JIT compiler through the +ExecutionEngine interface. + +.. _feat_jit_arm: + +The ARM backend has basic support for integer code in ARM codegen mode, but +lacks NEON and full Thumb support. + +.. _feat_objectwrite: + +.o File Writing +^^^^^^^^^^^^^^^ + +This box indicates whether the target supports writing .o files (e.g. MachO, +ELF, and/or COFF) files directly from the target. Note that the target also +must include an assembly parser and general inline assembly support for full +inline assembly support in the .o writer. + +Targets that don't support this feature can obviously still write out .o files, +they just rely on having an external assembler to translate from a .s file to a +.o file (as is the case for many C compilers). + +.. _feat_tailcall: + +Tail Calls +^^^^^^^^^^ + +This box indicates whether the target supports guaranteed tail calls. These are +calls marked "`tail `_" and use the fastcc calling +convention. Please see the `tail call section`_ for more details. + +.. _feat_segstacks: + +Segmented Stacks +^^^^^^^^^^^^^^^^ + +This box indicates whether the target supports segmented stacks. This replaces +the traditional large C stack with many linked segments. It is compatible with +the `gcc implementation `_ used by the Go +front end. + +.. _feat_segstacks_x86: + +Basic support exists on the X86 backend. Currently vararg doesn't work and the +object files are not marked the way the gold linker expects, but simple Go +programs can be built by dragonegg. + +.. _tail call section: + +Tail call optimization +---------------------- + +Tail call optimization, callee reusing the stack of the caller, is currently +supported on x86/x86-64, PowerPC, AArch64, and WebAssembly. It is performed on +x86/x86-64, PowerPC, and AArch64 if: + +* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC + calling convention), ``cc 11`` (HiPE calling convention), ``tailcc``, or + ``swifttailcc``. + +* The call is a tail call - in tail position (ret immediately follows call and + ret uses value of call or is void). + +* Option ``-tailcallopt`` is enabled or the calling convention is ``tailcc``. + +* Platform-specific constraints are met. + +x86/x86-64 constraints: + +* No variable argument lists are used. + +* On x86-64 when generating GOT/PIC code only module-local calls (visibility = + hidden or protected) are supported. + +PowerPC constraints: + +* No variable argument lists are used. + +* No byval parameters are used. + +* On ppc32/64 GOT/PIC only module-local calls (visibility = hidden or protected) + are supported. + +WebAssembly constraints: + +* No variable argument lists are used + +* The 'tail-call' target attribute is enabled. + +* The caller and callee's return types must match. The caller cannot + be void unless the callee is, too. + +AArch64 constraints: + +* No variable argument lists are used. + +Example: + +Call as ``llc -tailcallopt test.ll``. + +.. code-block:: llvm + + declare fastcc i32 @tailcallee(i32 inreg %a1, i32 inreg %a2, i32 %a3, i32 %a4) + + define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { + %l1 = add i32 %in1, %in2 + %tmp = tail call fastcc i32 @tailcallee(i32 inreg %in1, i32 inreg %in2, i32 %in1, i32 %l1) + ret i32 %tmp + } + +Implications of ``-tailcallopt``: + +To support tail call optimization in situations where the callee has more +arguments than the caller a 'callee pops arguments' convention is used. This +currently causes each ``fastcc`` call that is not tail call optimized (because +one or more of above constraints are not met) to be followed by a readjustment +of the stack. So performance might be worse in such cases. + +Sibling call optimization +------------------------- + +Sibling call optimization is a restricted form of tail call optimization. +Unlike tail call optimization described in the previous section, it can be +performed automatically on any tail calls when ``-tailcallopt`` option is not +specified. + +Sibling call optimization is currently performed on x86/x86-64 when the +following constraints are met: + +* Caller and callee have the same calling convention. It can be either ``c`` or + ``fastcc``. + +* The call is a tail call - in tail position (ret immediately follows call and + ret uses value of call or is void). + +* Caller and callee have matching return type or the callee result is not used. + +* If any of the callee arguments are being passed in stack, they must be + available in caller's own incoming argument stack and the frame offsets must + be the same. + +Example: + +.. code-block:: llvm + + declare i32 @bar(i32, i32) + + define i32 @foo(i32 %a, i32 %b, i32 %c) { + entry: + %0 = tail call i32 @bar(i32 %a, i32 %b) + ret i32 %0 + } + +The X86 backend +--------------- + +The X86 code generator lives in the ``lib/Target/X86`` directory. This code +generator is capable of targeting a variety of x86-32 and x86-64 processors, and +includes support for ISA extensions such as MMX and SSE. + +X86 Target Triples supported +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following are the known target triples that are supported by the X86 +backend. This is not an exhaustive list, and it would be useful to add those +that people test. + +* **i686-pc-linux-gnu** --- Linux + +* **i386-unknown-freebsd5.3** --- FreeBSD 5.3 + +* **i686-pc-cygwin** --- Cygwin on Win32 + +* **i686-pc-mingw32** --- MingW on Win32 + +* **i386-pc-mingw32msvc** --- MingW crosscompiler on Linux + +* **i686-apple-darwin*** --- Apple Darwin on X86 + +* **x86_64-unknown-linux-gnu** --- Linux + +X86 Calling Conventions supported +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following target-specific calling conventions are known to backend: + +* **x86_StdCall** --- stdcall calling convention seen on Microsoft Windows + platform (CC ID = 64). + +* **x86_FastCall** --- fastcall calling convention seen on Microsoft Windows + platform (CC ID = 65). + +* **x86_ThisCall** --- Similar to X86_StdCall. Passes first argument in ECX, + others via stack. Callee is responsible for stack cleaning. This convention is + used by MSVC by default for methods in its ABI (CC ID = 70). + +.. _X86 addressing mode: + +Representing X86 addressing modes in MachineInstrs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The x86 has a very flexible way of accessing memory. It is capable of forming +memory addresses of the following expression directly in integer instructions +(which use ModR/M addressing): + +:: + + SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32 + +In order to represent this, LLVM tracks no less than 5 operands for each memory +operand of this form. This means that the "load" form of '``mov``' has the +following ``MachineOperand``\s in this order: + +:: + + Index: 0 | 1 2 3 4 5 + Meaning: DestReg, | BaseReg, Scale, IndexReg, Displacement Segment + OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg, SignExtImm PhysReg + +Stores, and all other instructions, treat the four memory operands in the same +way and in the same order. If the segment register is unspecified (regno = 0), +then no segment override is generated. "Lea" operations do not have a segment +register specified, so they only have 4 operands for their memory reference. + +X86 address spaces supported +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +x86 has a feature which provides the ability to perform loads and stores to +different address spaces via the x86 segment registers. A segment override +prefix byte on an instruction causes the instruction's memory access to go to +the specified segment. LLVM address space 0 is the default address space, which +includes the stack, and any unqualified memory accesses in a program. Address +spaces 1-255 are currently reserved for user-defined code. The GS-segment is +represented by address space 256, the FS-segment is represented by address space +257, and the SS-segment is represented by address space 258. Other x86 segments +have yet to be allocated address space numbers. + +While these address spaces may seem similar to TLS via the ``thread_local`` +keyword, and often use the same underlying hardware, there are some fundamental +differences. + +The ``thread_local`` keyword applies to global variables and specifies that they +are to be allocated in thread-local memory. There are no type qualifiers +involved, and these variables can be pointed to with normal pointers and +accessed with normal loads and stores. The ``thread_local`` keyword is +target-independent at the LLVM IR level (though LLVM doesn't yet have +implementations of it for some configurations) + +Special address spaces, in contrast, apply to static types. Every load and store +has a particular address space in its address operand type, and this is what +determines which address space is accessed. LLVM ignores these special address +space qualifiers on global variables, and does not provide a way to directly +allocate storage in them. At the LLVM IR level, the behavior of these special +address spaces depends in part on the underlying OS or runtime environment, and +they are specific to x86 (and LLVM doesn't yet handle them correctly in some +cases). + +Some operating systems and runtime environments use (or may in the future use) +the FS/GS-segment registers for various low-level purposes, so care should be +taken when considering them. + +Instruction naming +^^^^^^^^^^^^^^^^^^ + +An instruction name consists of the base name, a default operand size, and a a +character per operand with an optional special size. For example: + +:: + + ADD8rr -> add, 8-bit register, 8-bit register + IMUL16rmi -> imul, 16-bit register, 16-bit memory, 16-bit immediate + IMUL16rmi8 -> imul, 16-bit register, 16-bit memory, 8-bit immediate + MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory + +The PowerPC backend +------------------- + +The PowerPC code generator lives in the lib/Target/PowerPC directory. The code +generation is retargetable to several variations or *subtargets* of the PowerPC +ISA; including ppc32, ppc64 and altivec. + +LLVM PowerPC ABI +^^^^^^^^^^^^^^^^ + +LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC relative +(PIC) or static addressing for accessing global values, so no TOC (r2) is +used. Second, r31 is used as a frame pointer to allow dynamic growth of a stack +frame. LLVM takes advantage of having no TOC to provide space to save the frame +pointer in the PowerPC linkage area of the caller frame. Other details of +PowerPC ABI can be found at `PowerPC ABI +`_\ +. Note: This link describes the 32 bit ABI. The 64 bit ABI is similar except +space for GPRs are 8 bytes wide (not 4) and r13 is reserved for system use. + +Frame Layout +^^^^^^^^^^^^ + +The size of a PowerPC frame is usually fixed for the duration of a function's +invocation. Since the frame is fixed size, all references into the frame can be +accessed via fixed offsets from the stack pointer. The exception to this is +when dynamic alloca or variable sized arrays are present, then a base pointer +(r31) is used as a proxy for the stack pointer and stack pointer is free to grow +or shrink. A base pointer is also used if llvm-gcc is not passed the +-fomit-frame-pointer flag. The stack pointer is always aligned to 16 bytes, so +that space allocated for altivec vectors will be properly aligned. + +An invocation frame is laid out as follows (low memory at top): + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`
Linkage

Parameter area

Dynamic area

Locals area

Saved registers area


Previous Frame

` + +The *linkage* area is used by a callee to save special registers prior to +allocating its own frame. Only three entries are relevant to LLVM. The first +entry is the previous stack pointer (sp), aka link. This allows probing tools +like gdb or exception handlers to quickly scan the frames in the stack. A +function epilog can also use the link to pop the frame from the stack. The +third entry in the linkage area is used to save the return address from the lr +register. Finally, as mentioned above, the last entry is used to save the +previous frame pointer (r31.) The entries in the linkage area are the size of a +GPR, thus the linkage area is 24 bytes long in 32 bit mode and 48 bytes in 64 +bit mode. + +32 bit linkage area: + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`
0Saved SP (r1)
4Saved CR
8Saved LR
12Reserved
16Reserved
20Saved FP (r31)
` + +64 bit linkage area: + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`
0Saved SP (r1)
8Saved CR
16Saved LR
24Reserved
32Reserved
40Saved FP (r31)
` + +The *parameter area* is used to store arguments being passed to a callee +function. Following the PowerPC ABI, the first few arguments are actually +passed in registers, with the space in the parameter area unused. However, if +there are not enough registers or the callee is a thunk or vararg function, +these register arguments can be spilled into the parameter area. Thus, the +parameter area must be large enough to store all the parameters for the largest +call sequence made by the caller. The size must also be minimally large enough +to spill registers r3-r10. This allows callees blind to the call signature, +such as thunks and vararg functions, enough space to cache the argument +registers. Therefore, the parameter area is minimally 32 bytes (64 bytes in 64 +bit mode.) Also note that since the parameter area is a fixed offset from the +top of the frame, that a callee can access its split arguments using fixed +offsets from the stack pointer (or base pointer.) + +Combining the information about the linkage, parameter areas and alignment. A +stack frame is minimally 64 bytes in 32 bit mode and 128 bytes in 64 bit mode. + +The *dynamic area* starts out as size zero. If a function uses dynamic alloca +then space is added to the stack, the linkage and parameter areas are shifted to +top of stack, and the new space is available immediately below the linkage and +parameter areas. The cost of shifting the linkage and parameter areas is minor +since only the link value needs to be copied. The link value can be easily +fetched by adding the original frame size to the base pointer. Note that +allocations in the dynamic space need to observe 16 byte alignment. + +The *locals area* is where the llvm compiler reserves space for local variables. + +The *saved registers area* is where the llvm compiler spills callee saved +registers on entry to the callee. + +Prolog/Epilog +^^^^^^^^^^^^^ + +The llvm prolog and epilog are the same as described in the PowerPC ABI, with +the following exceptions. Callee saved registers are spilled after the frame is +created. This allows the llvm epilog/prolog support to be common with other +targets. The base pointer callee saved register r31 is saved in the TOC slot of +linkage area. This simplifies allocation of space for the base pointer and +makes it convenient to locate programmatically and during debugging. + +Dynamic Allocation +^^^^^^^^^^^^^^^^^^ + +.. note:: + + TODO - More to come. + +The NVPTX backend +----------------- + +The NVPTX code generator under lib/Target/NVPTX is an open-source version of +the NVIDIA NVPTX code generator for LLVM. It is contributed by NVIDIA and is +a port of the code generator used in the CUDA compiler (nvcc). It targets the +PTX 3.0/3.1 ISA and can target any compute capability greater than or equal to +2.0 (Fermi). + +This target is of production quality and should be completely compatible with +the official NVIDIA toolchain. + +Code Generator Options: + +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`` +:raw-html:`
OptionDescription
sm_20Set shader model/compute capability to 2.0
sm_21Set shader model/compute capability to 2.1
sm_30Set shader model/compute capability to 3.0
sm_35Set shader model/compute capability to 3.5
ptx30Target PTX 3.0
ptx31Target PTX 3.1
` + +The extended Berkeley Packet Filter (eBPF) backend +-------------------------------------------------- + +Extended BPF (or eBPF) is similar to the original ("classic") BPF (cBPF) used +to filter network packets. The +`bpf() system call `_ +performs a range of operations related to eBPF. For both cBPF and eBPF +programs, the Linux kernel statically analyzes the programs before loading +them, in order to ensure that they cannot harm the running system. eBPF is +a 64-bit RISC instruction set designed for one to one mapping to 64-bit CPUs. +Opcodes are 8-bit encoded, and 87 instructions are defined. There are 10 +registers, grouped by function as outlined below. + +:: + + R0 return value from in-kernel functions; exit value for eBPF program + R1 - R5 function call arguments to in-kernel functions + R6 - R9 callee-saved registers preserved by in-kernel functions + R10 stack frame pointer (read only) + +Instruction encoding (arithmetic and jump) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +eBPF is reusing most of the opcode encoding from classic to simplify conversion +of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code' +field is divided into three parts: + +:: + + +----------------+--------+--------------------+ + | 4 bits | 1 bit | 3 bits | + | operation code | source | instruction class | + +----------------+--------+--------------------+ + (MSB) (LSB) + +Three LSB bits store instruction class which is one of: + +:: + + BPF_LD 0x0 + BPF_LDX 0x1 + BPF_ST 0x2 + BPF_STX 0x3 + BPF_ALU 0x4 + BPF_JMP 0x5 + (unused) 0x6 + BPF_ALU64 0x7 + +When BPF_CLASS(code) == BPF_ALU or BPF_ALU64 or BPF_JMP, +4th bit encodes source operand + +:: + + BPF_X 0x1 use src_reg register as source operand + BPF_K 0x0 use 32 bit immediate as source operand + +and four MSB bits store operation code + +:: + + BPF_ADD 0x0 add + BPF_SUB 0x1 subtract + BPF_MUL 0x2 multiply + BPF_DIV 0x3 divide + BPF_OR 0x4 bitwise logical OR + BPF_AND 0x5 bitwise logical AND + BPF_LSH 0x6 left shift + BPF_RSH 0x7 right shift (zero extended) + BPF_NEG 0x8 arithmetic negation + BPF_MOD 0x9 modulo + BPF_XOR 0xa bitwise logical XOR + BPF_MOV 0xb move register to register + BPF_ARSH 0xc right shift (sign extended) + BPF_END 0xd endianness conversion + +If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of + +:: + + BPF_JA 0x0 unconditional jump + BPF_JEQ 0x1 jump == + BPF_JGT 0x2 jump > + BPF_JGE 0x3 jump >= + BPF_JSET 0x4 jump if (DST & SRC) + BPF_JNE 0x5 jump != + BPF_JSGT 0x6 jump signed > + BPF_JSGE 0x7 jump signed >= + BPF_CALL 0x8 function call + BPF_EXIT 0x9 function return + +Instruction encoding (load, store) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +For load and store instructions the 8-bit 'code' field is divided as: + +:: + + +--------+--------+-------------------+ + | 3 bits | 2 bits | 3 bits | + | mode | size | instruction class | + +--------+--------+-------------------+ + (MSB) (LSB) + +Size modifier is one of + +:: + + BPF_W 0x0 word + BPF_H 0x1 half word + BPF_B 0x2 byte + BPF_DW 0x3 double word + +Mode modifier is one of + +:: + + BPF_IMM 0x0 immediate + BPF_ABS 0x1 used to access packet data + BPF_IND 0x2 used to access packet data + BPF_MEM 0x3 memory + (reserved) 0x4 + (reserved) 0x5 + BPF_XADD 0x6 exclusive add + + +Packet data access (BPF_ABS, BPF_IND) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Two non-generic instructions: (BPF_ABS | | BPF_LD) and +(BPF_IND | | BPF_LD) which are used to access packet data. +Register R6 is an implicit input that must contain pointer to sk_buff. +Register R0 is an implicit output which contains the data fetched +from the packet. Registers R1-R5 are scratch registers and must not +be used to store the data across BPF_ABS | BPF_LD or BPF_IND | BPF_LD +instructions. These instructions have implicit program exit condition +as well. When eBPF program is trying to access the data beyond +the packet boundary, the interpreter will abort the execution of the program. + +BPF_IND | BPF_W | BPF_LD is equivalent to: + R0 = ntohl(\*(u32 \*) (((struct sk_buff \*) R6)->data + src_reg + imm32)) + +eBPF maps +^^^^^^^^^ + +eBPF maps are provided for sharing data between kernel and user-space. +Currently implemented types are hash and array, with potential extension to +support bloom filters, radix trees, etc. A map is defined by its type, +maximum number of elements, key size and value size in bytes. eBPF syscall +supports create, update, find and delete functions on maps. + +Function calls +^^^^^^^^^^^^^^ + +Function call arguments are passed using up to five registers (R1 - R5). +The return value is passed in a dedicated register (R0). Four additional +registers (R6 - R9) are callee-saved, and the values in these registers +are preserved within kernel functions. R0 - R5 are scratch registers within +kernel functions, and eBPF programs must therefor store/restore values in +these registers if needed across function calls. The stack can be accessed +using the read-only frame pointer R10. eBPF registers map 1:1 to hardware +registers on x86_64 and other 64-bit architectures. For example, x86_64 +in-kernel JIT maps them as + +:: + + R0 - rax + R1 - rdi + R2 - rsi + R3 - rdx + R4 - rcx + R5 - r8 + R6 - rbx + R7 - r13 + R8 - r14 + R9 - r15 + R10 - rbp + +since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing +and rbx, r12 - r15 are callee saved. + +Program start +^^^^^^^^^^^^^ + +An eBPF program receives a single argument and contains +a single eBPF main routine; the program does not contain eBPF functions. +Function calls are limited to a predefined set of kernel functions. The size +of a program is limited to 4K instructions: this ensures fast termination and +a limited number of kernel function calls. Prior to running an eBPF program, +a verifier performs static analysis to prevent loops in the code and +to ensure valid register usage and operand types. + +The AMDGPU backend +------------------ + +The AMDGPU code generator lives in the ``lib/Target/AMDGPU`` +directory. This code generator is capable of targeting a variety of +AMD GPU processors. Refer to :doc:`AMDGPUUsage` for more information. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodeOfConduct.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodeOfConduct.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodeOfConduct.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodeOfConduct.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,112 @@ +============================== +LLVM Community Code of Conduct +============================== + +.. note:: + + This document is currently a **DRAFT** document while it is being discussed + by the community. + +The LLVM community has always worked to be a welcoming and respectful +community, and we want to ensure that doesn't change as we grow and evolve. To +that end, we have a few ground rules that we ask people to adhere to: + +* `be friendly and patient`_, +* `be welcoming`_, +* `be considerate`_, +* `be respectful`_, +* `be careful in the words that you choose and be kind to others`_, and +* `when we disagree, try to understand why`_. + +This isn't an exhaustive list of things that you can't do. Rather, take it in +the spirit in which it's intended - a guide to make it easier to communicate +and participate in the community. + +This code of conduct applies to all spaces managed by the LLVM project or The +LLVM Foundation. This includes IRC channels, mailing lists, bug trackers, LLVM +events such as the developer meetings and socials, and any other forums created +by the project that the community uses for communication. It applies to all of +your communication and conduct in these spaces, including emails, chats, things +you say, slides, videos, posters, signs, or even t-shirts you display in these +spaces. In addition, violations of this code outside these spaces may, in rare +cases, affect a person's ability to participate within them, when the conduct +amounts to an egregious violation of this code. + +If you believe someone is violating the code of conduct, we ask that you report +it by emailing conduct@llvm.org. For more details please see our +:doc:`Reporting Guide `. + +.. _be friendly and patient: + +* **Be friendly and patient.** + +.. _be welcoming: + +* **Be welcoming.** We strive to be a community that welcomes and supports + people of all backgrounds and identities. This includes, but is not limited + to members of any race, ethnicity, culture, national origin, colour, + immigration status, social and economic class, educational level, sex, sexual + orientation, gender identity and expression, age, size, family status, + political belief, religion or lack thereof, and mental and physical ability. + +.. _be considerate: + +* **Be considerate.** Your work will be used by other people, and you in turn + will depend on the work of others. Any decision you take will affect users + and colleagues, and you should take those consequences into account. Remember + that we're a world-wide community, so you might not be communicating in + someone else's primary language. + +.. _be respectful: + +* **Be respectful.** Not all of us will agree all the time, but disagreement is + no excuse for poor behavior and poor manners. We might all experience some + frustration now and then, but we cannot allow that frustration to turn into + a personal attack. It's important to remember that a community where people + feel uncomfortable or threatened is not a productive one. Members of the LLVM + community should be respectful when dealing with other members as well as + with people outside the LLVM community. + +.. _be careful in the words that you choose and be kind to others: + +* **Be careful in the words that you choose and be kind to others.** Do not + insult or put down other participants. Harassment and other exclusionary + behavior aren't acceptable. This includes, but is not limited to: + + * Violent threats or language directed against another person. + * Discriminatory jokes and language. + * Posting sexually explicit or violent material. + * Posting (or threatening to post) other people's personally identifying + information ("doxing"). + * Personal insults, especially those using racist or sexist terms. + * Unwelcome sexual attention. + * Advocating for, or encouraging, any of the above behavior. + + In general, if someone asks you to stop, then stop. Persisting in such + behavior after being asked to stop is considered harassment. + +.. _when we disagree, try to understand why: + +* **When we disagree, try to understand why.** Disagreements, both social and + technical, happen all the time and LLVM is no exception. It is important that + we resolve disagreements and differing views constructively. Remember that + we're different. The strength of LLVM comes from its varied community, people + from a wide range of backgrounds. Different people have different + perspectives on issues. Being unable to understand why someone holds + a viewpoint doesn't mean that they're wrong. Don't forget that it is human to + err and blaming each other doesn't get us anywhere. Instead, focus on helping + to resolve issues and learning from mistakes. + +Questions? +========== + +If you have questions, please feel free to contact the LLVM Foundation Code of +Conduct Advisory Committee by emailing conduct@llvm.org. + + +(This text is based on the `Django Project`_ Code of Conduct, which is in turn +based on wording from the `Speak Up! project`_.) + +.. _Django Project: https://www.djangoproject.com/conduct/ +.. _Speak Up! project: http://speakup.io/coc.html + diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodeReview.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodeReview.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodeReview.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodeReview.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,251 @@ +===================================== +LLVM Code-Review Policy and Practices +===================================== + +LLVM's code-review policy and practices help maintain high code quality across +the project. Specifically, our code review process aims to: + + * Improve readability and maintainability. + * Improve robustness and prevent the introduction of defects. + * Best leverage the experience of other contributors for each proposed change. + * Help grow and develop new contributors, through mentorship by community leaders. + +It is important for all contributors to understand our code-review +practices and participate in the code-review process. + +General Policies +================ + +What Code Should Be Reviewed? +----------------------------- + +All developers are required to have significant changes reviewed before they +are committed to the repository. + +Must Code Be Reviewed Prior to Being Committed? +----------------------------------------------- + +Code can be reviewed either before it is committed or after. We expect +significant patches to be reviewed before being committed. Smaller patches +(or patches where the developer owns the component) that meet +likely-community-consensus requirements (as apply to all patch approvals) can +be committed prior to an explicit review. In situations where there is any +uncertainty, a patch should be reviewed prior to being committed. + +Please note that the developer responsible for a patch is also +responsible for making all necessary review-related changes, including +those requested during any post-commit review. + +.. _post_commit_review: + +Can Code Be Reviewed After It Is Committed? +------------------------------------------- + +Post-commit review is encouraged, and can be accomplished using any of the +tools detailed below. There is a strong expectation that authors respond +promptly to post-commit feedback and address it. Failure to do so is cause for +the patch to be :ref:`reverted `. + +If a community member expresses a concern about a recent commit, and this +concern would have been significant enough to warrant a conversation during +pre-commit review (including around the need for more design discussions), +they may ask for a revert to the original author who is responsible to revert +the patch promptly. Developers often disagree, and erring on the side of the +developer asking for more review prevents any lingering disagreement over +code in the tree. This does not indicate any fault from the patch author, +this is inherent to our post-commit review practices. +Reverting a patch ensures that design discussions can happen without blocking +other development; it's entirely possible the patch will end up being reapplied +essentially as-is once concerns have been resolved. + +Before being recommitted, the patch generally should undergo further review. +The community member who identified the problem is expected to engage +actively in the review. In cases where the problem is identified by a buildbot, +a community member with access to hardware similar to that on the buildbot is +expected to engage in the review. + +Please note: The bar for post-commit feedback is not higher than for pre-commit +feedback. Don't delay unnecessarily in providing feedback. However, if you see +something after code has been committed about which you would have commented +pre-commit (had you noticed it earlier), please feel free to provide that +feedback at any time. + +That having been said, if a substantial period of time has passed since the +original change was committed, it may be better to create a new patch to +address the issues than comment on the original commit. The original patch +author, for example, might no longer be an active contributor to the project. + +What Tools Are Used for Code Review? +------------------------------------ + +Pre-commit code reviews are conducted on our web-based code-review tool (see +:doc:`Phabricator`). Post-commit reviews can be done on Phabricator, by email +on the relevant project's commit mailing list, on the project's development +list, or on the bug tracker. + +When Is an RFC Required? +------------------------ + +Some changes are too significant for just a code review. Changes that should +change the LLVM Language Reference (e.g., adding new target-independent +intrinsics), adding language extensions in Clang, and so on, require an RFC +(Request for Comment) email on the project's ``*-dev`` mailing list first. For +changes that promise significant impact on users and/or downstream code bases, +reviewers can request an RFC achieving consensus before proceeding with code +review. That having been said, posting initial patches can help with +discussions on an RFC. + +Code-Review Workflow +==================== + +Code review can be an iterative process, which continues until the patch is +ready to be committed. Specifically, once a patch is sent out for review, it +needs an explicit approval before it is committed. Do not assume silent +approval, or solicit objections to a patch with a deadline. + +Acknowledge All Reviewer Feedback +--------------------------------- + +All comments by reviewers should be acknowledged by the patch author. It is +generally expected that suggested changes will be incorporated into a future +revision of the patch unless the author and/or other reviewers can articulate a +good reason to do otherwise (and then the reviewers must agree). If a new patch +does not address all outstanding feedback, the author should explicitly state +that when providing the updated patch. When using the web-based code-review +tool, such notes can be provided in the "Diff" description (which is different +from the description of the "Differential Revision" as a whole used for the +commit message). + +If you suggest changes in a code review, but don't wish the suggestion to be +interpreted this strongly, please state so explicitly. + +Aim to Make Efficient Use of Everyone's Time +-------------------------------------------- + +Aim to limit the number of iterations in the review process. For example, when +suggesting a change, if you want the author to make a similar set of changes at +other places in the code, please explain the requested set of changes so that +the author can make all of the changes at once. If a patch will require +multiple steps prior to approval (e.g., splitting, refactoring, posting data +from specific performance tests), please explain as many of these up front as +possible. This allows the patch author and reviewers to make the most efficient +use of their time. + +LGTM - How a Patch Is Accepted +------------------------------ + +A patch is approved to be committed when a reviewer accepts it, and this is +almost always associated with a message containing the text "LGTM" (which +stands for Looks Good To Me). Only approval from a single reviewer is required. + +When providing an unqualified LGTM (approval to commit), it is the +responsibility of the reviewer to have reviewed all of the discussion and +feedback from all reviewers ensuring that all feedback has been addressed and +that all other reviewers will almost surely be satisfied with the patch being +approved. If unsure, the reviewer should provide a qualified approval, (e.g., +"LGTM, but please wait for @someone, @someone_else"). You may also do this if +you are fairly certain that a particular community member will wish to review, +even if that person hasn't done so yet. + +Note that, if a reviewer has requested a particular community member to review, +and after a week that community member has yet to respond, feel free to ping +the patch (which literally means submitting a comment on the patch with the +word, "Ping."), or alternatively, ask the original reviewer for further +suggestions. + +If it is likely that others will want to review a recently-posted patch, +especially if there might be objections, but no one else has done so yet, it is +also polite to provide a qualified approval (e.g., "LGTM, but please wait for a +couple of days in case others wish to review"). If approval is received very +quickly, a patch author may also elect to wait before committing (and this is +certainly considered polite for non-trivial patches). Especially given the +global nature of our community, this waiting time should be at least 24 hours. +Please also be mindful of weekends and major holidays. + +Our goal is to ensure community consensus around design decisions and +significant implementation choices, and one responsibility of a reviewer, when +providing an overall approval for a patch, is to be reasonably sure that such +consensus exists. If you're not familiar enough with the community to know, +then you shouldn't be providing final approval to commit. A reviewer providing +final approval should have commit access to the LLVM project. + +Every patch should be reviewed by at least one technical expert in the areas of +the project affected by the change. + +Splitting Requests and Conditional Acceptance +--------------------------------------------- + +Reviewers may request certain aspects of a patch to be broken out into separate +patches for independent review. Reviewers may also accept a patch +conditioned on the author providing a follow-up patch addressing some +particular issue or concern (although no committed patch should leave the +project in a broken state). Moreover, reviewers can accept a patch conditioned on +the author applying some set of minor updates prior to committing, and when +applicable, it is polite for reviewers to do so. + +Don't Unintentionally Block a Review +------------------------------------ + +If you review a patch, but don't intend for the review process to block on your +approval, please state that explicitly. Out of courtesy, we generally wait on +committing a patch until all reviewers are satisfied, and if you don't intend +to look at the patch again in a timely fashion, please communicate that fact in +the review. + +Who Can/Should Review Code? +=========================== + +Non-Experts Should Review Code +------------------------------ + +You do not need to be an expert in some area of the code base to review patches; +it's fine to ask questions about what some piece of code is doing. If it's not +clear to you what is going on, you're unlikely to be the only one. Please +remember that it is not in the long-term best interest of the community to have +components that are only understood well by a small number of people. Extra +comments and/or test cases can often help (and asking for comments in the test +cases is fine as well). + +Moreover, authors are encouraged to interpret questions as a reason to reexamine +the readability of the code in question. Structural changes, or further +comments, may be appropriate. + +If you're new to the LLVM community, you might also find this presentation helpful: +.. _How to Contribute to LLVM, A 2019 LLVM Developers' Meeting Presentation: https://youtu.be/C5Y977rLqpw + +A good way for new contributors to increase their knowledge of the code base is +to review code. It is perfectly acceptable to review code and explicitly +defer to others for approval decisions. + +Experts Should Review Code +-------------------------- + +If you are an expert in an area of the compiler affected by a proposed patch, +then you are highly encouraged to review the code. If you are a relevant code +owner, and no other experts are reviewing a patch, you must either help arrange +for an expert to review the patch or review it yourself. + +Code Reviews, Speed, and Reciprocity +------------------------------------ + +Sometimes code reviews will take longer than you might hope, especially for +larger features. Common ways to speed up review times for your patches are: + +* Review other people's patches. If you help out, everybody will be more + willing to do the same for you; goodwill is our currency. +* Ping the patch. If it is urgent, provide reasons why it is important to you to + get this patch landed and ping it every couple of days. If it is + not urgent, the common courtesy ping rate is one week. Remember that you're + asking for valuable time from other professional developers. +* Ask for help on IRC. Developers on IRC will be able to either help you + directly, or tell you who might be a good reviewer. +* Split your patch into multiple smaller patches that build on each other. The + smaller your patch is, the higher the probability that somebody will take a quick + look at it. When doing this, it is helpful to add "[N/M]" (for 1 <= N <= M) to + the title of each patch in the series, so it is clear that there is an order + and what that order is. + +Developers should participate in code reviews as both reviewers and +authors. If someone is kind enough to review your code, you should return the +favor for someone else. Note that anyone is welcome to review and give feedback +on a patch, but approval of patches should be consistent with the policy above. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodingStandards.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodingStandards.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CodingStandards.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CodingStandards.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,1695 @@ +===================== +LLVM Coding Standards +===================== + +.. contents:: + :local: + +Introduction +============ + +This document describes coding standards that are used in the LLVM project. +Although no coding standards should be regarded as absolute requirements to be +followed in all instances, coding standards are +particularly important for large-scale code bases that follow a library-based +design (like LLVM). + +While this document may provide guidance for some mechanical formatting issues, +whitespace, or other "microscopic details", these are not fixed standards. +Always follow the golden rule: + +.. _Golden Rule: + + **If you are extending, enhancing, or bug fixing already implemented code, + use the style that is already being used so that the source is uniform and + easy to follow.** + +Note that some code bases (e.g. ``libc++``) have special reasons to deviate +from the coding standards. For example, in the case of ``libc++``, this is +because the naming and other conventions are dictated by the C++ standard. + +There are some conventions that are not uniformly followed in the code base +(e.g. the naming convention). This is because they are relatively new, and a +lot of code was written before they were put in place. Our long term goal is +for the entire codebase to follow the convention, but we explicitly *do not* +want patches that do large-scale reformatting of existing code. On the other +hand, it is reasonable to rename the methods of a class if you're about to +change it in some other way. Please commit such changes separately to +make code review easier. + +The ultimate goal of these guidelines is to increase the readability and +maintainability of our common source base. + +Languages, Libraries, and Standards +=================================== + +Most source code in LLVM and other LLVM projects using these coding standards +is C++ code. There are some places where C code is used either due to +environment restrictions, historical restrictions, or due to third-party source +code imported into the tree. Generally, our preference is for standards +conforming, modern, and portable C++ code as the implementation language of +choice. + +C++ Standard Versions +--------------------- + +Unless otherwise documented, LLVM subprojects are written using standard C++14 +code and avoid unnecessary vendor-specific extensions. + +Nevertheless, we restrict ourselves to features which are available in the +major toolchains supported as host compilers (see :doc:`GettingStarted` page, +section `Software`). + +Each toolchain provides a good reference for what it accepts: + +* Clang: https://clang.llvm.org/cxx_status.html +* GCC: https://gcc.gnu.org/projects/cxx-status.html#cxx14 +* MSVC: https://msdn.microsoft.com/en-us/library/hh567368.aspx + + +C++ Standard Library +-------------------- + +Instead of implementing custom data structures, we encourage the use of C++ +standard library facilities or LLVM support libraries whenever they are +available for a particular task. LLVM and related projects emphasize and rely +on the standard library facilities and the LLVM support libraries as much as +possible. + +LLVM support libraries (for example, `ADT +`_) +implement specialized data structures or functionality missing in the standard +library. Such libraries are usually implemented in the ``llvm`` namespace and +follow the expected standard interface, when there is one. + +When both C++ and the LLVM support libraries provide similar functionality, and +there isn't a specific reason to favor the C++ implementation, it is generally +preferable to use the LLVM library. For example, ``llvm::DenseMap`` should +almost always be used instead of ``std::map`` or ``std::unordered_map``, and +``llvm::SmallVector`` should usually be used instead of ``std::vector``. + +We explicitly avoid some standard facilities, like the I/O streams, and instead +use LLVM's streams library (raw_ostream_). More detailed information on these +subjects is available in the :doc:`ProgrammersManual`. + +For more information about LLVM's data structures and the tradeoffs they make, +please consult [that section of the programmer's +manual](https://llvm.org/docs/ProgrammersManual.html#picking-the-right-data-structure-for-a-task). + +Guidelines for Go code +---------------------- + +Any code written in the Go programming language is not subject to the +formatting rules below. Instead, we adopt the formatting rules enforced by +the `gofmt`_ tool. + +Go code should strive to be idiomatic. Two good sets of guidelines for what +this means are `Effective Go`_ and `Go Code Review Comments`_. + +.. _gofmt: + https://golang.org/cmd/gofmt/ + +.. _Effective Go: + https://golang.org/doc/effective_go.html + +.. _Go Code Review Comments: + https://github.com/golang/go/wiki/CodeReviewComments + +Mechanical Source Issues +======================== + +Source Code Formatting +---------------------- + +Commenting +^^^^^^^^^^ + +Comments are important for readability and maintainability. When writing comments, +write them as English prose, using proper capitalization, punctuation, etc. +Aim to describe what the code is trying to do and why, not *how* it does it at +a micro level. Here are a few important things to document: + +.. _header file comment: + +File Headers +"""""""""""" + +Every source file should have a header on it that describes the basic purpose of +the file. The standard header looks like this: + +.. code-block:: c++ + + //===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===// + // + // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + // See https://llvm.org/LICENSE.txt for license information. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + // + //===----------------------------------------------------------------------===// + /// + /// \file + /// This file contains the declaration of the Instruction class, which is the + /// base class for all of the VM instructions. + /// + //===----------------------------------------------------------------------===// + +A few things to note about this particular format: The "``-*- C++ -*-``" string +on the first line is there to tell Emacs that the source file is a C++ file, not +a C file (Emacs assumes ``.h`` files are C files by default). + +.. note:: + + This tag is not necessary in ``.cpp`` files. The name of the file is also + on the first line, along with a very short description of the purpose of the + file. + +The next section in the file is a concise note that defines the license that the +file is released under. This makes it perfectly clear what terms the source +code can be distributed under and should not be modified in any way. + +The main body is a `Doxygen `_ comment (identified by +the ``///`` comment marker instead of the usual ``//``) describing the purpose +of the file. The first sentence (or a passage beginning with ``\brief``) is +used as an abstract. Any additional information should be separated by a blank +line. If an algorithm is based on a paper or is described in another source, +provide a reference. + +Header Guard +"""""""""""" + +The header file's guard should be the all-caps path that a user of this header +would #include, using '_' instead of path separator and extension marker. +For example, the header file +``llvm/include/llvm/Analysis/Utils/Local.h`` would be ``#include``-ed as +``#include "llvm/Analysis/Utils/Local.h"``, so its guard is +``LLVM_ANALYSIS_UTILS_LOCAL_H``. + +Class overviews +""""""""""""""" + +Classes are a fundamental part of an object-oriented design. As such, a +class definition should have a comment block that explains what the class is +used for and how it works. Every non-trivial class is expected to have a +``doxygen`` comment block. + +Method information +"""""""""""""""""" + +Methods and global functions should also be documented. A quick note about +what it does and a description of the edge cases is all that is necessary here. +The reader should be able to understand how to use interfaces without reading +the code itself. + +Good things to talk about here are what happens when something unexpected +happens, for instance, does the method return null? + +Comment Formatting +^^^^^^^^^^^^^^^^^^ + +In general, prefer C++-style comments (``//`` for normal comments, ``///`` for +``doxygen`` documentation comments). There are a few cases when it is +useful to use C-style (``/* */``) comments however: + +#. When writing C code to be compatible with C89. + +#. When writing a header file that may be ``#include``\d by a C source file. + +#. When writing a source file that is used by a tool that only accepts C-style + comments. + +#. When documenting the significance of constants used as actual parameters in + a call. This is most helpful for ``bool`` parameters, or passing ``0`` or + ``nullptr``. The comment should contain the parameter name, which ought to be + meaningful. For example, it's not clear what the parameter means in this call: + + .. code-block:: c++ + + Object.emitName(nullptr); + + An in-line C-style comment makes the intent obvious: + + .. code-block:: c++ + + Object.emitName(/*Prefix=*/nullptr); + +Commenting out large blocks of code is discouraged, but if you really have to do +this (for documentation purposes or as a suggestion for debug printing), use +``#if 0`` and ``#endif``. These nest properly and are better behaved in general +than C style comments. + +Doxygen Use in Documentation Comments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use the ``\file`` command to turn the standard file header into a file-level +comment. + +Include descriptive paragraphs for all public interfaces (public classes, +member and non-member functions). Avoid restating the information that can +be inferred from the API name. The first sentence (or a paragraph beginning +with ``\brief``) is used as an abstract. Try to use a single sentence as the +``\brief`` adds visual clutter. Put detailed discussion into separate +paragraphs. + +To refer to parameter names inside a paragraph, use the ``\p name`` command. +Don't use the ``\arg name`` command since it starts a new paragraph that +contains documentation for the parameter. + +Wrap non-inline code examples in ``\code ... \endcode``. + +To document a function parameter, start a new paragraph with the +``\param name`` command. If the parameter is used as an out or an in/out +parameter, use the ``\param [out] name`` or ``\param [in,out] name`` command, +respectively. + +To describe function return value, start a new paragraph with the ``\returns`` +command. + +A minimal documentation comment: + +.. code-block:: c++ + + /// Sets the xyzzy property to \p Baz. + void setXyzzy(bool Baz); + +A documentation comment that uses all Doxygen features in a preferred way: + +.. code-block:: c++ + + /// Does foo and bar. + /// + /// Does not do foo the usual way if \p Baz is true. + /// + /// Typical usage: + /// \code + /// fooBar(false, "quux", Res); + /// \endcode + /// + /// \param Quux kind of foo to do. + /// \param [out] Result filled with bar sequence on foo success. + /// + /// \returns true on success. + bool fooBar(bool Baz, StringRef Quux, std::vector &Result); + +Don't duplicate the documentation comment in the header file and in the +implementation file. Put the documentation comments for public APIs into the +header file. Documentation comments for private APIs can go to the +implementation file. In any case, implementation files can include additional +comments (not necessarily in Doxygen markup) to explain implementation details +as needed. + +Don't duplicate function or class name at the beginning of the comment. +For humans it is obvious which function or class is being documented; +automatic documentation processing tools are smart enough to bind the comment +to the correct declaration. + +Avoid: + +.. code-block:: c++ + + // Example.h: + + // example - Does something important. + void example(); + + // Example.cpp: + + // example - Does something important. + void example() { ... } + +Preferred: + +.. code-block:: c++ + + // Example.h: + + /// Does something important. + void example(); + + // Example.cpp: + + /// Builds a B-tree in order to do foo. See paper by... + void example() { ... } + +Error and Warning Messages +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Clear diagnostic messages are important to help users identify and fix issues in +their inputs. Use succinct but correct English prose that gives the user the +context needed to understand what went wrong. Also, to match error message +styles commonly produced by other tools, start the first sentence with a +lower-case letter, and finish the last sentence without a period, if it would +end in one otherwise. Sentences which end with different punctuation, such as +"did you forget ';'?", should still do so. + +For example this is a good error message: + +.. code-block:: none + + error: file.o: section header 3 is corrupt. Size is 10 when it should be 20 + +This is a bad message, since it does not provide useful information and uses the +wrong style: + +.. code-block:: none + + error: file.o: Corrupt section header. + +As with other coding standards, individual projects, such as the Clang Static +Analyzer, may have preexisting styles that do not conform to this. If a +different formatting scheme is used consistently throughout the project, use +that style instead. Otherwise, this standard applies to all LLVM tools, +including clang, clang-tidy, and so on. + +If the tool or project does not have existing functions to emit warnings or +errors, use the error and warning handlers provided in ``Support/WithColor.h`` +to ensure they are printed in the appropriate style, rather than printing to +stderr directly. + +When using ``report_fatal_error``, follow the same standards for the message as +regular error messages. Assertion messages and ``llvm_unreachable`` calls do not +necessarily need to follow these same styles as they are automatically +formatted, and thus these guidelines may not be suitable. + +``#include`` Style +^^^^^^^^^^^^^^^^^^ + +Immediately after the `header file comment`_ (and include guards if working on a +header file), the `minimal list of #includes`_ required by the file should be +listed. We prefer these ``#include``\s to be listed in this order: + +.. _Main Module Header: +.. _Local/Private Headers: + +#. Main Module Header +#. Local/Private Headers +#. LLVM project/subproject headers (``clang/...``, ``lldb/...``, ``llvm/...``, etc) +#. System ``#include``\s + +and each category should be sorted lexicographically by the full path. + +The `Main Module Header`_ file applies to ``.cpp`` files which implement an +interface defined by a ``.h`` file. This ``#include`` should always be included +**first** regardless of where it lives on the file system. By including a +header file first in the ``.cpp`` files that implement the interfaces, we ensure +that the header does not have any hidden dependencies which are not explicitly +``#include``\d in the header, but should be. It is also a form of documentation +in the ``.cpp`` file to indicate where the interfaces it implements are defined. + +LLVM project and subproject headers should be grouped from most specific to least +specific, for the same reasons described above. For example, LLDB depends on +both clang and LLVM, and clang depends on LLVM. So an LLDB source file should +include ``lldb`` headers first, followed by ``clang`` headers, followed by +``llvm`` headers, to reduce the possibility (for example) of an LLDB header +accidentally picking up a missing include due to the previous inclusion of that +header in the main source file or some earlier header file. clang should +similarly include its own headers before including llvm headers. This rule +applies to all LLVM subprojects. + +.. _fit into 80 columns: + +Source Code Width +^^^^^^^^^^^^^^^^^ + +Write your code to fit within 80 columns. + +There must be some limit to the width of the code in +order to allow developers to have multiple files side-by-side in +windows on a modest display. If you are going to pick a width limit, it is +somewhat arbitrary but you might as well pick something standard. Going with 90 +columns (for example) instead of 80 columns wouldn't add any significant value +and would be detrimental to printing out code. Also many other projects have +standardized on 80 columns, so some people have already configured their editors +for it (vs something else, like 90 columns). + +Whitespace +^^^^^^^^^^ + +In all cases, prefer spaces to tabs in source files. People have different +preferred indentation levels, and different styles of indentation that they +like; this is fine. What isn't fine is that different editors/viewers expand +tabs out to different tab stops. This can cause your code to look completely +unreadable, and it is not worth dealing with. + +As always, follow the `Golden Rule`_ above: follow the style of existing code +if you are modifying and extending it. + +Do not add trailing whitespace. Some common editors will automatically remove +trailing whitespace when saving a file which causes unrelated changes to appear +in diffs and commits. + +Format Lambdas Like Blocks Of Code +"""""""""""""""""""""""""""""""""" + +When formatting a multi-line lambda, format it like a block of code. If there +is only one multi-line lambda in a statement, and there are no expressions +lexically after it in the statement, drop the indent to the standard two space +indent for a block of code, as if it were an if-block opened by the preceding +part of the statement: + +.. code-block:: c++ + + std::sort(foo.begin(), foo.end(), [&](Foo a, Foo b) -> bool { + if (a.blah < b.blah) + return true; + if (a.baz < b.baz) + return true; + return a.bam < b.bam; + }); + +To take best advantage of this formatting, if you are designing an API which +accepts a continuation or single callable argument (be it a function object, or +a ``std::function``), it should be the last argument if at all possible. + +If there are multiple multi-line lambdas in a statement, or additional +parameters after the lambda, indent the block two spaces from the indent of the +``[]``: + +.. code-block:: c++ + + dyn_switch(V->stripPointerCasts(), + [] (PHINode *PN) { + // process phis... + }, + [] (SelectInst *SI) { + // process selects... + }, + [] (LoadInst *LI) { + // process loads... + }, + [] (AllocaInst *AI) { + // process allocas... + }); + +Braced Initializer Lists +"""""""""""""""""""""""" + +Starting from C++11, there are significantly more uses of braced lists to +perform initialization. For example, they can be used to construct aggregate +temporaries in expressions. They now have a natural way of ending up nested +within each other and within function calls in order to build up aggregates +(such as option structs) from local variables. + +The historically common formatting of braced initialization of aggregate +variables does not mix cleanly with deep nesting, general expression contexts, +function arguments, and lambdas. We suggest new code use a simple rule for +formatting braced initialization lists: act as-if the braces were parentheses +in a function call. The formatting rules exactly match those already well +understood for formatting nested function calls. Examples: + +.. code-block:: c++ + + foo({a, b, c}, {1, 2, 3}); + + llvm::Constant *Mask[] = { + llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 0), + llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 1), + llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 2)}; + +This formatting scheme also makes it particularly easy to get predictable, +consistent, and automatic formatting with tools like `Clang Format`_. + +.. _Clang Format: https://clang.llvm.org/docs/ClangFormat.html + +Language and Compiler Issues +---------------------------- + +Treat Compiler Warnings Like Errors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Compiler warnings are often useful and help improve the code. Those that are +not useful, can be often suppressed with a small code change. For example, an +assignment in the ``if`` condition is often a typo: + +.. code-block:: c++ + + if (V = getValue()) { + ... + } + +Several compilers will print a warning for the code above. It can be suppressed +by adding parentheses: + +.. code-block:: c++ + + if ((V = getValue())) { + ... + } + +Write Portable Code +^^^^^^^^^^^^^^^^^^^ + +In almost all cases, it is possible to write completely portable code. When +you need to rely on non-portable code, put it behind a well-defined and +well-documented interface. + +Do not use RTTI or Exceptions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In an effort to reduce code and executable size, LLVM does not use exceptions +or RTTI (`runtime type information +`_, for example, +``dynamic_cast<>``). + +That said, LLVM does make extensive use of a hand-rolled form of RTTI that use +templates like :ref:`isa\<>, cast\<>, and dyn_cast\<> `. +This form of RTTI is opt-in and can be +:doc:`added to any class `. + +.. _static constructor: + +Do not use Static Constructors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Static constructors and destructors (e.g., global variables whose types have a +constructor or destructor) should not be added to the code base, and should be +removed wherever possible. + +Globals in different source files are initialized in `arbitrary order +`, making the code more +difficult to reason about. + +Static constructors have negative impact on launch time of programs that use +LLVM as a library. We would really like for there to be zero cost for linking +in an additional LLVM target or other library into an application, but static +constructors undermine this goal. + +Use of ``class`` and ``struct`` Keywords +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In C++, the ``class`` and ``struct`` keywords can be used almost +interchangeably. The only difference is when they are used to declare a class: +``class`` makes all members private by default while ``struct`` makes all +members public by default. + +* All declarations and definitions of a given ``class`` or ``struct`` must use + the same keyword. For example: + +.. code-block:: c++ + + // Avoid if `Example` is defined as a struct. + class Example; + + // OK. + struct Example; + + struct Example { ... }; + +* ``struct`` should be used when *all* members are declared public. + +.. code-block:: c++ + + // Avoid using `struct` here, use `class` instead. + struct Foo { + private: + int Data; + public: + Foo() : Data(0) { } + int getData() const { return Data; } + void setData(int D) { Data = D; } + }; + + // OK to use `struct`: all members are public. + struct Bar { + int Data; + Bar() : Data(0) { } + }; + +Do not use Braced Initializer Lists to Call a Constructor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Starting from C++11 there is a "generalized initialization syntax" which allows +calling constructors using braced initializer lists. Do not use these to call +constructors with non-trivial logic or if you care that you're calling some +*particular* constructor. Those should look like function calls using +parentheses rather than like aggregate initialization. Similarly, if you need +to explicitly name the type and call its constructor to create a temporary, +don't use a braced initializer list. Instead, use a braced initializer list +(without any type for temporaries) when doing aggregate initialization or +something notionally equivalent. Examples: + +.. code-block:: c++ + + class Foo { + public: + // Construct a Foo by reading data from the disk in the whizbang format, ... + Foo(std::string filename); + + // Construct a Foo by looking up the Nth element of some global data ... + Foo(int N); + + // ... + }; + + // The Foo constructor call is reading a file, don't use braces to call it. + std::fill(foo.begin(), foo.end(), Foo("name")); + + // The pair is being constructed like an aggregate, use braces. + bar_map.insert({my_key, my_value}); + +If you use a braced initializer list when initializing a variable, use an equals before the open curly brace: + +.. code-block:: c++ + + int data[] = {0, 1, 2, 3}; + +Use ``auto`` Type Deduction to Make Code More Readable +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some are advocating a policy of "almost always ``auto``" in C++11, however LLVM +uses a more moderate stance. Use ``auto`` if and only if it makes the code more +readable or easier to maintain. Don't "almost always" use ``auto``, but do use +``auto`` with initializers like ``cast(...)`` or other places where the +type is already obvious from the context. Another time when ``auto`` works well +for these purposes is when the type would have been abstracted away anyways, +often behind a container's typedef such as ``std::vector::iterator``. + +Similarly, C++14 adds generic lambda expressions where parameter types can be +``auto``. Use these where you would have used a template. + +Beware unnecessary copies with ``auto`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The convenience of ``auto`` makes it easy to forget that its default behavior +is a copy. Particularly in range-based ``for`` loops, careless copies are +expensive. + +Use ``auto &`` for values and ``auto *`` for pointers unless you need to make a +copy. + +.. code-block:: c++ + + // Typically there's no reason to copy. + for (const auto &Val : Container) observe(Val); + for (auto &Val : Container) Val.change(); + + // Remove the reference if you really want a new copy. + for (auto Val : Container) { Val.change(); saveSomewhere(Val); } + + // Copy pointers, but make it clear that they're pointers. + for (const auto *Ptr : Container) observe(*Ptr); + for (auto *Ptr : Container) Ptr->change(); + +Beware of non-determinism due to ordering of pointers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In general, there is no relative ordering among pointers. As a result, +when unordered containers like sets and maps are used with pointer keys +the iteration order is undefined. Hence, iterating such containers may +result in non-deterministic code generation. While the generated code +might work correctly, non-determinism can make it harder to reproduce bugs and +debug the compiler. + +In case an ordered result is expected, remember to +sort an unordered container before iteration. Or use ordered containers +like ``vector``/``MapVector``/``SetVector`` if you want to iterate pointer +keys. + +Beware of non-deterministic sorting order of equal elements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``std::sort`` uses a non-stable sorting algorithm in which the order of equal +elements is not guaranteed to be preserved. Thus using ``std::sort`` for a +container having equal elements may result in non-deterministic behavior. +To uncover such instances of non-determinism, LLVM has introduced a new +llvm::sort wrapper function. For an EXPENSIVE_CHECKS build this will randomly +shuffle the container before sorting. Default to using ``llvm::sort`` instead +of ``std::sort``. + +Style Issues +============ + +The High-Level Issues +--------------------- + +Self-contained Headers +^^^^^^^^^^^^^^^^^^^^^^ + +Header files should be self-contained (compile on their own) and end in ``.h``. +Non-header files that are meant for inclusion should end in ``.inc`` and be +used sparingly. + +All header files should be self-contained. Users and refactoring tools should +not have to adhere to special conditions to include the header. Specifically, a +header should have header guards and include all other headers it needs. + +There are rare cases where a file designed to be included is not +self-contained. These are typically intended to be included at unusual +locations, such as the middle of another file. They might not use header +guards, and might not include their prerequisites. Name such files with the +.inc extension. Use sparingly, and prefer self-contained headers when possible. + +In general, a header should be implemented by one or more ``.cpp`` files. Each +of these ``.cpp`` files should include the header that defines their interface +first. This ensures that all of the dependences of the header have been +properly added to the header itself, and are not implicit. System headers +should be included after user headers for a translation unit. + +Library Layering +^^^^^^^^^^^^^^^^ + +A directory of header files (for example ``include/llvm/Foo``) defines a +library (``Foo``). One library (both +its headers and implementation) should only use things from the libraries +listed in its dependencies. + +Some of this constraint can be enforced by classic Unix linkers (Mac & Windows +linkers, as well as lld, do not enforce this constraint). A Unix linker +searches left to right through the libraries specified on its command line and +never revisits a library. In this way, no circular dependencies between +libraries can exist. + +This doesn't fully enforce all inter-library dependencies, and importantly +doesn't enforce header file circular dependencies created by inline functions. +A good way to answer the "is this layered correctly" would be to consider +whether a Unix linker would succeed at linking the program if all inline +functions were defined out-of-line. (& for all valid orderings of dependencies +- since linking resolution is linear, it's possible that some implicit +dependencies can sneak through: A depends on B and C, so valid orderings are +"C B A" or "B C A", in both cases the explicit dependencies come before their +use. But in the first case, B could still link successfully if it implicitly +depended on C, or the opposite in the second case) + +.. _minimal list of #includes: + +``#include`` as Little as Possible +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``#include`` hurts compile time performance. Don't do it unless you have to, +especially in header files. + +But wait! Sometimes you need to have the definition of a class to use it, or to +inherit from it. In these cases go ahead and ``#include`` that header file. Be +aware however that there are many cases where you don't need to have the full +definition of a class. If you are using a pointer or reference to a class, you +don't need the header file. If you are simply returning a class instance from a +prototyped function or method, you don't need it. In fact, for most cases, you +simply don't need the definition of a class. And not ``#include``\ing speeds up +compilation. + +It is easy to try to go too overboard on this recommendation, however. You +**must** include all of the header files that you are using --- you can include +them either directly or indirectly through another header file. To make sure +that you don't accidentally forget to include a header file in your module +header, make sure to include your module header **first** in the implementation +file (as mentioned above). This way there won't be any hidden dependencies that +you'll find out about later. + +Keep "Internal" Headers Private +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Many modules have a complex implementation that causes them to use more than one +implementation (``.cpp``) file. It is often tempting to put the internal +communication interface (helper classes, extra functions, etc) in the public +module header file. Don't do this! + +If you really need to do something like this, put a private header file in the +same directory as the source files, and include it locally. This ensures that +your private interface remains private and undisturbed by outsiders. + +.. note:: + + It's okay to put extra implementation methods in a public class itself. Just + make them private (or protected) and all is well. + +Use Namespace Qualifiers to Implement Previously Declared Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When providing an out of line implementation of a function in a source file, do +not open namespace blocks in the source file. Instead, use namespace qualifiers +to help ensure that your definition matches an existing declaration. Do this: + +.. code-block:: c++ + + // Foo.h + namespace llvm { + int foo(const char *s); + } + + // Foo.cpp + #include "Foo.h" + using namespace llvm; + int llvm::foo(const char *s) { + // ... + } + +Doing this helps to avoid bugs where the definition does not match the +declaration from the header. For example, the following C++ code defines a new +overload of ``llvm::foo`` instead of providing a definition for the existing +function declared in the header: + +.. code-block:: c++ + + // Foo.cpp + #include "Foo.h" + namespace llvm { + int foo(char *s) { // Mismatch between "const char *" and "char *" + } + } // end namespace llvm + +This error will not be caught until the build is nearly complete, when the +linker fails to find a definition for any uses of the original function. If the +function were instead defined with a namespace qualifier, the error would have +been caught immediately when the definition was compiled. + +Class method implementations must already name the class and new overloads +cannot be introduced out of line, so this recommendation does not apply to them. + +.. _early exits: + +Use Early Exits and ``continue`` to Simplify Code +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When reading code, keep in mind how much state and how many previous decisions +have to be remembered by the reader to understand a block of code. Aim to +reduce indentation where possible when it doesn't make it more difficult to +understand the code. One great way to do this is by making use of early exits +and the ``continue`` keyword in long loops. Consider this code that does not +use an early exit: + +.. code-block:: c++ + + Value *doSomething(Instruction *I) { + if (!I->isTerminator() && + I->hasOneUse() && doOtherThing(I)) { + ... some long code .... + } + + return 0; + } + +This code has several problems if the body of the ``'if'`` is large. When +you're looking at the top of the function, it isn't immediately clear that this +*only* does interesting things with non-terminator instructions, and only +applies to things with the other predicates. Second, it is relatively difficult +to describe (in comments) why these predicates are important because the ``if`` +statement makes it difficult to lay out the comments. Third, when you're deep +within the body of the code, it is indented an extra level. Finally, when +reading the top of the function, it isn't clear what the result is if the +predicate isn't true; you have to read to the end of the function to know that +it returns null. + +It is much preferred to format the code like this: + +.. code-block:: c++ + + Value *doSomething(Instruction *I) { + // Terminators never need 'something' done to them because ... + if (I->isTerminator()) + return 0; + + // We conservatively avoid transforming instructions with multiple uses + // because goats like cheese. + if (!I->hasOneUse()) + return 0; + + // This is really just here for example. + if (!doOtherThing(I)) + return 0; + + ... some long code .... + } + +This fixes these problems. A similar problem frequently happens in ``for`` +loops. A silly example is something like this: + +.. code-block:: c++ + + for (Instruction &I : BB) { + if (auto *BO = dyn_cast(&I)) { + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + if (LHS != RHS) { + ... + } + } + } + +When you have very, very small loops, this sort of structure is fine. But if it +exceeds more than 10-15 lines, it becomes difficult for people to read and +understand at a glance. The problem with this sort of code is that it gets very +nested very quickly. Meaning that the reader of the code has to keep a lot of +context in their brain to remember what is going immediately on in the loop, +because they don't know if/when the ``if`` conditions will have ``else``\s etc. +It is strongly preferred to structure the loop like this: + +.. code-block:: c++ + + for (Instruction &I : BB) { + auto *BO = dyn_cast(&I); + if (!BO) continue; + + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + if (LHS == RHS) continue; + + ... + } + +This has all the benefits of using early exits for functions: it reduces nesting +of the loop, it makes it easier to describe why the conditions are true, and it +makes it obvious to the reader that there is no ``else`` coming up that they +have to push context into their brain for. If a loop is large, this can be a +big understandability win. + +Don't use ``else`` after a ``return`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For similar reasons as above (reduction of indentation and easier reading), please +do not use ``'else'`` or ``'else if'`` after something that interrupts control +flow --- like ``return``, ``break``, ``continue``, ``goto``, etc. For example: + +.. code-block:: c++ + + case 'J': { + if (Signed) { + Type = Context.getsigjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_sigjmp_buf; + return QualType(); + } else { + break; // Unnecessary. + } + } else { + Type = Context.getjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_jmp_buf; + return QualType(); + } else { + break; // Unnecessary. + } + } + } + +It is better to write it like this: + +.. code-block:: c++ + + case 'J': + if (Signed) { + Type = Context.getsigjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_sigjmp_buf; + return QualType(); + } + } else { + Type = Context.getjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_jmp_buf; + return QualType(); + } + } + break; + +Or better yet (in this case) as: + +.. code-block:: c++ + + case 'J': + if (Signed) + Type = Context.getsigjmp_bufType(); + else + Type = Context.getjmp_bufType(); + + if (Type.isNull()) { + Error = Signed ? ASTContext::GE_Missing_sigjmp_buf : + ASTContext::GE_Missing_jmp_buf; + return QualType(); + } + break; + +The idea is to reduce indentation and the amount of code you have to keep track +of when reading the code. + +Turn Predicate Loops into Predicate Functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is very common to write small loops that just compute a boolean value. There +are a number of ways that people commonly write these, but an example of this +sort of thing is: + +.. code-block:: c++ + + bool FoundFoo = false; + for (unsigned I = 0, E = BarList.size(); I != E; ++I) + if (BarList[I]->isFoo()) { + FoundFoo = true; + break; + } + + if (FoundFoo) { + ... + } + +Instead of this sort of loop, we prefer to use a predicate function (which may +be `static`_) that uses `early exits`_: + +.. code-block:: c++ + + /// \returns true if the specified list has an element that is a foo. + static bool containsFoo(const std::vector &List) { + for (unsigned I = 0, E = List.size(); I != E; ++I) + if (List[I]->isFoo()) + return true; + return false; + } + ... + + if (containsFoo(BarList)) { + ... + } + +There are many reasons for doing this: it reduces indentation and factors out +code which can often be shared by other code that checks for the same predicate. +More importantly, it *forces you to pick a name* for the function, and forces +you to write a comment for it. In this silly example, this doesn't add much +value. However, if the condition is complex, this can make it a lot easier for +the reader to understand the code that queries for this predicate. Instead of +being faced with the in-line details of how we check to see if the BarList +contains a foo, we can trust the function name and continue reading with better +locality. + +The Low-Level Issues +-------------------- + +Name Types, Functions, Variables, and Enumerators Properly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Poorly-chosen names can mislead the reader and cause bugs. We cannot stress +enough how important it is to use *descriptive* names. Pick names that match +the semantics and role of the underlying entities, within reason. Avoid +abbreviations unless they are well known. After picking a good name, make sure +to use consistent capitalization for the name, as inconsistency requires clients +to either memorize the APIs or to look it up to find the exact spelling. + +In general, names should be in camel case (e.g. ``TextFileReader`` and +``isLValue()``). Different kinds of declarations have different rules: + +* **Type names** (including classes, structs, enums, typedefs, etc) should be + nouns and start with an upper-case letter (e.g. ``TextFileReader``). + +* **Variable names** should be nouns (as they represent state). The name should + be camel case, and start with an upper case letter (e.g. ``Leader`` or + ``Boats``). + +* **Function names** should be verb phrases (as they represent actions), and + command-like function should be imperative. The name should be camel case, + and start with a lower case letter (e.g. ``openFile()`` or ``isFoo()``). + +* **Enum declarations** (e.g. ``enum Foo {...}``) are types, so they should + follow the naming conventions for types. A common use for enums is as a + discriminator for a union, or an indicator of a subclass. When an enum is + used for something like this, it should have a ``Kind`` suffix + (e.g. ``ValueKind``). + +* **Enumerators** (e.g. ``enum { Foo, Bar }``) and **public member variables** + should start with an upper-case letter, just like types. Unless the + enumerators are defined in their own small namespace or inside a class, + enumerators should have a prefix corresponding to the enum declaration name. + For example, ``enum ValueKind { ... };`` may contain enumerators like + ``VK_Argument``, ``VK_BasicBlock``, etc. Enumerators that are just + convenience constants are exempt from the requirement for a prefix. For + instance: + + .. code-block:: c++ + + enum { + MaxSize = 42, + Density = 12 + }; + +As an exception, classes that mimic STL classes can have member names in STL's +style of lower-case words separated by underscores (e.g. ``begin()``, +``push_back()``, and ``empty()``). Classes that provide multiple +iterators should add a singular prefix to ``begin()`` and ``end()`` +(e.g. ``global_begin()`` and ``use_begin()``). + +Here are some examples: + +.. code-block:: c++ + + class VehicleMaker { + ... + Factory F; // Avoid: a non-descriptive abbreviation. + Factory Factory; // Better: more descriptive. + Factory TireFactory; // Even better: if VehicleMaker has more than one + // kind of factories. + }; + + Vehicle makeVehicle(VehicleType Type) { + VehicleMaker M; // Might be OK if scope is small. + Tire Tmp1 = M.makeTire(); // Avoid: 'Tmp1' provides no information. + Light Headlight = M.makeLight("head"); // Good: descriptive. + ... + } + +Assert Liberally +^^^^^^^^^^^^^^^^ + +Use the "``assert``" macro to its fullest. Check all of your preconditions and +assumptions, you never know when a bug (not necessarily even yours) might be +caught early by an assertion, which reduces debugging time dramatically. The +"````" header file is probably already included by the header files you +are using, so it doesn't cost anything to use it. + +To further assist with debugging, make sure to put some kind of error message in +the assertion statement, which is printed if the assertion is tripped. This +helps the poor debugger make sense of why an assertion is being made and +enforced, and hopefully what to do about it. Here is one complete example: + +.. code-block:: c++ + + inline Value *getOperand(unsigned I) { + assert(I < Operands.size() && "getOperand() out of range!"); + return Operands[I]; + } + +Here are more examples: + +.. code-block:: c++ + + assert(Ty->isPointerType() && "Can't allocate a non-pointer type!"); + + assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!"); + + assert(idx < getNumSuccessors() && "Successor # out of range!"); + + assert(V1.getType() == V2.getType() && "Constant types must be identical!"); + + assert(isa(Succ->front()) && "Only works on PHId BBs!"); + +You get the idea. + +In the past, asserts were used to indicate a piece of code that should not be +reached. These were typically of the form: + +.. code-block:: c++ + + assert(0 && "Invalid radix for integer literal"); + +This has a few issues, the main one being that some compilers might not +understand the assertion, or warn about a missing return in builds where +assertions are compiled out. + +Today, we have something much better: ``llvm_unreachable``: + +.. code-block:: c++ + + llvm_unreachable("Invalid radix for integer literal"); + +When assertions are enabled, this will print the message if it's ever reached +and then exit the program. When assertions are disabled (i.e. in release +builds), ``llvm_unreachable`` becomes a hint to compilers to skip generating +code for this branch. If the compiler does not support this, it will fall back +to the "abort" implementation. + +Use ``llvm_unreachable`` to mark a specific point in code that should never be +reached. This is especially desirable for addressing warnings about unreachable +branches, etc., but can be used whenever reaching a particular code path is +unconditionally a bug (not originating from user input; see below) of some kind. +Use of ``assert`` should always include a testable predicate (as opposed to +``assert(false)``). + +If the error condition can be triggered by user input then the +recoverable error mechanism described in :doc:`ProgrammersManual` should be +used instead. In cases where this is not practical, ``report_fatal_error`` may +be used. + +Another issue is that values used only by assertions will produce an "unused +value" warning when assertions are disabled. For example, this code will warn: + +.. code-block:: c++ + + unsigned Size = V.size(); + assert(Size > 42 && "Vector smaller than it should be"); + + bool NewToSet = Myset.insert(Value); + assert(NewToSet && "The value shouldn't be in the set yet"); + +These are two interesting different cases. In the first case, the call to +``V.size()`` is only useful for the assert, and we don't want it executed when +assertions are disabled. Code like this should move the call into the assert +itself. In the second case, the side effects of the call must happen whether +the assert is enabled or not. In this case, the value should be cast to void to +disable the warning. To be specific, it is preferred to write the code like +this: + +.. code-block:: c++ + + assert(V.size() > 42 && "Vector smaller than it should be"); + + bool NewToSet = Myset.insert(Value); (void)NewToSet; + assert(NewToSet && "The value shouldn't be in the set yet"); + +Do Not Use ``using namespace std`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In LLVM, we prefer to explicitly prefix all identifiers from the standard +namespace with an "``std::``" prefix, rather than rely on "``using namespace +std;``". + +In header files, adding a ``'using namespace XXX'`` directive pollutes the +namespace of any source file that ``#include``\s the header, creating +maintenance issues. + +In implementation files (e.g. ``.cpp`` files), the rule is more of a stylistic +rule, but is still important. Basically, using explicit namespace prefixes +makes the code **clearer**, because it is immediately obvious what facilities +are being used and where they are coming from. And **more portable**, because +namespace clashes cannot occur between LLVM code and other namespaces. The +portability rule is important because different standard library implementations +expose different symbols (potentially ones they shouldn't), and future revisions +to the C++ standard will add more symbols to the ``std`` namespace. As such, we +never use ``'using namespace std;'`` in LLVM. + +The exception to the general rule (i.e. it's not an exception for the ``std`` +namespace) is for implementation files. For example, all of the code in the +LLVM project implements code that lives in the 'llvm' namespace. As such, it is +ok, and actually clearer, for the ``.cpp`` files to have a ``'using namespace +llvm;'`` directive at the top, after the ``#include``\s. This reduces +indentation in the body of the file for source editors that indent based on +braces, and keeps the conceptual context cleaner. The general form of this rule +is that any ``.cpp`` file that implements code in any namespace may use that +namespace (and its parents'), but should not use any others. + +Provide a Virtual Method Anchor for Classes in Headers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If a class is defined in a header file and has a vtable (either it has virtual +methods or it derives from classes with virtual methods), it must always have at +least one out-of-line virtual method in the class. Without this, the compiler +will copy the vtable and RTTI into every ``.o`` file that ``#include``\s the +header, bloating ``.o`` file sizes and increasing link times. + +Don't use default labels in fully covered switches over enumerations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``-Wswitch`` warns if a switch, without a default label, over an enumeration +does not cover every enumeration value. If you write a default label on a fully +covered switch over an enumeration then the ``-Wswitch`` warning won't fire +when new elements are added to that enumeration. To help avoid adding these +kinds of defaults, Clang has the warning ``-Wcovered-switch-default`` which is +off by default but turned on when building LLVM with a version of Clang that +supports the warning. + +A knock-on effect of this stylistic requirement is that when building LLVM with +GCC you may get warnings related to "control may reach end of non-void function" +if you return from each case of a covered switch-over-enum because GCC assumes +that the enum expression may take any representable value, not just those of +individual enumerators. To suppress this warning, use ``llvm_unreachable`` after +the switch. + +Use range-based ``for`` loops wherever possible +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The introduction of range-based ``for`` loops in C++11 means that explicit +manipulation of iterators is rarely necessary. We use range-based ``for`` +loops wherever possible for all newly added code. For example: + +.. code-block:: c++ + + BasicBlock *BB = ... + for (Instruction &I : *BB) + ... use I ... + +Usage of ``std::for_each()``/``llvm::for_each()`` functions is discouraged, +unless the the callable object already exists. + +Don't evaluate ``end()`` every time through a loop +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In cases where range-based ``for`` loops can't be used and it is necessary +to write an explicit iterator-based loop, pay close attention to whether +``end()`` is re-evaluated on each loop iteration. One common mistake is to +write a loop in this style: + +.. code-block:: c++ + + BasicBlock *BB = ... + for (auto I = BB->begin(); I != BB->end(); ++I) + ... use I ... + +The problem with this construct is that it evaluates "``BB->end()``" every time +through the loop. Instead of writing the loop like this, we strongly prefer +loops to be written so that they evaluate it once before the loop starts. A +convenient way to do this is like so: + +.. code-block:: c++ + + BasicBlock *BB = ... + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) + ... use I ... + +The observant may quickly point out that these two loops may have different +semantics: if the container (a basic block in this case) is being mutated, then +"``BB->end()``" may change its value every time through the loop and the second +loop may not in fact be correct. If you actually do depend on this behavior, +please write the loop in the first form and add a comment indicating that you +did it intentionally. + +Why do we prefer the second form (when correct)? Writing the loop in the first +form has two problems. First it may be less efficient than evaluating it at the +start of the loop. In this case, the cost is probably minor --- a few extra +loads every time through the loop. However, if the base expression is more +complex, then the cost can rise quickly. I've seen loops where the end +expression was actually something like: "``SomeMap[X]->end()``" and map lookups +really aren't cheap. By writing it in the second form consistently, you +eliminate the issue entirely and don't even have to think about it. + +The second (even bigger) issue is that writing the loop in the first form hints +to the reader that the loop is mutating the container (a fact that a comment +would handily confirm!). If you write the loop in the second form, it is +immediately obvious without even looking at the body of the loop that the +container isn't being modified, which makes it easier to read the code and +understand what it does. + +While the second form of the loop is a few extra keystrokes, we do strongly +prefer it. + +``#include `` is Forbidden +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The use of ``#include `` in library files is hereby **forbidden**, +because many common implementations transparently inject a `static constructor`_ +into every translation unit that includes it. + +Note that using the other stream headers (```` for example) is not +problematic in this regard --- just ````. However, ``raw_ostream`` +provides various APIs that are better performing for almost every use than +``std::ostream`` style APIs. + +.. note:: + + New code should always use `raw_ostream`_ for writing, or the + ``llvm::MemoryBuffer`` API for reading files. + +.. _raw_ostream: + +Use ``raw_ostream`` +^^^^^^^^^^^^^^^^^^^ + +LLVM includes a lightweight, simple, and efficient stream implementation in +``llvm/Support/raw_ostream.h``, which provides all of the common features of +``std::ostream``. All new code should use ``raw_ostream`` instead of +``ostream``. + +Unlike ``std::ostream``, ``raw_ostream`` is not a template and can be forward +declared as ``class raw_ostream``. Public headers should generally not include +the ``raw_ostream`` header, but use forward declarations and constant references +to ``raw_ostream`` instances. + +Avoid ``std::endl`` +^^^^^^^^^^^^^^^^^^^ + +The ``std::endl`` modifier, when used with ``iostreams`` outputs a newline to +the output stream specified. In addition to doing this, however, it also +flushes the output stream. In other words, these are equivalent: + +.. code-block:: c++ + + std::cout << std::endl; + std::cout << '\n' << std::flush; + +Most of the time, you probably have no reason to flush the output stream, so +it's better to use a literal ``'\n'``. + +Don't use ``inline`` when defining a function in a class definition +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A member function defined in a class definition is implicitly inline, so don't +put the ``inline`` keyword in this case. + +Don't: + +.. code-block:: c++ + + class Foo { + public: + inline void bar() { + // ... + } + }; + +Do: + +.. code-block:: c++ + + class Foo { + public: + void bar() { + // ... + } + }; + +Microscopic Details +------------------- + +This section describes preferred low-level formatting guidelines along with +reasoning on why we prefer them. + +Spaces Before Parentheses +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Put a space before an open parenthesis only in control flow statements, but not +in normal function call expressions and function-like macros. For example: + +.. code-block:: c++ + + if (X) ... + for (I = 0; I != 100; ++I) ... + while (LLVMRocks) ... + + somefunc(42); + assert(3 != 4 && "laws of math are failing me"); + + A = foo(42, 92) + bar(X); + +The reason for doing this is not completely arbitrary. This style makes control +flow operators stand out more, and makes expressions flow better. + +Prefer Preincrement +^^^^^^^^^^^^^^^^^^^ + +Hard fast rule: Preincrement (``++X``) may be no slower than postincrement +(``X++``) and could very well be a lot faster than it. Use preincrementation +whenever possible. + +The semantics of postincrement include making a copy of the value being +incremented, returning it, and then preincrementing the "work value". For +primitive types, this isn't a big deal. But for iterators, it can be a huge +issue (for example, some iterators contains stack and set objects in them... +copying an iterator could invoke the copy ctor's of these as well). In general, +get in the habit of always using preincrement, and you won't have a problem. + + +Namespace Indentation +^^^^^^^^^^^^^^^^^^^^^ + +In general, we strive to reduce indentation wherever possible. This is useful +because we want code to `fit into 80 columns`_ without excessive wrapping, but +also because it makes it easier to understand the code. To facilitate this and +avoid some insanely deep nesting on occasion, don't indent namespaces. If it +helps readability, feel free to add a comment indicating what namespace is +being closed by a ``}``. For example: + +.. code-block:: c++ + + namespace llvm { + namespace knowledge { + + /// This class represents things that Smith can have an intimate + /// understanding of and contains the data associated with it. + class Grokable { + ... + public: + explicit Grokable() { ... } + virtual ~Grokable() = 0; + + ... + + }; + + } // end namespace knowledge + } // end namespace llvm + + +Feel free to skip the closing comment when the namespace being closed is +obvious for any reason. For example, the outer-most namespace in a header file +is rarely a source of confusion. But namespaces both anonymous and named in +source files that are being closed half way through the file probably could use +clarification. + +.. _static: + +Anonymous Namespaces +^^^^^^^^^^^^^^^^^^^^ + +After talking about namespaces in general, you may be wondering about anonymous +namespaces in particular. Anonymous namespaces are a great language feature +that tells the C++ compiler that the contents of the namespace are only visible +within the current translation unit, allowing more aggressive optimization and +eliminating the possibility of symbol name collisions. Anonymous namespaces are +to C++ as "static" is to C functions and global variables. While "``static``" +is available in C++, anonymous namespaces are more general: they can make entire +classes private to a file. + +The problem with anonymous namespaces is that they naturally want to encourage +indentation of their body, and they reduce locality of reference: if you see a +random function definition in a C++ file, it is easy to see if it is marked +static, but seeing if it is in an anonymous namespace requires scanning a big +chunk of the file. + +Because of this, we have a simple guideline: make anonymous namespaces as small +as possible, and only use them for class declarations. For example: + +.. code-block:: c++ + + namespace { + class StringSort { + ... + public: + StringSort(...) + bool operator<(const char *RHS) const; + }; + } // end anonymous namespace + + static void runHelper() { + ... + } + + bool StringSort::operator<(const char *RHS) const { + ... + } + +Avoid putting declarations other than classes into anonymous namespaces: + +.. code-block:: c++ + + namespace { + + // ... many declarations ... + + void runHelper() { + ... + } + + // ... many declarations ... + + } // end anonymous namespace + +When you are looking at "``runHelper``" in the middle of a large C++ file, +you have no immediate way to tell if this function is local to the file. In +contrast, when the function is marked static, you don't need to cross-reference +faraway places in the file to tell that the function is local. + +Don't Use Braces on Simple Single-Statement Bodies of if/else/loop Statements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When writing the body of an ``if``, ``else``, or loop statement, we prefer to +omit the braces to avoid unnecessary line noise. However, braces should be used +in cases where the omission of braces harm the readability and maintainability +of the code. + +We consider that readability is harmed when omitting the brace in the presence +of a single statement that is accompanied by a comment (assuming the comment +can't be hoisted above the ``if`` or loop statement, see below). +Similarly, braces should be used when a single-statement body is complex enough +that it becomes difficult to see where the block containing the following +statement began. An ``if``/``else`` chain or a loop is considered a single +statement for this rule, and this rule applies recursively. + +This list is not exhaustive, for example, readability is also harmed if an +``if``/``else`` chain does not use braced bodies for either all or none of its +members, with complex conditionals, deep nesting, etc. The examples below +intend to provide some guidelines. + +Maintainability is harmed if the body of an ``if`` ends with a (directly or +indirectly) nested ``if`` statement with no ``else``. Braces on the outer ``if`` +would help to avoid running into a "dangling else" situation. + + +.. code-block:: c++ + + // Omit the braces, since the body is simple and clearly associated with the if. + if (isa(D)) + handleFunctionDecl(D); + else if (isa(D)) + handleVarDecl(D); + + + // Here we document the condition itself and not the body. + if (isa(D)) { + // It is necessary that we explain the situation with this surprisingly long + // comment, so it would be unclear without the braces whether the following + // statement is in the scope of the `if`. + // Because the condition is documented, we can't really hoist this + // comment that applies to the body above the if. + handleOtherDecl(D); + } + + // Use braces on the outer `if` to avoid a potential dangling else situation. + if (isa(D)) { + for (auto *A : D.attrs()) + if (shouldProcessAttr(A)) + handleAttr(A); + } + + // Use braces for the `if` block to keep it uniform with the else block. + if (isa(D)) { + handleFunctionDecl(D); + } else { + // In this else case, it is necessary that we explain the situation with this + // surprisingly long comment, so it would be unclear without the braces whether + // the following statement is in the scope of the `if`. + handleOtherDecl(D); + } + + // This should also omit braces. The `for` loop contains only a single statement, + // so it shouldn't have braces. The `if` also only contains a single simple + // statement (the for loop), so it also should omit braces. + if (isa(D)) + for (auto *A : D.attrs()) + handleAttr(A); + + // Use braces for the outer `if` since the nested `for` is braced. + if (isa(D)) { + for (auto *A : D.attrs()) { + // In this for loop body, it is necessary that we explain the situation + // with this surprisingly long comment, forcing braces on the `for` block. + handleAttr(A); + } + } + + // Use braces on the outer block because there are more than two levels of nesting. + if (isa(D)) { + for (auto *A : D.attrs()) + for (ssize_t i : llvm::seq(count)) + handleAttrOnDecl(D, A, i); + } + + // Use braces on the outer block because of a nested `if`, otherwise the + // compiler would warn: `add explicit braces to avoid dangling else` + if (auto *D = dyn_cast(D)) { + if (shouldProcess(D)) + handleVarDecl(D); + else + markAsIgnored(D); + } + + +See Also +======== + +A lot of these comments and recommendations have been culled from other sources. +Two particularly important books for our work are: + +#. `Effective C++ + `_ + by Scott Meyers. Also interesting and useful are "More Effective C++" and + "Effective STL" by the same author. + +#. `Large-Scale C++ Software Design + `_ + by John Lakos + +If you get some free time, and you haven't read them: do so, you might learn +something. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/bugpoint.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/bugpoint.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/bugpoint.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/bugpoint.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,197 @@ +bugpoint - automatic test case reduction tool +============================================= + +.. program:: bugpoint + +SYNOPSIS +-------- + +**bugpoint** [*options*] [*input LLVM ll/bc files*] [*LLVM passes*] **--args** +*program arguments* + +DESCRIPTION +----------- + +**bugpoint** narrows down the source of problems in LLVM tools and passes. It +can be used to debug three types of failures: optimizer crashes, miscompilations +by optimizers, or bad native code generation (including problems in the static +and JIT compilers). It aims to reduce large test cases to small, useful ones. +For more information on the design and inner workings of **bugpoint**, as well as +advice for using bugpoint, see :doc:`/Bugpoint` in the LLVM +distribution. + +OPTIONS +------- + +**--additional-so** *library* + + Load the dynamic shared object *library* into the test program whenever it is + run. This is useful if you are debugging programs which depend on non-LLVM + libraries (such as the X or curses libraries) to run. + +**--append-exit-code**\ =\ *{true,false}* + + Append the test programs exit code to the output file so that a change in exit + code is considered a test failure. Defaults to false. + +**--args** *program args* + + Pass all arguments specified after **--args** to the test program whenever it runs. + Note that if any of the *program args* start with a "``-``", you should use: + + .. code-block:: bash + + bugpoint [bugpoint args] --args -- [program args] + + The "``--``" right after the **--args** option tells **bugpoint** to consider + any options starting with "``-``" to be part of the **--args** option, not as + options to **bugpoint** itself. + +**--tool-args** *tool args* + + Pass all arguments specified after **--tool-args** to the LLVM tool under test + (**llc**, **lli**, etc.) whenever it runs. You should use this option in the + following way: + + .. code-block:: bash + + bugpoint [bugpoint args] --tool-args -- [tool args] + + The "``--``" right after the **--tool-args** option tells **bugpoint** to + consider any options starting with "``-``" to be part of the **--tool-args** + option, not as options to **bugpoint** itself. (See **--args**, above.) + +**--safe-tool-args** *tool args* + + Pass all arguments specified after **--safe-tool-args** to the "safe" execution + tool. + +**--gcc-tool-args** *gcc tool args* + + Pass all arguments specified after **--gcc-tool-args** to the invocation of + **gcc**. + +**--opt-args** *opt args* + + Pass all arguments specified after **--opt-args** to the invocation of **opt**. + +**--disable-{dce,simplifycfg}** + + Do not run the specified passes to clean up and reduce the size of the test + program. By default, **bugpoint** uses these passes internally when attempting to + reduce test programs. If you're trying to find a bug in one of these passes, + **bugpoint** may crash. + +**--enable-valgrind** + + Use valgrind to find faults in the optimization phase. This will allow + bugpoint to find otherwise asymptomatic problems caused by memory + mis-management. + +**-find-bugs** + + Continually randomize the specified passes and run them on the test program + until a bug is found or the user kills **bugpoint**. + +**-help** + + Print a summary of command line options. + +**--input** *filename* + + Open *filename* and redirect the standard input of the test program, whenever + it runs, to come from that file. + +**--load** *plugin* + + Load the dynamic object *plugin* into **bugpoint** itself. This object should + register new optimization passes. Once loaded, the object will add new command + line options to enable various optimizations. To see the new complete list of + optimizations, use the **-help** and **--load** options together; for example: + + .. code-block:: bash + + bugpoint --load myNewPass.so -help + +**--mlimit** *megabytes* + + Specifies an upper limit on memory usage of the optimization and codegen. Set + to zero to disable the limit. + +**--output** *filename* + + Whenever the test program produces output on its standard output stream, it + should match the contents of *filename* (the "reference output"). If you + do not use this option, **bugpoint** will attempt to generate a reference output + by compiling the program with the "safe" backend and running it. + +**--run-{int,jit,llc,custom}** + + Whenever the test program is compiled, **bugpoint** should generate code for it + using the specified code generator. These options allow you to choose the + interpreter, the JIT compiler, the static native code compiler, or a + custom command (see **--exec-command**) respectively. + +**--safe-{llc,custom}** + + When debugging a code generator, **bugpoint** should use the specified code + generator as the "safe" code generator. This is a known-good code generator + used to generate the "reference output" if it has not been provided, and to + compile portions of the program that as they are excluded from the testcase. + These options allow you to choose the + static native code compiler, or a custom command, (see **--exec-command**) + respectively. The interpreter and the JIT backends cannot currently + be used as the "safe" backends. + +**--exec-command** *command* + + This option defines the command to use with the **--run-custom** and + **--safe-custom** options to execute the bitcode testcase. This can + be useful for cross-compilation. + +**--compile-command** *command* + + This option defines the command to use with the **--compile-custom** + option to compile the bitcode testcase. The command should exit with a + failure exit code if the file is "interesting" and should exit with a + success exit code (i.e. 0) otherwise (this is the same as if it crashed on + "interesting" inputs). + + This can be useful for + testing compiler output without running any link or execute stages. To + generate a reduced unit test, you may add CHECK directives to the + testcase and pass the name of an executable compile-command script in this form: + + .. code-block:: sh + + #!/bin/sh + llc "$@" + not FileCheck [bugpoint input file].ll < bugpoint-test-program.s + + This script will "fail" as long as FileCheck passes. So the result + will be the minimum bitcode that passes FileCheck. + +**--safe-path** *path* + + This option defines the path to the command to execute with the + **--safe-{int,jit,llc,custom}** + option. + +**--verbose-errors**\ =\ *{true,false}* + + The default behavior of bugpoint is to print "" when it finds a reduced + test that crashes compilation. This flag prints the output of the crashing + program to stderr. This is useful to make sure it is the same error being + tracked down and not a different error that happens to crash the compiler as + well. Defaults to false. + +EXIT STATUS +----------- + +If **bugpoint** succeeds in finding a problem, it will exit with 0. Otherwise, +if an error occurs, it will exit with a non-zero value. + +SEE ALSO +-------- + +:manpage:`opt(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/clang-tblgen.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/clang-tblgen.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/clang-tblgen.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/clang-tblgen.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,22 @@ +clang-tblgen - Description to C++ Code for Clang +================================================ + +.. program:: clang-tblgen + +SYNOPSIS +-------- + +:program:`clang-tblgen` [*options*] [*filename*] + + +DESCRIPTION +----------- + +:program:`clang-tblgen` is a program that translates compiler-related target +description (``.td``) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler. + +Please see :doc:`tblgen - Description to C++ Code<./tblgen>` +for a description of the *filename* argument and options, including the +options common to all :program:`*-tblgen` programs. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/dsymutil.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/dsymutil.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/dsymutil.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/dsymutil.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,172 @@ +dsymutil - manipulate archived DWARF debug symbol files +======================================================= + +.. program:: dsymutil + +SYNOPSIS +-------- + +| :program:`dsymutil` [*options*] *executable* + +DESCRIPTION +----------- + +:program:`dsymutil` links the DWARF debug information found in the object files +for an executable *executable* by using debug symbols information contained in +its symbol table. By default, the linked debug information is placed in a +``.dSYM`` bundle with the same name as the executable. + +OPTIONS +------- +.. option:: --accelerator= + + Specify the desired type of accelerator table. Valid options are 'Apple', + 'Dwarf' and 'Default'. + +.. option:: --arch + + Link DWARF debug information only for specified CPU architecture types. + Architectures may be specified by name. When using this option, an error will + be returned if any architectures can not be properly linked. This option can + be specified multiple times, once for each desired architecture. All CPU + architectures will be linked by default and any architectures that can't be + properly linked will cause :program:`dsymutil` to return an error. + +.. option:: --dump-debug-map + + Dump the *executable*'s debug-map (the list of the object files containing the + debug information) in YAML format and exit. Not DWARF link will take place. + +.. option:: --flat, -f + + Produce a flat dSYM file. A ``.dwarf`` extension will be appended to the + executable name unless the output file is specified using the ``-o`` option. + +.. option:: --gen-reproducer + + Generate a reproducer consisting of the input object files. + +.. option:: --help, -h + + Print this help output. + +.. option:: --keep-function-for-static + + Make a static variable keep the enclosing function even if it would have been + omitted otherwise. + +.. option:: --minimize, -z + + When used when creating a dSYM file, this option will suppress the emission of + the .debug_inlines, .debug_pubnames, and .debug_pubtypes sections since + dsymutil currently has better equivalents: .apple_names and .apple_types. When + used in conjunction with ``--update`` option, this option will cause redundant + accelerator tables to be removed. + +.. option:: --no-odr + + Do not use ODR (One Definition Rule) for uniquing C++ types. + +.. option:: --no-output + + Do the link in memory, but do not emit the result file. + +.. option:: --no-swiftmodule-timestamp + + Don't check the timestamp for swiftmodule files. + +.. option:: --num-threads , -j + + Specifies the maximum number (``n``) of simultaneous threads to use when + linking multiple architectures. + +.. option:: --object-prefix-map + + Remap object file paths (but no source paths) before processing. Use + this for Clang objects where the module cache location was remapped using + ``-fdebug-prefix-map``; to help dsymutil find the Clang module cache. + +.. option:: --oso-prepend-path + + Specifies a ``path`` to prepend to all debug symbol object file paths. + +.. option:: --out , -o + + Specifies an alternate ``path`` to place the dSYM bundle. The default dSYM + bundle path is created by appending ``.dSYM`` to the executable name. + +.. option:: --papertrail + + When running dsymutil as part of your build system, it can be desirable for + warnings to be part of the end product, rather than just being emitted to the + output stream. When enabled warnings are embedded in the linked DWARF debug + information. + +.. option:: --remarks-output-format + + Specify the format to be used when serializing the linked remarks. + +.. option:: --remarks-prepend-path + + Specify a directory to prepend the paths of the external remark files. + +.. option:: --statistics + + Print statistics about the contribution of each object file to the linked + debug info. This prints a table after linking with the object file name, the + size of the debug info in the object file (in bytes) and the size contributed + (in bytes) to the linked dSYM. The table is sorted by the output size listing + the object files with the largest contribution first. + +.. option:: --symbol-map + + Update the existing dSYMs inplace using symbol map specified. + +.. option:: -s, --symtab + + Dumps the symbol table found in *executable* or object file(s) and exits. + +.. option:: -S + + Output textual assembly instead of a binary dSYM companion file. + +.. option:: --toolchain + + Embed the toolchain in the dSYM bundle's property list. + +.. option:: -u, --update + + Update an existing dSYM file to contain the latest accelerator tables and + other DWARF optimizations. This option will rebuild the '.apple_names' and + '.apple_types' hashed accelerator tables. + +.. option:: --use-reproducer + + Use the object files from the given reproducer path. + +.. option:: --verbose + + Display verbose information when linking. + +.. option:: --verify + + Run the DWARF verifier on the linked DWARF debug info. + +.. option:: -v, --version + + Display the version of the tool. + +.. option:: -y + + Treat *executable* as a YAML debug-map rather than an executable. + +EXIT STATUS +----------- + +:program:`dsymutil` returns 0 if the DWARF debug information was linked +successfully. Otherwise, it returns 1. + +SEE ALSO +-------- + +:manpage:`llvm-dwarfdump(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/FileCheck.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/FileCheck.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/FileCheck.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/FileCheck.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,964 @@ +FileCheck - Flexible pattern matching file verifier +=================================================== + +.. program:: FileCheck + +SYNOPSIS +-------- + +:program:`FileCheck` *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*] + +DESCRIPTION +----------- + +:program:`FileCheck` reads two files (one from standard input, and one +specified on the command line) and uses one to verify the other. This +behavior is particularly useful for the testsuite, which wants to verify that +the output of some tool (e.g. :program:`llc`) contains the expected information +(for example, a movsd from esp or whatever is interesting). This is similar to +using :program:`grep`, but it is optimized for matching multiple different +inputs in one file in a specific order. + +The ``match-filename`` file specifies the file that contains the patterns to +match. The file to verify is read from standard input unless the +:option:`--input-file` option is used. + +OPTIONS +------- + +Options are parsed from the environment variable ``FILECHECK_OPTS`` +and from the command line. + +.. option:: -help + + Print a summary of command line options. + +.. option:: --check-prefix prefix + + FileCheck searches the contents of ``match-filename`` for patterns to + match. By default, these patterns are prefixed with "``CHECK:``". + If you'd like to use a different prefix (e.g. because the same input + file is checking multiple different tool or options), the + :option:`--check-prefix` argument allows you to specify (without the trailing + "``:``") one or more prefixes to match. Multiple prefixes are useful for tests + which might change for different run options, but most lines remain the same. + + FileCheck does not permit duplicate prefixes, even if one is a check prefix + and one is a comment prefix (see :option:`--comment-prefixes` below). + +.. option:: --check-prefixes prefix1,prefix2,... + + An alias of :option:`--check-prefix` that allows multiple prefixes to be + specified as a comma separated list. + +.. option:: --comment-prefixes prefix1,prefix2,... + + By default, FileCheck ignores any occurrence in ``match-filename`` of any check + prefix if it is preceded on the same line by "``COM:``" or "``RUN:``". See the + section `The "COM:" directive`_ for usage details. + + These default comment prefixes can be overridden by + :option:`--comment-prefixes` if they are not appropriate for your testing + environment. However, doing so is not recommended in LLVM's LIT-based test + suites, which should be easier to maintain if they all follow a consistent + comment style. In that case, consider proposing a change to the default + comment prefixes instead. + +.. option:: --allow-unused-prefixes + + This option controls the behavior when using more than one prefix as specified + by :option:`--check-prefix` or :option:`--check-prefixes`, and some of these + prefixes are missing in the test file. If true, this is allowed, if false, + FileCheck will report an error, listing the missing prefixes. + + It is currently, temporarily, true by default, and will be subsequently + switched to false. + +.. option:: --input-file filename + + File to check (defaults to stdin). + +.. option:: --match-full-lines + + By default, FileCheck allows matches of anywhere on a line. This + option will require all positive matches to cover an entire + line. Leading and trailing whitespace is ignored, unless + :option:`--strict-whitespace` is also specified. (Note: negative + matches from ``CHECK-NOT`` are not affected by this option!) + + Passing this option is equivalent to inserting ``{{^ *}}`` or + ``{{^}}`` before, and ``{{ *$}}`` or ``{{$}}`` after every positive + check pattern. + +.. option:: --strict-whitespace + + By default, FileCheck canonicalizes input horizontal whitespace (spaces and + tabs) which causes it to ignore these differences (a space will match a tab). + The :option:`--strict-whitespace` argument disables this behavior. End-of-line + sequences are canonicalized to UNIX-style ``\n`` in all modes. + +.. option:: --ignore-case + + By default, FileCheck uses case-sensitive matching. This option causes + FileCheck to use case-insensitive matching. + +.. option:: --implicit-check-not check-pattern + + Adds implicit negative checks for the specified patterns between positive + checks. The option allows writing stricter tests without stuffing them with + ``CHECK-NOT``\ s. + + For example, "``--implicit-check-not warning:``" can be useful when testing + diagnostic messages from tools that don't have an option similar to ``clang + -verify``. With this option FileCheck will verify that input does not contain + warnings not covered by any ``CHECK:`` patterns. + +.. option:: --dump-input + + Dump input to stderr, adding annotations representing currently enabled + diagnostics. When there are multiple occurrences of this option, the + ```` that appears earliest in the list below has precedence. The + default is ``fail``. + + * ``help`` - Explain input dump and quit + * ``always`` - Always dump input + * ``fail`` - Dump input on failure + * ``never`` - Never dump input + +.. option:: --dump-input-context + + In the dump requested by ``--dump-input``, print ```` input lines before + and ```` input lines after any lines specified by ``--dump-input-filter``. + When there are multiple occurrences of this option, the largest specified + ```` has precedence. The default is 5. + +.. option:: --dump-input-filter + + In the dump requested by ``--dump-input``, print only input lines of kind + ```` plus any context specified by ``--dump-input-context``. When + there are multiple occurrences of this option, the ```` that appears + earliest in the list below has precedence. The default is ``error`` when + ``--dump-input=fail``, and it's ``all`` when ``--dump-input=always``. + + * ``all`` - All input lines + * ``annotation-full`` - Input lines with annotations + * ``annotation`` - Input lines with starting points of annotations + * ``error`` - Input lines with starting points of error annotations + +.. option:: --enable-var-scope + + Enables scope for regex variables. + + Variables with names that start with ``$`` are considered global and + remain set throughout the file. + + All other variables get undefined after each encountered ``CHECK-LABEL``. + +.. option:: -D + + Sets a filecheck pattern variable ``VAR`` with value ``VALUE`` that can be + used in ``CHECK:`` lines. + +.. option:: -D#,= + + Sets a filecheck numeric variable ``NUMVAR`` of matching format ``FMT`` to + the result of evaluating ```` that can be used in + ``CHECK:`` lines. See section + ``FileCheck Numeric Variables and Expressions`` for details on supported + numeric expressions. + +.. option:: -version + + Show the version number of this program. + +.. option:: -v + + Print good directive pattern matches. However, if ``-dump-input=fail`` or + ``-dump-input=always``, add those matches as input annotations instead. + +.. option:: -vv + + Print information helpful in diagnosing internal FileCheck issues, such as + discarded overlapping ``CHECK-DAG:`` matches, implicit EOF pattern matches, + and ``CHECK-NOT:`` patterns that do not have matches. Implies ``-v``. + However, if ``-dump-input=fail`` or ``-dump-input=always``, just add that + information as input annotations instead. + +.. option:: --allow-deprecated-dag-overlap + + Enable overlapping among matches in a group of consecutive ``CHECK-DAG:`` + directives. This option is deprecated and is only provided for convenience + as old tests are migrated to the new non-overlapping ``CHECK-DAG:`` + implementation. + +.. option:: --allow-empty + + Allow checking empty input. By default, empty input is rejected. + +.. option:: --color + + Use colors in output (autodetected by default). + +EXIT STATUS +----------- + +If :program:`FileCheck` verifies that the file matches the expected contents, +it exits with 0. Otherwise, if not, or if an error occurs, it will exit with a +non-zero value. + +TUTORIAL +-------- + +FileCheck is typically used from LLVM regression tests, being invoked on the RUN +line of the test. A simple example of using FileCheck from a RUN line looks +like this: + +.. code-block:: llvm + + ; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s + +This syntax says to pipe the current file ("``%s``") into ``llvm-as``, pipe +that into ``llc``, then pipe the output of ``llc`` into ``FileCheck``. This +means that FileCheck will be verifying its standard input (the llc output) +against the filename argument specified (the original ``.ll`` file specified by +"``%s``"). To see how this works, let's look at the rest of the ``.ll`` file +(after the RUN line): + +.. code-block:: llvm + + define void @sub1(i32* %p, i32 %v) { + entry: + ; CHECK: sub1: + ; CHECK: subl + %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v) + ret void + } + + define void @inc4(i64* %p) { + entry: + ; CHECK: inc4: + ; CHECK: incq + %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) + ret void + } + +Here you can see some "``CHECK:``" lines specified in comments. Now you can +see how the file is piped into ``llvm-as``, then ``llc``, and the machine code +output is what we are verifying. FileCheck checks the machine code output to +verify that it matches what the "``CHECK:``" lines specify. + +The syntax of the "``CHECK:``" lines is very simple: they are fixed strings that +must occur in order. FileCheck defaults to ignoring horizontal whitespace +differences (e.g. a space is allowed to match a tab) but otherwise, the contents +of the "``CHECK:``" line is required to match some thing in the test file exactly. + +One nice thing about FileCheck (compared to grep) is that it allows merging +test cases together into logical groups. For example, because the test above +is checking for the "``sub1:``" and "``inc4:``" labels, it will not match +unless there is a "``subl``" in between those labels. If it existed somewhere +else in the file, that would not count: "``grep subl``" matches if "``subl``" +exists anywhere in the file. + +The FileCheck -check-prefix option +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The FileCheck `-check-prefix` option allows multiple test +configurations to be driven from one `.ll` file. This is useful in many +circumstances, for example, testing different architectural variants with +:program:`llc`. Here's a simple example: + +.. code-block:: llvm + + ; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \ + ; RUN: | FileCheck %s -check-prefix=X32 + ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \ + ; RUN: | FileCheck %s -check-prefix=X64 + + define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind { + %tmp1 = insertelement <4 x i32>; %tmp, i32 %s, i32 1 + ret <4 x i32> %tmp1 + ; X32: pinsrd_1: + ; X32: pinsrd $1, 4(%esp), %xmm0 + + ; X64: pinsrd_1: + ; X64: pinsrd $1, %edi, %xmm0 + } + +In this case, we're testing that we get the expected code generation with +both 32-bit and 64-bit code generation. + +The "COM:" directive +~~~~~~~~~~~~~~~~~~~~ + +Sometimes you want to disable a FileCheck directive without removing it +entirely, or you want to write comments that mention a directive by name. The +"``COM:``" directive makes it easy to do this. For example, you might have: + +.. code-block:: llvm + + ; X32: pinsrd_1: + ; X32: pinsrd $1, 4(%esp), %xmm0 + + ; COM: FIXME: X64 isn't working correctly yet for this part of codegen, but + ; COM: X64 will have something similar to X32: + ; COM: + ; COM: X64: pinsrd_1: + ; COM: X64: pinsrd $1, %edi, %xmm0 + +Without "``COM:``", you would need to use some combination of rewording and +directive syntax mangling to prevent FileCheck from recognizing the commented +occurrences of "``X32:``" and "``X64:``" above as directives. Moreover, +FileCheck diagnostics have been proposed that might complain about the above +occurrences of "``X64``" that don't have the trailing "``:``" because they look +like directive typos. Dodging all these problems can be tedious for a test +author, and directive syntax mangling can make the purpose of test code unclear. +"``COM:``" avoids all these problems. + +A few important usage notes: + +* "``COM:``" within another directive's pattern does *not* comment out the + remainder of the pattern. For example: + + .. code-block:: llvm + + ; X32: pinsrd $1, 4(%esp), %xmm0 COM: This is part of the X32 pattern! + + If you need to temporarily comment out part of a directive's pattern, move it + to another line. The reason is that FileCheck parses "``COM:``" in the same + manner as any other directive: only the first directive on the line is + recognized as a directive. + +* For the sake of LIT, FileCheck treats "``RUN:``" just like "``COM:``". If this + is not suitable for your test environment, see :option:`--comment-prefixes`. + +* FileCheck does not recognize "``COM``", "``RUN``", or any user-defined comment + prefix as a comment directive if it's combined with one of the usual check + directive suffixes, such as "``-NEXT:``" or "``-NOT:``", discussed below. + FileCheck treats such a combination as plain text instead. If it needs to act + as a comment directive for your test environment, define it as such with + :option:`--comment-prefixes`. + +The "CHECK-NEXT:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes you want to match lines and would like to verify that matches +happen on exactly consecutive lines with no other lines in between them. In +this case, you can use "``CHECK:``" and "``CHECK-NEXT:``" directives to specify +this. If you specified a custom check prefix, just use "``-NEXT:``". +For example, something like this works as you'd expect: + +.. code-block:: llvm + + define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) { + %tmp3 = load <2 x double>* %A, align 16 + %tmp7 = insertelement <2 x double> undef, double %B, i32 0 + %tmp9 = shufflevector <2 x double> %tmp3, + <2 x double> %tmp7, + <2 x i32> < i32 0, i32 2 > + store <2 x double> %tmp9, <2 x double>* %r, align 16 + ret void + + ; CHECK: t2: + ; CHECK: movl 8(%esp), %eax + ; CHECK-NEXT: movapd (%eax), %xmm0 + ; CHECK-NEXT: movhpd 12(%esp), %xmm0 + ; CHECK-NEXT: movl 4(%esp), %eax + ; CHECK-NEXT: movapd %xmm0, (%eax) + ; CHECK-NEXT: ret + } + +"``CHECK-NEXT:``" directives reject the input unless there is exactly one +newline between it and the previous directive. A "``CHECK-NEXT:``" cannot be +the first directive in a file. + +The "CHECK-SAME:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes you want to match lines and would like to verify that matches happen +on the same line as the previous match. In this case, you can use "``CHECK:``" +and "``CHECK-SAME:``" directives to specify this. If you specified a custom +check prefix, just use "``-SAME:``". + +"``CHECK-SAME:``" is particularly powerful in conjunction with "``CHECK-NOT:``" +(described below). + +For example, the following works like you'd expect: + +.. code-block:: llvm + + !0 = !DILocation(line: 5, scope: !1, inlinedAt: !2) + + ; CHECK: !DILocation(line: 5, + ; CHECK-NOT: column: + ; CHECK-SAME: scope: ![[SCOPE:[0-9]+]] + +"``CHECK-SAME:``" directives reject the input if there are any newlines between +it and the previous directive. + +"``CHECK-SAME:``" is also useful to avoid writing matchers for irrelevant +fields. For example, suppose you're writing a test which parses a tool that +generates output like this: + +.. code-block:: text + + Name: foo + Field1: ... + Field2: ... + Field3: ... + Value: 1 + + Name: bar + Field1: ... + Field2: ... + Field3: ... + Value: 2 + + Name: baz + Field1: ... + Field2: ... + Field3: ... + Value: 1 + +To write a test that verifies ``foo`` has the value ``1``, you might first +write this: + +.. code-block:: text + + CHECK: Name: foo + CHECK: Value: 1{{$}} + +However, this would be a bad test: if the value for ``foo`` changes, the test +would still pass because the "``CHECK: Value: 1``" line would match the value +from ``baz``. To fix this, you could add ``CHECK-NEXT`` matchers for every +``FieldN:`` line, but that would be verbose, and need to be updated when +``Field4`` is added. A more succinct way to write the test using the +"``CHECK-SAME:``" matcher would be as follows: + +.. code-block:: text + + CHECK: Name: foo + CHECK: Value: + CHECK-SAME: {{ 1$}} + +This verifies that the *next* time "``Value:``" appears in the output, it has +the value ``1``. + +Note: a "``CHECK-SAME:``" cannot be the first directive in a file. + +The "CHECK-EMPTY:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you need to check that the next line has nothing on it, not even whitespace, +you can use the "``CHECK-EMPTY:``" directive. + +.. code-block:: llvm + + declare void @foo() + + declare void @bar() + ; CHECK: foo + ; CHECK-EMPTY: + ; CHECK-NEXT: bar + +Just like "``CHECK-NEXT:``" the directive will fail if there is more than one +newline before it finds the next blank line, and it cannot be the first +directive in a file. + +The "CHECK-NOT:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The "``CHECK-NOT:``" directive is used to verify that a string doesn't occur +between two matches (or before the first match, or after the last match). For +example, to verify that a load is removed by a transformation, a test like this +can be used: + +.. code-block:: llvm + + define i8 @coerce_offset0(i32 %V, i32* %P) { + store i32 %V, i32* %P + + %P2 = bitcast i32* %P to i8* + %P3 = getelementptr i8* %P2, i32 2 + + %A = load i8* %P3 + ret i8 %A + ; CHECK: @coerce_offset0 + ; CHECK-NOT: load + ; CHECK: ret i8 + } + +The "CHECK-COUNT:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you need to match multiple lines with the same pattern over and over again +you can repeat a plain ``CHECK:`` as many times as needed. If that looks too +boring you can instead use a counted check "``CHECK-COUNT-:``", where +```` is a positive decimal number. It will match the pattern exactly +```` times, no more and no less. If you specified a custom check prefix, +just use "``-COUNT-:``" for the same effect. +Here is a simple example: + +.. code-block:: text + + Loop at depth 1 + Loop at depth 1 + Loop at depth 1 + Loop at depth 1 + Loop at depth 2 + Loop at depth 3 + + ; CHECK-COUNT-6: Loop at depth {{[0-9]+}} + ; CHECK-NOT: Loop at depth {{[0-9]+}} + +The "CHECK-DAG:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If it's necessary to match strings that don't occur in a strictly sequential +order, "``CHECK-DAG:``" could be used to verify them between two matches (or +before the first match, or after the last match). For example, clang emits +vtable globals in reverse order. Using ``CHECK-DAG:``, we can keep the checks +in the natural order: + +.. code-block:: c++ + + // RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s + + struct Foo { virtual void method(); }; + Foo f; // emit vtable + // CHECK-DAG: @_ZTV3Foo = + + struct Bar { virtual void method(); }; + Bar b; + // CHECK-DAG: @_ZTV3Bar = + +``CHECK-NOT:`` directives could be mixed with ``CHECK-DAG:`` directives to +exclude strings between the surrounding ``CHECK-DAG:`` directives. As a result, +the surrounding ``CHECK-DAG:`` directives cannot be reordered, i.e. all +occurrences matching ``CHECK-DAG:`` before ``CHECK-NOT:`` must not fall behind +occurrences matching ``CHECK-DAG:`` after ``CHECK-NOT:``. For example, + +.. code-block:: llvm + + ; CHECK-DAG: BEFORE + ; CHECK-NOT: NOT + ; CHECK-DAG: AFTER + +This case will reject input strings where ``BEFORE`` occurs after ``AFTER``. + +With captured variables, ``CHECK-DAG:`` is able to match valid topological +orderings of a DAG with edges from the definition of a variable to its use. +It's useful, e.g., when your test cases need to match different output +sequences from the instruction scheduler. For example, + +.. code-block:: llvm + + ; CHECK-DAG: add [[REG1:r[0-9]+]], r1, r2 + ; CHECK-DAG: add [[REG2:r[0-9]+]], r3, r4 + ; CHECK: mul r5, [[REG1]], [[REG2]] + +In this case, any order of that two ``add`` instructions will be allowed. + +If you are defining `and` using variables in the same ``CHECK-DAG:`` block, +be aware that the definition rule can match `after` its use. + +So, for instance, the code below will pass: + +.. code-block:: text + + ; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0] + ; CHECK-DAG: vmov.32 [[REG2]][1] + vmov.32 d0[1] + vmov.32 d0[0] + +While this other code, will not: + +.. code-block:: text + + ; CHECK-DAG: vmov.32 [[REG2:d[0-9]+]][0] + ; CHECK-DAG: vmov.32 [[REG2]][1] + vmov.32 d1[1] + vmov.32 d0[0] + +While this can be very useful, it's also dangerous, because in the case of +register sequence, you must have a strong order (read before write, copy before +use, etc). If the definition your test is looking for doesn't match (because +of a bug in the compiler), it may match further away from the use, and mask +real bugs away. + +In those cases, to enforce the order, use a non-DAG directive between DAG-blocks. + +A ``CHECK-DAG:`` directive skips matches that overlap the matches of any +preceding ``CHECK-DAG:`` directives in the same ``CHECK-DAG:`` block. Not only +is this non-overlapping behavior consistent with other directives, but it's +also necessary to handle sets of non-unique strings or patterns. For example, +the following directives look for unordered log entries for two tasks in a +parallel program, such as the OpenMP runtime: + +.. code-block:: text + + // CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin + // CHECK-DAG: [[THREAD_ID]]: task_end + // + // CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin + // CHECK-DAG: [[THREAD_ID]]: task_end + +The second pair of directives is guaranteed not to match the same log entries +as the first pair even though the patterns are identical and even if the text +of the log entries is identical because the thread ID manages to be reused. + +The "CHECK-LABEL:" directive +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes in a file containing multiple tests divided into logical blocks, one +or more ``CHECK:`` directives may inadvertently succeed by matching lines in a +later block. While an error will usually eventually be generated, the check +flagged as causing the error may not actually bear any relationship to the +actual source of the problem. + +In order to produce better error messages in these cases, the "``CHECK-LABEL:``" +directive can be used. It is treated identically to a normal ``CHECK`` +directive except that FileCheck makes an additional assumption that a line +matched by the directive cannot also be matched by any other check present in +``match-filename``; this is intended to be used for lines containing labels or +other unique identifiers. Conceptually, the presence of ``CHECK-LABEL`` divides +the input stream into separate blocks, each of which is processed independently, +preventing a ``CHECK:`` directive in one block matching a line in another block. +If ``--enable-var-scope`` is in effect, all local variables are cleared at the +beginning of the block. + +For example, + +.. code-block:: llvm + + define %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) { + entry: + ; CHECK-LABEL: C_ctor_base: + ; CHECK: mov [[SAVETHIS:r[0-9]+]], r0 + ; CHECK: bl A_ctor_base + ; CHECK: mov r0, [[SAVETHIS]] + %0 = bitcast %struct.C* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) + %1 = bitcast %struct.C* %this to %struct.B* + %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) + ret %struct.C* %this + } + + define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) { + entry: + ; CHECK-LABEL: D_ctor_base: + +The use of ``CHECK-LABEL:`` directives in this case ensures that the three +``CHECK:`` directives only accept lines corresponding to the body of the +``@C_ctor_base`` function, even if the patterns match lines found later in +the file. Furthermore, if one of these three ``CHECK:`` directives fail, +FileCheck will recover by continuing to the next block, allowing multiple test +failures to be detected in a single invocation. + +There is no requirement that ``CHECK-LABEL:`` directives contain strings that +correspond to actual syntactic labels in a source or output language: they must +simply uniquely match a single line in the file being verified. + +``CHECK-LABEL:`` directives cannot contain variable definitions or uses. + +Directive modifiers +~~~~~~~~~~~~~~~~~~~ + +A directive modifier can be append to a directive by following the directive +with ``{}`` where the only supported value for ```` is +``LITERAL``. + +The ``LITERAL`` directive modifier can be used to perform a literal match. The +modifier results in the directive not recognizing any syntax to perform regex +matching, variable capture or any substitutions. This is useful when the text +to match would require excessive escaping otherwise. For example, the +following will perform literal matches rather than considering these as +regular expressions: + +.. code-block:: text + + Input: [[[10, 20]], [[30, 40]]] + Output %r10: [[10, 20]] + Output %r10: [[30, 40]] + + ; CHECK{LITERAL}: [[[10, 20]], [[30, 40]]] + ; CHECK-DAG{LITERAL}: [[30, 40]] + ; CHECK-DAG{LITERAL}: [[10, 20]] + +FileCheck Regex Matching Syntax +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All FileCheck directives take a pattern to match. +For most uses of FileCheck, fixed string matching is perfectly sufficient. For +some things, a more flexible form of matching is desired. To support this, +FileCheck allows you to specify regular expressions in matching strings, +surrounded by double braces: ``{{yourregex}}``. FileCheck implements a POSIX +regular expression matcher; it supports Extended POSIX regular expressions +(ERE). Because we want to use fixed string matching for a majority of what we +do, FileCheck has been designed to support mixing and matching fixed string +matching with regular expressions. This allows you to write things like this: + +.. code-block:: llvm + + ; CHECK: movhpd {{[0-9]+}}(%esp), {{%xmm[0-7]}} + +In this case, any offset from the ESP register will be allowed, and any xmm +register will be allowed. + +Because regular expressions are enclosed with double braces, they are +visually distinct, and you don't need to use escape characters within the double +braces like you would in C. In the rare case that you want to match double +braces explicitly from the input, you can use something ugly like +``{{[}][}]}}`` as your pattern. Or if you are using the repetition count +syntax, for example ``[[:xdigit:]]{8}`` to match exactly 8 hex digits, you +would need to add parentheses like this ``{{([[:xdigit:]]{8})}}`` to avoid +confusion with FileCheck's closing double-brace. + +FileCheck String Substitution Blocks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is often useful to match a pattern and then verify that it occurs again +later in the file. For codegen tests, this can be useful to allow any +register, but verify that that register is used consistently later. To do +this, :program:`FileCheck` supports string substitution blocks that allow +string variables to be defined and substituted into patterns. Here is a simple +example: + +.. code-block:: llvm + + ; CHECK: test5: + ; CHECK: notw [[REGISTER:%[a-z]+]] + ; CHECK: andw {{.*}}[[REGISTER]] + +The first check line matches a regex ``%[a-z]+`` and captures it into the +string variable ``REGISTER``. The second line verifies that whatever is in +``REGISTER`` occurs later in the file after an "``andw``". :program:`FileCheck` +string substitution blocks are always contained in ``[[ ]]`` pairs, and string +variable names can be formed with the regex ``[a-zA-Z_][a-zA-Z0-9_]*``. If a +colon follows the name, then it is a definition of the variable; otherwise, it +is a substitution. + +:program:`FileCheck` variables can be defined multiple times, and substitutions +always get the latest value. Variables can also be substituted later on the +same line they were defined on. For example: + +.. code-block:: llvm + + ; CHECK: op [[REG:r[0-9]+]], [[REG]] + +Can be useful if you want the operands of ``op`` to be the same register, +and don't care exactly which register it is. + +If ``--enable-var-scope`` is in effect, variables with names that +start with ``$`` are considered to be global. All others variables are +local. All local variables get undefined at the beginning of each +CHECK-LABEL block. Global variables are not affected by CHECK-LABEL. +This makes it easier to ensure that individual tests are not affected +by variables set in preceding tests. + +FileCheck Numeric Substitution Blocks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:program:`FileCheck` also supports numeric substitution blocks that allow +defining numeric variables and checking for numeric values that satisfy a +numeric expression constraint based on those variables via a numeric +substitution. This allows ``CHECK:`` directives to verify a numeric relation +between two numbers, such as the need for consecutive registers to be used. + +The syntax to capture a numeric value is +``[[#%,:]]`` where: + +* ``%,`` is an optional format specifier to indicate what number + format to match and the minimum number of digits to expect. + +* ``:`` is an optional definition of variable ```` from the + captured value. + +The syntax of ```` is: ``#.`` where: + +* ``#`` is an optional flag available for hex values (see + ```` below) which requires the value matched to be + prefixed by ``0x``. +* ``.`` is an optional printf-style precision specifier in which + ```` indicates the minimum number of digits that the value matched + must have, expecting leading zeros if needed. + +* ```` is an optional scanf-style conversion specifier + to indicate what number format to match (e.g. hex number). Currently + accepted format specifiers are ``%u``, ``%d``, ``%x`` and ``%X``. If absent, + the format specifier defaults to ``%u``. + + +For example: + +.. code-block:: llvm + + ; CHECK: mov r[[#REG:]], 0x[[#%.8X,ADDR:]] + +would match ``mov r5, 0x0000FEFE`` and set ``REG`` to the value ``5`` and +``ADDR`` to the value ``0xFEFE``. Note that due to the precision it would fail +to match ``mov r5, 0xFEFE``. + +As a result of the numeric variable definition being optional, it is possible +to only check that a numeric value is present in a given format. This can be +useful when the value itself is not useful, for instance: + +.. code-block:: gas + + ; CHECK-NOT: mov r0, r[[#]] + +to check that a value is synthesized rather than moved around. + + +The syntax of a numeric substitution is +``[[#%, ]]`` where: + +* ```` is the same format specifier as for defining a variable but + in this context indicating how a numeric expression value should be matched + against. If absent, both components of the format specifier are inferred from + the matching format of the numeric variable(s) used by the expression + constraint if any, and defaults to ``%u`` if no numeric variable is used, + denoting that the value should be unsigned with no leading zeros. In case of + conflict between format specifiers of several numeric variables, the + conversion specifier becomes mandatory but the precision specifier remains + optional. + +* ```` is the constraint describing how the value to match must + relate to the value of the numeric expression. The only currently accepted + constraint is ``==`` for an exact match and is the default if + ```` is not provided. No matching constraint must be specified + when the ```` is empty. + +* ```` is an expression. An expression is in turn recursively defined + as: + + * a numeric operand, or + * an expression followed by an operator and a numeric operand. + + A numeric operand is a previously defined numeric variable, an integer + literal, or a function. Spaces are accepted before, after and between any of + these elements. Numeric operands have 64-bit precision. Overflow and underflow + are rejected. There is no support for operator precedence, but parentheses + can be used to change the evaluation order. + +The supported operators are: + + * ``+`` - Returns the sum of its two operands. + * ``-`` - Returns the difference of its two operands. + +The syntax of a function call is ``()`` where: + +* ``name`` is a predefined string literal. Accepted values are: + + * add - Returns the sum of its two operands. + * div - Returns the quotient of its two operands. + * max - Returns the largest of its two operands. + * min - Returns the smallest of its two operands. + * mul - Returns the product of its two operands. + * sub - Returns the difference of its two operands. + +* ```` is a comma separated list of expressions. + +For example: + +.. code-block:: llvm + + ; CHECK: load r[[#REG:]], [r0] + ; CHECK: load r[[#REG+1]], [r1] + ; CHECK: Loading from 0x[[#%x,ADDR:]] + ; CHECK-SAME: to 0x[[#ADDR + 7]] + +The above example would match the text: + +.. code-block:: gas + + load r5, [r0] + load r6, [r1] + Loading from 0xa0463440 to 0xa0463447 + +but would not match the text: + +.. code-block:: gas + + load r5, [r0] + load r7, [r1] + Loading from 0xa0463440 to 0xa0463443 + +Due to ``7`` being unequal to ``5 + 1`` and ``a0463443`` being unequal to +``a0463440 + 7``. + + +A numeric variable can also be defined to the result of a numeric expression, +in which case the numeric expression constraint is checked and if verified the +variable is assigned to the value. The unified syntax for both checking a +numeric expression and capturing its value into a numeric variable is thus +``[[#%,: ]]`` with each element as +described previously. One can use this syntax to make a testcase more +self-describing by using variables instead of values: + +.. code-block:: gas + + ; CHECK: mov r[[#REG_OFFSET:]], 0x[[#%X,FIELD_OFFSET:12]] + ; CHECK-NEXT: load r[[#]], [r[[#REG_BASE:]], r[[#REG_OFFSET]]] + +which would match: + +.. code-block:: gas + + mov r4, 0xC + load r6, [r5, r4] + +The ``--enable-var-scope`` option has the same effect on numeric variables as +on string variables. + +Important note: In its current implementation, an expression cannot use a +numeric variable defined earlier in the same CHECK directive. + +FileCheck Pseudo Numeric Variables +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Sometimes there's a need to verify output that contains line numbers of the +match file, e.g. when testing compiler diagnostics. This introduces a certain +fragility of the match file structure, as "``CHECK:``" lines contain absolute +line numbers in the same file, which have to be updated whenever line numbers +change due to text addition or deletion. + +To support this case, FileCheck expressions understand the ``@LINE`` pseudo +numeric variable which evaluates to the line number of the CHECK pattern where +it is found. + +This way match patterns can be put near the relevant test lines and include +relative line number references, for example: + +.. code-block:: c++ + + // CHECK: test.cpp:[[# @LINE + 4]]:6: error: expected ';' after top level declarator + // CHECK-NEXT: {{^int a}} + // CHECK-NEXT: {{^ \^}} + // CHECK-NEXT: {{^ ;}} + int a + +To support legacy uses of ``@LINE`` as a special string variable, +:program:`FileCheck` also accepts the following uses of ``@LINE`` with string +substitution block syntax: ``[[@LINE]]``, ``[[@LINE+]]`` and +``[[@LINE-]]`` without any spaces inside the brackets and where +``offset`` is an integer. + +Matching Newline Characters +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To match newline characters in regular expressions the character class +``[[:space:]]`` can be used. For example, the following pattern: + +.. code-block:: c++ + + // CHECK: DW_AT_location [DW_FORM_sec_offset] ([[DLOC:0x[0-9a-f]+]]){{[[:space:]].*}}"intd" + +matches output of the form (from llvm-dwarfdump): + +.. code-block:: text + + DW_AT_location [DW_FORM_sec_offset] (0x00000233) + DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000c9] = "intd") + +letting us set the :program:`FileCheck` variable ``DLOC`` to the desired value +``0x00000233``, extracted from the line immediately preceding "``intd``". diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/index.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/index.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/index.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/index.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,84 @@ +LLVM Command Guide +------------------ + +The following documents are command descriptions for all of the LLVM tools. +These pages describe how to use the LLVM commands and what their options are. +Note that these pages do not describe all of the options available for all +tools. To get a complete listing, pass the ``--help`` (general options) or +``--help-hidden`` (general and debugging options) arguments to the tool you are +interested in. + +Basic Commands +~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 1 + + dsymutil + llc + lli + llvm-as + llvm-config + llvm-cov + llvm-cxxmap + llvm-diff + llvm-dis + llvm-dwarfdump + llvm-lib + llvm-libtool-darwin + llvm-link + llvm-lipo + llvm-mca + llvm-otool + llvm-profdata + llvm-readobj + llvm-stress + llvm-symbolizer + opt + +GNU binutils replacements +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 1 + + llvm-addr2line + llvm-ar + llvm-cxxfilt + llvm-install-name-tool + llvm-nm + llvm-objcopy + llvm-objdump + llvm-ranlib + llvm-readelf + llvm-size + llvm-strings + llvm-strip + +Debugging Tools +~~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 1 + + bugpoint + llvm-extract + llvm-bcanalyzer + +Developer Tools +~~~~~~~~~~~~~~~ + +.. toctree:: + :maxdepth: 1 + + FileCheck + tblgen + clang-tblgen + lldb-tblgen + llvm-tblgen + mlir-tblgen + lit + llvm-exegesis + llvm-locstats + llvm-pdbutil + llvm-profgen diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/lit.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/lit.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/lit.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/lit.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,628 @@ +lit - LLVM Integrated Tester +============================ + +.. program:: lit + +SYNOPSIS +-------- + +:program:`lit` [*options*] [*tests*] + +DESCRIPTION +----------- + +:program:`lit` is a portable tool for executing LLVM and Clang style test +suites, summarizing their results, and providing indication of failures. +:program:`lit` is designed to be a lightweight testing tool with as simple a +user interface as possible. + +:program:`lit` should be run with one or more *tests* to run specified on the +command line. Tests can be either individual test files or directories to +search for tests (see :ref:`test-discovery`). + +Each specified test will be executed (potentially concurrently) and once all +tests have been run :program:`lit` will print summary information on the number +of tests which passed or failed (see :ref:`test-status-results`). The +:program:`lit` program will execute with a non-zero exit code if any tests +fail. + +By default :program:`lit` will use a succinct progress display and will only +print summary information for test failures. See :ref:`output-options` for +options controlling the :program:`lit` progress display and output. + +:program:`lit` also includes a number of options for controlling how tests are +executed (specific features may depend on the particular test format). See +:ref:`execution-options` for more information. + +Finally, :program:`lit` also supports additional options for only running a +subset of the options specified on the command line, see +:ref:`selection-options` for more information. + +:program:`lit` parses options from the environment variable ``LIT_OPTS`` after +parsing options from the command line. ``LIT_OPTS`` is primarily useful for +supplementing or overriding the command-line options supplied to :program:`lit` +by ``check`` targets defined by a project's build system. + +Users interested in the :program:`lit` architecture or designing a +:program:`lit` testing implementation should see :ref:`lit-infrastructure`. + +GENERAL OPTIONS +--------------- + +.. option:: -h, --help + + Show the :program:`lit` help message. + +.. option:: -j N, --workers=N + + Run ``N`` tests in parallel. By default, this is automatically chosen to + match the number of detected available CPUs. + +.. option:: --config-prefix=NAME + + Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for + test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`. + +.. option:: -D NAME[=VALUE], --param NAME[=VALUE] + + Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty + string if not given). The meaning and use of these parameters is test suite + dependent. + +.. _output-options: + +OUTPUT OPTIONS +-------------- + +.. option:: -q, --quiet + + Suppress any output except for test failures. + +.. option:: -s, --succinct + + Show less output, for example don't show information on tests that pass. + Also show a progress bar, unless ``--no-progress-bar`` is specified. + +.. option:: -v, --verbose + + Show more information on test failures, for example the entire test output + instead of just the test result. + +.. option:: -vv, --echo-all-commands + + Echo all commands to stdout, as they are being executed. + This can be valuable for debugging test failures, as the last echoed command + will be the one which has failed. + :program:`lit` normally inserts a no-op command (``:`` in the case of bash) + with argument ``'RUN: at line N'`` before each command pipeline, and this + option also causes those no-op commands to be echoed to stdout to help you + locate the source line of the failed command. + This option implies ``--verbose``. + +.. option:: -a, --show-all + + Show more information about all tests, for example the entire test + commandline and output. + +.. option:: --no-progress-bar + + Do not use curses based progress bar. + +.. option:: --show-unsupported + + Show the names of unsupported tests. + +.. option:: --show-xfail + + Show the names of tests that were expected to fail. + +.. _execution-options: + +EXECUTION OPTIONS +----------------- + +.. option:: --path=PATH + + Specify an additional ``PATH`` to use when searching for executables in tests. + +.. option:: --vg + + Run individual tests under valgrind (using the memcheck tool). The + ``--error-exitcode`` argument for valgrind is used so that valgrind failures + will cause the program to exit with a non-zero status. + + When this option is enabled, :program:`lit` will also automatically provide a + "``valgrind``" feature that can be used to conditionally disable (or expect + failure in) certain tests. + +.. option:: --vg-arg=ARG + + When :option:`--vg` is used, specify an additional argument to pass to + :program:`valgrind` itself. + +.. option:: --vg-leak + + When :option:`--vg` is used, enable memory leak checks. When this option is + enabled, :program:`lit` will also automatically provide a "``vg_leak``" + feature that can be used to conditionally disable (or expect failure in) + certain tests. + +.. option:: --time-tests + + Track the wall time individual tests take to execute and includes the results + in the summary output. This is useful for determining which tests in a test + suite take the most time to execute. + +.. option:: --ignore-fail + + Exit with status zero even if some tests fail. + +.. option:: --no-indirectly-run-check + + Do not error if a test would not be run if the user had specified the + containing directory instead of naming the test directly. + +.. _selection-options: + +SELECTION OPTIONS +----------------- + +By default, `lit` will run failing tests first, then run tests in descending +execution time order to optimize concurrency. The execution order can be +changed using the :option:`--order` option. + +The timing data is stored in the `test_exec_root` in a file named +`.lit_test_times.txt`. If this file does not exist, then `lit` checks the +`test_source_root` for the file to optionally accelerate clean builds. + +.. option:: --shuffle + + Run the tests in a random order, not failing/slowest first. Deprecated, + use :option:`--order` instead. + +.. option:: --max-failures N + + Stop execution after the given number ``N`` of failures. + An integer argument should be passed on the command line + prior to execution. + +.. option:: --max-tests=N + + Run at most ``N`` tests and then terminate. + +.. option:: --max-time=N + + Spend at most ``N`` seconds (approximately) running tests and then terminate. + Note that this is not an alias for :option:`--timeout`; the two are + different kinds of maximums. + +.. option:: --num-shards=M + + Divide the set of selected tests into ``M`` equal-sized subsets or + "shards", and run only one of them. Must be used with the + ``--run-shard=N`` option, which selects the shard to run. The environment + variable ``LIT_NUM_SHARDS`` can also be used in place of this + option. These two options provide a coarse mechanism for partitioning large + testsuites, for parallel execution on separate machines (say in a large + testing farm). + +.. option:: --order={lexical,random,smart} + + Define the order in which tests are run. The supported values are: + + - lexical - tests will be run in lexical order according to the test file + path. This option is useful when predictable test order is desired. + + - random - tests will be run in random order. + + - smart - tests that failed previously will be run first, then the remaining + tests, all in descending execution time order. This is the default as it + optimizes concurrency. + +.. option:: --run-shard=N + + Select which shard to run, assuming the ``--num-shards=M`` option was + provided. The two options must be used together, and the value of ``N`` + must be in the range ``1..M``. The environment variable + ``LIT_RUN_SHARD`` can also be used in place of this option. + +.. option:: --timeout=N + + Spend at most ``N`` seconds (approximately) running each individual test. + ``0`` means no time limit, and ``0`` is the default. Note that this is not an + alias for :option:`--max-time`; the two are different kinds of maximums. + +.. option:: --filter=REGEXP + + Run only those tests whose name matches the regular expression specified in + ``REGEXP``. The environment variable ``LIT_FILTER`` can be also used in place + of this option, which is especially useful in environments where the call + to ``lit`` is issued indirectly. + +.. option:: --filter-out=REGEXP + + Filter out those tests whose name matches the regular expression specified in + ``REGEXP``. The environment variable ``LIT_FILTER_OUT`` can be also used in + place of this option, which is especially useful in environments where the + call to ``lit`` is issued indirectly. + +.. option:: --xfail=LIST + + Treat those tests whose name is in the semicolon separated list ``LIST`` as + ``XFAIL``. This can be helpful when one does not want to modify the test + suite. The environment variable ``LIT_XFAIL`` can be also used in place of + this option, which is especially useful in environments where the call to + ``lit`` is issued indirectly. + + A test name can specified as a file name relative to the test suite directory. + For example: + + .. code-block:: none + + LIT_XFAIL="affinity/kmp-hw-subset.c;offloading/memory_manager.cpp" + + In this case, all of the following tests are treated as ``XFAIL``: + + .. code-block:: none + + libomp :: affinity/kmp-hw-subset.c + libomptarget :: nvptx64-nvidia-cuda :: offloading/memory_manager.cpp + libomptarget :: x86_64-pc-linux-gnu :: offloading/memory_manager.cpp + + Alternatively, a test name can be specified as the full test name + reported in LIT output. For example, we can adjust the previous + example not to treat the ``nvptx64-nvidia-cuda`` version of + ``offloading/memory_manager.cpp`` as XFAIL: + + .. code-block:: none + + LIT_XFAIL="affinity/kmp-hw-subset.c;libomptarget :: x86_64-pc-linux-gnu :: offloading/memory_manager.cpp" + +.. option:: --xfail-not=LIST + + Do not treat the specified tests as ``XFAIL``. The environment variable + ``LIT_XFAIL_NOT`` can also be used in place of this option. The syntax is the + same as for :option:`--xfail` and ``LIT_XFAIL``. :option:`--xfail-not` and + ``LIT_XFAIL_NOT`` always override all other ``XFAIL`` specifications, + including an :option:`--xfail` appearing later on the command line. The + primary purpose is to suppress an ``XPASS`` result without modifying a test + case that uses the ``XFAIL`` directive. + +ADDITIONAL OPTIONS +------------------ + +.. option:: --debug + + Run :program:`lit` in debug mode, for debugging configuration issues and + :program:`lit` itself. + +.. option:: --show-suites + + List the discovered test suites and exit. + +.. option:: --show-tests + + List all of the discovered tests and exit. + +EXIT STATUS +----------- + +:program:`lit` will exit with an exit code of 1 if there are any FAIL or XPASS +results. Otherwise, it will exit with the status 0. Other exit codes are used +for non-test related failures (for example a user error or an internal program +error). + +.. _test-discovery: + +TEST DISCOVERY +-------------- + +The inputs passed to :program:`lit` can be either individual tests, or entire +directories or hierarchies of tests to run. When :program:`lit` starts up, the +first thing it does is convert the inputs into a complete list of tests to run +as part of *test discovery*. + +In the :program:`lit` model, every test must exist inside some *test suite*. +:program:`lit` resolves the inputs specified on the command line to test suites +by searching upwards from the input path until it finds a :file:`lit.cfg` or +:file:`lit.site.cfg` file. These files serve as both a marker of test suites +and as configuration files which :program:`lit` loads in order to understand +how to find and run the tests inside the test suite. + +Once :program:`lit` has mapped the inputs into test suites it traverses the +list of inputs adding tests for individual files and recursively searching for +tests in directories. + +This behavior makes it easy to specify a subset of tests to run, while still +allowing the test suite configuration to control exactly how tests are +interpreted. In addition, :program:`lit` always identifies tests by the test +suite they are in, and their relative path inside the test suite. For +appropriately configured projects, this allows :program:`lit` to provide +convenient and flexible support for out-of-tree builds. + +.. _test-status-results: + +TEST STATUS RESULTS +------------------- + +Each test ultimately produces one of the following eight results: + +**PASS** + + The test succeeded. + +**FLAKYPASS** + + The test succeeded after being re-run more than once. This only applies to + tests containing an ``ALLOW_RETRIES:`` annotation. + +**XFAIL** + + The test failed, but that is expected. This is used for test formats which allow + specifying that a test does not currently work, but wish to leave it in the test + suite. + +**XPASS** + + The test succeeded, but it was expected to fail. This is used for tests which + were specified as expected to fail, but are now succeeding (generally because + the feature they test was broken and has been fixed). + +**FAIL** + + The test failed. + +**UNRESOLVED** + + The test result could not be determined. For example, this occurs when the test + could not be run, the test itself is invalid, or the test was interrupted. + +**UNSUPPORTED** + + The test is not supported in this environment. This is used by test formats + which can report unsupported tests. + +**TIMEOUT** + + The test was run, but it timed out before it was able to complete. This is + considered a failure. + +Depending on the test format tests may produce additional information about +their status (generally only for failures). See the :ref:`output-options` +section for more information. + +.. _lit-infrastructure: + +LIT INFRASTRUCTURE +------------------ + +This section describes the :program:`lit` testing architecture for users interested in +creating a new :program:`lit` testing implementation, or extending an existing one. + +:program:`lit` proper is primarily an infrastructure for discovering and running +arbitrary tests, and to expose a single convenient interface to these +tests. :program:`lit` itself doesn't know how to run tests, rather this logic is +defined by *test suites*. + +TEST SUITES +~~~~~~~~~~~ + +As described in :ref:`test-discovery`, tests are always located inside a *test +suite*. Test suites serve to define the format of the tests they contain, the +logic for finding those tests, and any additional information to run the tests. + +:program:`lit` identifies test suites as directories containing ``lit.cfg`` or +``lit.site.cfg`` files (see also :option:`--config-prefix`). Test suites are +initially discovered by recursively searching up the directory hierarchy for +all the input files passed on the command line. You can use +:option:`--show-suites` to display the discovered test suites at startup. + +Once a test suite is discovered, its config file is loaded. Config files +themselves are Python modules which will be executed. When the config file is +executed, two important global variables are predefined: + +**lit_config** + + The global **lit** configuration object (a *LitConfig* instance), which defines + the builtin test formats, global configuration parameters, and other helper + routines for implementing test configurations. + +**config** + + This is the config object (a *TestingConfig* instance) for the test suite, + which the config file is expected to populate. The following variables are also + available on the *config* object, some of which must be set by the config and + others are optional or predefined: + + **name** *[required]* The name of the test suite, for use in reports and + diagnostics. + + **test_format** *[required]* The test format object which will be used to + discover and run tests in the test suite. Generally this will be a builtin test + format available from the *lit.formats* module. + + **test_source_root** The filesystem path to the test suite root. For out-of-dir + builds this is the directory that will be scanned for tests. + + **test_exec_root** For out-of-dir builds, the path to the test suite root inside + the object directory. This is where tests will be run and temporary output files + placed. + + **environment** A dictionary representing the environment to use when executing + tests in the suite. + + **standalone_tests** When true, mark a directory with tests expected to be run + standalone. Test discovery is disabled for that directory and + *--no-indirectly-run-check* is in effect. *lit.suffixes* and *lit.excludes* + must be empty when this variable is true. + + **suffixes** For **lit** test formats which scan directories for tests, this + variable is a list of suffixes to identify test files. Used by: *ShTest*. + + **substitutions** For **lit** test formats which substitute variables into a test + script, the list of substitutions to perform. Used by: *ShTest*. + + **unsupported** Mark an unsupported directory, all tests within it will be + reported as unsupported. Used by: *ShTest*. + + **parent** The parent configuration, this is the config object for the directory + containing the test suite, or None. + + **root** The root configuration. This is the top-most :program:`lit` configuration in + the project. + + **pipefail** Normally a test using a shell pipe fails if any of the commands + on the pipe fail. If this is not desired, setting this variable to false + makes the test fail only if the last command in the pipe fails. + + **available_features** A set of features that can be used in `XFAIL`, + `REQUIRES`, and `UNSUPPORTED` directives. + +TEST DISCOVERY +~~~~~~~~~~~~~~ + +Once test suites are located, :program:`lit` recursively traverses the source +directory (following *test_source_root*) looking for tests. When :program:`lit` +enters a sub-directory, it first checks to see if a nested test suite is +defined in that directory. If so, it loads that test suite recursively, +otherwise it instantiates a local test config for the directory (see +:ref:`local-configuration-files`). + +Tests are identified by the test suite they are contained within, and the +relative path inside that suite. Note that the relative path may not refer to +an actual file on disk; some test formats (such as *GoogleTest*) define +"virtual tests" which have a path that contains both the path to the actual +test file and a subpath to identify the virtual test. + +.. _local-configuration-files: + +LOCAL CONFIGURATION FILES +~~~~~~~~~~~~~~~~~~~~~~~~~ + +When :program:`lit` loads a subdirectory in a test suite, it instantiates a +local test configuration by cloning the configuration for the parent directory +--- the root of this configuration chain will always be a test suite. Once the +test configuration is cloned :program:`lit` checks for a *lit.local.cfg* file +in the subdirectory. If present, this file will be loaded and can be used to +specialize the configuration for each individual directory. This facility can +be used to define subdirectories of optional tests, or to change other +configuration parameters --- for example, to change the test format, or the +suffixes which identify test files. + +SUBSTITUTIONS +~~~~~~~~~~~~~ + +:program:`lit` allows patterns to be substituted inside RUN commands. It also +provides the following base set of substitutions, which are defined in +TestRunner.py: + + ======================= ============== + Macro Substitution + ======================= ============== + %s source path (path to the file currently being run) + %S source dir (directory of the file currently being run) + %p same as %S + %{pathsep} path separator + %t temporary file name unique to the test + %basename_t The last path component of %t but without the ``.tmp`` extension + %T parent directory of %t (not unique, deprecated, do not use) + %% % + %/s %s but ``\`` is replaced by ``/`` + %/S %S but ``\`` is replaced by ``/`` + %/p %p but ``\`` is replaced by ``/`` + %/t %t but ``\`` is replaced by ``/`` + %/T %T but ``\`` is replaced by ``/`` + %{/s:regex_replacement} %/s but escaped for use in the replacement of a ``s@@@`` command in sed + %{/S:regex_replacement} %/S but escaped for use in the replacement of a ``s@@@`` command in sed + %{/p:regex_replacement} %/p but escaped for use in the replacement of a ``s@@@`` command in sed + %{/t:regex_replacement} %/t but escaped for use in the replacement of a ``s@@@`` command in sed + %{/T:regex_replacement} %/T but escaped for use in the replacement of a ``s@@@`` command in sed + %:s On Windows, %/s but a ``:`` is removed if its the second character. + Otherwise, %s but with a single leading ``/`` removed. + %:S On Windows, %/S but a ``:`` is removed if its the second character. + Otherwise, %S but with a single leading ``/`` removed. + %:p On Windows, %/p but a ``:`` is removed if its the second character. + Otherwise, %p but with a single leading ``/`` removed. + %:t On Windows, %/t but a ``:`` is removed if its the second character. + Otherwise, %t but with a single leading ``/`` removed. + %:T On Windows, %/T but a ``:`` is removed if its the second character. + Otherwise, %T but with a single leading ``/`` removed. + ======================= ============== + +Other substitutions are provided that are variations on this base set and +further substitution patterns can be defined by each test module. See the +modules :ref:`local-configuration-files`. + +By default, substitutions are expanded exactly once, so that if e.g. a +substitution ``%build`` is defined in top of another substitution ``%cxx``, +``%build`` will expand to ``%cxx`` textually, not to what ``%cxx`` expands to. +However, if the ``recursiveExpansionLimit`` property of the ``TestingConfig`` +is set to a non-negative integer, substitutions will be expanded recursively +until that limit is reached. It is an error if the limit is reached and +expanding substitutions again would yield a different result. + +More detailed information on substitutions can be found in the +:doc:`../TestingGuide`. + +TEST RUN OUTPUT FORMAT +~~~~~~~~~~~~~~~~~~~~~~ + +The :program:`lit` output for a test run conforms to the following schema, in +both short and verbose modes (although in short mode no PASS lines will be +shown). This schema has been chosen to be relatively easy to reliably parse by +a machine (for example in buildbot log scraping), and for other tools to +generate. + +Each test result is expected to appear on a line that matches: + +.. code-block:: none + + : () + +where ```` is a standard test result such as PASS, FAIL, XFAIL, +XPASS, UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and +REGRESSED are also allowed. + +The ```` field can consist of an arbitrary string containing no +newline. + +The ```` field can be used to report progress information such +as (1/300) or can be empty, but even when empty the parentheses are required. + +Each test result may include additional (multiline) log information in the +following format: + +.. code-block:: none + + TEST '()' + ... log message ... + + +where ```` should be the name of a preceding reported test, ```` is a string of "*" characters *at least* four characters long +(the recommended length is 20), and ```` is an arbitrary +(unparsed) string. + +The following is an example of a test run output which consists of four tests A, +B, C, and D, and a log message for the failing test C: + +.. code-block:: none + + PASS: A (1 of 4) + PASS: B (2 of 4) + FAIL: C (3 of 4) + ******************** TEST 'C' FAILED ******************** + Test 'C' failed as a result of exit code 1. + ******************** + PASS: D (4 of 4) + +LIT EXAMPLE TESTS +~~~~~~~~~~~~~~~~~ + +The :program:`lit` distribution contains several example implementations of +test suites in the *ExampleTests* directory. + +SEE ALSO +-------- + +valgrind(1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llc.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llc.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llc.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llc.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,222 @@ +llc - LLVM static compiler +========================== + +.. program:: llc + +SYNOPSIS +-------- + +:program:`llc` [*options*] [*filename*] + +DESCRIPTION +----------- + +The :program:`llc` command compiles LLVM source inputs into assembly language +for a specified architecture. The assembly language output can then be passed +through a native assembler and linker to generate a native executable. + +The choice of architecture for the output assembly code is automatically +determined from the input file, unless the :option:`-march` option is used to +override the default. + +OPTIONS +------- + +If ``filename`` is "``-``" or omitted, :program:`llc` reads from standard input. +Otherwise, it will from ``filename``. Inputs can be in either the LLVM assembly +language format (``.ll``) or the LLVM bitcode format (``.bc``). + +If the :option:`-o` option is omitted, then :program:`llc` will send its output +to standard output if the input is from standard input. If the :option:`-o` +option specifies "``-``", then the output will also be sent to standard output. + +If no :option:`-o` option is specified and an input file other than "``-``" is +specified, then :program:`llc` creates the output filename by taking the input +filename, removing any existing ``.bc`` extension, and adding a ``.s`` suffix. + +Other :program:`llc` options are described below. + +End-user Options +~~~~~~~~~~~~~~~~ + +.. option:: -help + + Print a summary of command line options. + +.. option:: -o + + Use ```` as the output filename. See the summary above for more + details. + +.. option:: -O=uint + + Generate code at different optimization levels. These correspond to the + ``-O0``, ``-O1``, ``-O2``, and ``-O3`` optimization levels used by + :program:`clang`. + +.. option:: -mtriple= + + Override the target triple specified in the input file with the specified + string. + +.. option:: -march= + + Specify the architecture for which to generate assembly, overriding the target + encoded in the input file. See the output of ``llc -help`` for a list of + valid architectures. By default this is inferred from the target triple or + autodetected to the current architecture. + +.. option:: -mcpu= + + Specify a specific chip in the current architecture to generate code for. + By default this is inferred from the target triple and autodetected to + the current architecture. For a list of available CPUs, use: + + .. code-block:: none + + llvm-as < /dev/null | llc -march=xyz -mcpu=help + +.. option:: -filetype= + + Specify what kind of output ``llc`` should generated. Options are: ``asm`` + for textual assembly ( ``'.s'``), ``obj`` for native object files (``'.o'``) + and ``null`` for not emitting anything (for performance testing). + + Note that not all targets support all options. + +.. option:: -mattr=a1,+a2,-a3,... + + Override or control specific attributes of the target, such as whether SIMD + operations are enabled or not. The default set of attributes is set by the + current CPU. For a list of available attributes, use: + + .. code-block:: none + + llvm-as < /dev/null | llc -march=xyz -mattr=help + +.. option:: --frame-pointer + + Specify effect of frame pointer elimination optimization (all,non-leaf,none). + +.. option:: --disable-excess-fp-precision + + Disable optimizations that may produce excess precision for floating point. + Note that this option can dramatically slow down code on some systems + (e.g. X86). + +.. option:: --enable-no-infs-fp-math + + Enable optimizations that assume no Inf values. + +.. option:: --enable-no-nans-fp-math + + Enable optimizations that assume no NAN values. + +.. option:: --enable-no-signed-zeros-fp-math + + Enable FP math optimizations that assume the sign of 0 is insignificant. + +.. option:: --enable-no-trapping-fp-math + + Enable setting the FP exceptions build attribute not to use exceptions. + +.. option:: --enable-unsafe-fp-math + + Enable optimizations that make unsafe assumptions about IEEE math (e.g. that + addition is associative) or may not work for all input ranges. These + optimizations allow the code generator to make use of some instructions which + would otherwise not be usable (such as ``fsin`` on X86). + +.. option:: --stats + + Print statistics recorded by code-generation passes. + +.. option:: --time-passes + + Record the amount of time needed for each pass and print a report to standard + error. + +.. option:: --load= + + Dynamically load ``dso_path`` (a path to a dynamically shared object) that + implements an LLVM target. This will permit the target name to be used with + the :option:`-march` option so that code can be generated for that target. + +.. option:: -meabi=[default|gnu|4|5] + + Specify which EABI version should conform to. Valid EABI versions are *gnu*, + *4* and *5*. Default value (*default*) depends on the triple. + +.. option:: -stack-size-section + + Emit the .stack_sizes section which contains stack size metadata. The section + contains an array of pairs of function symbol values (pointer size) and stack + sizes (unsigned LEB128). The stack size values only include the space allocated + in the function prologue. Functions with dynamic stack allocations are not + included. + +.. option:: -remarks-section + + Emit the __remarks (MachO) section which contains metadata about remark + diagnostics. + +Tuning/Configuration Options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. option:: --print-after-isel + + Print generated machine code after instruction selection (useful for debugging). + +.. option:: --regalloc= + + Specify the register allocator to use. + Valid register allocators are: + + *basic* + + Basic register allocator. + + *fast* + + Fast register allocator. It is the default for unoptimized code. + + *greedy* + + Greedy register allocator. It is the default for optimized code. + + *pbqp* + + Register allocator based on 'Partitioned Boolean Quadratic Programming'. + +.. option:: --spiller= + + Specify the spiller to use for register allocators that support it. Currently + this option is used only by the linear scan register allocator. The default + ``spiller`` is *local*. Valid spillers are: + + *simple* + + Simple spiller + + *local* + + Local spiller + +Intel IA-32-specific Options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. option:: --x86-asm-syntax=[att|intel] + + Specify whether to emit assembly code in AT&T syntax (the default) or Intel + syntax. + +EXIT STATUS +----------- + +If :program:`llc` succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value. + +SEE ALSO +-------- + +:manpage:`lli(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/lldb-tblgen.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/lldb-tblgen.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/lldb-tblgen.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/lldb-tblgen.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,22 @@ +lldb-tblgen - Description to C++ Code for LLDB +============================================== + +.. program:: lldb-tblgen + +SYNOPSIS +-------- + +:program:`lldb-tblgen` [*options*] [*filename*] + + +DESCRIPTION +----------- + +:program:`lldb-tblgen` is a program that translates compiler-related target +description (``.td``) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler. + +Please see :doc:`tblgen - Description to C++ Code<./tblgen>` +for a description of the *filename* argument and options, including the +options common to all :program:`*-tblgen` programs. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/lli.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/lli.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/lli.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/lli.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,217 @@ +lli - directly execute programs from LLVM bitcode +================================================= + +.. program:: lli + +SYNOPSIS +-------- + +:program:`lli` [*options*] [*filename*] [*program args*] + +DESCRIPTION +----------- + +:program:`lli` directly executes programs in LLVM bitcode format. It takes a program +in LLVM bitcode format and executes it using a just-in-time compiler or an +interpreter. + +:program:`lli` is *not* an emulator. It will not execute IR of different architectures +and it can only interpret (or JIT-compile) for the host architecture. + +The JIT compiler takes the same arguments as other tools, like :program:`llc`, +but they don't necessarily work for the interpreter. + +If `filename` is not specified, then :program:`lli` reads the LLVM bitcode for the +program from standard input. + +The optional *args* specified on the command line are passed to the program as +arguments. + +GENERAL OPTIONS +--------------- + +.. option:: -fake-argv0=executable + + Override the ``argv[0]`` value passed into the executing program. + +.. option:: -force-interpreter={false,true} + + If set to true, use the interpreter even if a just-in-time compiler is available + for this architecture. Defaults to false. + +.. option:: -help + + Print a summary of command line options. + +.. option:: -load=pluginfilename + + Causes :program:`lli` to load the plugin (shared object) named *pluginfilename* and use + it for optimization. + +.. option:: -stats + + Print statistics from the code-generation passes. This is only meaningful for + the just-in-time compiler, at present. + +.. option:: -time-passes + + Record the amount of time needed for each code-generation pass and print it to + standard error. + +.. option:: -version + + Print out the version of :program:`lli` and exit without doing anything else. + +TARGET OPTIONS +-------------- + +.. option:: -mtriple=target triple + + Override the target triple specified in the input bitcode file with the + specified string. This may result in a crash if you pick an + architecture which is not compatible with the current system. + +.. option:: -march=arch + + Specify the architecture for which to generate assembly, overriding the target + encoded in the bitcode file. See the output of **llc -help** for a list of + valid architectures. By default this is inferred from the target triple or + autodetected to the current architecture. + +.. option:: -mcpu=cpuname + + Specify a specific chip in the current architecture to generate code for. + By default this is inferred from the target triple and autodetected to + the current architecture. For a list of available CPUs, use: + **llvm-as < /dev/null | llc -march=xyz -mcpu=help** + +.. option:: -mattr=a1,+a2,-a3,... + + Override or control specific attributes of the target, such as whether SIMD + operations are enabled or not. The default set of attributes is set by the + current CPU. For a list of available attributes, use: + **llvm-as < /dev/null | llc -march=xyz -mattr=help** + +FLOATING POINT OPTIONS +---------------------- + +.. option:: -disable-excess-fp-precision + + Disable optimizations that may increase floating point precision. + +.. option:: -enable-no-infs-fp-math + + Enable optimizations that assume no Inf values. + +.. option:: -enable-no-nans-fp-math + + Enable optimizations that assume no NAN values. + +.. option:: -enable-unsafe-fp-math + + Causes :program:`lli` to enable optimizations that may decrease floating point + precision. + +.. option:: -soft-float + + Causes :program:`lli` to generate software floating point library calls instead of + equivalent hardware instructions. + +CODE GENERATION OPTIONS +----------------------- + +.. option:: -code-model=model + + Choose the code model from: + + .. code-block:: text + + default: Target default code model + tiny: Tiny code model + small: Small code model + kernel: Kernel code model + medium: Medium code model + large: Large code model + +.. option:: -disable-post-RA-scheduler + + Disable scheduling after register allocation. + +.. option:: -disable-spill-fusing + + Disable fusing of spill code into instructions. + +.. option:: -jit-enable-eh + + Exception handling should be enabled in the just-in-time compiler. + +.. option:: -join-liveintervals + + Coalesce copies (default=true). + +.. option:: -nozero-initialized-in-bss + + Don't place zero-initialized symbols into the BSS section. + +.. option:: -pre-RA-sched=scheduler + + Instruction schedulers available (before register allocation): + + .. code-block:: text + + =default: Best scheduler for the target + =none: No scheduling: breadth first sequencing + =simple: Simple two pass scheduling: minimize critical path and maximize processor utilization + =simple-noitin: Simple two pass scheduling: Same as simple except using generic latency + =list-burr: Bottom-up register reduction list scheduling + =list-tdrr: Top-down register reduction list scheduling + =list-td: Top-down list scheduler + +.. option:: -regalloc=allocator + + Register allocator to use (default=linearscan) + + .. code-block:: text + + =bigblock: Big-block register allocator + =linearscan: linear scan register allocator =local - local register allocator + =simple: simple register allocator + +.. option:: -relocation-model=model + + Choose relocation model from: + + .. code-block:: text + + =default: Target default relocation model + =static: Non-relocatable code =pic - Fully relocatable, position independent code + =dynamic-no-pic: Relocatable external references, non-relocatable code + +.. option:: -spiller + + Spiller to use (default=local) + + .. code-block:: text + + =simple: simple spiller + =local: local spiller + +.. option:: -x86-asm-syntax=syntax + + Choose style of code to emit from X86 backend: + + .. code-block:: text + + =att: Emit AT&T-style assembly + =intel: Emit Intel-style assembly + +EXIT STATUS +----------- + +If :program:`lli` fails to load the program, it will exit with an exit code of 1. +Otherwise, it will return the exit code of the program it executes. + +SEE ALSO +-------- + +:manpage:`llc(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-addr2line.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-addr2line.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-addr2line.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-addr2line.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,47 @@ +llvm-addr2line - a drop-in replacement for addr2line +==================================================== + +.. program:: llvm-addr2line + +SYNOPSIS +-------- + +:program:`llvm-addr2line` [*options*] + +DESCRIPTION +----------- + +:program:`llvm-addr2line` is an alias for the :manpage:`llvm-symbolizer(1)` +tool with different defaults. The goal is to make it a drop-in replacement for +GNU's :program:`addr2line`. + +Here are some of those differences: + +- ``llvm-addr2line`` interprets all addresses as hexadecimal and ignores an + optional ``0x`` prefix, whereas ``llvm-symbolizer`` attempts to determine + the base from the literal's prefix and defaults to decimal if there is no + prefix. + +- ``llvm-addr2line`` defaults not to print function names. Use `-f`_ to enable + that. + +- ``llvm-addr2line`` defaults not to demangle function names. Use `-C`_ to + switch the demangling on. + +- ``llvm-addr2line`` defaults not to print inlined frames. Use `-i`_ to show + inlined frames for a source code location in an inlined function. + +- ``llvm-addr2line`` uses `--output-style=GNU`_ by default. + +- ``llvm-addr2line`` parses options from the environment variable + ``LLVM_ADDR2LINE_OPTS`` instead of from ``LLVM_SYMBOLIZER_OPTS``. + +SEE ALSO +-------- + +:manpage:`llvm-symbolizer(1)` + +.. _-f: llvm-symbolizer.html#llvm-symbolizer-opt-f +.. _-C: llvm-symbolizer.html#llvm-symbolizer-opt-c +.. _-i: llvm-symbolizer.html#llvm-symbolizer-opt-i +.. _--output-style=GNU: llvm-symbolizer.html#llvm-symbolizer-opt-output-style diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-ar.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-ar.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-ar.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-ar.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,352 @@ +llvm-ar - LLVM archiver +======================= + +.. program:: llvm-ar + +SYNOPSIS +-------- + +:program:`llvm-ar` [-]{dmpqrstx}[abcDilLNoOPsSTuUvV] [relpos] [count] archive [files...] + +DESCRIPTION +----------- + +The :program:`llvm-ar` command is similar to the common Unix utility, +:program:`ar`. It archives several files, such as objects and LLVM bitcode +files into a single archive library that can be linked into a program. However, +the archive can contain any kind of file. By default, :program:`llvm-ar` +generates a symbol table that makes linking faster because only the symbol +table needs to be consulted, not each individual file member of the archive. + +The :program:`llvm-ar` command can be used to *read* archive files in SVR4, +GNU, BSD and Darwin format, and *write* in the GNU, BSD, and Darwin style +archive files. If an SVR4 format archive is used with the :option:`r` +(replace), :option:`d` (delete), :option:`m` (move) or :option:`q` +(quick update) operations, the archive will be reconstructed in the format +defined by :option:`--format`. + +Here's where :program:`llvm-ar` departs from previous :program:`ar` +implementations: + +*The following option is not supported* + + [f] - truncate inserted filenames + +*The following options are ignored for compatibility* + + --plugin= - load a plugin which adds support for other file formats + + [l] - ignored in :program:`ar` + +*Symbol Table* + + Since :program:`llvm-ar` supports bitcode files, the symbol table it creates + includes both native and bitcode symbols. + +*Deterministic Archives* + + By default, :program:`llvm-ar` always uses zero for timestamps and UIDs/GIDs + to write archives in a deterministic mode. This is equivalent to the + :option:`D` modifier being enabled by default. If you wish to maintain + compatibility with other :program:`ar` implementations, you can pass the + :option:`U` modifier to write actual timestamps and UIDs/GIDs. + +*Windows Paths* + + When on Windows :program:`llvm-ar` treats the names of archived *files* in the same + case sensitive manner as the operating system. When on a non-Windows machine + :program:`llvm-ar` does not consider character case. + +OPTIONS +------- + +:program:`llvm-ar` operations are compatible with other :program:`ar` +implementations. However, there are a few modifiers (:option:`L`) that are not +found in other :program:`ar` implementations. The options for +:program:`llvm-ar` specify a single basic Operation to perform on the archive, +a variety of Modifiers for that Operation, the name of the archive file, and an +optional list of file names. If the *files* option is not specified, it +generally means either "none" or "all" members, depending on the operation. The +Options, Operations and Modifiers are explained in the sections below. + +The minimal set of options is at least one operator and the name of the +archive. + +Operations +~~~~~~~~~~ + +.. option:: d [NT] + + Delete files from the ``archive``. The :option:`N` and :option:`T` modifiers + apply to this operation. The *files* options specify which members should be + removed from the archive. It is not an error if a specified file does not + appear in the archive. If no *files* are specified, the archive is not + modified. + +.. option:: m [abi] + + Move files from one location in the ``archive`` to another. The :option:`a`, + :option:`b`, and :option:`i` modifiers apply to this operation. The *files* + will all be moved to the location given by the modifiers. If no modifiers are + used, the files will be moved to the end of the archive. If no *files* are + specified, the archive is not modified. + +.. option:: p [v] + + Print *files* to the standard output stream. If no *files* are specified, the + entire ``archive`` is printed. With the :option:`v` modifier, + :program:`llvm-ar` also prints out the name of the file being output. Printing + binary files is ill-advised as they might confuse your terminal settings. The + :option:`p` operation never modifies the archive. + +.. option:: q [LT] + + Quickly append files to the end of the ``archive`` without removing + duplicates. If no *files* are specified, the archive is not modified. The + behavior when appending one archive to another depends upon whether the + :option:`L` and :option:`T` modifiers are used: + + * Appending a regular archive to a regular archive will append the archive + file. If the :option:`L` modifier is specified the members will be appended + instead. + + * Appending a regular archive to a thin archive requires the :option:`T` + modifier and will append the archive file. The :option:`L` modifier is not + supported. + + * Appending a thin archive to a regular archive will append the archive file. + If the :option:`L` modifier is specified the members will be appended + instead. + + * Appending a thin archive to a thin archive will always quick append its + members. + +.. option:: r [abTu] + + Replace existing *files* or insert them at the end of the ``archive`` if + they do not exist. The :option:`a`, :option:`b`, :option:`T` and :option:`u` + modifiers apply to this operation. If no *files* are specified, the archive + is not modified. + +t[v] +.. option:: t [vO] + + Print the table of contents. Without any modifiers, this operation just prints + the names of the members to the standard output stream. With the :option:`v` + modifier, :program:`llvm-ar` also prints out the file type (B=bitcode, + S=symbol table, blank=regular file), the permission mode, the owner and group, + are ignored when extracting *files* and set to placeholder values when adding + size, and the date. With the :option:`O` modifier, display member offsets. If + any *files* are specified, the listing is only for those files. If no *files* + are specified, the table of contents for the whole archive is printed. + +.. option:: V + + A synonym for the :option:`--version` option. + +.. option:: x [oP] + + Extract ``archive`` members back to files. The :option:`o` modifier applies + to this operation. This operation retrieves the indicated *files* from the + archive and writes them back to the operating system's file system. If no + *files* are specified, the entire archive is extracted. + +Modifiers (operation specific) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The modifiers below are specific to certain operations. See the Operations +section to determine which modifiers are applicable to which operations. + +.. option:: a + + When inserting or moving member files, this option specifies the destination + of the new files as being after the *relpos* member. If *relpos* is not found, + the files are placed at the end of the ``archive``. *relpos* cannot be + consumed without either :option:`a`, :option:`b` or :option:`i`. + +.. option:: b + + When inserting or moving member files, this option specifies the destination + of the new files as being before the *relpos* member. If *relpos* is not + found, the files are placed at the end of the ``archive``. *relpos* cannot + be consumed without either :option:`a`, :option:`b` or :option:`i`. This + modifier is identical to the :option:`i` modifier. + +.. option:: i + + A synonym for the :option:`b` option. + +.. option:: L + + When quick appending an ``archive``, instead quick append its members. This + is a feature for :program:`llvm-ar` that is not found in gnu-ar. + +.. option:: N + + When extracting or deleting a member that shares its name with another member, + the *count* parameter allows you to supply a positive whole number that + selects the instance of the given name, with "1" indicating the first + instance. If :option:`N` is not specified the first member of that name will + be selected. If *count* is not supplied, the operation fails.*count* cannot be + +.. option:: o + + When extracting files, use the modification times of any *files* as they + appear in the ``archive``. By default *files* extracted from the archive + use the time of extraction. + +.. option:: O + + Display member offsets inside the archive. + +.. option:: T + + When creating or modifying an archive, this option specifies that the + ``archive`` will be thin. By default, archives are not created as thin + archives and when modifying a thin archive, it will be converted to a regular + archive. + +.. option:: v + + When printing *files* or the ``archive`` table of contents, this modifier + instructs :program:`llvm-ar` to include additional information in the output. + +Modifiers (generic) +~~~~~~~~~~~~~~~~~~~ + +The modifiers below may be applied to any operation. + +.. option:: c + + For the :option:`r` (replace)and :option:`q` (quick update) operations, + :program:`llvm-ar` will always create the archive if it doesn't exist. + Normally, :program:`llvm-ar` will print a warning message indicating that the + ``archive`` is being created. Using this modifier turns off + that warning. + +.. option:: D + + Use zero for timestamps and UIDs/GIDs. This is set by default. + +.. option:: P + + Use full paths when matching member names rather than just the file name. + This can be useful when manipulating an ``archive`` generated by another + archiver, as some allow paths as member names. This is the default behavior + for thin archives. + +.. option:: s + + This modifier requests that an archive index (or symbol table) be added to the + ``archive``, as if using ranlib. The symbol table will contain all the + externally visible functions and global variables defined by all the bitcode + files in the archive. By default :program:`llvm-ar` generates symbol tables in + archives. This can also be used as an operation. + +.. option:: S + + This modifier is the opposite of the :option:`s` modifier. It instructs + :program:`llvm-ar` to not build the symbol table. If both :option:`s` and + :option:`S` are used, the last modifier to occur in the options will prevail. + +.. option:: u + + Only update ``archive`` members with *files* that have more recent + timestamps. + +.. option:: U + + Use actual timestamps and UIDs/GIDs. + +Other +~~~~~ + +.. option:: --format= + + This option allows for default, gnu, darwin or bsd ```` to be selected. + When creating an ``archive``, ```` will default to that of the host + machine. + +.. option:: -h, --help + + Print a summary of command-line options and their meanings. + +.. option:: -M + + This option allows for MRI scripts to be read through the standard input + stream. No other options are compatible with this option. + +.. option:: --rsp-quoting= + This option selects the quoting style ```` for response files, either + ``posix`` or ``windows``. The default when on Windows is ``windows``, otherwise the + default is ``posix``. + +.. option:: --version + + Display the version of the :program:`llvm-ar` executable. + +.. option:: @ + + Read command-line options and commands from response file ````. + +MRI SCRIPTS +----------- + +:program:`llvm-ar` understands a subset of the MRI scripting interface commonly +supported by archivers following in the ar tradition. An MRI script contains a +sequence of commands to be executed by the archiver. The :option:`-M` option +allows for an MRI script to be passed to :program:`llvm-ar` through the +standard input stream. + +Note that :program:`llvm-ar` has known limitations regarding the use of MRI +scripts: + +* Each script can only create one archive. +* Existing archives can not be modified. + +MRI Script Commands +~~~~~~~~~~~~~~~~~~~ + +Each command begins with the command's name and must appear on its own line. +Some commands have arguments, which must be separated from the name by +whitespace. An MRI script should begin with either a :option:`CREATE` or +:option:`CREATETHIN` command and will typically end with a :option:`SAVE` +command. Any text after either '*' or ';' is treated as a comment. + +.. option:: CREATE archive + + Begin creation of a regular archive with the specified name. Subsequent + commands act upon this ``archive``. + +.. option:: CREATETHIN archive + + Begin creation of a thin archive with the specified name. Subsequent + commands act upon this ``archive``. + +.. option:: ADDLIB archive + + Append the contents of ``archive`` to the current archive. + +.. option:: ADDMOD + + Append ```` to the current archive. + +.. option:: DELETE + + Delete the member of the current archive whose file name, excluding directory + components, matches ````. + +.. option:: SAVE + + Write the current archive to the path specified in the previous + :option:`CREATE`/:option:`CREATETHIN` command. + +.. option:: END + + Ends the MRI script (optional). + +EXIT STATUS +----------- + +If :program:`llvm-ar` succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non-zero value. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-as.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-as.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-as.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-as.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,58 @@ +llvm-as - LLVM assembler +======================== + +.. program:: llvm-as + +SYNOPSIS +-------- + +**llvm-as** [*options*] [*filename*] + +DESCRIPTION +----------- + +**llvm-as** is the LLVM assembler. It reads a file containing human-readable +LLVM assembly language, translates it to LLVM bitcode, and writes the result +into a file or to standard output. + +If *filename* is omitted or is ``-``, then **llvm-as** reads its input from +standard input. + +If an output file is not specified with the **-o** option, then +**llvm-as** sends its output to a file or standard output by following +these rules: + +* If the input is standard input, then the output is standard output. + +* If the input is a file that ends with ``.ll``, then the output file is of the + same name, except that the suffix is changed to ``.bc``. + +* If the input is a file that does not end with the ``.ll`` suffix, then the + output file has the same name as the input file, except that the ``.bc`` + suffix is appended. + +OPTIONS +------- + +**-f** + Enable binary output on terminals. Normally, **llvm-as** will refuse to + write raw bitcode output if the output stream is a terminal. With this option, + **llvm-as** will write raw bitcode regardless of the output device. + +**-help** + Print a summary of command line options. + +**-o** *filename* + Specify the output file name. If *filename* is ``-``, then **llvm-as** + sends its output to standard output. + +EXIT STATUS +----------- + +If **llvm-as** succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non-zero value. + +SEE ALSO +-------- + +:manpage:`llvm-dis(1)`, as(1) diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-bcanalyzer.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-bcanalyzer.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-bcanalyzer.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-bcanalyzer.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,306 @@ +llvm-bcanalyzer - LLVM bitcode analyzer +======================================= + +.. program:: llvm-bcanalyzer + +SYNOPSIS +-------- + +:program:`llvm-bcanalyzer` [*options*] [*filename*] + +DESCRIPTION +----------- + +The :program:`llvm-bcanalyzer` command is a small utility for analyzing bitcode +files. The tool reads a bitcode file (such as generated with the +:program:`llvm-as` tool) and produces a statistical report on the contents of +the bitcode file. The tool can also dump a low level but human readable +version of the bitcode file. This tool is probably not of much interest or +utility except for those working directly with the bitcode file format. Most +LLVM users can just ignore this tool. + +If *filename* is omitted or is ``-``, then :program:`llvm-bcanalyzer` reads its +input from standard input. This is useful for combining the tool into a +pipeline. Output is written to the standard output. + +OPTIONS +------- + +.. program:: llvm-bcanalyzer + +.. option:: -nodetails + + Causes :program:`llvm-bcanalyzer` to abbreviate its output by writing out only + a module level summary. The details for individual functions are not + displayed. + +.. option:: -dump + + Causes :program:`llvm-bcanalyzer` to dump the bitcode in a human readable + format. This format is significantly different from LLVM assembly and + provides details about the encoding of the bitcode file. + +.. option:: -verify + + Causes :program:`llvm-bcanalyzer` to verify the module produced by reading the + bitcode. This ensures that the statistics generated are based on a consistent + module. + +.. option:: -help + + Print a summary of command line options. + +EXIT STATUS +----------- + +If :program:`llvm-bcanalyzer` succeeds, it will exit with 0. Otherwise, if an +error occurs, it will exit with a non-zero value, usually 1. + +SUMMARY OUTPUT DEFINITIONS +-------------------------- + +The following items are always printed by llvm-bcanalyzer. They comprize the +summary output. + +**Bitcode Analysis Of Module** + + This just provides the name of the module for which bitcode analysis is being + generated. + +**Bitcode Version Number** + + The bitcode version (not LLVM version) of the file read by the analyzer. + +**File Size** + + The size, in bytes, of the entire bitcode file. + +**Module Bytes** + + The size, in bytes, of the module block. Percentage is relative to File Size. + +**Function Bytes** + + The size, in bytes, of all the function blocks. Percentage is relative to File + Size. + +**Global Types Bytes** + + The size, in bytes, of the Global Types Pool. Percentage is relative to File + Size. This is the size of the definitions of all types in the bitcode file. + +**Constant Pool Bytes** + + The size, in bytes, of the Constant Pool Blocks Percentage is relative to File + Size. + +**Module Globals Bytes** + + Ths size, in bytes, of the Global Variable Definitions and their initializers. + Percentage is relative to File Size. + +**Instruction List Bytes** + + The size, in bytes, of all the instruction lists in all the functions. + Percentage is relative to File Size. Note that this value is also included in + the Function Bytes. + +**Compaction Table Bytes** + + The size, in bytes, of all the compaction tables in all the functions. + Percentage is relative to File Size. Note that this value is also included in + the Function Bytes. + +**Symbol Table Bytes** + + The size, in bytes, of all the symbol tables in all the functions. Percentage is + relative to File Size. Note that this value is also included in the Function + Bytes. + +**Dependent Libraries Bytes** + + The size, in bytes, of the list of dependent libraries in the module. Percentage + is relative to File Size. Note that this value is also included in the Module + Global Bytes. + +**Number Of Bitcode Blocks** + + The total number of blocks of any kind in the bitcode file. + +**Number Of Functions** + + The total number of function definitions in the bitcode file. + +**Number Of Types** + + The total number of types defined in the Global Types Pool. + +**Number Of Constants** + + The total number of constants (of any type) defined in the Constant Pool. + +**Number Of Basic Blocks** + + The total number of basic blocks defined in all functions in the bitcode file. + +**Number Of Instructions** + + The total number of instructions defined in all functions in the bitcode file. + +**Number Of Long Instructions** + + The total number of long instructions defined in all functions in the bitcode + file. Long instructions are those taking greater than 4 bytes. Typically long + instructions are GetElementPtr with several indices, PHI nodes, and calls to + functions with large numbers of arguments. + +**Number Of Operands** + + The total number of operands used in all instructions in the bitcode file. + +**Number Of Compaction Tables** + + The total number of compaction tables in all functions in the bitcode file. + +**Number Of Symbol Tables** + + The total number of symbol tables in all functions in the bitcode file. + +**Number Of Dependent Libs** + + The total number of dependent libraries found in the bitcode file. + +**Total Instruction Size** + + The total size of the instructions in all functions in the bitcode file. + +**Average Instruction Size** + + The average number of bytes per instruction across all functions in the bitcode + file. This value is computed by dividing Total Instruction Size by Number Of + Instructions. + +**Maximum Type Slot Number** + + The maximum value used for a type's slot number. Larger slot number values take + more bytes to encode. + +**Maximum Value Slot Number** + + The maximum value used for a value's slot number. Larger slot number values take + more bytes to encode. + +**Bytes Per Value** + + The average size of a Value definition (of any type). This is computed by + dividing File Size by the total number of values of any type. + +**Bytes Per Global** + + The average size of a global definition (constants and global variables). + +**Bytes Per Function** + + The average number of bytes per function definition. This is computed by + dividing Function Bytes by Number Of Functions. + +**# of VBR 32-bit Integers** + + The total number of 32-bit integers encoded using the Variable Bit Rate + encoding scheme. + +**# of VBR 64-bit Integers** + + The total number of 64-bit integers encoded using the Variable Bit Rate encoding + scheme. + +**# of VBR Compressed Bytes** + + The total number of bytes consumed by the 32-bit and 64-bit integers that use + the Variable Bit Rate encoding scheme. + +**# of VBR Expanded Bytes** + + The total number of bytes that would have been consumed by the 32-bit and 64-bit + integers had they not been compressed with the Variable Bit Rage encoding + scheme. + +**Bytes Saved With VBR** + + The total number of bytes saved by using the Variable Bit Rate encoding scheme. + The percentage is relative to # of VBR Expanded Bytes. + +DETAILED OUTPUT DEFINITIONS +--------------------------- + +The following definitions occur only if the -nodetails option was not given. +The detailed output provides additional information on a per-function basis. + +**Type** + + The type signature of the function. + +**Byte Size** + + The total number of bytes in the function's block. + +**Basic Blocks** + + The number of basic blocks defined by the function. + +**Instructions** + + The number of instructions defined by the function. + +**Long Instructions** + + The number of instructions using the long instruction format in the function. + +**Operands** + + The number of operands used by all instructions in the function. + +**Instruction Size** + + The number of bytes consumed by instructions in the function. + +**Average Instruction Size** + + The average number of bytes consumed by the instructions in the function. + This value is computed by dividing Instruction Size by Instructions. + +**Bytes Per Instruction** + + The average number of bytes used by the function per instruction. This value + is computed by dividing Byte Size by Instructions. Note that this is not the + same as Average Instruction Size. It computes a number relative to the total + function size not just the size of the instruction list. + +**Number of VBR 32-bit Integers** + + The total number of 32-bit integers found in this function (for any use). + +**Number of VBR 64-bit Integers** + + The total number of 64-bit integers found in this function (for any use). + +**Number of VBR Compressed Bytes** + + The total number of bytes in this function consumed by the 32-bit and 64-bit + integers that use the Variable Bit Rate encoding scheme. + +**Number of VBR Expanded Bytes** + + The total number of bytes in this function that would have been consumed by + the 32-bit and 64-bit integers had they not been compressed with the Variable + Bit Rate encoding scheme. + +**Bytes Saved With VBR** + + The total number of bytes saved in this function by using the Variable Bit + Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes. + +SEE ALSO +-------- + +:manpage:`llvm-dis(1)`, :doc:`/BitCodeFormat` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-config.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-config.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-config.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-config.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,126 @@ +llvm-config - Print LLVM compilation options +============================================ + +.. program:: llvm-config + +SYNOPSIS +-------- + +**llvm-config** *option* [*components*...] + +DESCRIPTION +----------- + +**llvm-config** makes it easier to build applications that use LLVM. It can +print the compiler flags, linker flags and object libraries needed to link +against LLVM. + +EXAMPLES +-------- + +To link against the JIT: + +.. code-block:: sh + + g++ `llvm-config --cxxflags` -o HowToUseJIT.o -c HowToUseJIT.cpp + g++ `llvm-config --ldflags` -o HowToUseJIT HowToUseJIT.o \ + `llvm-config --libs engine bcreader scalaropts` + +OPTIONS +------- + +**--version** + + Print the version number of LLVM. + +**-help** + + Print a summary of **llvm-config** arguments. + +**--prefix** + + Print the installation prefix for LLVM. + +**--src-root** + + Print the source root from which LLVM was built. + +**--obj-root** + + Print the object root used to build LLVM. + +**--bindir** + + Print the installation directory for LLVM binaries. + +**--includedir** + + Print the installation directory for LLVM headers. + +**--libdir** + + Print the installation directory for LLVM libraries. + +**--cxxflags** + + Print the C++ compiler flags needed to use LLVM headers. + +**--ldflags** + + Print the flags needed to link against LLVM libraries. + +**--libs** + + Print all the libraries needed to link against the specified LLVM + *components*, including any dependencies. + +**--libnames** + + Similar to **--libs**, but prints the bare filenames of the libraries + without **-l** or pathnames. Useful for linking against a not-yet-installed + copy of LLVM. + +**--libfiles** + + Similar to **--libs**, but print the full path to each library file. This is + useful when creating makefile dependencies, to ensure that a tool is relinked if + any library it uses changes. + +**--components** + + Print all valid component names. + +**--targets-built** + + Print the component names for all targets supported by this copy of LLVM. + +**--build-mode** + + Print the build mode used when LLVM was built (e.g. Debug or Release) + + +COMPONENTS +---------- + +To print a list of all available components, run **llvm-config +--components**. In most cases, components correspond directly to LLVM +libraries. Useful "virtual" components include: + +**all** + + Includes all LLVM libraries. The default if no components are specified. + +**backend** + + Includes either a native backend or the C backend. + +**engine** + + Includes either a native JIT or the bitcode interpreter. + + +EXIT STATUS +----------- + +If **llvm-config** succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-cov.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-cov.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-cov.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-cov.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,487 @@ +llvm-cov - emit coverage information +==================================== + +.. program:: llvm-cov + +SYNOPSIS +-------- + +:program:`llvm-cov` *command* [*args...*] + +DESCRIPTION +----------- + +The :program:`llvm-cov` tool shows code coverage information for +programs that are instrumented to emit profile data. It can be used to +work with ``gcov``\-style coverage or with ``clang``\'s instrumentation +based profiling. + +If the program is invoked with a base name of ``gcov``, it will behave as if +the :program:`llvm-cov gcov` command were called. Otherwise, a command should +be provided. + +COMMANDS +-------- + +* :ref:`gcov ` +* :ref:`show ` +* :ref:`report ` +* :ref:`export ` + +.. program:: llvm-cov gcov + +.. _llvm-cov-gcov: + +GCOV COMMAND +------------ + +SYNOPSIS +^^^^^^^^ + +:program:`llvm-cov gcov` [*options*] *SOURCEFILE* + +DESCRIPTION +^^^^^^^^^^^ + +The :program:`llvm-cov gcov` tool reads code coverage data files and displays +the coverage information for a specified source file. It is compatible with the +``gcov`` tool from version 4.2 of ``GCC`` and may also be compatible with some +later versions of ``gcov``. + +To use :program:`llvm-cov gcov`, you must first build an instrumented version +of your application that collects coverage data as it runs. Compile with the +``-fprofile-arcs`` and ``-ftest-coverage`` options to add the +instrumentation. (Alternatively, you can use the ``--coverage`` option, which +includes both of those other options.) + +At the time you compile the instrumented code, a ``.gcno`` data file will be +generated for each object file. These ``.gcno`` files contain half of the +coverage data. The other half of the data comes from ``.gcda`` files that are +generated when you run the instrumented program, with a separate ``.gcda`` +file for each object file. Each time you run the program, the execution counts +are summed into any existing ``.gcda`` files, so be sure to remove any old +files if you do not want their contents to be included. + +By default, the ``.gcda`` files are written into the same directory as the +object files, but you can override that by setting the ``GCOV_PREFIX`` and +``GCOV_PREFIX_STRIP`` environment variables. The ``GCOV_PREFIX_STRIP`` +variable specifies a number of directory components to be removed from the +start of the absolute path to the object file directory. After stripping those +directories, the prefix from the ``GCOV_PREFIX`` variable is added. These +environment variables allow you to run the instrumented program on a machine +where the original object file directories are not accessible, but you will +then need to copy the ``.gcda`` files back to the object file directories +where :program:`llvm-cov gcov` expects to find them. + +Once you have generated the coverage data files, run :program:`llvm-cov gcov` +for each main source file where you want to examine the coverage results. This +should be run from the same directory where you previously ran the +compiler. The results for the specified source file are written to a file named +by appending a ``.gcov`` suffix. A separate output file is also created for +each file included by the main source file, also with a ``.gcov`` suffix added. + +The basic content of an ``.gcov`` output file is a copy of the source file with +an execution count and line number prepended to every line. The execution +count is shown as ``-`` if a line does not contain any executable code. If +a line contains code but that code was never executed, the count is displayed +as ``#####``. + +OPTIONS +^^^^^^^ + +.. option:: -a, --all-blocks + + Display all basic blocks. If there are multiple blocks for a single line of + source code, this option causes llvm-cov to show the count for each block + instead of just one count for the entire line. + +.. option:: -b, --branch-probabilities + + Display conditional branch probabilities and a summary of branch information. + +.. option:: -c, --branch-counts + + Display branch counts instead of probabilities (requires -b). + +.. option:: -m, --demangled-names + + Demangle function names. + +.. option:: -f, --function-summaries + + Show a summary of coverage for each function instead of just one summary for + an entire source file. + +.. option:: --help + + Display available options (--help-hidden for more). + +.. option:: -l, --long-file-names + + For coverage output of files included from the main source file, add the + main file name followed by ``##`` as a prefix to the output file names. This + can be combined with the --preserve-paths option to use complete paths for + both the main file and the included file. + +.. option:: -n, --no-output + + Do not output any ``.gcov`` files. Summary information is still + displayed. + +.. option:: -o=, --object-directory=, --object-file= + + Find objects in DIR or based on FILE's path. If you specify a particular + object file, the coverage data files are expected to have the same base name + with ``.gcno`` and ``.gcda`` extensions. If you specify a directory, the + files are expected in that directory with the same base name as the source + file. + +.. option:: -p, --preserve-paths + + Preserve path components when naming the coverage output files. In addition + to the source file name, include the directories from the path to that + file. The directories are separate by ``#`` characters, with ``.`` directories + removed and ``..`` directories replaced by ``^`` characters. When used with + the --long-file-names option, this applies to both the main file name and the + included file name. + +.. option:: -r + + Only dump files with relative paths or absolute paths with the prefix specified + by ``-s``. + +.. option:: -s= + + Source prefix to elide. + +.. option:: -t, --stdout + + Print to stdout instead of producing ``.gcov`` files. + +.. option:: -u, --unconditional-branches + + Include unconditional branches in the output for the --branch-probabilities + option. + +.. option:: -version + + Display the version of llvm-cov. + +.. option:: -x, --hash-filenames + + Use md5 hash of file name when naming the coverage output files. The source + file name will be suffixed by ``##`` followed by MD5 hash calculated for it. + +EXIT STATUS +^^^^^^^^^^^ + +:program:`llvm-cov gcov` returns 1 if it cannot read input files. Otherwise, +it exits with zero. + +.. program:: llvm-cov show + +.. _llvm-cov-show: + +SHOW COMMAND +------------ + +SYNOPSIS +^^^^^^^^ + +:program:`llvm-cov show` [*options*] -instr-profile *PROFILE* *BIN* [*-object BIN,...*] [[*-object BIN*]] [*SOURCES*] + +DESCRIPTION +^^^^^^^^^^^ + +The :program:`llvm-cov show` command shows line by line coverage of the +binaries *BIN*,... using the profile data *PROFILE*. It can optionally be +filtered to only show the coverage for the files listed in *SOURCES*. + +*BIN* may be an executable, object file, dynamic library, or archive (thin or +otherwise). + +To use :program:`llvm-cov show`, you need a program that is compiled with +instrumentation to emit profile and coverage data. To build such a program with +``clang`` use the ``-fprofile-instr-generate`` and ``-fcoverage-mapping`` +flags. If linking with the ``clang`` driver, pass ``-fprofile-instr-generate`` +to the link stage to make sure the necessary runtime libraries are linked in. + +The coverage information is stored in the built executable or library itself, +and this is what you should pass to :program:`llvm-cov show` as a *BIN* +argument. The profile data is generated by running this instrumented program +normally. When the program exits it will write out a raw profile file, +typically called ``default.profraw``, which can be converted to a format that +is suitable for the *PROFILE* argument using the :program:`llvm-profdata merge` +tool. + +OPTIONS +^^^^^^^ + +.. option:: -show-branches= + + Show coverage for branch conditions in terms of either count or percentage. + The supported views are: "count", "percent". + +.. option:: -show-line-counts + + Show the execution counts for each line. Defaults to true, unless another + ``-show`` option is used. + +.. option:: -show-expansions + + Expand inclusions, such as preprocessor macros or textual inclusions, inline + in the display of the source file. Defaults to false. + +.. option:: -show-instantiations + + For source regions that are instantiated multiple times, such as templates in + ``C++``, show each instantiation separately as well as the combined summary. + Defaults to true. + +.. option:: -show-regions + + Show the execution counts for each region by displaying a caret that points to + the character where the region starts. Defaults to false. + +.. option:: -show-line-counts-or-regions + + Show the execution counts for each line if there is only one region on the + line, but show the individual regions if there are multiple on the line. + Defaults to false. + +.. option:: -use-color + + Enable or disable color output. By default this is autodetected. + +.. option:: -arch=[*NAMES*] + + Specify a list of architectures such that the Nth entry in the list + corresponds to the Nth specified binary. If the covered object is a universal + binary, this specifies the architecture to use. It is an error to specify an + architecture that is not included in the universal binary or to use an + architecture that does not match a non-universal binary. + +.. option:: -name= + + Show code coverage only for functions with the given name. + +.. option:: -name-whitelist= + + Show code coverage only for functions listed in the given file. Each line in + the file should start with `whitelist_fun:`, immediately followed by the name + of the function to accept. This name can be a wildcard expression. + +.. option:: -name-regex= + + Show code coverage only for functions that match the given regular expression. + +.. option:: -ignore-filename-regex= + + Skip source code files with file paths that match the given regular expression. + +.. option:: -format= + + Use the specified output format. The supported formats are: "text", "html". + +.. option:: -tab-size= + + Replace tabs with spaces when preparing reports. Currently, this is + only supported for the html format. + +.. option:: -output-dir=PATH + + Specify a directory to write coverage reports into. If the directory does not + exist, it is created. When used in function view mode (i.e when -name or + -name-regex are used to select specific functions), the report is written to + PATH/functions.EXTENSION. When used in file view mode, a report for each file + is written to PATH/REL_PATH_TO_FILE.EXTENSION. + +.. option:: -Xdemangler=| + + Specify a symbol demangler. This can be used to make reports more + human-readable. This option can be specified multiple times to supply + arguments to the demangler (e.g `-Xdemangler c++filt -Xdemangler -n` for C++). + The demangler is expected to read a newline-separated list of symbols from + stdin and write a newline-separated list of the same length to stdout. + +.. option:: -num-threads=N, -j=N + + Use N threads to write file reports (only applicable when -output-dir is + specified). When N=0, llvm-cov auto-detects an appropriate number of threads to + use. This is the default. + +.. option:: -compilation-dir= + + Directory used as a base for relative coverage mapping paths. Only applicable + when binaries have been compiled with one of `-fcoverage-prefix-map` + `-fcoverage-compilation-dir`, or `-ffile-compilation-dir`. + +.. option:: -line-coverage-gt= + + Show code coverage only for functions with line coverage greater than the + given threshold. + +.. option:: -line-coverage-lt= + + Show code coverage only for functions with line coverage less than the given + threshold. + +.. option:: -region-coverage-gt= + + Show code coverage only for functions with region coverage greater than the + given threshold. + +.. option:: -region-coverage-lt= + + Show code coverage only for functions with region coverage less than the given + threshold. + +.. option:: -path-equivalence=, + + Map the paths in the coverage data to local source file paths. This allows you + to generate the coverage data on one machine, and then use llvm-cov on a + different machine where you have the same files on a different path. + +.. program:: llvm-cov report + +.. _llvm-cov-report: + +REPORT COMMAND +-------------- + +SYNOPSIS +^^^^^^^^ + +:program:`llvm-cov report` [*options*] -instr-profile *PROFILE* *BIN* [*-object BIN,...*] [[*-object BIN*]] [*SOURCES*] + +DESCRIPTION +^^^^^^^^^^^ + +The :program:`llvm-cov report` command displays a summary of the coverage of +the binaries *BIN*,... using the profile data *PROFILE*. It can optionally be +filtered to only show the coverage for the files listed in *SOURCES*. + +*BIN* may be an executable, object file, dynamic library, or archive (thin or +otherwise). + +If no source files are provided, a summary line is printed for each file in the +coverage data. If any files are provided, summaries can be shown for each +function in the listed files if the ``-show-functions`` option is enabled. + +For information on compiling programs for coverage and generating profile data, +see :ref:`llvm-cov-show`. + +OPTIONS +^^^^^^^ + +.. option:: -use-color[=VALUE] + + Enable or disable color output. By default this is autodetected. + +.. option:: -arch= + + If the covered binary is a universal binary, select the architecture to use. + It is an error to specify an architecture that is not included in the + universal binary or to use an architecture that does not match a + non-universal binary. + +.. option:: -show-region-summary + + Show statistics for all regions. Defaults to true. + +.. option:: -show-branch-summary + + Show statistics for all branch conditions. Defaults to true. + +.. option:: -show-functions + + Show coverage summaries for each function. Defaults to false. + +.. option:: -show-instantiation-summary + + Show statistics for all function instantiations. Defaults to false. + +.. option:: -ignore-filename-regex= + + Skip source code files with file paths that match the given regular expression. + +.. option:: -compilation-dir= + + Directory used as a base for relative coverage mapping paths. Only applicable + when binaries have been compiled with one of `-fcoverage-prefix-map` + `-fcoverage-compilation-dir`, or `-ffile-compilation-dir`. + +.. program:: llvm-cov export + +.. _llvm-cov-export: + +EXPORT COMMAND +-------------- + +SYNOPSIS +^^^^^^^^ + +:program:`llvm-cov export` [*options*] -instr-profile *PROFILE* *BIN* [*-object BIN,...*] [[*-object BIN*]] [*SOURCES*] + +DESCRIPTION +^^^^^^^^^^^ + +The :program:`llvm-cov export` command exports coverage data of the binaries +*BIN*,... using the profile data *PROFILE* in either JSON or lcov trace file +format. + +When exporting JSON, the regions, functions, branches, expansions, and +summaries of the coverage data will be exported. When exporting an lcov trace +file, the line-based coverage, branch coverage, and summaries will be exported. + +The exported data can optionally be filtered to only export the coverage +for the files listed in *SOURCES*. + +For information on compiling programs for coverage and generating profile data, +see :ref:`llvm-cov-show`. + +OPTIONS +^^^^^^^ + +.. option:: -arch= + + If the covered binary is a universal binary, select the architecture to use. + It is an error to specify an architecture that is not included in the + universal binary or to use an architecture that does not match a + non-universal binary. + +.. option:: -format= + + Use the specified output format. The supported formats are: "text" (JSON), + "lcov". + +.. option:: -summary-only + + Export only summary information for each file in the coverage data. This mode + will not export coverage information for smaller units such as individual + functions or regions. The result will contain the same information as produced + by the :program:`llvm-cov report` command, but presented in JSON or lcov + format rather than text. + +.. option:: -ignore-filename-regex= + + Skip source code files with file paths that match the given regular expression. + + .. option:: -skip-expansions + + Skip exporting macro expansion coverage data. + + .. option:: -skip-functions + + Skip exporting per-function coverage data. + + .. option:: -num-threads=N, -j=N + + Use N threads to export coverage data. When N=0, llvm-cov auto-detects an + appropriate number of threads to use. This is the default. + +.. option:: -compilation-dir= + + Directory used as a base for relative coverage mapping paths. Only applicable + when binaries have been compiled with one of `-fcoverage-prefix-map` + `-fcoverage-compilation-dir`, or `-ffile-compilation-dir`. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxfilt.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxfilt.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxfilt.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxfilt.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,82 @@ +llvm-cxxfilt - LLVM symbol name demangler +========================================= + +.. program:: llvm-cxxfilt + +SYNOPSIS +-------- + +:program:`llvm-cxxfilt` [*options*] [*mangled names...*] + +DESCRIPTION +----------- + +:program:`llvm-cxxfilt` is a symbol demangler that can be used as a replacement +for the GNU :program:`c++filt` tool. It takes a series of symbol names and +prints their demangled form on the standard output stream. If a name cannot be +demangled, it is simply printed as is. + +If no names are specified on the command-line, names are read interactively from +the standard input stream. When reading names from standard input, each input +line is split on characters that are not part of valid Itanium name manglings, +i.e. characters that are not alphanumeric, '.', '$', or '_'. Separators between +names are copied to the output as is. + +EXAMPLE +------- + +.. code-block:: console + + $ llvm-cxxfilt _Z3foov _Z3bari not_mangled + foo() + bar(int) + not_mangled + $ cat input.txt + | _Z3foov *** _Z3bari *** not_mangled | + $ llvm-cxxfilt < input.txt + | foo() *** bar(int) *** not_mangled | + +OPTIONS +------- + +.. option:: --format=, -s + + Mangling scheme to assume. Valid values are ``auto`` (default, auto-detect the + style) and ``gnu`` (assume GNU/Itanium style). + +.. option:: --help, -h + + Print a summary of command line options. + +.. option:: --no-strip-underscore, -n + + Do not strip a leading underscore. This is the default for all platforms + except Mach-O based hosts. + +.. option:: --strip-underscore, -_ + + Strip a single leading underscore, if present, from each input name before + demangling. On by default on Mach-O based platforms. + +.. option:: --types, -t + + Attempt to demangle names as type names as well as function names. + +.. option:: --version + + Display the version of the :program:`llvm-cxxfilt` executable. + +.. option:: @ + + Read command-line options from response file ``. + +EXIT STATUS +----------- + +:program:`llvm-cxxfilt` returns 0 unless it encounters a usage error, in which +case a non-zero exit code is returned. + +SEE ALSO +-------- + +:manpage:`llvm-nm(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxmap.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxmap.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxmap.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-cxxmap.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,101 @@ +llvm-cxxmap - Mangled name remapping tool +========================================= + +.. program:: llvm-cxxmap + +SYNOPSIS +-------- + +:program:`llvm-cxxmap` [*options*] *symbol-file-1* *symbol-file-2* + +DESCRIPTION +----------- + +The :program:`llvm-cxxmap` tool performs fuzzy matching of C++ mangled names, +based on a file describing name components that should be considered equivalent. + +The symbol files should contain a list of C++ mangled names (one per line). +Blank lines and lines starting with ``#`` are ignored. The output is a list +of pairs of equivalent symbols, one per line, of the form + +.. code-block:: none + + + +where ```` is a symbol from *symbol-file-1* and ```` is +a symbol from *symbol-file-2*. Mappings for which the two symbols are identical +are omitted. + +OPTIONS +------- + +.. program:: llvm-cxxmap + +.. option:: -remapping-file=file, -r=file + + Specify a file containing a list of equivalence rules that should be used + to determine whether two symbols are equivalent. Required. + See :ref:`remapping-file`. + +.. option:: -output=file, -o=file + + Specify a file to write the list of matched names to. If unspecified, the + list will be written to stdout. + +.. option:: -Wambiguous + + Produce a warning if there are multiple equivalent (but distinct) symbols in + *symbol-file-2*. + +.. option:: -Wincomplete + + Produce a warning if *symbol-file-1* contains a symbol for which there is no + equivalent symbol in *symbol-file-2*. + +.. _remapping-file: + +REMAPPING FILE +-------------- + +The remapping file is a text file containing lines of the form + +.. code-block:: none + + fragmentkind fragment1 fragment2 + +where ``fragmentkind`` is one of ``name``, ``type``, or ``encoding``, +indicating whether the following mangled name fragments are +<`name `_>s, +<`type `_>s, or +<`encoding `_>s, +respectively. +Blank lines and lines starting with ``#`` are ignored. + +Unmangled C names can be expressed as an ``encoding`` that is a (length-prefixed) +<`source-name `_>: + +.. code-block:: none + + # C function "void foo_bar()" is remapped to C++ function "void foo::bar()". + encoding 7foo_bar _Z3foo3barv + +For convenience, built-in s such as ``St`` and ``Ss`` +are accepted as s (even though they technically are not s). + +For example, to specify that ``absl::string_view`` and ``std::string_view`` +should be treated as equivalent, the following remapping file could be used: + +.. code-block:: none + + # absl::string_view is considered equivalent to std::string_view + type N4absl11string_viewE St17basic_string_viewIcSt11char_traitsIcEE + + # std:: might be std::__1:: in libc++ or std::__cxx11:: in libstdc++ + name St St3__1 + name St St7__cxx11 + +.. note:: + + Symbol remapping is currently only supported for C++ mangled names + following the Itanium C++ ABI mangling scheme. This covers all C++ targets + supported by Clang other than Windows targets. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-diff.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-diff.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-diff.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-diff.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,50 @@ +llvm-diff - LLVM structural 'diff' +================================== + +.. program:: llvm-diff + +SYNOPSIS +-------- + +**llvm-diff** [*options*] *module 1* *module 2* [*global name ...*] + +DESCRIPTION +----------- + +**llvm-diff** compares the structure of two LLVM modules, primarily +focusing on differences in function definitions. Insignificant +differences, such as changes in the ordering of globals or in the +names of local values, are ignored. + +An input module will be interpreted as an assembly file if its name +ends in '.ll'; otherwise it will be read in as a bitcode file. + +If a list of global names is given, just the values with those names +are compared; otherwise, all global values are compared, and +diagnostics are produced for globals which only appear in one module +or the other. + +**llvm-diff** compares two functions by comparing their basic blocks, +beginning with the entry blocks. If the terminators seem to match, +then the corresponding successors are compared; otherwise they are +ignored. This algorithm is very sensitive to changes in control flow, +which tend to stop any downstream changes from being detected. + +**llvm-diff** is intended as a debugging tool for writers of LLVM +passes and frontends. It does not have a stable output format. + +EXIT STATUS +----------- + +If **llvm-diff** finds no differences between the modules, it will exit +with 0 and produce no output. Otherwise it will exit with a non-zero +value. + +BUGS +---- + +Many important differences, like changes in linkage or function +attributes, are not diagnosed. + +Changes in memory behavior (for example, coalescing loads) can cause +massive detected differences in blocks. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-dis.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-dis.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-dis.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-dis.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,54 @@ +llvm-dis - LLVM disassembler +============================ + +.. program:: llvm-dis + +SYNOPSIS +-------- + +**llvm-dis** [*options*] [*filename*] + +DESCRIPTION +----------- + +The **llvm-dis** command is the LLVM disassembler. It takes an LLVM +bitcode file and converts it into human-readable LLVM assembly language. + +If filename is omitted or specified as ``-``, **llvm-dis** reads its +input from standard input. + +If the input is being read from standard input, then **llvm-dis** +will send its output to standard output by default. Otherwise, the +output will be written to a file named after the input file, with +a ``.ll`` suffix added (any existing ``.bc`` suffix will first be +removed). You can override the choice of output file using the +**-o** option. + +OPTIONS +------- + +**-f** + + Enable binary output on terminals. Normally, **llvm-dis** will refuse to + write raw bitcode output if the output stream is a terminal. With this option, + **llvm-dis** will write raw bitcode regardless of the output device. + +**-help** + + Print a summary of command line options. + +**-o** *filename* + + Specify the output file name. If *filename* is -, then the output is sent + to standard output. + +EXIT STATUS +----------- + +If **llvm-dis** succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value. + +SEE ALSO +-------- + +:manpage:`llvm-as(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-dwarfdump.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-dwarfdump.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-dwarfdump.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-dwarfdump.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,199 @@ +llvm-dwarfdump - dump and verify DWARF debug information +======================================================== + +.. program:: llvm-dwarfdump + +SYNOPSIS +-------- + +:program:`llvm-dwarfdump` [*options*] [*filename ...*] + +DESCRIPTION +----------- + +:program:`llvm-dwarfdump` parses DWARF sections in object files, +archives, and `.dSYM` bundles and prints their contents in +human-readable form. Only the .debug_info section is printed unless one of +the section-specific options or :option:`--all` is specified. + +If no input file is specified, `a.out` is used instead. If `-` is used as the +input file, :program:`llvm-dwarfdump` reads the input from its standard input +stream. + +OPTIONS +------- + +.. option:: -a, --all + + Dump all supported DWARF sections. + +.. option:: --arch= + + Dump DWARF debug information for the specified CPU architecture. + Architectures may be specified by name or by number. This + option can be specified multiple times, once for each desired + architecture. All CPU architectures will be printed by + default. + +.. option:: -c, --show-children + + Show a debug info entry's children when selectively printing with + the `=` argument of :option:`--debug-info`, or options such + as :option:`--find` or :option:`--name`. + +.. option:: --color + + Use colors in output. + +.. option:: -f , --find= + + Search for the exact text in the accelerator tables + and print the matching debug information entries. + When there is no accelerator tables or the name of the DIE + you are looking for is not found in the accelerator tables, + try using the slower but more complete :option:`--name` option. + +.. option:: -F, --show-form + + Show DWARF form types after the DWARF attribute types. + +.. option:: -h, --help + + Show help and usage for this command. + +.. option:: --help-list + + Show help and usage for this command without grouping the options + into categories. + +.. option:: -i, --ignore-case + + Ignore case distinctions when using :option:`--name`. + +.. option:: -n , --name= + + Find and print all debug info entries whose name + (`DW_AT_name` attribute) is . + +.. option:: --lookup=
+ + Look up
in the debug information and print out the file, + function, block, and line table details. + +.. option:: -o + + Redirect output to a file specified by , where `-` is the + standard output stream. + +.. option:: -p, --show-parents + + Show a debug info entry's parents when selectively printing with + the `=` argument of :option:`--debug-info`, or options such + as :option:`--find` or :option:`--name`. + +.. option:: --parent-recurse-depth= + + When displaying debug info entry parents, only show them to a + maximum depth of . + +.. option:: --quiet + + Use with :option:`--verify` to not emit to `STDOUT`. + +.. option:: -r , --recurse-depth= + + When displaying debug info entries, only show children to a maximum + depth of . + +.. option:: --show-section-sizes + + Show the sizes of all debug sections, expressed in bytes. + +.. option:: --statistics + + Collect debug info quality metrics and print the results + as machine-readable single-line JSON output. The output + format is described in the section below (:ref:`stats-format`). + +.. option:: --summarize-types + + Abbreviate the description of type unit entries. + +.. option:: -x, --regex + + Treat any strings as regular expressions when searching + with :option:`--name`. If :option:`--ignore-case` is also specified, + the regular expression becomes case-insensitive. + +.. option:: -u, --uuid + + Show the UUID for each architecture. + +.. option:: --diff + + Dump the output in a format that is more friendly for comparing + DWARF output from two different files. + +.. option:: -v, --verbose + + Display verbose information when dumping. This can help to debug + DWARF issues. + +.. option:: --verify + + Verify the structure of the DWARF information by verifying the + compile unit chains, DIE relationships graph, address + ranges, and more. + +.. option:: --version + + Display the version of the tool. + +.. option:: --debug-abbrev, --debug-addr, --debug-aranges, --debug-cu-index, --debug-frame[=], --debug-gnu-pubnames, --debug-gnu-pubtypes, --debug-info [=], --debug-line [=], --debug-line-str, --debug-loc [=], --debug-loclists [=], --debug-macro, --debug-names, --debug-pubnames, --debug-pubtypes, --debug-ranges, --debug-rnglists, --debug-str, --debug-str-offsets, --debug-tu-index, --debug-types [=], --eh-frame [=], --gdb-index, --apple-names, --apple-types, --apple-namespaces, --apple-objc + + Dump the specified DWARF section by name. Only the + `.debug_info` section is shown by default. Some entries + support adding an `=` as a way to provide an + optional offset of the exact entry to dump within the + respective section. When an offset is provided, only the + entry at that offset will be dumped, else the entire + section will be dumped. + +.. option:: @ + + Read command-line options from ``. + +.. _stats-format: + +FORMAT OF STATISTICS OUTPUT +--------------------------- + +The ::option:`--statistics` option generates single-line JSON output +representing quality metrics of the processed debug info. These metrics are +useful to compare changes between two compilers, particularly for judging +the effect that a change to the compiler has on the debug info quality. + +The output is formatted as key-value pairs. The first pair contains a version +number. The following naming scheme is used for the keys: + + - `variables` ==> local variables and parameters + - `local vars` ==> local variables + - `params` ==> formal parameters + +For aggregated values, the following keys are used: + + - `sum_of_all_variables(...)` ==> the sum applied to all variables + - `#bytes` ==> the number of bytes + - `#variables - entry values ...` ==> the number of variables excluding + the entry values etc. + +EXIT STATUS +----------- + +:program:`llvm-dwarfdump` returns 0 if the input files were parsed and dumped +successfully. Otherwise, it returns 1. + +SEE ALSO +-------- + +:manpage:`dsymutil(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-exegesis.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-exegesis.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-exegesis.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-exegesis.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,311 @@ +llvm-exegesis - LLVM Machine Instruction Benchmark +================================================== + +.. program:: llvm-exegesis + +SYNOPSIS +-------- + +:program:`llvm-exegesis` [*options*] + +DESCRIPTION +----------- + +:program:`llvm-exegesis` is a benchmarking tool that uses information available +in LLVM to measure host machine instruction characteristics like latency, +throughput, or port decomposition. + +Given an LLVM opcode name and a benchmarking mode, :program:`llvm-exegesis` +generates a code snippet that makes execution as serial (resp. as parallel) as +possible so that we can measure the latency (resp. inverse throughput/uop decomposition) +of the instruction. +The code snippet is jitted and executed on the host subtarget. The time taken +(resp. resource usage) is measured using hardware performance counters. The +result is printed out as YAML to the standard output. + +The main goal of this tool is to automatically (in)validate the LLVM's TableDef +scheduling models. To that end, we also provide analysis of the results. + +:program:`llvm-exegesis` can also benchmark arbitrary user-provided code +snippets. + +EXAMPLE 1: benchmarking instructions +------------------------------------ + +Assume you have an X86-64 machine. To measure the latency of a single +instruction, run: + +.. code-block:: bash + + $ llvm-exegesis -mode=latency -opcode-name=ADD64rr + +Measuring the uop decomposition or inverse throughput of an instruction works similarly: + +.. code-block:: bash + + $ llvm-exegesis -mode=uops -opcode-name=ADD64rr + $ llvm-exegesis -mode=inverse_throughput -opcode-name=ADD64rr + + +The output is a YAML document (the default is to write to stdout, but you can +redirect the output to a file using `-benchmarks-file`): + +.. code-block:: none + + --- + key: + opcode_name: ADD64rr + mode: latency + config: '' + cpu_name: haswell + llvm_triple: x86_64-unknown-linux-gnu + num_repetitions: 10000 + measurements: + - { key: latency, value: 1.0058, debug_string: '' } + error: '' + info: 'explicit self cycles, selecting one aliasing configuration. + Snippet: + ADD64rr R8, R8, R10 + ' + ... + +To measure the latency of all instructions for the host architecture, run: + +.. code-block:: bash + + $ llvm-exegesis -mode=latency -opcode-index=-1 + + +EXAMPLE 2: benchmarking a custom code snippet +--------------------------------------------- + +To measure the latency/uops of a custom piece of code, you can specify the +`snippets-file` option (`-` reads from standard input). + +.. code-block:: bash + + $ echo "vzeroupper" | llvm-exegesis -mode=uops -snippets-file=- + +Real-life code snippets typically depend on registers or memory. +:program:`llvm-exegesis` checks the liveliness of registers (i.e. any register +use has a corresponding def or is a "live in"). If your code depends on the +value of some registers, you have two options: + +- Mark the register as requiring a definition. :program:`llvm-exegesis` will + automatically assign a value to the register. This can be done using the + directive `LLVM-EXEGESIS-DEFREG `, where `` + is a bit pattern used to fill ``. If `` is smaller than + the register width, it will be sign-extended. +- Mark the register as a "live in". :program:`llvm-exegesis` will benchmark + using whatever value was in this registers on entry. This can be done using + the directive `LLVM-EXEGESIS-LIVEIN `. + +For example, the following code snippet depends on the values of XMM1 (which +will be set by the tool) and the memory buffer passed in RDI (live in). + +.. code-block:: none + + # LLVM-EXEGESIS-LIVEIN RDI + # LLVM-EXEGESIS-DEFREG XMM1 42 + vmulps (%rdi), %xmm1, %xmm2 + vhaddps %xmm2, %xmm2, %xmm3 + addq $0x10, %rdi + + +EXAMPLE 3: analysis +------------------- + +Assuming you have a set of benchmarked instructions (either latency or uops) as +YAML in file `/tmp/benchmarks.yaml`, you can analyze the results using the +following command: + +.. code-block:: bash + + $ llvm-exegesis -mode=analysis \ + -benchmarks-file=/tmp/benchmarks.yaml \ + -analysis-clusters-output-file=/tmp/clusters.csv \ + -analysis-inconsistencies-output-file=/tmp/inconsistencies.html + +This will group the instructions into clusters with the same performance +characteristics. The clusters will be written out to `/tmp/clusters.csv` in the +following format: + +.. code-block:: none + + cluster_id,opcode_name,config,sched_class + ... + 2,ADD32ri8_DB,,WriteALU,1.00 + 2,ADD32ri_DB,,WriteALU,1.01 + 2,ADD32rr,,WriteALU,1.01 + 2,ADD32rr_DB,,WriteALU,1.00 + 2,ADD32rr_REV,,WriteALU,1.00 + 2,ADD64i32,,WriteALU,1.01 + 2,ADD64ri32,,WriteALU,1.01 + 2,MOVSX64rr32,,BSWAP32r_BSWAP64r_MOVSX64rr32,1.00 + 2,VPADDQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.02 + 2,VPSUBQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.01 + 2,ADD64ri8,,WriteALU,1.00 + 2,SETBr,,WriteSETCC,1.01 + ... + +:program:`llvm-exegesis` will also analyze the clusters to point out +inconsistencies in the scheduling information. The output is an html file. For +example, `/tmp/inconsistencies.html` will contain messages like the following : + +.. image:: llvm-exegesis-analysis.png + :align: center + +Note that the scheduling class names will be resolved only when +:program:`llvm-exegesis` is compiled in debug mode, else only the class id will +be shown. This does not invalidate any of the analysis results though. + +OPTIONS +------- + +.. option:: -help + + Print a summary of command line options. + +.. option:: -opcode-index= + + Specify the opcode to measure, by index. Specifying `-1` will result + in measuring every existing opcode. See example 1 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. + +.. option:: -opcode-name=,,... + + Specify the opcode to measure, by name. Several opcodes can be specified as + a comma-separated list. See example 1 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. + +.. option:: -snippets-file= + + Specify the custom code snippet to measure. See example 2 for details. + Either `opcode-index`, `opcode-name` or `snippets-file` must be set. + +.. option:: -mode=[latency|uops|inverse_throughput|analysis] + + Specify the run mode. Note that some modes have additional requirements and options. + + `latency` mode can be make use of either RDTSC or LBR. + `latency[LBR]` is only available on X86 (at least `Skylake`). + To run in `latency` mode, a positive value must be specified + for `x86-lbr-sample-period` and `--repetition-mode=loop`. + + In `analysis` mode, you also need to specify at least one of the + `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`. + +.. option:: -x86-lbr-sample-period= + + Specify the LBR sampling period - how many branches before we take a sample. + When a positive value is specified for this option and when the mode is `latency`, + we will use LBRs for measuring. + On choosing the "right" sampling period, a small value is preferred, but throttling + could occur if the sampling is too frequent. A prime number should be used to + avoid consistently skipping certain blocks. + +.. option:: -repetition-mode=[duplicate|loop|min] + + Specify the repetition mode. `duplicate` will create a large, straight line + basic block with `num-repetitions` instructions (repeating the snippet + `num-repetitions`/`snippet size` times). `loop` will, optionally, duplicate the + snippet until the loop body contains at least `loop-body-size` instructions, + and then wrap the result in a loop which will execute `num-repetitions` + instructions (thus, again, repeating the snippet + `num-repetitions`/`snippet size` times). The `loop` mode, especially with loop + unrolling tends to better hide the effects of the CPU frontend on architectures + that cache decoded instructions, but consumes a register for counting + iterations. If performing an analysis over many opcodes, it may be best to + instead use the `min` mode, which will run each other mode, + and produce the minimal measured result. + +.. option:: -num-repetitions= + + Specify the target number of executed instructions. Note that the actual + repetition count of the snippet will be `num-repetitions`/`snippet size`. + Higher values lead to more accurate measurements but lengthen the benchmark. + +.. option:: -loop-body-size= + + Only effective for `-repetition-mode=[loop|min]`. + Instead of looping over the snippet directly, first duplicate it so that the + loop body contains at least this many instructions. This potentially results + in loop body being cached in the CPU Op Cache / Loop Cache, which allows to + which may have higher throughput than the CPU decoders. + +.. option:: -max-configs-per-opcode= + + Specify the maximum configurations that can be generated for each opcode. + By default this is `1`, meaning that we assume that a single measurement is + enough to characterize an opcode. This might not be true of all instructions: + for example, the performance characteristics of the LEA instruction on X86 + depends on the value of assigned registers and immediates. Setting a value of + `-max-configs-per-opcode` larger than `1` allows `llvm-exegesis` to explore + more configurations to discover if some register or immediate assignments + lead to different performance characteristics. + + +.. option:: -benchmarks-file= + + File to read (`analysis` mode) or write (`latency`/`uops`/`inverse_throughput` + modes) benchmark results. "-" uses stdin/stdout. + +.. option:: -analysis-clusters-output-file= + + If provided, write the analysis clusters as CSV to this file. "-" prints to + stdout. By default, this analysis is not run. + +.. option:: -analysis-inconsistencies-output-file= + + If non-empty, write inconsistencies found during analysis to this file. `-` + prints to stdout. By default, this analysis is not run. + +.. option:: -analysis-clustering=[dbscan,naive] + + Specify the clustering algorithm to use. By default DBSCAN will be used. + Naive clustering algorithm is better for doing further work on the + `-analysis-inconsistencies-output-file=` output, it will create one cluster + per opcode, and check that the cluster is stable (all points are neighbours). + +.. option:: -analysis-numpoints= + + Specify the numPoints parameters to be used for DBSCAN clustering + (`analysis` mode, DBSCAN only). + +.. option:: -analysis-clustering-epsilon= + + Specify the epsilon parameter used for clustering of benchmark points + (`analysis` mode). + +.. option:: -analysis-inconsistency-epsilon= + + Specify the epsilon parameter used for detection of when the cluster + is different from the LLVM schedule profile values (`analysis` mode). + +.. option:: -analysis-display-unstable-clusters + + If there is more than one benchmark for an opcode, said benchmarks may end up + not being clustered into the same cluster if the measured performance + characteristics are different. by default all such opcodes are filtered out. + This flag will instead show only such unstable opcodes. + +.. option:: -ignore-invalid-sched-class=false + + If set, ignore instructions that do not have a sched class (class idx = 0). + +.. option:: -mcpu= + + If set, measure the cpu characteristics using the counters for this CPU. This + is useful when creating new sched models (the host CPU is unknown to LLVM). + +.. option:: --dump-object-to-disk=true + + By default, llvm-exegesis will dump the generated code to a temporary file to + enable code inspection. You may disable it to speed up the execution and save + disk space. + +EXIT STATUS +----------- + +:program:`llvm-exegesis` returns 0 on success. Otherwise, an error message is +printed to standard error, and the tool returns a non 0 value. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-extract.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-extract.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-extract.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-extract.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,110 @@ +llvm-extract - extract a function from an LLVM module +===================================================== + +.. program:: llvm-extract + +SYNOPSIS +-------- + +:program:`llvm-extract` [*options*] **--func** *function-name* [*filename*] + +DESCRIPTION +----------- + +The :program:`llvm-extract` command takes the name of a function and extracts +it from the specified LLVM bitcode file. It is primarily used as a debugging +tool to reduce test cases from larger programs that are triggering a bug. + +In addition to extracting the bitcode of the specified function, +:program:`llvm-extract` will also remove unreachable global variables, +prototypes, and unused types. + +The :program:`llvm-extract` command reads its input from standard input if +filename is omitted or if filename is ``-``. The output is always written to +standard output, unless the **-o** option is specified (see below). + +OPTIONS +------- + +**--alias** *alias-name* + + Extract the alias named *function-name* from the LLVM bitcode. May be + specified multiple times to extract multiple alias at once. + +**--ralias** *alias-regular-expr* + + Extract the alias matching *alias-regular-expr* from the LLVM bitcode. + All alias matching the regular expression will be extracted. May be + specified multiple times. + +**--bb** *basic-block-specifier* + + Extract basic blocks(s) specified in *basic-block-specifier*. May be + specified multiple times. Each specifier pair will create + a function. If multiple basic blocks are specified in one pair, the first + block in the sequence should dominate the rest. + +**--delete** + + Delete specified Globals from Module. + +**-f** + + Enable binary output on terminals. Normally, :program:`llvm-extract` will + refuse to write raw bitcode output if the output stream is a terminal. With + this option, :program:`llvm-extract` will write raw bitcode regardless of the + output device. + +**--func** *function-name* + + Extract the function named *function-name* from the LLVM bitcode. May be + specified multiple times to extract multiple functions at once. + +**--rfunc** *function-regular-expr* + + Extract the function(s) matching *function-regular-expr* from the LLVM bitcode. + All functions matching the regular expression will be extracted. May be + specified multiple times. + +**--glob** *global-name* + + Extract the global variable named *global-name* from the LLVM bitcode. May be + specified multiple times to extract multiple global variables at once. + +**--rglob** *glob-regular-expr* + + Extract the global variable(s) matching *global-regular-expr* from the LLVM + bitcode. All global variables matching the regular expression will be + extracted. May be specified multiple times. + +**--keep-const-init** + + Preserve the values of constant globals. + +**--recursive** + + Recursively extract all called functions + +**-help** + + Print a summary of command line options. + +**-o** *filename* + + Specify the output filename. If filename is "-" (the default), then + :program:`llvm-extract` sends its output to standard output. + +**-S** + + Write output in LLVM intermediate language (instead of bitcode). + +EXIT STATUS +----------- + +If :program:`llvm-extract` succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value. + +SEE ALSO +-------- + +:manpage:`bugpoint(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-install-name-tool.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-install-name-tool.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-install-name-tool.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-install-name-tool.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,84 @@ +llvm-install-name-tool - LLVM tool for manipulating install-names and rpaths +============================================================================ + +.. program:: llvm-install-name-tool + +SYNOPSIS +-------- + +:program:`llvm-install-name-tool` [*options*] *input* + +DESCRIPTION +----------- + +:program:`llvm-install-name-tool` is a tool to manipulate dynamic shared library +install names and rpaths listed in a Mach-O binary. + +For most scenarios, it works as a drop-in replacement for Apple's +:program:`install_name_tool`. + +OPTIONS +-------- +At least one of the following options are required, and some options can be +combined with other options. Options :option:`-add_rpath`, :option:`-delete_rpath`, +and :option:`-rpath` can be combined in an invocation only if they do not share +the same `` value. + +.. option:: -add_rpath + + Add an rpath named ```` to the specified binary. Can be specified multiple + times to add multiple rpaths. Throws an error if ```` is already listed in + the binary. + +.. option:: -change + + Change an install name ```` to ```` in the + specified binary. Can be specified multiple times to change multiple dependent shared + library install names. Option is ignored if ```` is not listed + in the specified binary. + +.. option:: -delete_rpath + + Delete an rpath named ```` from the specified binary. Can be specified multiple + times to delete multiple rpaths. Throws an error if ```` is not listed in + the binary. + +.. option:: -delete_all_rpaths + + Deletes all rpaths from the binary. + +.. option:: --help, -h + + Print a summary of command line options. + +.. option:: -id + + Change shared library's identification name under LC_ID_DYLIB to ```` in the + specified binary. If specified multiple times, only the last :option:`-id` option is + selected. Option is ignored if the specified Mach-O binary is not a dynamic shared library. + +.. option:: -rpath + + Change an rpath named ```` to ```` in the specified binary. Can be specified + multiple times to change multiple rpaths. Throws an error if ```` is not listed + in the binary or ```` is already listed in the binary. + +.. option:: --version, -V + + Display the version of the :program:`llvm-install-name-tool` executable. + +EXIT STATUS +----------- + +:program:`llvm-install-name-tool` exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0. + +BUGS +---- + +To report bugs, please visit . + +SEE ALSO +-------- + +:manpage:`llvm-objcopy(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-lib.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-lib.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-lib.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-lib.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,29 @@ +llvm-lib - LLVM lib.exe compatible library tool +=============================================== + +.. program:: llvm-lib + +SYNOPSIS +-------- + +**llvm-lib** [/libpath:] [/out:] [/llvmlibthin] +[/ignore] [/machine] [/nologo] [files...] + +DESCRIPTION +----------- + +The **llvm-lib** command is intended to be a ``lib.exe`` compatible +tool. See https://msdn.microsoft.com/en-us/library/7ykb2k5f for the +general description. + +**llvm-lib** has the following extensions: + +* Bitcode files in symbol tables. + **llvm-lib** includes symbols from both bitcode files and regular + object files in the symbol table. + +* Creating thin archives. + The /llvmlibthin option causes **llvm-lib** to create thin archive + that contain only the symbol table and the header for the various + members. These files are much smaller, but are not compatible with + link.exe (lld can handle them). diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-libtool-darwin.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-libtool-darwin.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-libtool-darwin.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-libtool-darwin.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,105 @@ +llvm-libtool-darwin - LLVM tool for creating libraries for Darwin +================================================================= + +.. program:: llvm-libtool-darwin + +SYNOPSIS +-------- + +:program:`llvm-libtool-darwin` [*options*] ** + +DESCRIPTION +----------- + +:program:`llvm-libtool-darwin` is a tool for creating static and dynamic +libraries for Darwin. + +For most scenarios, it works as a drop-in replacement for cctools' +:program:`libtool`. + +OPTIONS +-------- +:program:`llvm-libtool-darwin` supports the following options: + +.. option:: -arch_only + + Build a static library only for the specified `` and ignore all + other architectures in the files. + +.. option:: -color + + Use colors in output. + +.. option:: -D + + Use zero for timestamps and UIDs/GIDs. This is set by default. + +.. option:: -filelist + + Read input file names from ``. File names are specified in `` + one per line, separated only by newlines. Whitespace on a line is assumed + to be part of the filename. If the directory name, `dirname`, is also + specified then it is prepended to each file name in the ``. + +.. option:: -h, -help + + Show help and usage for this command. + +.. option:: -help-list + + Show help and usage for this command without grouping the options + into categories. + +.. option:: -l + + Searches for the library libx.a in the library search path. If the string `` + ends with '.o', then the library 'x' is searched for without prepending 'lib' + or appending '.a'. If the library is found, it is added to the list of input + files. Otherwise, an error is raised. + +.. option:: -L + + Adds `` to the list of directories in which to search for libraries. The + directories are searched in the order in which they are specified with + :option:`-L` and before the default search path. The default search path + includes directories `/lib`, `/usr/lib` and `/usr/local/lib`. + +.. option:: -no_warning_for_no_symbols + + Do not warn about files that have no symbols. + +.. option:: -o + + Specify the output file name. Must be specified exactly once. + +.. option:: -static + + Produces a static library from the input files. + +.. option:: -U + + Use actual timestamps and UIDs/GIDs. + +.. option:: -V + + Display the version of this program and perform any operation specified. + +.. option:: -version + + Display the version of this program and exit immediately. + +EXIT STATUS +----------- + +:program:`llvm-libtool-darwin` exits with a non-zero exit code if there is an error. +Otherwise, it exits with code 0. + +BUGS +---- + +To report bugs, please visit . + +SEE ALSO +-------- + +:manpage:`llvm-ar(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-link.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-link.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-link.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-link.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,56 @@ +llvm-link - LLVM bitcode linker +=============================== + +.. program:: llvm-link + +SYNOPSIS +-------- + +:program:`llvm-link` [*options*] *filename ...* + +DESCRIPTION +----------- + +:program:`llvm-link` takes several LLVM bitcode files and links them together +into a single LLVM bitcode file. It writes the output file to standard output, +unless the :option:`-o` option is used to specify a filename. + +OPTIONS +------- + +.. option:: -f + + Enable binary output on terminals. Normally, :program:`llvm-link` will refuse + to write raw bitcode output if the output stream is a terminal. With this + option, :program:`llvm-link` will write raw bitcode regardless of the output + device. + +.. option:: -o filename + + Specify the output file name. If ``filename`` is "``-``", then + :program:`llvm-link` will write its output to standard output. + +.. option:: -S + + Write output in LLVM intermediate language (instead of bitcode). + +.. option:: -d + + If specified, :program:`llvm-link` prints a human-readable version of the + output bitcode file to standard error. + +.. option:: -help + + Print a summary of command line options. + +.. option:: -v + + Verbose mode. Print information about what :program:`llvm-link` is doing. + This typically includes a message for each bitcode file linked in and for each + library found. + +EXIT STATUS +----------- + +If :program:`llvm-link` succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non-zero value. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-lipo.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-lipo.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-lipo.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-lipo.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,73 @@ +llvm-lipo - LLVM tool for manipulating universal binaries +========================================================= + +.. program:: llvm-lipo + +SYNOPSIS +-------- + +:program:`llvm-lipo` [*filenames...*] [*options*] + +DESCRIPTION +----------- +:program:`llvm-lipo` can create universal binaries from Mach-O files, extract regular object files from universal binaries, and display architecture information about both universal and regular files. + +COMMANDS +-------- +:program:`llvm-lipo` supports the following mutually exclusive commands: + +.. option:: -help, -h + + Display usage information and exit. + +.. option:: -version + + Display the version of this program. + +.. option:: -verify_arch [ ...] + + Take a single input file and verify the specified architectures are present in the file. + If so then exit with a status of 0 else exit with a status of 1. + +.. option:: -archs + + Take a single input file and display the architectures present in the file. + Each architecture is separated by a single whitespace. + Unknown architectures are displayed as unknown(CPUtype,CPUsubtype). + +.. option:: -info + + Take at least one input file and display the descriptions of each file. + The descriptions include the filename and architecture types separated by whitespace. + Universal binaries are grouped together first, followed by thin files. + Architectures in the fat file: are: + Non-fat file: is architecture: + +.. option:: -thin + + Take a single universal binary input file and the thin flag followed by an architecture type. + Require the output flag to be specified, and output a thin binary of the specified architecture. + +.. option:: -create + + Take at least one input file and require the output flag to be specified. + Output a universal binary combining the input files. + +.. option:: -replace + + Take a single universal binary input file and require the output flag to be specified. + The replace flag is followed by an architecture type, and a thin input file. + Output a universal binary with the specified architecture slice in the + universal binary input replaced with the contents of the thin input file. + +.. option:: -segalign + + Additional flag that can be specified with create and replace. + The segalign flag is followed by an architecture type, and an alignment. + The alignment is a hexadecimal number that is a power of 2. + Output a file in which the slice with the specified architecture has the specified alignment. + +BUGS +---- + +To report bugs, please visit . diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-locstats.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-locstats.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-locstats.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-locstats.rst.txt 2021-07-09 07:05:00.000000000 +0000 @@ -0,0 +1,118 @@ +llvm-locstats - calculate statistics on DWARF debug location +============================================================ + +.. program:: llvm-locstats + +SYNOPSIS +-------- + +:program:`llvm-locstats` [*options*] [*filename*] + +DESCRIPTION +----------- + +:program:`llvm-locstats` works like a wrapper around :program:`llvm-dwarfdump`. +It parses :program:`llvm-dwarfdump` statistics regarding debug location by +pretty printing it in a more human readable way. + +The line 0% shows the number and the percentage of DIEs with no location +information, but the line 100% shows the information for DIEs where there is +location information in all code section bytes (where the variable or parameter +is in the scope). The line [50%,60%) shows the number and the percentage of DIEs +where the location information is between 50 and 60 percentage of its scope +covered. + +OPTIONS +------- + +.. option:: --only-variables + + calculate the location statistics only for local variables + +.. option:: --only-formal-parameters + + calculate the location statistics only for formal parameters + +.. option:: --ignore-debug-entry-values + + ignore the location statistics on locations containing the + debug entry values DWARF operation + +.. option:: --draw-plot + + make histogram of location buckets generated (requires + matplotlib) + +.. option:: --compare + + compare the debug location coverage on two files provided, and draw + a plot showing the difference (requires matplotlib) + +EXIT STATUS +----------- + +:program:`llvm-locstats` returns 0 if the input file were parsed +successfully. Otherwise, it returns 1. + +EXAMPLE 1 +-------------- + +Pretty print the location coverage on the standard output. + +.. code-block:: none + + llvm-locstats a.out + + ================================================= + Debug Location Statistics + ================================================= + cov% samples percentage(~) + ------------------------------------------------- + 0% 1 16% + (0%,10%) 0 0% + [10%,20%) 0 0% + [20%,30%) 0 0% + [30%,40%) 0 0% + [40%,50%) 0 0% + [50%,60%) 1 16% + [60%,70%) 0 0% + [70%,80%) 0 0% + [80%,90%) 1 16% + [90%,100%) 0 0% + 100% 3 50% + ================================================= + -the number of debug variables processed: 6 + -PC ranges covered: 81% + ------------------------------------------------- + -total availability: 83% + ================================================= + +EXAMPLE 2 +-------------- + +Generate a plot as an image file. + +.. code-block:: none + + llvm-locstats --draw-plot file1.out + +.. image:: locstats-draw-plot.png + :align: center + +EXAMPLE 3 +-------------- + +Generate a plot as an image file showing the difference in the debug location +coverage. + +.. code-block:: none + + llvm-locstats --compare file1.out file1.withentryvals.out + +.. image:: locstats-compare.png + :align: center + +SEE ALSO +-------- + +:manpage:`llvm-dwarfdump(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-mca.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-mca.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-mca.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-mca.rst.txt 2021-09-14 14:09:01.000000000 +0000 @@ -0,0 +1,1046 @@ +llvm-mca - LLVM Machine Code Analyzer +===================================== + +.. program:: llvm-mca + +SYNOPSIS +-------- + +:program:`llvm-mca` [*options*] [input] + +DESCRIPTION +----------- + +:program:`llvm-mca` is a performance analysis tool that uses information +available in LLVM (e.g. scheduling models) to statically measure the performance +of machine code in a specific CPU. + +Performance is measured in terms of throughput as well as processor resource +consumption. The tool currently works for processors with a backend for which +there is a scheduling model available in LLVM. + +The main goal of this tool is not just to predict the performance of the code +when run on the target, but also help with diagnosing potential performance +issues. + +Given an assembly code sequence, :program:`llvm-mca` estimates the Instructions +Per Cycle (IPC), as well as hardware resource pressure. The analysis and +reporting style were inspired by the IACA tool from Intel. + +For example, you can compile code with clang, output assembly, and pipe it +directly into :program:`llvm-mca` for analysis: + +.. code-block:: bash + + $ clang foo.c -O2 -target x86_64-unknown-unknown -S -o - | llvm-mca -mcpu=btver2 + +Or for Intel syntax: + +.. code-block:: bash + + $ clang foo.c -O2 -target x86_64-unknown-unknown -mllvm -x86-asm-syntax=intel -S -o - | llvm-mca -mcpu=btver2 + +(:program:`llvm-mca` detects Intel syntax by the presence of an `.intel_syntax` +directive at the beginning of the input. By default its output syntax matches +that of its input.) + +Scheduling models are not just used to compute instruction latencies and +throughput, but also to understand what processor resources are available +and how to simulate them. + +By design, the quality of the analysis conducted by :program:`llvm-mca` is +inevitably affected by the quality of the scheduling models in LLVM. + +If you see that the performance report is not accurate for a processor, +please `file a bug `_ +against the appropriate backend. + +OPTIONS +------- + +If ``input`` is "``-``" or omitted, :program:`llvm-mca` reads from standard +input. Otherwise, it will read from the specified filename. + +If the :option:`-o` option is omitted, then :program:`llvm-mca` will send its output +to standard output if the input is from standard input. If the :option:`-o` +option specifies "``-``", then the output will also be sent to standard output. + + +.. option:: -help + + Print a summary of command line options. + +.. option:: -o + + Use ```` as the output filename. See the summary above for more + details. + +.. option:: -mtriple= + + Specify a target triple string. + +.. option:: -march= + + Specify the architecture for which to analyze the code. It defaults to the + host default target. + +.. option:: -mcpu= + + Specify the processor for which to analyze the code. By default, the cpu name + is autodetected from the host. + +.. option:: -output-asm-variant= + + Specify the output assembly variant for the report generated by the tool. + On x86, possible values are [0, 1]. A value of 0 (vic. 1) for this flag enables + the AT&T (vic. Intel) assembly format for the code printed out by the tool in + the analysis report. + +.. option:: -print-imm-hex + + Prefer hex format for numeric literals in the output assembly printed as part + of the report. + +.. option:: -dispatch= + + Specify a different dispatch width for the processor. The dispatch width + defaults to field 'IssueWidth' in the processor scheduling model. If width is + zero, then the default dispatch width is used. + +.. option:: -register-file-size= + + Specify the size of the register file. When specified, this flag limits how + many physical registers are available for register renaming purposes. A value + of zero for this flag means "unlimited number of physical registers". + +.. option:: -iterations= + + Specify the number of iterations to run. If this flag is set to 0, then the + tool sets the number of iterations to a default value (i.e. 100). + +.. option:: -noalias= + + If set, the tool assumes that loads and stores don't alias. This is the + default behavior. + +.. option:: -lqueue= + + Specify the size of the load queue in the load/store unit emulated by the tool. + By default, the tool assumes an unbound number of entries in the load queue. + A value of zero for this flag is ignored, and the default load queue size is + used instead. + +.. option:: -squeue= + + Specify the size of the store queue in the load/store unit emulated by the + tool. By default, the tool assumes an unbound number of entries in the store + queue. A value of zero for this flag is ignored, and the default store queue + size is used instead. + +.. option:: -timeline + + Enable the timeline view. + +.. option:: -timeline-max-iterations= + + Limit the number of iterations to print in the timeline view. By default, the + timeline view prints information for up to 10 iterations. + +.. option:: -timeline-max-cycles= + + Limit the number of cycles in the timeline view, or use 0 for no limit. By + default, the number of cycles is set to 80. + +.. option:: -resource-pressure + + Enable the resource pressure view. This is enabled by default. + +.. option:: -register-file-stats + + Enable register file usage statistics. + +.. option:: -dispatch-stats + + Enable extra dispatch statistics. This view collects and analyzes instruction + dispatch events, as well as static/dynamic dispatch stall events. This view + is disabled by default. + +.. option:: -scheduler-stats + + Enable extra scheduler statistics. This view collects and analyzes instruction + issue events. This view is disabled by default. + +.. option:: -retire-stats + + Enable extra retire control unit statistics. This view is disabled by default. + +.. option:: -instruction-info + + Enable the instruction info view. This is enabled by default. + +.. option:: -show-encoding + + Enable the printing of instruction encodings within the instruction info view. + +.. option:: -all-stats + + Print all hardware statistics. This enables extra statistics related to the + dispatch logic, the hardware schedulers, the register file(s), and the retire + control unit. This option is disabled by default. + +.. option:: -all-views + + Enable all the view. + +.. option:: -instruction-tables + + Prints resource pressure information based on the static information + available from the processor model. This differs from the resource pressure + view because it doesn't require that the code is simulated. It instead prints + the theoretical uniform distribution of resource pressure for every + instruction in sequence. + +.. option:: -bottleneck-analysis + + Print information about bottlenecks that affect the throughput. This analysis + can be expensive, and it is disabled by default. Bottlenecks are highlighted + in the summary view. Bottleneck analysis is currently not supported for + processors with an in-order backend. + +.. option:: -json + + Print the requested views in valid JSON format. The instructions and the + processor resources are printed as members of special top level JSON objects. + The individual views refer to them by index. However, not all views are + currently supported. For example, the report from the bottleneck analysis is + not printed out in JSON. All the default views are currently supported. + +.. option:: -disable-cb + + Force usage of the generic CustomBehaviour and InstrPostProcess classes rather + than using the target specific implementation. The generic classes never + detect any custom hazards or make any post processing modifications to + instructions. + + +EXIT STATUS +----------- + +:program:`llvm-mca` returns 0 on success. Otherwise, an error message is printed +to standard error, and the tool returns 1. + +USING MARKERS TO ANALYZE SPECIFIC CODE BLOCKS +--------------------------------------------- +:program:`llvm-mca` allows for the optional usage of special code comments to +mark regions of the assembly code to be analyzed. A comment starting with +substring ``LLVM-MCA-BEGIN`` marks the beginning of a code region. A comment +starting with substring ``LLVM-MCA-END`` marks the end of a code region. For +example: + +.. code-block:: none + + # LLVM-MCA-BEGIN + ... + # LLVM-MCA-END + +If no user-defined region is specified, then :program:`llvm-mca` assumes a +default region which contains every instruction in the input file. Every region +is analyzed in isolation, and the final performance report is the union of all +the reports generated for every code region. + +Code regions can have names. For example: + +.. code-block:: none + + # LLVM-MCA-BEGIN A simple example + add %eax, %eax + # LLVM-MCA-END + +The code from the example above defines a region named "A simple example" with a +single instruction in it. Note how the region name doesn't have to be repeated +in the ``LLVM-MCA-END`` directive. In the absence of overlapping regions, +an anonymous ``LLVM-MCA-END`` directive always ends the currently active user +defined region. + +Example of nesting regions: + +.. code-block:: none + + # LLVM-MCA-BEGIN foo + add %eax, %edx + # LLVM-MCA-BEGIN bar + sub %eax, %edx + # LLVM-MCA-END bar + # LLVM-MCA-END foo + +Example of overlapping regions: + +.. code-block:: none + + # LLVM-MCA-BEGIN foo + add %eax, %edx + # LLVM-MCA-BEGIN bar + sub %eax, %edx + # LLVM-MCA-END foo + add %eax, %edx + # LLVM-MCA-END bar + +Note that multiple anonymous regions cannot overlap. Also, overlapping regions +cannot have the same name. + +There is no support for marking regions from high-level source code, like C or +C++. As a workaround, inline assembly directives may be used: + +.. code-block:: c++ + + int foo(int a, int b) { + __asm volatile("# LLVM-MCA-BEGIN foo"); + a += 42; + __asm volatile("# LLVM-MCA-END"); + a *= b; + return a; + } + +However, this interferes with optimizations like loop vectorization and may have +an impact on the code generated. This is because the ``__asm`` statements are +seen as real code having important side effects, which limits how the code +around them can be transformed. If users want to make use of inline assembly +to emit markers, then the recommendation is to always verify that the output +assembly is equivalent to the assembly generated in the absence of markers. +The `Clang options to emit optimization reports `_ +can also help in detecting missed optimizations. + +HOW LLVM-MCA WORKS +------------------ + +:program:`llvm-mca` takes assembly code as input. The assembly code is parsed +into a sequence of MCInst with the help of the existing LLVM target assembly +parsers. The parsed sequence of MCInst is then analyzed by a ``Pipeline`` module +to generate a performance report. + +The Pipeline module simulates the execution of the machine code sequence in a +loop of iterations (default is 100). During this process, the pipeline collects +a number of execution related statistics. At the end of this process, the +pipeline generates and prints a report from the collected statistics. + +Here is an example of a performance report generated by the tool for a +dot-product of two packed float vectors of four elements. The analysis is +conducted for target x86, cpu btver2. The following result can be produced via +the following command using the example located at +``test/tools/llvm-mca/X86/BtVer2/dot-product.s``: + +.. code-block:: bash + + $ llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=300 dot-product.s + +.. code-block:: none + + Iterations: 300 + Instructions: 900 + Total Cycles: 610 + Total uOps: 900 + + Dispatch Width: 2 + uOps Per Cycle: 1.48 + IPC: 1.48 + Block RThroughput: 2.0 + + + Instruction Info: + [1]: #uOps + [2]: Latency + [3]: RThroughput + [4]: MayLoad + [5]: MayStore + [6]: HasSideEffects (U) + + [1] [2] [3] [4] [5] [6] Instructions: + 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 + 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 + 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 + + + Resources: + [0] - JALU0 + [1] - JALU1 + [2] - JDiv + [3] - JFPA + [4] - JFPM + [5] - JFPU0 + [6] - JFPU1 + [7] - JLAGU + [8] - JMul + [9] - JSAGU + [10] - JSTC + [11] - JVALU0 + [12] - JVALU1 + [13] - JVIMUL + + + Resource pressure per iteration: + [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] + - - - 2.00 1.00 2.00 1.00 - - - - - - - + + Resource pressure by instruction: + [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: + - - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm1, %xmm2 + - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm2, %xmm2, %xmm3 + - - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4 + +According to this report, the dot-product kernel has been executed 300 times, +for a total of 900 simulated instructions. The total number of simulated micro +opcodes (uOps) is also 900. + +The report is structured in three main sections. The first section collects a +few performance numbers; the goal of this section is to give a very quick +overview of the performance throughput. Important performance indicators are +**IPC**, **uOps Per Cycle**, and **Block RThroughput** (Block Reciprocal +Throughput). + +Field *DispatchWidth* is the maximum number of micro opcodes that are dispatched +to the out-of-order backend every simulated cycle. For processors with an +in-order backend, *DispatchWidth* is the maximum number of micro opcodes issued +to the backend every simulated cycle. + +IPC is computed dividing the total number of simulated instructions by the total +number of cycles. + +Field *Block RThroughput* is the reciprocal of the block throughput. Block +throughput is a theoretical quantity computed as the maximum number of blocks +(i.e. iterations) that can be executed per simulated clock cycle in the absence +of loop carried dependencies. Block throughput is superiorly limited by the +dispatch rate, and the availability of hardware resources. + +In the absence of loop-carried data dependencies, the observed IPC tends to a +theoretical maximum which can be computed by dividing the number of instructions +of a single iteration by the `Block RThroughput`. + +Field 'uOps Per Cycle' is computed dividing the total number of simulated micro +opcodes by the total number of cycles. A delta between Dispatch Width and this +field is an indicator of a performance issue. In the absence of loop-carried +data dependencies, the observed 'uOps Per Cycle' should tend to a theoretical +maximum throughput which can be computed by dividing the number of uOps of a +single iteration by the `Block RThroughput`. + +Field *uOps Per Cycle* is bounded from above by the dispatch width. That is +because the dispatch width limits the maximum size of a dispatch group. Both IPC +and 'uOps Per Cycle' are limited by the amount of hardware parallelism. The +availability of hardware resources affects the resource pressure distribution, +and it limits the number of instructions that can be executed in parallel every +cycle. A delta between Dispatch Width and the theoretical maximum uOps per +Cycle (computed by dividing the number of uOps of a single iteration by the +`Block RThroughput`) is an indicator of a performance bottleneck caused by the +lack of hardware resources. +In general, the lower the Block RThroughput, the better. + +In this example, ``uOps per iteration/Block RThroughput`` is 1.50. Since there +are no loop-carried dependencies, the observed `uOps Per Cycle` is expected to +approach 1.50 when the number of iterations tends to infinity. The delta between +the Dispatch Width (2.00), and the theoretical maximum uOp throughput (1.50) is +an indicator of a performance bottleneck caused by the lack of hardware +resources, and the *Resource pressure view* can help to identify the problematic +resource usage. + +The second section of the report is the `instruction info view`. It shows the +latency and reciprocal throughput of every instruction in the sequence. It also +reports extra information related to the number of micro opcodes, and opcode +properties (i.e., 'MayLoad', 'MayStore', and 'HasSideEffects'). + +Field *RThroughput* is the reciprocal of the instruction throughput. Throughput +is computed as the maximum number of instructions of a same type that can be +executed per clock cycle in the absence of operand dependencies. In this +example, the reciprocal throughput of a vector float multiply is 1 +cycles/instruction. That is because the FP multiplier JFPM is only available +from pipeline JFPU1. + +Instruction encodings are displayed within the instruction info view when flag +`-show-encoding` is specified. + +Below is an example of `-show-encoding` output for the dot-product kernel: + +.. code-block:: none + + Instruction Info: + [1]: #uOps + [2]: Latency + [3]: RThroughput + [4]: MayLoad + [5]: MayStore + [6]: HasSideEffects (U) + [7]: Encoding Size + + [1] [2] [3] [4] [5] [6] [7] Encodings: Instructions: + 1 2 1.00 4 c5 f0 59 d0 vmulps %xmm0, %xmm1, %xmm2 + 1 4 1.00 4 c5 eb 7c da vhaddps %xmm2, %xmm2, %xmm3 + 1 4 1.00 4 c5 e3 7c e3 vhaddps %xmm3, %xmm3, %xmm4 + +The `Encoding Size` column shows the size in bytes of instructions. The +`Encodings` column shows the actual instruction encodings (byte sequences in +hex). + +The third section is the *Resource pressure view*. This view reports +the average number of resource cycles consumed every iteration by instructions +for every processor resource unit available on the target. Information is +structured in two tables. The first table reports the number of resource cycles +spent on average every iteration. The second table correlates the resource +cycles to the machine instruction in the sequence. For example, every iteration +of the instruction vmulps always executes on resource unit [6] +(JFPU1 - floating point pipeline #1), consuming an average of 1 resource cycle +per iteration. Note that on AMD Jaguar, vector floating-point multiply can +only be issued to pipeline JFPU1, while horizontal floating-point additions can +only be issued to pipeline JFPU0. + +The resource pressure view helps with identifying bottlenecks caused by high +usage of specific hardware resources. Situations with resource pressure mainly +concentrated on a few resources should, in general, be avoided. Ideally, +pressure should be uniformly distributed between multiple resources. + +Timeline View +^^^^^^^^^^^^^ +The timeline view produces a detailed report of each instruction's state +transitions through an instruction pipeline. This view is enabled by the +command line option ``-timeline``. As instructions transition through the +various stages of the pipeline, their states are depicted in the view report. +These states are represented by the following characters: + +* D : Instruction dispatched. +* e : Instruction executing. +* E : Instruction executed. +* R : Instruction retired. +* = : Instruction already dispatched, waiting to be executed. +* \- : Instruction executed, waiting to be retired. + +Below is the timeline view for a subset of the dot-product example located in +``test/tools/llvm-mca/X86/BtVer2/dot-product.s`` and processed by +:program:`llvm-mca` using the following command: + +.. code-block:: bash + + $ llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=3 -timeline dot-product.s + +.. code-block:: none + + Timeline view: + 012345 + Index 0123456789 + + [0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2 + [0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3 + [0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 + [1,0] .DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 + [1,1] . D=eeeE---R . vhaddps %xmm2, %xmm2, %xmm3 + [1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 + [2,0] . DeeE-----R . vmulps %xmm0, %xmm1, %xmm2 + [2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3 + [2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4 + + + Average Wait times (based on the timeline view): + [0]: Executions + [1]: Average time spent waiting in a scheduler's queue + [2]: Average time spent waiting in a scheduler's queue while ready + [3]: Average time elapsed from WB until retire stage + + [0] [1] [2] [3] + 0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2 + 1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3 + 2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 + 3 3.3 0.5 1.4 + +The timeline view is interesting because it shows instruction state changes +during execution. It also gives an idea of how the tool processes instructions +executed on the target, and how their timing information might be calculated. + +The timeline view is structured in two tables. The first table shows +instructions changing state over time (measured in cycles); the second table +(named *Average Wait times*) reports useful timing statistics, which should +help diagnose performance bottlenecks caused by long data dependencies and +sub-optimal usage of hardware resources. + +An instruction in the timeline view is identified by a pair of indices, where +the first index identifies an iteration, and the second index is the +instruction index (i.e., where it appears in the code sequence). Since this +example was generated using 3 iterations: ``-iterations=3``, the iteration +indices range from 0-2 inclusively. + +Excluding the first and last column, the remaining columns are in cycles. +Cycles are numbered sequentially starting from 0. + +From the example output above, we know the following: + +* Instruction [1,0] was dispatched at cycle 1. +* Instruction [1,0] started executing at cycle 2. +* Instruction [1,0] reached the write back stage at cycle 4. +* Instruction [1,0] was retired at cycle 10. + +Instruction [1,0] (i.e., vmulps from iteration #1) does not have to wait in the +scheduler's queue for the operands to become available. By the time vmulps is +dispatched, operands are already available, and pipeline JFPU1 is ready to +serve another instruction. So the instruction can be immediately issued on the +JFPU1 pipeline. That is demonstrated by the fact that the instruction only +spent 1cy in the scheduler's queue. + +There is a gap of 5 cycles between the write-back stage and the retire event. +That is because instructions must retire in program order, so [1,0] has to wait +for [0,2] to be retired first (i.e., it has to wait until cycle 10). + +In the example, all instructions are in a RAW (Read After Write) dependency +chain. Register %xmm2 written by vmulps is immediately used by the first +vhaddps, and register %xmm3 written by the first vhaddps is used by the second +vhaddps. Long data dependencies negatively impact the ILP (Instruction Level +Parallelism). + +In the dot-product example, there are anti-dependencies introduced by +instructions from different iterations. However, those dependencies can be +removed at register renaming stage (at the cost of allocating register aliases, +and therefore consuming physical registers). + +Table *Average Wait times* helps diagnose performance issues that are caused by +the presence of long latency instructions and potentially long data dependencies +which may limit the ILP. Last row, ````, shows a global average over all +instructions measured. Note that :program:`llvm-mca`, by default, assumes at +least 1cy between the dispatch event and the issue event. + +When the performance is limited by data dependencies and/or long latency +instructions, the number of cycles spent while in the *ready* state is expected +to be very small when compared with the total number of cycles spent in the +scheduler's queue. The difference between the two counters is a good indicator +of how large of an impact data dependencies had on the execution of the +instructions. When performance is mostly limited by the lack of hardware +resources, the delta between the two counters is small. However, the number of +cycles spent in the queue tends to be larger (i.e., more than 1-3cy), +especially when compared to other low latency instructions. + +Bottleneck Analysis +^^^^^^^^^^^^^^^^^^^ +The ``-bottleneck-analysis`` command line option enables the analysis of +performance bottlenecks. + +This analysis is potentially expensive. It attempts to correlate increases in +backend pressure (caused by pipeline resource pressure and data dependencies) to +dynamic dispatch stalls. + +Below is an example of ``-bottleneck-analysis`` output generated by +:program:`llvm-mca` for 500 iterations of the dot-product example on btver2. + +.. code-block:: none + + + Cycles with backend pressure increase [ 48.07% ] + Throughput Bottlenecks: + Resource Pressure [ 47.77% ] + - JFPA [ 47.77% ] + - JFPU0 [ 47.77% ] + Data Dependencies: [ 0.30% ] + - Register Dependencies [ 0.30% ] + - Memory Dependencies [ 0.00% ] + + Critical sequence based on the simulation: + + Instruction Dependency Information + +----< 2. vhaddps %xmm3, %xmm3, %xmm4 + | + | < loop carried > + | + | 0. vmulps %xmm0, %xmm1, %xmm2 + +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 74% ] + +----> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3 + | + | < loop carried > + | + +----> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 74% ] + + +According to the analysis, throughput is limited by resource pressure and not by +data dependencies. The analysis observed increases in backend pressure during +48.07% of the simulated run. Almost all those pressure increase events were +caused by contention on processor resources JFPA/JFPU0. + +The `critical sequence` is the most expensive sequence of instructions according +to the simulation. It is annotated to provide extra information about critical +register dependencies and resource interferences between instructions. + +Instructions from the critical sequence are expected to significantly impact +performance. By construction, the accuracy of this analysis is strongly +dependent on the simulation and (as always) by the quality of the processor +model in llvm. + +Bottleneck analysis is currently not supported for processors with an in-order +backend. + +Extra Statistics to Further Diagnose Performance Issues +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``-all-stats`` command line option enables extra statistics and performance +counters for the dispatch logic, the reorder buffer, the retire control unit, +and the register file. + +Below is an example of ``-all-stats`` output generated by :program:`llvm-mca` +for 300 iterations of the dot-product example discussed in the previous +sections. + +.. code-block:: none + + Dynamic Dispatch Stall Cycles: + RAT - Register unavailable: 0 + RCU - Retire tokens unavailable: 0 + SCHEDQ - Scheduler full: 272 (44.6%) + LQ - Load queue full: 0 + SQ - Store queue full: 0 + GROUP - Static restrictions on the dispatch group: 0 + + + Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: + [# dispatched], [# cycles] + 0, 24 (3.9%) + 1, 272 (44.6%) + 2, 314 (51.5%) + + + Schedulers - number of cycles where we saw N micro opcodes issued: + [# issued], [# cycles] + 0, 7 (1.1%) + 1, 306 (50.2%) + 2, 297 (48.7%) + + Scheduler's queue usage: + [1] Resource name. + [2] Average number of used buffer entries. + [3] Maximum number of used buffer entries. + [4] Total number of buffer entries. + + [1] [2] [3] [4] + JALU01 0 0 20 + JFPU01 17 18 18 + JLSAGU 0 0 12 + + + Retire Control Unit - number of cycles where we saw N instructions retired: + [# retired], [# cycles] + 0, 109 (17.9%) + 1, 102 (16.7%) + 2, 399 (65.4%) + + Total ROB Entries: 64 + Max Used ROB Entries: 35 ( 54.7% ) + Average Used ROB Entries per cy: 32 ( 50.0% ) + + + Register File statistics: + Total number of mappings created: 900 + Max number of mappings used: 35 + + * Register File #1 -- JFpuPRF: + Number of physical registers: 72 + Total number of mappings created: 900 + Max number of mappings used: 35 + + * Register File #2 -- JIntegerPRF: + Number of physical registers: 64 + Total number of mappings created: 0 + Max number of mappings used: 0 + +If we look at the *Dynamic Dispatch Stall Cycles* table, we see the counter for +SCHEDQ reports 272 cycles. This counter is incremented every time the dispatch +logic is unable to dispatch a full group because the scheduler's queue is full. + +Looking at the *Dispatch Logic* table, we see that the pipeline was only able to +dispatch two micro opcodes 51.5% of the time. The dispatch group was limited to +one micro opcode 44.6% of the cycles, which corresponds to 272 cycles. The +dispatch statistics are displayed by either using the command option +``-all-stats`` or ``-dispatch-stats``. + +The next table, *Schedulers*, presents a histogram displaying a count, +representing the number of micro opcodes issued on some number of cycles. In +this case, of the 610 simulated cycles, single opcodes were issued 306 times +(50.2%) and there were 7 cycles where no opcodes were issued. + +The *Scheduler's queue usage* table shows that the average and maximum number of +buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01 +reached its maximum (18 of 18 queue entries). Note that AMD Jaguar implements +three schedulers: + +* JALU01 - A scheduler for ALU instructions. +* JFPU01 - A scheduler floating point operations. +* JLSAGU - A scheduler for address generation. + +The dot-product is a kernel of three floating point instructions (a vector +multiply followed by two horizontal adds). That explains why only the floating +point scheduler appears to be used. + +A full scheduler queue is either caused by data dependency chains or by a +sub-optimal usage of hardware resources. Sometimes, resource pressure can be +mitigated by rewriting the kernel using different instructions that consume +different scheduler resources. Schedulers with a small queue are less resilient +to bottlenecks caused by the presence of long data dependencies. The scheduler +statistics are displayed by using the command option ``-all-stats`` or +``-scheduler-stats``. + +The next table, *Retire Control Unit*, presents a histogram displaying a count, +representing the number of instructions retired on some number of cycles. In +this case, of the 610 simulated cycles, two instructions were retired during the +same cycle 399 times (65.4%) and there were 109 cycles where no instructions +were retired. The retire statistics are displayed by using the command option +``-all-stats`` or ``-retire-stats``. + +The last table presented is *Register File statistics*. Each physical register +file (PRF) used by the pipeline is presented in this table. In the case of AMD +Jaguar, there are two register files, one for floating-point registers (JFpuPRF) +and one for integer registers (JIntegerPRF). The table shows that of the 900 +instructions processed, there were 900 mappings created. Since this dot-product +example utilized only floating point registers, the JFPuPRF was responsible for +creating the 900 mappings. However, we see that the pipeline only used a +maximum of 35 of 72 available register slots at any given time. We can conclude +that the floating point PRF was the only register file used for the example, and +that it was never resource constrained. The register file statistics are +displayed by using the command option ``-all-stats`` or +``-register-file-stats``. + +In this example, we can conclude that the IPC is mostly limited by data +dependencies, and not by resource pressure. + +Instruction Flow +^^^^^^^^^^^^^^^^ +This section describes the instruction flow through the default pipeline of +:program:`llvm-mca`, as well as the functional units involved in the process. + +The default pipeline implements the following sequence of stages used to +process instructions. + +* Dispatch (Instruction is dispatched to the schedulers). +* Issue (Instruction is issued to the processor pipelines). +* Write Back (Instruction is executed, and results are written back). +* Retire (Instruction is retired; writes are architecturally committed). + +The in-order pipeline implements the following sequence of stages: +* InOrderIssue (Instruction is issued to the processor pipelines). +* Retire (Instruction is retired; writes are architecturally committed). + +:program:`llvm-mca` assumes that instructions have all been decoded and placed +into a queue before the simulation start. Therefore, the instruction fetch and +decode stages are not modeled. Performance bottlenecks in the frontend are not +diagnosed. Also, :program:`llvm-mca` does not model branch prediction. + +Instruction Dispatch +"""""""""""""""""""" +During the dispatch stage, instructions are picked in program order from a +queue of already decoded instructions, and dispatched in groups to the +simulated hardware schedulers. + +The size of a dispatch group depends on the availability of the simulated +hardware resources. The processor dispatch width defaults to the value +of the ``IssueWidth`` in LLVM's scheduling model. + +An instruction can be dispatched if: + +* The size of the dispatch group is smaller than processor's dispatch width. +* There are enough entries in the reorder buffer. +* There are enough physical registers to do register renaming. +* The schedulers are not full. + +Scheduling models can optionally specify which register files are available on +the processor. :program:`llvm-mca` uses that information to initialize register +file descriptors. Users can limit the number of physical registers that are +globally available for register renaming by using the command option +``-register-file-size``. A value of zero for this option means *unbounded*. By +knowing how many registers are available for renaming, the tool can predict +dispatch stalls caused by the lack of physical registers. + +The number of reorder buffer entries consumed by an instruction depends on the +number of micro-opcodes specified for that instruction by the target scheduling +model. The reorder buffer is responsible for tracking the progress of +instructions that are "in-flight", and retiring them in program order. The +number of entries in the reorder buffer defaults to the value specified by field +`MicroOpBufferSize` in the target scheduling model. + +Instructions that are dispatched to the schedulers consume scheduler buffer +entries. :program:`llvm-mca` queries the scheduling model to determine the set +of buffered resources consumed by an instruction. Buffered resources are +treated like scheduler resources. + +Instruction Issue +""""""""""""""""" +Each processor scheduler implements a buffer of instructions. An instruction +has to wait in the scheduler's buffer until input register operands become +available. Only at that point, does the instruction becomes eligible for +execution and may be issued (potentially out-of-order) for execution. +Instruction latencies are computed by :program:`llvm-mca` with the help of the +scheduling model. + +:program:`llvm-mca`'s scheduler is designed to simulate multiple processor +schedulers. The scheduler is responsible for tracking data dependencies, and +dynamically selecting which processor resources are consumed by instructions. +It delegates the management of processor resource units and resource groups to a +resource manager. The resource manager is responsible for selecting resource +units that are consumed by instructions. For example, if an instruction +consumes 1cy of a resource group, the resource manager selects one of the +available units from the group; by default, the resource manager uses a +round-robin selector to guarantee that resource usage is uniformly distributed +between all units of a group. + +:program:`llvm-mca`'s scheduler internally groups instructions into three sets: + +* WaitSet: a set of instructions whose operands are not ready. +* ReadySet: a set of instructions ready to execute. +* IssuedSet: a set of instructions executing. + +Depending on the operands availability, instructions that are dispatched to the +scheduler are either placed into the WaitSet or into the ReadySet. + +Every cycle, the scheduler checks if instructions can be moved from the WaitSet +to the ReadySet, and if instructions from the ReadySet can be issued to the +underlying pipelines. The algorithm prioritizes older instructions over younger +instructions. + +Write-Back and Retire Stage +""""""""""""""""""""""""""" +Issued instructions are moved from the ReadySet to the IssuedSet. There, +instructions wait until they reach the write-back stage. At that point, they +get removed from the queue and the retire control unit is notified. + +When instructions are executed, the retire control unit flags the instruction as +"ready to retire." + +Instructions are retired in program order. The register file is notified of the +retirement so that it can free the physical registers that were allocated for +the instruction during the register renaming stage. + +Load/Store Unit and Memory Consistency Model +"""""""""""""""""""""""""""""""""""""""""""" +To simulate an out-of-order execution of memory operations, :program:`llvm-mca` +utilizes a simulated load/store unit (LSUnit) to simulate the speculative +execution of loads and stores. + +Each load (or store) consumes an entry in the load (or store) queue. Users can +specify flags ``-lqueue`` and ``-squeue`` to limit the number of entries in the +load and store queues respectively. The queues are unbounded by default. + +The LSUnit implements a relaxed consistency model for memory loads and stores. +The rules are: + +1. A younger load is allowed to pass an older load only if there are no + intervening stores or barriers between the two loads. +2. A younger load is allowed to pass an older store provided that the load does + not alias with the store. +3. A younger store is not allowed to pass an older store. +4. A younger store is not allowed to pass an older load. + +By default, the LSUnit optimistically assumes that loads do not alias +(`-noalias=true`) store operations. Under this assumption, younger loads are +always allowed to pass older stores. Essentially, the LSUnit does not attempt +to run any alias analysis to predict when loads and stores do not alias with +each other. + +Note that, in the case of write-combining memory, rule 3 could be relaxed to +allow reordering of non-aliasing store operations. That being said, at the +moment, there is no way to further relax the memory model (``-noalias`` is the +only option). Essentially, there is no option to specify a different memory +type (e.g., write-back, write-combining, write-through; etc.) and consequently +to weaken, or strengthen, the memory model. + +Other limitations are: + +* The LSUnit does not know when store-to-load forwarding may occur. +* The LSUnit does not know anything about cache hierarchy and memory types. +* The LSUnit does not know how to identify serializing operations and memory + fences. + +The LSUnit does not attempt to predict if a load or store hits or misses the L1 +cache. It only knows if an instruction "MayLoad" and/or "MayStore." For +loads, the scheduling model provides an "optimistic" load-to-use latency (which +usually matches the load-to-use latency for when there is a hit in the L1D). + +:program:`llvm-mca` does not know about serializing operations or memory-barrier +like instructions. The LSUnit conservatively assumes that an instruction which +has both "MayLoad" and unmodeled side effects behaves like a "soft" +load-barrier. That means, it serializes loads without forcing a flush of the +load queue. Similarly, instructions that "MayStore" and have unmodeled side +effects are treated like store barriers. A full memory barrier is a "MayLoad" +and "MayStore" instruction with unmodeled side effects. This is inaccurate, but +it is the best that we can do at the moment with the current information +available in LLVM. + +A load/store barrier consumes one entry of the load/store queue. A load/store +barrier enforces ordering of loads/stores. A younger load cannot pass a load +barrier. Also, a younger store cannot pass a store barrier. A younger load +has to wait for the memory/load barrier to execute. A load/store barrier is +"executed" when it becomes the oldest entry in the load/store queue(s). That +also means, by construction, all of the older loads/stores have been executed. + +In conclusion, the full set of load/store consistency rules are: + +#. A store may not pass a previous store. +#. A store may not pass a previous load (regardless of ``-noalias``). +#. A store has to wait until an older store barrier is fully executed. +#. A load may pass a previous load. +#. A load may not pass a previous store unless ``-noalias`` is set. +#. A load has to wait until an older load barrier is fully executed. + +In-order Issue and Execute +"""""""""""""""""""""""""""""""""""" +In-order processors are modelled as a single ``InOrderIssueStage`` stage. It +bypasses Dispatch, Scheduler and Load/Store unit. Instructions are issued as +soon as their operand registers are available and resource requirements are +met. Multiple instructions can be issued in one cycle according to the value of +the ``IssueWidth`` parameter in LLVM's scheduling model. + +Once issued, an instruction is moved to ``IssuedInst`` set until it is ready to +retire. :program:`llvm-mca` ensures that writes are committed in-order. However, +an instruction is allowed to commit writes and retire out-of-order if +``RetireOOO`` property is true for at least one of its writes. + +Custom Behaviour +"""""""""""""""""""""""""""""""""""" +Due to certain instructions not being expressed perfectly within their +scheduling model, :program:`llvm-mca` isn't always able to simulate them +perfectly. Modifying the scheduling model isn't always a viable +option though (maybe because the instruction is modeled incorrectly on +purpose or the instruction's behaviour is quite complex). The +CustomBehaviour class can be used in these cases to enforce proper +instruction modeling (often by customizing data dependencies and detecting +hazards that :program:`llvm-mca` has no way of knowing about). + +:program:`llvm-mca` comes with one generic and multiple target specific +CustomBehaviour classes. The generic class will be used if the ``-disable-cb`` +flag is used or if a target specific CustomBehaviour class doesn't exist for +that target. (The generic class does nothing.) Currently, the CustomBehaviour +class is only a part of the in-order pipeline, but there are plans to add it +to the out-of-order pipeline in the future. + +CustomBehaviour's main method is `checkCustomHazard()` which uses the +current instruction and a list of all instructions still executing within +the pipeline to determine if the current instruction should be dispatched. +As output, the method returns an integer representing the number of cycles +that the current instruction must stall for (this can be an underestimate +if you don't know the exact number and a value of 0 represents no stall). + +If you'd like to add a CustomBehaviour class for a target that doesn't +already have one, refer to an existing implementation to see how to set it +up. The classes are implemented within the target specific backend (for +example `/llvm/lib/Target/AMDGPU/MCA/`) so that they can access backend symbols. + +Custom Views +"""""""""""""""""""""""""""""""""""" +:program:`llvm-mca` comes with several Views such as the Timeline View and +Summary View. These Views are generic and can work with most (if not all) +targets. If you wish to add a new View to :program:`llvm-mca` and it does not +require any backend functionality that is not already exposed through MC layer +classes (MCSubtargetInfo, MCInstrInfo, etc.), please add it to the +`/tools/llvm-mca/View/` directory. However, if your new View is target specific +AND requires unexposed backend symbols or functionality, you can define it in +the `/lib/Target//MCA/` directory. + +To enable this target specific View, you will have to use this target's +CustomBehaviour class to override the `CustomBehaviour::getViews()` methods. +There are 3 variations of these methods based on where you want your View to +appear in the output: `getStartViews()`, `getPostInstrInfoViews()`, and +`getEndViews()`. These methods returns a vector of Views so you will want to +return a vector containing all of the target specific Views for the target in +question. + +Because these target specific (and backend dependent) Views require the +`CustomBehaviour::getViews()` variants, these Views will not be enabled if +the `-disable-cb` flag is used. + +Enabling these custom Views does not affect the non-custom (generic) Views. +Continue to use the usual command line arguments to enable / disable those +Views. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-nm.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-nm.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-nm.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-nm.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,285 @@ +llvm-nm - list LLVM bitcode and object file's symbol table +========================================================== + +.. program:: llvm-nm + +SYNOPSIS +-------- + +:program:`llvm-nm` [*options*] [*filenames...*] + +DESCRIPTION +----------- + +The :program:`llvm-nm` utility lists the names of symbols from LLVM bitcode +files, object files, and archives. Each symbol is listed along with some simple +information about its provenance. If no filename is specified, *a.out* is used +as the input. If *-* is used as a filename, :program:`llvm-nm` will read a file +from its standard input stream. + +:program:`llvm-nm`'s default output format is the traditional BSD :program:`nm` +output format. Each such output record consists of an (optional) 8-digit +hexadecimal address, followed by a type code character, followed by a name, for +each symbol. One record is printed per line; fields are separated by spaces. +When the address is omitted, it is replaced by 8 spaces. + +The supported type code characters are as follows. Where both lower and +upper-case characters are listed for the same meaning, a lower-case character +represents a local symbol, whilst an upper-case character represents a global +(external) symbol: + +a, A + + Absolute symbol. + +b, B + + Uninitialized data (bss) object. + +C + + Common symbol. Multiple definitions link together into one definition. + +d, D + + Writable data object. + +i, I + + COFF: .idata symbol or symbol in a section with IMAGE_SCN_LNK_INFO set. + +n + + ELF: local symbol from non-alloc section. + + COFF: debug symbol. + +N + + ELF: debug section symbol, or global symbol from non-alloc section. + +s, S + + COFF: section symbol. + + Mach-O: absolute symbol or symbol from a section other than __TEXT_EXEC __text, + __TEXT __text, __DATA __data, or __DATA __bss. + +r, R + + Read-only data object. + +t, T + + Code (text) object. + +u + + ELF: GNU unique symbol. + +U + + Named object is undefined in this file. + +v + + ELF: Undefined weak object. It is not a link failure if the object is not + defined. + +V + + ELF: Defined weak object symbol. This definition will only be used if no + regular definitions exist in a link. If multiple weak definitions and no + regular definitions exist, one of the weak definitions will be used. + +w + + Undefined weak symbol other than an ELF object symbol. It is not a link failure + if the symbol is not defined. + +W + + Defined weak symbol other than an ELF object symbol. This definition will only + be used if no regular definitions exist in a link. If multiple weak definitions + and no regular definitions exist, one of the weak definitions will be used. + +\- + + Mach-O: N_STAB symbol. + +? + + Something unrecognizable. + +Because LLVM bitcode files typically contain objects that are not considered to +have addresses until they are linked into an executable image or dynamically +compiled "just-in-time", :program:`llvm-nm` does not print an address for any +symbol in an LLVM bitcode file, even symbols which are defined in the bitcode +file. + +OPTIONS +------- + +.. program:: llvm-nm + +.. option:: -B + + Use BSD output format. Alias for ``--format=bsd``. + +.. option:: --debug-syms, -a + + Show all symbols, even those usually suppressed. + +.. option:: --defined-only + + Print only symbols defined in this file. + +.. option:: --demangle, -C + + Demangle symbol names. + +.. option:: --dynamic, -D + + Display dynamic symbols instead of normal symbols. + +.. option:: --extern-only, -g + + Print only symbols whose definitions are external; that is, accessible from + other files. + +.. option:: --format=, -f + + Select an output format; *format* may be *sysv*, *posix*, *darwin*, *bsd* or + *just-symbols*. + The default is *bsd*. + +.. option:: --help, -h + + Print a summary of command-line options and their meanings. + +.. option:: -j + + Print just the symbol names. Alias for `--format=just-symbols``. + +.. option:: -m + + Use Darwin format. Alias for ``--format=darwin``. + +.. option:: --no-demangle + + Don't demangle symbol names. This is the default. + +.. option:: --no-llvm-bc + + Disable the LLVM bitcode reader. + +.. option:: --no-sort, -p + + Show symbols in the order encountered. + +.. option:: --no-weak + + Don't print weak symbols. + +.. option:: --numeric-sort, -n, -v + + Sort symbols by address. + +.. option:: --portability, -P + + Use POSIX.2 output format. Alias for ``--format=posix``. + +.. option:: --print-armap + + Print the archive symbol table, in addition to the symbols. + +.. option:: --print-file-name, -A, -o + + Precede each symbol with the file it came from. + +.. option:: --print-size, -S + + Show symbol size as well as address (not applicable for Mach-O). + +.. option:: --quiet + + Suppress 'no symbols' diagnostic. + +.. option:: --radix=, -t + + Specify the radix of the symbol address(es). Values accepted are *d* (decimal), + *x* (hexadecimal) and *o* (octal). + +.. option:: --reverse-sort, -r + + Sort symbols in reverse order. + +.. option:: --size-sort + + Sort symbols by size. + +.. option:: --special-syms + + Do not filter special symbols from the output. + +.. option:: --undefined-only, -u + + Print only undefined symbols. + +.. option:: --version, -V + + Display the version of the :program:`llvm-nm` executable, then exit. Does not + stack with other commands. + +.. option:: @ + + Read command-line options from response file ``. + +MACH-O SPECIFIC OPTIONS +----------------------- + +.. option:: --add-dyldinfo + + Add symbols from the dyldinfo, if they are not already in the symbol table. + This is the default. + +.. option:: --add-inlinedinfo + + Add symbols from the inlined libraries, TBD file inputs only. + +.. option:: --arch= + + Dump the symbols from the specified architecture(s). + +.. option:: --dyldinfo-only + + Dump only symbols from the dyldinfo. + +.. option:: --no-dyldinfo + + Do not add any symbols from the dyldinfo. + +.. option:: -s
+ + Dump only symbols from this segment and section name. + +.. option:: -x + + Print symbol entry in hex. + +BUGS +---- + + * :program:`llvm-nm` does not support the full set of arguments that GNU + :program:`nm` does. + +EXIT STATUS +----------- + +:program:`llvm-nm` exits with an exit code of zero. + +SEE ALSO +-------- + +:manpage:`llvm-ar(1)`, :manpage:`llvm-objdump(1)`, :manpage:`llvm-readelf(1)`, +:manpage:`llvm-readobj(1)` diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-objcopy.rst.txt llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-objcopy.rst.txt --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_sources/CommandGuide/llvm-objcopy.rst.txt 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_sources/CommandGuide/llvm-objcopy.rst.txt 2021-07-19 12:17:50.000000000 +0000 @@ -0,0 +1,549 @@ +llvm-objcopy - object copying and editing tool +============================================== + +.. program:: llvm-objcopy + +SYNOPSIS +-------- + +:program:`llvm-objcopy` [*options*] *input* [*output*] + +DESCRIPTION +----------- + +:program:`llvm-objcopy` is a tool to copy and manipulate objects. In basic +usage, it makes a semantic copy of the input to the output. If any options are +specified, the output may be modified along the way, e.g. by removing sections. + +If no output file is specified, the input file is modified in-place. If "-" is +specified for the input file, the input is read from the program's standard +input stream. If "-" is specified for the output file, the output is written to +the standard output stream of the program. + +If the input is an archive, any requested operations will be applied to each +archive member individually. + +The tool is still in active development, but in most scenarios it works as a +drop-in replacement for GNU's :program:`objcopy`. + +GENERIC AND CROSS-PLATFORM OPTIONS +---------------------------------- + +The following options are either agnostic of the file format, or apply to +multiple file formats. + +.. option:: --add-gnu-debuglink + + Add a .gnu_debuglink section for ```` to the output. + +.. option:: --add-section + + Add a section named ``
`` with the contents of ```` to the + output. For ELF objects the section will be of type `SHT_NOTE`, if the name + starts with ".note". Otherwise, it will have type `SHT_PROGBITS`. Can be + specified multiple times to add multiple sections. + + For MachO objects, ``
`` must be formatted as + ``,
``. + +.. option:: --binary-architecture , -B + + Ignored for compatibility. + +.. option:: --disable-deterministic-archives, -U + + Use real values for UIDs, GIDs and timestamps when updating archive member + headers. + +.. option:: --discard-all, -x + + Remove most local symbols from the output. Different file formats may limit + this to a subset of the local symbols. For example, file and section symbols in + ELF objects will not be discarded. Additionally, remove all debug sections. + +.. option:: --dump-section
= + + Dump the contents of section ``
`` into the file ````. Can be + specified multiple times to dump multiple sections to different files. + ```` is unrelated to the input and output files provided to + :program:`llvm-objcopy` and as such the normal copying and editing + operations will still be performed. No operations are performed on the sections + prior to dumping them. + + For MachO objects, ``
`` must be formatted as + ``,
``. + +.. option:: --enable-deterministic-archives, -D + + Enable deterministic mode when copying archives, i.e. use 0 for archive member + header UIDs, GIDs and timestamp fields. On by default. + +.. option:: --help, -h + + Print a summary of command line options. + +.. option:: --only-keep-debug + + Produce a debug file as the output that only preserves contents of sections + useful for debugging purposes. + + For ELF objects, this removes the contents of `SHF_ALLOC` sections that are not + `SHT_NOTE` by making them `SHT_NOBITS` and shrinking the program headers where + possible. + +.. option:: --only-section
, -j + + Remove all sections from the output, except for sections named ``
``. + Can be specified multiple times to keep multiple sections. + + For MachO objects, ``
`` must be formatted as + ``,
``. + +.. option:: --redefine-sym = + + Rename symbols called ```` to ```` in the output. Can be specified + multiple times to rename multiple symbols. + +.. option:: --redefine-syms + + Rename symbols in the output as described in the file ````. In the + file, each line represents a single symbol to rename, with the old name and new + name separated by whitespace. Leading and trailing whitespace is ignored, as is + anything following a '#'. Can be specified multiple times to read names from + multiple files. + +.. option:: --regex + + If specified, symbol and section names specified by other switches are treated + as extended POSIX regular expression patterns. + +.. option:: --remove-section
, -R + + Remove the specified section from the output. Can be specified multiple times + to remove multiple sections simultaneously. + + For MachO objects, ``
`` must be formatted as + ``,
``. + +.. option:: --set-section-alignment
= + + Set the alignment of section ``
`` to ```. Can be specified + multiple times to update multiple sections. + +.. option:: --set-section-flags
=[,,...] + + Set section properties in the output of section ``
`` based on the + specified ```` values. Can be specified multiple times to update multiple + sections. + + Supported flag names are `alloc`, `load`, `noload`, `readonly`, `exclude`, + `debug`, `code`, `data`, `rom`, `share`, `contents`, `merge` and `strings`. Not + all flags are meaningful for all object file formats. + + For ELF objects, the flags have the following effects: + + - `alloc` = add the `SHF_ALLOC` flag. + - `load` = if the section has `SHT_NOBITS` type, mark it as a `SHT_PROGBITS` + section. + - `readonly` = if this flag is not specified, add the `SHF_WRITE` flag. + - `exclude` = add the `SHF_EXCLUDE` flag. + - `code` = add the `SHF_EXECINSTR` flag. + - `merge` = add the `SHF_MERGE` flag. + - `strings` = add the `SHF_STRINGS` flag. + - `contents` = if the section has `SHT_NOBITS` type, mark it as a `SHT_PROGBITS` + section. + + For COFF objects, the flags have the following effects: + + - `alloc` = add the `IMAGE_SCN_CNT_UNINITIALIZED_DATA` and `IMAGE_SCN_MEM_READ` + flags, unless the `load` flag is specified. + - `noload` = add the `IMAGE_SCN_LNK_REMOVE` and `IMAGE_SCN_MEM_READ` flags. + - `readonly` = if this flag is not specified, add the `IMAGE_SCN_MEM_WRITE` + flag. + - `exclude` = add the `IMAGE_SCN_LNK_REMOVE` and `IMAGE_SCN_MEM_READ` flags. + - `debug` = add the `IMAGE_SCN_CNT_INITIALIZED_DATA`, + `IMAGE_SCN_MEM_DISCARDABLE` and `IMAGE_SCN_MEM_READ` flags. + - `code` = add the `IMAGE_SCN_CNT_CODE`, `IMAGE_SCN_MEM_EXECUTE` and + `IMAGE_SCN_MEM_READ` flags. + - `data` = add the `IMAGE_SCN_CNT_INITIALIZED_DATA` and `IMAGE_SCN_MEM_READ` + flags. + - `share` = add the `IMAGE_SCN_MEM_SHARED` and `IMAGE_SCN_MEM_READ` flags. + +.. option:: --strip-all-gnu + + Remove all symbols, debug sections and relocations from the output. This option + is equivalent to GNU :program:`objcopy`'s ``--strip-all`` switch. + +.. option:: --strip-all, -S + + For ELF objects, remove from the output all symbols and non-alloc sections not + within segments, except for .gnu.warning, .ARM.attribute sections and the + section name table. + + For COFF and Mach-O objects, remove all symbols, debug sections, and + relocations from the output. + +.. option:: --strip-debug, -g + + Remove all debug sections from the output. + +.. option:: --strip-symbol , -N + + Remove all symbols named ```` from the output. Can be specified + multiple times to remove multiple symbols. + +.. option:: --strip-symbols + + Remove all symbols whose names appear in the file ````, from the + output. In the file, each line represents a single symbol name, with leading + and trailing whitespace ignored, as is anything following a '#'. Can be + specified multiple times to read names from multiple files. + +.. option:: --strip-unneeded-symbol + + Remove from the output all symbols named ```` that are local or + undefined and are not required by any relocation. + +.. option:: --strip-unneeded-symbols + + Remove all symbols whose names appear in the file ````, from the + output, if they are local or undefined and are not required by any relocation. + In the file, each line represents a single symbol name, with leading and + trailing whitespace ignored, as is anything following a '#'. Can be specified + multiple times to read names from multiple files. + +.. option:: --strip-unneeded + + Remove from the output all local or undefined symbols that are not required by + relocations. Also remove all debug sections. + +.. option:: --version, -V + + Display the version of the :program:`llvm-objcopy` executable. + +.. option:: --wildcard, -w + + Allow wildcard syntax for symbol-related flags. On by default for + section-related flags. Incompatible with --regex. + + Wildcard syntax allows the following special symbols: + + ====================== ========================= ================== + Character Meaning Equivalent + ====================== ========================= ================== + ``*`` Any number of characters ``.*`` + ``?`` Any single character ``.`` + ``\`` Escape the next character ``\`` + ``[a-z]`` Character class ``[a-z]`` + ``[!a-z]``, ``[^a-z]`` Negated character class ``[^a-z]`` + ====================== ========================= ================== + + Additionally, starting a wildcard with '!' will prevent a match, even if + another flag matches. For example ``-w -N '*' -N '!x'`` will strip all symbols + except for ``x``. + + The order of wildcards does not matter. For example, ``-w -N '*' -N '!x'`` is + the same as ``-w -N '!x' -N '*'``. + +.. option:: @ + + Read command-line options and commands from response file ``. + +ELF-SPECIFIC OPTIONS +-------------------- + +The following options are implemented only for ELF objects. If used with other +objects, :program:`llvm-objcopy` will either emit an error or silently ignore +them. + +.. option:: --add-symbol =[
:][,] + + Add a new symbol called ```` to the output symbol table, in the section + named ``
``, with value ````. If ``
`` is not specified, + the symbol is added as an absolute symbol. The ```` affect the symbol + properties. Accepted values are: + + - `global` = the symbol will have global binding. + - `local` = the symbol will have local binding. + - `weak` = the symbol will have weak binding. + - `default` = the symbol will have default visibility. + - `hidden` = the symbol will have hidden visibility. + - `protected` = the symbol will have protected visibility. + - `file` = the symbol will be an `STT_FILE` symbol. + - `section` = the symbol will be an `STT_SECTION` symbol. + - `object` = the symbol will be an `STT_OBJECT` symbol. + - `function` = the symbol will be an `STT_FUNC` symbol. + - `indirect-function` = the symbol will be an `STT_GNU_IFUNC` symbol. + + Additionally, the following flags are accepted but ignored: `debug`, + `constructor`, `warning`, `indirect`, `synthetic`, `unique-object`, `before`. + + Can be specified multiple times to add multiple symbols. + +.. option:: --allow-broken-links + + Allow :program:`llvm-objcopy` to remove sections even if it would leave invalid + section references. Any invalid sh_link fields will be set to zero. + +.. option:: --change-start , --adjust-start + + Add ```` to the program's start address. Can be specified multiple + times, in which case the values will be applied cumulatively. + +.. option:: --compress-debug-sections [ + + + + + + + + +
+
+
+
+ +
+

Speculative Load Hardening

+
+

A Spectre Variant #1 Mitigation Technique

+

Author: Chandler Carruth - chandlerc@google.com

+
+
+

Problem Statement

+

Recently, Google Project Zero and other researchers have found information leak +vulnerabilities by exploiting speculative execution in modern CPUs. These +exploits are currently broken down into three variants:

+
    +
  • GPZ Variant #1 (a.k.a. Spectre Variant #1): Bounds check (or predicate) bypass

  • +
  • GPZ Variant #2 (a.k.a. Spectre Variant #2): Branch target injection

  • +
  • GPZ Variant #3 (a.k.a. Meltdown): Rogue data cache load

  • +
+

For more details, see the Google Project Zero blog post and the Spectre research +paper:

+
    +
  • https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html

  • +
  • https://spectreattack.com/spectre.pdf

  • +
+

The core problem of GPZ Variant #1 is that speculative execution uses branch +prediction to select the path of instructions speculatively executed. This path +is speculatively executed with the available data, and may load from memory and +leak the loaded values through various side channels that survive even when the +speculative execution is unwound due to being incorrect. Mispredicted paths can +cause code to be executed with data inputs that never occur in correct +executions, making checks against malicious inputs ineffective and allowing +attackers to use malicious data inputs to leak secret data. Here is an example, +extracted and simplified from the Project Zero paper:

+
struct array {
+  unsigned long length;
+  unsigned char data[];
+};
+struct array *arr1 = ...; // small array
+struct array *arr2 = ...; // array of size 0x400
+unsigned long untrusted_offset_from_caller = ...;
+if (untrusted_offset_from_caller < arr1->length) {
+  unsigned char value = arr1->data[untrusted_offset_from_caller];
+  unsigned long index2 = ((value&1)*0x100)+0x200;
+  unsigned char value2 = arr2->data[index2];
+}
+
+
+

The key of the attack is to call this with untrusted_offset_from_caller that +is far outside of the bounds when the branch predictor will predict that it +will be in-bounds. In that case, the body of the if will be executed +speculatively, and may read secret data into value and leak it via a +cache-timing side channel when a dependent access is made to populate value2.

+
+
+

High Level Mitigation Approach

+

While several approaches are being actively pursued to mitigate specific +branches and/or loads inside especially risky software (most notably various OS +kernels), these approaches require manual and/or static analysis aided auditing +of code and explicit source changes to apply the mitigation. They are unlikely +to scale well to large applications. We are proposing a comprehensive +mitigation approach that would apply automatically across an entire program +rather than through manual changes to the code. While this is likely to have a +high performance cost, some applications may be in a good position to take this +performance / security tradeoff.

+

The specific technique we propose is to cause loads to be checked using +branchless code to ensure that they are executing along a valid control flow +path. Consider the following C-pseudo-code representing the core idea of a +predicate guarding potentially invalid loads:

+
void leak(int data);
+void example(int* pointer1, int* pointer2) {
+  if (condition) {
+    // ... lots of code ...
+    leak(*pointer1);
+  } else {
+    // ... more code ...
+    leak(*pointer2);
+  }
+}
+
+
+

This would get transformed into something resembling the following:

+
uintptr_t all_ones_mask = std::numerical_limits<uintptr_t>::max();
+uintptr_t all_zeros_mask = 0;
+void leak(int data);
+void example(int* pointer1, int* pointer2) {
+  uintptr_t predicate_state = all_ones_mask;
+  if (condition) {
+    // Assuming ?: is implemented using branchless logic...
+    predicate_state = !condition ? all_zeros_mask : predicate_state;
+    // ... lots of code ...
+    //
+    // Harden the pointer so it can't be loaded
+    pointer1 &= predicate_state;
+    leak(*pointer1);
+  } else {
+    predicate_state = condition ? all_zeros_mask : predicate_state;
+    // ... more code ...
+    //
+    // Alternative: Harden the loaded value
+    int value2 = *pointer2 & predicate_state;
+    leak(value2);
+  }
+}
+
+
+

The result should be that if the if (condition) { branch is mis-predicted, +there is a data dependency on the condition used to zero out any pointers +prior to loading through them or to zero out all of the loaded bits. Even +though this code pattern may still execute speculatively, invalid speculative +executions are prevented from leaking secret data from memory (but note that +this data might still be loaded in safe ways, and some regions of memory are +required to not hold secrets, see below for detailed limitations). This +approach only requires the underlying hardware have a way to implement a +branchless and unpredicted conditional update of a register’s value. All modern +architectures have support for this, and in fact such support is necessary to +correctly implement constant time cryptographic primitives.

+

Crucial properties of this approach:

+
    +
  • It is not preventing any particular side-channel from working. This is +important as there are an unknown number of potential side channels and we +expect to continue discovering more. Instead, it prevents the observation of +secret data in the first place.

  • +
  • It accumulates the predicate state, protecting even in the face of nested +correctly predicted control flows.

  • +
  • It passes this predicate state across function boundaries to provide +interprocedural protection.

  • +
  • When hardening the address of a load, it uses a destructive or +non-reversible modification of the address to prevent an attacker from +reversing the check using attacker-controlled inputs.

  • +
  • It does not completely block speculative execution, and merely prevents +mis-speculated paths from leaking secrets from memory (and stalls +speculation until this can be determined).

  • +
  • It is completely general and makes no fundamental assumptions about the +underlying architecture other than the ability to do branchless conditional +data updates and a lack of value prediction.

  • +
  • It does not require programmers to identify all possible secret data using +static source code annotations or code vulnerable to a variant #1 style +attack.

  • +
+

Limitations of this approach:

+
    +
  • It requires re-compiling source code to insert hardening instruction +sequences. Only software compiled in this mode is protected.

  • +
  • The performance is heavily dependent on a particular architecture’s +implementation strategy. We outline a potential x86 implementation below and +characterize its performance.

  • +
  • It does not defend against secret data already loaded from memory and +residing in registers or leaked through other side-channels in +non-speculative execution. Code dealing with this, e.g cryptographic +routines, already uses constant-time algorithms and code to prevent +side-channels. Such code should also scrub registers of secret data following +these +guidelines.

  • +
  • To achieve reasonable performance, many loads may not be checked, such as +those with compile-time fixed addresses. This primarily consists of accesses +at compile-time constant offsets of global and local variables. Code which +needs this protection and intentionally stores secret data must ensure the +memory regions used for secret data are necessarily dynamic mappings or heap +allocations. This is an area which can be tuned to provide more comprehensive +protection at the cost of performance.

  • +
  • Hardened loads may still load data from +valid addresses if not attacker-controlled addresses. To prevent these +from reading secret data, the low 2gb of the address space and 2gb above and +below any executable pages should be protected.

  • +
+

Credit:

+
    +
  • The core idea of tracing misspeculation through data and marking pointers to +block misspeculated loads was developed as part of a HACS 2018 discussion +between Chandler Carruth, Paul Kocher, Thomas Pornin, and several other +individuals.

  • +
  • Core idea of masking out loaded bits was part of the original mitigation +suggested by Jann Horn when these attacks were reported.

  • +
+
+

Indirect Branches, Calls, and Returns

+

It is possible to attack control flow other than conditional branches with +variant #1 style mispredictions.

+
    +
  • A prediction towards a hot call target of a virtual method can lead to it +being speculatively executed when an expected type is used (often called +“type confusion”).

  • +
  • A hot case may be speculatively executed due to prediction instead of the +correct case for a switch statement implemented as a jump table.

  • +
  • A hot common return address may be predicted incorrectly when returning from +a function.

  • +
+

These code patterns are also vulnerable to Spectre variant #2, and as such are +best mitigated with a +retpoline on x86 platforms. +When a mitigation technique like retpoline is used, speculation simply cannot +proceed through an indirect control flow edge (or it cannot be mispredicted in +the case of a filled RSB) and so it is also protected from variant #1 style +attacks. However, some architectures, micro-architectures, or vendors do not +employ the retpoline mitigation, and on future x86 hardware (both Intel and +AMD) it is expected to become unnecessary due to hardware-based mitigation.

+

When not using a retpoline, these edges will need independent protection from +variant #1 style attacks. The analogous approach to that used for conditional +control flow should work:

+
uintptr_t all_ones_mask = std::numerical_limits<uintptr_t>::max();
+uintptr_t all_zeros_mask = 0;
+void leak(int data);
+void example(int* pointer1, int* pointer2) {
+  uintptr_t predicate_state = all_ones_mask;
+  switch (condition) {
+  case 0:
+    // Assuming ?: is implemented using branchless logic...
+    predicate_state = (condition != 0) ? all_zeros_mask : predicate_state;
+    // ... lots of code ...
+    //
+    // Harden the pointer so it can't be loaded
+    pointer1 &= predicate_state;
+    leak(*pointer1);
+    break;
+
+  case 1:
+    predicate_state = (condition != 1) ? all_zeros_mask : predicate_state;
+    // ... more code ...
+    //
+    // Alternative: Harden the loaded value
+    int value2 = *pointer2 & predicate_state;
+    leak(value2);
+    break;
+
+    // ...
+  }
+}
+
+
+

The core idea remains the same: validate the control flow using data-flow and +use that validation to check that loads cannot leak information along +misspeculated paths. Typically this involves passing the desired target of such +control flow across the edge and checking that it is correct afterwards. Note +that while it is tempting to think that this mitigates variant #2 attacks, it +does not. Those attacks go to arbitrary gadgets that don’t include the checks.

+
+
+

Variant #1.1 and #1.2 attacks: “Bounds Check Bypass Store”

+

Beyond the core variant #1 attack, there are techniques to extend this attack. +The primary technique is known as “Bounds Check Bypass Store” and is discussed +in this research paper: https://people.csail.mit.edu/vlk/spectre11.pdf

+

We will analyze these two variants independently. First, variant #1.1 works by +speculatively storing over the return address after a bounds check bypass. This +speculative store then ends up being used by the CPU during speculative +execution of the return, potentially directing speculative execution to +arbitrary gadgets in the binary. Let’s look at an example.

+
unsigned char local_buffer[4];
+unsigned char *untrusted_data_from_caller = ...;
+unsigned long untrusted_size_from_caller = ...;
+if (untrusted_size_from_caller < sizeof(local_buffer)) {
+  // Speculative execution enters here with a too-large size.
+  memcpy(local_buffer, untrusted_data_from_caller,
+         untrusted_size_from_caller);
+  // The stack has now been smashed, writing an attacker-controlled
+  // address over the return address.
+  minor_processing(local_buffer);
+  return;
+  // Control will speculate to the attacker-written address.
+}
+
+
+

However, this can be mitigated by hardening the load of the return address just +like any other load. This is sometimes complicated because x86 for example +implicitly loads the return address off the stack. However, the +implementation technique below is specifically designed to mitigate this +implicit load by using the stack pointer to communicate misspeculation between +functions. This additionally causes a misspeculation to have an invalid stack +pointer and never be able to read the speculatively stored return address. See +the detailed discussion below.

+

For variant #1.2, the attacker speculatively stores into the vtable or jump +table used to implement an indirect call or indirect jump. Because this is +speculative, this will often be possible even when these are stored in +read-only pages. For example:

+
class FancyObject : public BaseObject {
+public:
+  void DoSomething() override;
+};
+void f(unsigned long attacker_offset, unsigned long attacker_data) {
+  FancyObject object = getMyObject();
+  unsigned long *arr[4] = getFourDataPointers();
+  if (attacker_offset < 4) {
+    // We have bypassed the bounds check speculatively.
+    unsigned long *data = arr[attacker_offset];
+    // Now we have computed a pointer inside of `object`, the vptr.
+    *data = attacker_data;
+    // The vptr points to the virtual table and we speculatively clobber that.
+    g(object); // Hand the object to some other routine.
+  }
+}
+// In another file, we call a method on the object.
+void g(BaseObject &object) {
+  object.DoSomething();
+  // This speculatively calls the address stored over the vtable.
+}
+
+
+

Mitigating this requires hardening loads from these locations, or mitigating +the indirect call or indirect jump. Any of these are sufficient to block the +call or jump from using a speculatively stored value that has been read back.

+

For both of these, using retpolines would be equally sufficient. One possible +hybrid approach is to use retpolines for indirect call and jump, while relying +on SLH to mitigate returns.

+

Another approach that is sufficient for both of these is to harden all of the +speculative stores. However, as most stores aren’t interesting and don’t +inherently leak data, this is expected to be prohibitively expensive given the +attack it is defending against.

+
+
+
+

Implementation Details

+

There are a number of complex details impacting the implementation of this +technique, both on a particular architecture and within a particular compiler. +We discuss proposed implementation techniques for the x86 architecture and the +LLVM compiler. These are primarily to serve as an example, as other +implementation techniques are very possible.

+
+

x86 Implementation Details

+

On the x86 platform we break down the implementation into three core +components: accumulating the predicate state through the control flow graph, +checking the loads, and checking control transfers between procedures.

+
+

Accumulating Predicate State

+

Consider baseline x86 instructions like the following, which test three +conditions and if all pass, loads data from memory and potentially leaks it +through some side channel:

+
# %bb.0:                                # %entry
+        pushq   %rax
+        testl   %edi, %edi
+        jne     .LBB0_4
+# %bb.1:                                # %then1
+        testl   %esi, %esi
+        jne     .LBB0_4
+# %bb.2:                                # %then2
+        testl   %edx, %edx
+        je      .LBB0_3
+.LBB0_4:                                # %exit
+        popq    %rax
+        retq
+.LBB0_3:                                # %danger
+        movl    (%rcx), %edi
+        callq   leak
+        popq    %rax
+        retq
+
+
+

When we go to speculatively execute the load, we want to know whether any of +the dynamically executed predicates have been misspeculated. To track that, +along each conditional edge, we need to track the data which would allow that +edge to be taken. On x86, this data is stored in the flags register used by the +conditional jump instruction. Along both edges after this fork in control flow, +the flags register remains alive and contains data that we can use to build up +our accumulated predicate state. We accumulate it using the x86 conditional +move instruction which also reads the flag registers where the state resides. +These conditional move instructions are known to not be predicted on any x86 +processors, making them immune to misprediction that could reintroduce the +vulnerability. When we insert the conditional moves, the code ends up looking +like the following:

+
# %bb.0:                                # %entry
+        pushq   %rax
+        xorl    %eax, %eax              # Zero out initial predicate state.
+        movq    $-1, %r8                # Put all-ones mask into a register.
+        testl   %edi, %edi
+        jne     .LBB0_1
+# %bb.2:                                # %then1
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        testl   %esi, %esi
+        jne     .LBB0_1
+# %bb.3:                                # %then2
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        testl   %edx, %edx
+        je      .LBB0_4
+.LBB0_1:
+        cmoveq  %r8, %rax               # Conditionally update predicate state.
+        popq    %rax
+        retq
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        ...
+
+
+

Here we create the “empty” or “correct execution” predicate state by zeroing +%rax, and we create a constant “incorrect execution” predicate value by +putting -1 into %r8. Then, along each edge coming out of a conditional +branch we do a conditional move that in a correct execution will be a no-op, +but if misspeculated, will replace the %rax with the value of %r8. +Misspeculating any one of the three predicates will cause %rax to hold the +“incorrect execution” value from %r8 as we preserve incoming values when +execution is correct rather than overwriting it.

+

We now have a value in %rax in each basic block that indicates if at some +point previously a predicate was mispredicted. And we have arranged for that +value to be particularly effective when used below to harden loads.

+
+
Indirect Call, Branch, and Return Predicates
+

There is no analogous flag to use when tracing indirect calls, branches, and +returns. The predicate state must be accumulated through some other means. +Fundamentally, this is the reverse of the problem posed in CFI: we need to +check where we came from rather than where we are going. For function-local +jump tables, this is easily arranged by testing the input to the jump table +within each destination (not yet implemented, use retpolines):

+
        pushq   %rax
+        xorl    %eax, %eax              # Zero out initial predicate state.
+        movq    $-1, %r8                # Put all-ones mask into a register.
+        jmpq    *.LJTI0_0(,%rdi,8)      # Indirect jump through table.
+.LBB0_2:                                # %sw.bb
+        testq   $0, %rdi                # Validate index used for jump table.
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        ...
+        jmp     _Z4leaki                # TAILCALL
+
+.LBB0_3:                                # %sw.bb1
+        testq   $1, %rdi                # Validate index used for jump table.
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        ...
+        jmp     _Z4leaki                # TAILCALL
+
+.LBB0_5:                                # %sw.bb10
+        testq   $2, %rdi                # Validate index used for jump table.
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        ...
+        jmp     _Z4leaki                # TAILCALL
+        ...
+
+        .section        .rodata,"a",@progbits
+        .p2align        3
+.LJTI0_0:
+        .quad   .LBB0_2
+        .quad   .LBB0_3
+        .quad   .LBB0_5
+        ...
+
+
+

Returns have a simple mitigation technique on x86-64 (or other ABIs which have +what is called a “red zone” region beyond the end of the stack). This region is +guaranteed to be preserved across interrupts and context switches, making the +return address used in returning to the current code remain on the stack and +valid to read. We can emit code in the caller to verify that a return edge was +not mispredicted:

+
        callq   other_function
+return_addr:
+        testq   -8(%rsp), return_addr   # Validate return address.
+        cmovneq %r8, %rax               # Update predicate state.
+
+
+

For an ABI without a “red zone” (and thus unable to read the return address +from the stack), we can compute the expected return address prior to the call +into a register preserved across the call and use that similarly to the above.

+

Indirect calls (and returns in the absence of a red zone ABI) pose the most +significant challenge to propagate. The simplest technique would be to define a +new ABI such that the intended call target is passed into the called function +and checked in the entry. Unfortunately, new ABIs are quite expensive to deploy +in C and C++. While the target function could be passed in TLS, we would still +require complex logic to handle a mixture of functions compiled with and +without this extra logic (essentially, making the ABI backwards compatible). +Currently, we suggest using retpolines here and will continue to investigate +ways of mitigating this.

+
+
+
Optimizations, Alternatives, and Tradeoffs
+

Merely accumulating predicate state involves significant cost. There are +several key optimizations we employ to minimize this and various alternatives +that present different tradeoffs in the generated code.

+

First, we work to reduce the number of instructions used to track the state:

+
    +
  • Rather than inserting a cmovCC instruction along every conditional edge in +the original program, we track each set of condition flags we need to capture +prior to entering each basic block and reuse a common cmovCC sequence for +those.

    +
      +
    • We could further reuse suffixes when there are multiple cmovCC +instructions required to capture the set of flags. Currently this is +believed to not be worth the cost as paired flags are relatively rare and +suffixes of them are exceedingly rare.

    • +
    +
  • +
  • A common pattern in x86 is to have multiple conditional jump instructions +that use the same flags but handle different conditions. Naively, we could +consider each fallthrough between them an “edge” but this causes a much more +complex control flow graph. Instead, we accumulate the set of conditions +necessary for fallthrough and use a sequence of cmovCC instructions in a +single fallthrough edge to track it.

  • +
+

Second, we trade register pressure for simpler cmovCC instructions by +allocating a register for the “bad” state. We could read that value from memory +as part of the conditional move instruction, however, this creates more +micro-ops and requires the load-store unit to be involved. Currently, we place +the value into a virtual register and allow the register allocator to decide +when the register pressure is sufficient to make it worth spilling to memory +and reloading.

+
+
+
+

Hardening Loads

+

Once we have the predicate accumulated into a special value for correct vs. +misspeculated, we need to apply this to loads in a way that ensures they do not +leak secret data. There are two primary techniques for this: we can either +harden the loaded value to prevent observation, or we can harden the address +itself to prevent the load from occurring. These have significantly different +performance tradeoffs.

+
+
Hardening loaded values
+

The most appealing way to harden loads is to mask out all of the bits loaded. +The key requirement is that for each bit loaded, along the misspeculated path +that bit is always fixed at either 0 or 1 regardless of the value of the bit +loaded. The most obvious implementation uses either an and instruction with +an all-zero mask along misspeculated paths and an all-one mask along correct +paths, or an or instruction with an all-one mask along misspeculated paths +and an all-zero mask along correct paths. Other options become less appealing +such as multiplying by zero, or multiple shift instructions. For reasons we +elaborate on below, we end up suggesting you use or with an all-ones mask, +making the x86 instruction sequence look like the following:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        movl    (%rsi), %edi            # Load potentially secret data from %rsi.
+        orl     %eax, %edi
+
+
+

Other useful patterns may be to fold the load into the or instruction itself +at the cost of a register-to-register copy.

+

There are some challenges with deploying this approach:

+
    +
  1. Many loads on x86 are folded into other instructions. Separating them would +add very significant and costly register pressure with prohibitive +performance cost.

  2. +
  3. Loads may not target a general purpose register requiring extra instructions +to map the state value into the correct register class, and potentially more +expensive instructions to mask the value in some way.

  4. +
  5. The flags registers on x86 are very likely to be live, and challenging to +preserve cheaply.

  6. +
  7. There are many more values loaded than pointers & indices used for loads. As +a consequence, hardening the result of a load requires substantially more +instructions than hardening the address of the load (see below).

  8. +
+

Despite these challenges, hardening the result of the load critically allows +the load to proceed and thus has dramatically less impact on the total +speculative / out-of-order potential of the execution. There are also several +interesting techniques to try and mitigate these challenges and make hardening +the results of loads viable in at least some cases. However, we generally +expect to fall back when unprofitable from hardening the loaded value to the +next approach of hardening the address itself.

+
+
Loads folded into data-invariant operations can be hardened after the operation
+

The first key to making this feasible is to recognize that many operations on +x86 are “data-invariant”. That is, they have no (known) observable behavior +differences due to the particular input data. These instructions are often used +when implementing cryptographic primitives dealing with private key data +because they are not believed to provide any side-channels. Similarly, we can +defer hardening until after them as they will not in-and-of-themselves +introduce a speculative execution side-channel. This results in code sequences +that look like:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        addl    (%rsi), %edi            # Load and accumulate without leaking.
+        orl     %eax, %edi
+
+
+

While an addition happens to the loaded (potentially secret) value, that +doesn’t leak any data and we then immediately harden it.

+
+
+
Hardening of loaded values deferred down the data-invariant expression graph
+

We can generalize the previous idea and sink the hardening down the expression +graph across as many data-invariant operations as desirable. This can use very +conservative rules for whether something is data-invariant. The primary goal +should be to handle multiple loads with a single hardening instruction:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        addl    (%rsi), %edi            # Load and accumulate without leaking.
+        addl    4(%rsi), %edi           # Continue without leaking.
+        addl    8(%rsi), %edi
+        orl     %eax, %edi              # Mask out bits from all three loads.
+
+
+
+
+
Preserving the flags while hardening loaded values on Haswell, Zen, and newer processors
+

Sadly, there are no useful instructions on x86 that apply a mask to all 64 bits +without touching the flag registers. However, we can harden loaded values that +are narrower than a word (fewer than 32-bits on 32-bit systems and fewer than +64-bits on 64-bit systems) by zero-extending the value to the full word size +and then shifting right by at least the number of original bits using the BMI2 +shrx instruction:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        addl    (%rsi), %edi            # Load and accumulate 32 bits of data.
+        shrxq   %rax, %rdi, %rdi        # Shift out all 32 bits loaded.
+
+
+

Because on x86 the zero-extend is free, this can efficiently harden the loaded +value.

+
+
+
+
Hardening the address of the load
+

When hardening the loaded value is inapplicable, most often because the +instruction directly leaks information (like cmp or jmpq), we switch to +hardening the address of the load instead of the loaded value. This avoids +increasing register pressure by unfolding the load or paying some other high +cost.

+

To understand how this works in practice, we need to examine the exact +semantics of the x86 addressing modes which, in its fully general form, looks +like (%base,%index,scale)offset. Here %base and %index are 64-bit +registers that can potentially be any value, and may be attacker controlled, +and scale and offset are fixed immediate values. scale must be 1, 2, +4, or 8, and offset can be any 32-bit sign extended value. The exact +computation performed to find the address is then: %base + (scale * %index) + offset under 64-bit 2’s complement modular arithmetic.

+

One issue with this approach is that, after hardening, the %base + (scale * %index) subexpression will compute a value near zero (-1 + (scale * -1)) and +then a large, positive offset will index into memory within the first two +gigabytes of address space. While these offsets are not attacker controlled, +the attacker could chose to attack a load which happens to have the desired +offset and then successfully read memory in that region. This significantly +raises the burden on the attacker and limits the scope of attack but does not +eliminate it. To fully close the attack we must work with the operating system +to preclude mapping memory in the low two gigabytes of address space.

+
+
64-bit load checking instructions
+

We can use the following instruction sequences to check loads. We set up %r8 +in these examples to hold the special value of -1 which will be cmoved over +%rax in misspeculated paths.

+

Single register addressing mode:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        orq     %rax, %rsi              # Mask the pointer if misspeculating.
+        movl    (%rsi), %edi
+
+
+

Two register addressing mode:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        orq     %rax, %rsi              # Mask the pointer if misspeculating.
+        orq     %rax, %rcx              # Mask the index if misspeculating.
+        movl    (%rsi,%rcx), %edi
+
+
+

This will result in a negative address near zero or in offset wrapping the +address space back to a small positive address. Small, negative addresses will +fault in user-mode for most operating systems, but targets which need the high +address space to be user accessible may need to adjust the exact sequence used +above. Additionally, the low addresses will need to be marked unreadable by the +OS to fully harden the load.

+
+
+
RIP-relative addressing is even easier to break
+

There is a common addressing mode idiom that is substantially harder to check: +addressing relative to the instruction pointer. We cannot change the value of +the instruction pointer register and so we have the harder problem of forcing +%base + scale * %index + offset to be an invalid address, by only changing +%index. The only advantage we have is that the attacker also cannot modify +%base. If we use the fast instruction sequence above, but only apply it to +the index, we will always access %rip + (scale * -1) + offset. If the +attacker can find a load which with this address happens to point to secret +data, then they can reach it. However, the loader and base libraries can also +simply refuse to map the heap, data segments, or stack within 2gb of any of the +text in the program, much like it can reserve the low 2gb of address space.

+
+
+
The flag registers again make everything hard
+

Unfortunately, the technique of using orq-instructions has a serious flaw on +x86. The very thing that makes it easy to accumulate state, the flag registers +containing predicates, causes serious problems here because they may be alive +and used by the loading instruction or subsequent instructions. On x86, the +orq instruction sets the flags and will override anything already there. +This makes inserting them into the instruction stream very hazardous. +Unfortunately, unlike when hardening the loaded value, we have no fallback here +and so we must have a fully general approach available.

+

The first thing we must do when generating these sequences is try to analyze +the surrounding code to prove that the flags are not in fact alive or being +used. Typically, it has been set by some other instruction which just happens +to set the flags register (much like ours!) with no actual dependency. In those +cases, it is safe to directly insert these instructions. Alternatively we may +be able to move them earlier to avoid clobbering the used value.

+

However, this may ultimately be impossible. In that case, we need to preserve +the flags around these instructions:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        pushfq
+        orq     %rax, %rcx              # Mask the pointer if misspeculating.
+        orq     %rax, %rdx              # Mask the index if misspeculating.
+        popfq
+        movl    (%rcx,%rdx), %edi
+
+
+

Using the pushf and popf instructions saves the flags register around our +inserted code, but comes at a high cost. First, we must store the flags to the +stack and reload them. Second, this causes the stack pointer to be adjusted +dynamically, requiring a frame pointer be used for referring to temporaries +spilled to the stack, etc.

+

On newer x86 processors we can use the lahf and sahf instructions to save +all of the flags besides the overflow flag in a register rather than on the +stack. We can then use seto and add to save and restore the overflow flag +in a register. Combined, this will save and restore flags in the same manner as +above but using two registers rather than the stack. That is still very +expensive if slightly less expensive than pushf and popf in most cases.

+
+
+
A flag-less alternative on Haswell, Zen and newer processors
+

Starting with the BMI2 x86 instruction set extensions available on Haswell and +Zen processors, there is an instruction for shifting that does not set any +flags: shrx. We can use this and the lea instruction to implement analogous +code sequences to the above ones. However, these are still very marginally +slower, as there are fewer ports able to dispatch shift instructions in most +modern x86 processors than there are for or instructions.

+

Fast, single register addressing mode:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        shrxq   %rax, %rsi, %rsi        # Shift away bits if misspeculating.
+        movl    (%rsi), %edi
+
+
+

This will collapse the register to zero or one, and everything but the offset +in the addressing mode to be less than or equal to 9. This means the full +address can only be guaranteed to be less than (1 << 31) + 9. The OS may wish +to protect an extra page of the low address space to account for this

+
+
+
+
Optimizations
+

A very large portion of the cost for this approach comes from checking loads in +this way, so it is important to work to optimize this. However, beyond making +the instruction sequences to apply the checks efficient (for example by +avoiding pushfq and popfq sequences), the only significant optimization is +to check fewer loads without introducing a vulnerability. We apply several +techniques to accomplish that.

+
+
Don’t check loads from compile-time constant stack offsets
+

We implement this optimization on x86 by skipping the checking of loads which +use a fixed frame pointer offset.

+

The result of this optimization is that patterns like reloading a spilled +register or accessing a global field don’t get checked. This is a very +significant performance win.

+
+
+
Don’t check dependent loads
+

A core part of why this mitigation strategy works is that it establishes a +data-flow check on the loaded address. However, this means that if the address +itself was already loaded using a checked load, there is no need to check a +dependent load provided it is within the same basic block as the checked load, +and therefore has no additional predicates guarding it. Consider code like the +following:

+
        ...
+
+.LBB0_4:                                # %danger
+        movq    (%rcx), %rdi
+        movl    (%rdi), %edx
+
+
+

This will get transformed into:

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        orq     %rax, %rcx              # Mask the pointer if misspeculating.
+        movq    (%rcx), %rdi            # Hardened load.
+        movl    (%rdi), %edx            # Unhardened load due to dependent addr.
+
+
+

This doesn’t check the load through %rdi as that pointer is dependent on a +checked load already.

+
+
+
Protect large, load-heavy blocks with a single lfence
+

It may be worth using a single lfence instruction at the start of a block +which begins with a (very) large number of loads that require independent +protection and which require hardening the address of the load. However, this +is unlikely to be profitable in practice. The latency hit of the hardening +would need to exceed that of an lfence when correctly speculatively +executed. But in that case, the lfence cost is a complete loss of speculative +execution (at a minimum). So far, the evidence we have of the performance cost +of using lfence indicates few if any hot code patterns where this trade off +would make sense.

+
+
+
Tempting optimizations that break the security model
+

Several optimizations were considered which didn’t pan out due to failure to +uphold the security model. One in particular is worth discussing as many others +will reduce to it.

+

We wondered whether only the first load in a basic block could be checked. If +the check works as intended, it forms an invalid pointer that doesn’t even +virtual-address translate in the hardware. It should fault very early on in its +processing. Maybe that would stop things in time for the misspeculated path to +fail to leak any secrets. This doesn’t end up working because the processor is +fundamentally out-of-order, even in its speculative domain. As a consequence, +the attacker could cause the initial address computation itself to stall and +allow an arbitrary number of unrelated loads (including attacked loads of +secret data) to pass through.

+
+
+
+
+

Interprocedural Checking

+

Modern x86 processors may speculate into called functions and out of functions +to their return address. As a consequence, we need a way to check loads that +occur after a misspeculated predicate but where the load and the misspeculated +predicate are in different functions. In essence, we need some interprocedural +generalization of the predicate state tracking. A primary challenge to passing +the predicate state between functions is that we would like to not require a +change to the ABI or calling convention in order to make this mitigation more +deployable, and further would like code mitigated in this way to be easily +mixed with code not mitigated in this way and without completely losing the +value of the mitigation.

+
+
Embed the predicate state into the high bit(s) of the stack pointer
+

We can use the same technique that allows hardening pointers to pass the +predicate state into and out of functions. The stack pointer is trivially +passed between functions and we can test for it having the high bits set to +detect when it has been marked due to misspeculation. The callsite instruction +sequence looks like (assuming a misspeculated state value of -1):

+
        ...
+
+.LBB0_4:                                # %danger
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        shlq    $47, %rax
+        orq     %rax, %rsp
+        callq   other_function
+        movq    %rsp, %rax
+        sarq    63, %rax                # Sign extend the high bit to all bits.
+
+
+

This first puts the predicate state into the high bits of %rsp before calling +the function and then reads it back out of high bits of %rsp afterward. When +correctly executing (speculatively or not), these are all no-ops. When +misspeculating, the stack pointer will end up negative. We arrange for it to +remain a canonical address, but otherwise leave the low bits alone to allow +stack adjustments to proceed normally without disrupting this. Within the +called function, we can extract this predicate state and then reset it on +return:

+
other_function:
+        # prolog
+        callq   other_function
+        movq    %rsp, %rax
+        sarq    63, %rax                # Sign extend the high bit to all bits.
+        # ...
+
+.LBB0_N:
+        cmovneq %r8, %rax               # Conditionally update predicate state.
+        shlq    $47, %rax
+        orq     %rax, %rsp
+        retq
+
+
+

This approach is effective when all code is mitigated in this fashion, and can +even survive very limited reaches into unmitigated code (the state will +round-trip in and back out of an unmitigated function, it just won’t be +updated). But it does have some limitations. There is a cost to merging the +state into %rsp and it doesn’t insulate mitigated code from misspeculation in +an unmitigated caller.

+

There is also an advantage to using this form of interprocedural mitigation: by +forming these invalid stack pointer addresses we can prevent speculative +returns from successfully reading speculatively written values to the actual +stack. This works first by forming a data-dependency between computing the +address of the return address on the stack and our predicate state. And even +when satisfied, if a misprediction causes the state to be poisoned the +resulting stack pointer will be invalid.

+
+
+
Rewrite API of internal functions to directly propagate predicate state
+

(Not yet implemented.)

+

We have the option with internal functions to directly adjust their API to +accept the predicate as an argument and return it. This is likely to be +marginally cheaper than embedding into %rsp for entering functions.

+
+
+
Use lfence to guard function transitions
+

An lfence instruction can be used to prevent subsequent loads from +speculatively executing until all prior mispredicted predicates have resolved. +We can use this broader barrier to speculative loads executing between +functions. We emit it in the entry block to handle calls, and prior to each +return. This approach also has the advantage of providing the strongest degree +of mitigation when mixed with unmitigated code by halting all misspeculation +entering a function which is mitigated, regardless of what occurred in the +caller. However, such a mixture is inherently more risky. Whether this kind of +mixture is a sufficient mitigation requires careful analysis.

+

Unfortunately, experimental results indicate that the performance overhead of +this approach is very high for certain patterns of code. A classic example is +any form of recursive evaluation engine. The hot, rapid call and return +sequences exhibit dramatic performance loss when mitigated with lfence. This +component alone can regress performance by 2x or more, making it an unpleasant +tradeoff even when only used in a mixture of code.

+
+
+
Use an internal TLS location to pass predicate state
+

We can define a special thread-local value to hold the predicate state between +functions. This avoids direct ABI implications by using a side channel between +callers and callees to communicate the predicate state. It also allows implicit +zero-initialization of the state, which allows non-checked code to be the first +code executed.

+

However, this requires a load from TLS in the entry block, a store to TLS +before every call and every ret, and a load from TLS after every call. As a +consequence it is expected to be substantially more expensive even than using +%rsp and potentially lfence within the function entry block.

+
+
+
Define a new ABI and/or calling convention
+

We could define a new ABI and/or calling convention to explicitly pass the +predicate state in and out of functions. This may be interesting if none of the +alternatives have adequate performance, but it makes deployment and adoption +dramatically more complex, and potentially infeasible.

+
+
+
+
+
+

High-Level Alternative Mitigation Strategies

+

There are completely different alternative approaches to mitigating variant 1 +attacks. Most +discussion so far focuses on mitigating +specific known attackable components in the Linux kernel (or other kernels) by +manually rewriting the code to contain an instruction sequence that is not +vulnerable. For x86 systems this is done by either injecting an lfence +instruction along the code path which would leak data if executed speculatively +or by rewriting memory accesses to have branch-less masking to a known safe +region. On Intel systems, lfence will prevent the speculative load of secret +data. +On AMD systems lfence is currently a no-op, but can be made +dispatch-serializing by setting an MSR, and thus preclude misspeculation of the +code path (mitigation G-2 + +V1-1).

+

However, this relies on finding and enumerating all possible points in code +which could be attacked to leak information. While in some cases static +analysis is effective at doing this at scale, in many cases it still relies on +human judgement to evaluate whether code might be vulnerable. Especially for +software systems which receive less detailed scrutiny but remain sensitive to +these attacks, this seems like an impractical security model. We need an +automatic and systematic mitigation strategy.

+
+

Automatic lfence on Conditional Edges

+

A natural way to scale up the existing hand-coded mitigations is simply to +inject an lfence instruction into both the target and fallthrough +destinations of every conditional branch. This ensures that no predicate or +bounds check can be bypassed speculatively. However, the performance overhead +of this approach is, simply put, catastrophic. Yet it remains the only truly +“secure by default” approach known prior to this effort and serves as the +baseline for performance.

+

One attempt to address the performance overhead of this and make it more +realistic to deploy is MSVC’s /Qspectre +switch. +Their technique is to use static analysis within the compiler to only insert +lfence instructions into conditional edges at risk of attack. However, +initial +analysis +has shown that this approach is incomplete and only catches a small and limited +subset of attackable patterns which happen to resemble very closely the initial +proofs of concept. As such, while its performance is acceptable, it does not +appear to be an adequate systematic mitigation.

+
+
+
+

Performance Overhead

+

The performance overhead of this style of comprehensive mitigation is very +high. However, it compares very favorably with previously recommended +approaches such as the lfence instruction. Just as users can restrict the +scope of lfence to control its performance impact, this mitigation technique +could be restricted in scope as well.

+

However, it is important to understand what it would cost to get a fully +mitigated baseline. Here we assume targeting a Haswell (or newer) processor and +using all of the tricks to improve performance (so leaves the low 2gb +unprotected and +/- 2gb surrounding any PC in the program). We ran both +Google’s microbenchmark suite and a large highly-tuned server built using +ThinLTO and PGO. All were built with -march=haswell to give access to BMI2 +instructions, and benchmarks were run on large Haswell servers. We collected +data both with an lfence-based mitigation and load hardening as presented +here. The summary is that mitigating with load hardening is 1.77x faster than +mitigating with lfence, and the overhead of load hardening compared to a +normal program is likely between a 10% overhead and a 50% overhead with most +large applications seeing a 30% overhead or less.

+

| Benchmark | lfence | Load Hardening | Mitigated Speedup | +| ————————————– | ——-: | ————-: | —————-: | +| Google microbenchmark suite | -74.8% | -36.4% | 2.5x | +| Large server QPS (using ThinLTO & PGO) | -62% | -29% | 1.8x |

+

Below is a visualization of the microbenchmark suite results which helps show +the distribution of results that is somewhat lost in the summary. The y-axis is +a log-scale speedup ratio of load hardening relative to lfence (up -> faster +-> better). Each box-and-whiskers represents one microbenchmark which may have +many different metrics measured. The red line marks the median, the box marks +the first and third quartiles, and the whiskers mark the min and max.

+

Microbenchmark result visualization

+

We don’t yet have benchmark data on SPEC or the LLVM test suite, but we can +work on getting that. Still, the above should give a pretty clear +characterization of the performance, and specific benchmarks are unlikely to +reveal especially interesting properties.

+
+

Future Work: Fine Grained Control and API-Integration

+

The performance overhead of this technique is likely to be very significant and +something users wish to control or reduce. There are interesting options here +that impact the implementation strategy used.

+

One particularly appealing option is to allow both opt-in and opt-out of this +mitigation at reasonably fine granularity such as on a per-function basis, +including intelligent handling of inlining decisions – protected code can be +prevented from inlining into unprotected code, and unprotected code will become +protected when inlined into protected code. For systems where only a limited +set of code is reachable by externally controlled inputs, it may be possible to +limit the scope of mitigation through such mechanisms without compromising the +application’s overall security. The performance impact may also be focused in a +few key functions that can be hand-mitigated in ways that have lower +performance overhead while the remainder of the application receives automatic +protection.

+

For both limiting the scope of mitigation or manually mitigating hot functions, +there needs to be some support for mixing mitigated and unmitigated code +without completely defeating the mitigation. For the first use case, it would +be particularly desirable that mitigated code remains safe when being called +during misspeculation from unmitigated code.

+

For the second use case, it may be important to connect the automatic +mitigation technique to explicit mitigation APIs such as what is described in +http://wg21.link/p0928 (or any other eventual API) so that there is a clean way +to switch from automatic to manual mitigation without immediately exposing a +hole. However, the design for how to do this is hard to come up with until the +APIs are better established. We will revisit this as those APIs mature.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SphinxQuickstartTemplate.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SphinxQuickstartTemplate.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SphinxQuickstartTemplate.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SphinxQuickstartTemplate.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,316 @@ + + + + + + + + + Sphinx Quickstart Template — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Sphinx Quickstart Template

+

This article is intended to take someone in the state of “I want to write documentation and get it added to LLVM’s docs” and help them start writing documentation as fast as possible and with as little nonsense as possible.

+ +
+

Overview

+

LLVM documentation is written in reStructuredText, a markup syntax similar to markdown (but much more powerful). The LLVM documentation site itself uses Sphinx, a documentation generator originally written for Python documentation.

+
+
+

How to use this template

+

This article is located in docs/SphinxQuickstartTemplate.rst. To use it as a template, make a copy and open it in a text editor. You can then write your docs, and then send the new article to llvm-commits for review.

+

To view the restructuredText source file for this article, click Show Source on the right sidebar.

+
+
+

Authoring Guidelines

+

Focus on content. It is easy to fix the Sphinx (reStructuredText) syntax +later if necessary, although reStructuredText tries to imitate common +plain-text conventions so it should be quite natural. A basic knowledge of +reStructuredText syntax is useful when writing the document, so the last +~half of this document (starting with Example Section) gives examples +which should cover 99% of use cases.

+

Let me say that again: focus on content. But if you really need to verify +Sphinx’s output, see docs/README.txt for information. Once you have finished with the content, please send the .rst file to +llvm-commits for review.

+
+

Creating New Articles

+

Before creating a new article, consider the following questions:

+
    +
  1. Why would I want to read this document?

  2. +
  3. What should I know to be able to follow along with this document?

  4. +
  5. What will I have learned by the end of this document?

  6. +
+

A standard best practice is to make your articles task-oriented. You generally should not be writing documentation that isn’t based around “how to” do something +unless there’s already an existing “how to” article for the topic you’re documenting. The reason for this is that without a “how to” article to read first, it might be difficult for +someone unfamiliar with the topic to understand a more advanced, conceptual article.

+

When creating a task-oriented article, follow existing LLVM articles by giving it a filename that starts with HowTo*.rst. This format is usually the easiest for another person to understand and also the most useful.

+

Focus on content (yes, I had to say it again).

+

The rest of this document shows example reStructuredText markup constructs +that are meant to be read by you in your text editor after you have copied +this file into a new file for the documentation you are about to write.

+
+
+
+

Example Section

+

An article can contain one or more sections (i.e., headings). Sections (like Example Section above) help give your document its +structure. Use the same kind of adornments (e.g. ====== vs. ------) +as are used in this document. The adornment must be the same length as the +text above it. For Vim users, variations of yypVr= might be handy.

+
+

Example Nested Subsection

+

Subsections can also be nested beneath other subsections. For more information on sections, see Sphinx’s reStructuredText Primer.

+
+
+
+

Text Formatting

+

Text can be emphasized, bold, or monospace.

+

To create a new paragraph, simply insert a blank line.

+
+ +
+

Lists

+

restructuredText allows you to create ordered lists…

+
    +
  1. A list starting with #. will be automatically numbered.

  2. +
  3. This is a second list element.

    +
      +
    1. Use indentation to create nested lists.

    2. +
    +
  4. +
+

…as well as unordered lists:

+
    +
  • Stuff.

    +
      +
    • Deeper stuff.

    • +
    +
  • +
  • More stuff.

  • +
+
+
+

Code Blocks

+

You can make blocks of code like this:

+
int main() {
+  return 0;
+}
+
+
+

For a shell session, use a console code block (some existing docs use +bash):

+
$ echo "Goodbye cruel world!"
+$ rm -rf /
+
+
+

If you need to show LLVM IR use the llvm code block.

+
define i32 @test1() {
+entry:
+  ret i32 0
+}
+
+
+

Some other common code blocks you might need are c, objc, make, +and cmake. If you need something beyond that, you can look at the full +list of supported code blocks.

+

However, don’t waste time fiddling with syntax highlighting when you could +be adding meaningful content. When in doubt, show preformatted text +without any syntax highlighting like this:

+
                      .
+                       +:.
+                   ..:: ::
+                .++:+:: ::+:.:.
+               .:+           :
+        ::.::..::            .+.
+      ..:+    ::              :
+......+:.                    ..
+      :++.    ..              :
+        .+:::+::              :
+        ..   . .+            ::
+                 +.:      .::+.
+                  ...+. .: .
+                     .++:..
+                      ...
+
+
+
+
+

Generating the documentation

+

You can generate the HTML documentation from the sources locally if you want to +see what they would look like. In addition to the normal +build tools +you need to install Sphinx and the +recommonmark extension.

+

On Debian you can install these with:

+
sudo apt install -y sphinx-doc python-recommonmark-doc
+
+
+

On Ubuntu use pip to get an up-to-date version of recommonmark:

+
sudo pip install sphinx recommonmark
+
+
+

Then run cmake to build the documentation inside the llvm-project checkout:

+
mkdir build
+cd build
+cmake -DLLVM_ENABLE_SPHINX=On ../llvm
+cmake --build . --target docs-llvm-html
+
+
+

In case you already have the Cmake build set up and want to reuse that, +just set the CMake variable LLVM_ENABLE_SPHINX=On.

+

After that you find the generated documentation in build/docs/html +folder.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/StackMaps.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/StackMaps.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/StackMaps.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/StackMaps.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,635 @@ + + + + + + + + + Stack maps and patch points in LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Stack maps and patch points in LLVM

+ +
+

Definitions

+

In this document we refer to the “runtime” collectively as all +components that serve as the LLVM client, including the LLVM IR +generator, object code consumer, and code patcher.

+

A stack map records the location of live values at a particular +instruction address. These live values do not refer to all the +LLVM values live across the stack map. Instead, they are only the +values that the runtime requires to be live at this point. For +example, they may be the values the runtime will need to resume +program execution at that point independent of the compiled function +containing the stack map.

+

LLVM emits stack map data into the object code within a designated +Stack Map Section. This stack map data contains a record for +each stack map. The record stores the stack map’s instruction address +and contains an entry for each mapped value. Each entry encodes a +value’s location as a register, stack offset, or constant.

+

A patch point is an instruction address at which space is reserved for +patching a new instruction sequence at run time. Patch points look +much like calls to LLVM. They take arguments that follow a calling +convention and may return a value. They also imply stack map +generation, which allows the runtime to locate the patchpoint and +find the location of live values at that point.

+
+
+

Motivation

+

This functionality is currently experimental but is potentially useful +in a variety of settings, the most obvious being a runtime (JIT) +compiler. Example applications of the patchpoint intrinsics are +implementing an inline call cache for polymorphic method dispatch or +optimizing the retrieval of properties in dynamically typed languages +such as JavaScript.

+

The intrinsics documented here are currently used by the JavaScript +compiler within the open source WebKit project, see the FTL JIT, but they are designed to be +used whenever stack maps or code patching are needed. Because the +intrinsics have experimental status, compatibility across LLVM +releases is not guaranteed.

+

The stack map functionality described in this document is separate +from the functionality described in +Computing stack maps. GCFunctionMetadata provides the location of +pointers into a collected heap captured by the GCRoot intrinsic, +which can also be considered a “stack map”. Unlike the stack maps +defined above, the GCFunctionMetadata stack map interface does not +provide a way to associate live register values of arbitrary type with +an instruction address, nor does it specify a format for the resulting +stack map. The stack maps described here could potentially provide +richer information to a garbage collecting runtime, but that usage +will not be discussed in this document.

+
+
+

Intrinsics

+

The following two kinds of intrinsics can be used to implement stack +maps and patch points: llvm.experimental.stackmap and +llvm.experimental.patchpoint. Both kinds of intrinsics generate a +stack map record, and they both allow some form of code patching. They +can be used independently (i.e. llvm.experimental.patchpoint +implicitly generates a stack map without the need for an additional +call to llvm.experimental.stackmap). The choice of which to use +depends on whether it is necessary to reserve space for code patching +and whether any of the intrinsic arguments should be lowered according +to calling conventions. llvm.experimental.stackmap does not +reserve any space, nor does it expect any call arguments. If the +runtime patches code at the stack map’s address, it will destructively +overwrite the program text. This is unlike +llvm.experimental.patchpoint, which reserves space for in-place +patching without overwriting surrounding code. The +llvm.experimental.patchpoint intrinsic also lowers a specified +number of arguments according to its calling convention. This allows +patched code to make in-place function calls without marshaling.

+

Each instance of one of these intrinsics generates a stack map record +in the Stack Map Section. The record includes an ID, allowing +the runtime to uniquely identify the stack map, and the offset within +the code from the beginning of the enclosing function.

+
+

llvm.experimental.stackmap’ Intrinsic

+
+

Syntax:

+
declare void
+  @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, ...)
+
+
+
+
+

Overview:

+

The ‘llvm.experimental.stackmap’ intrinsic records the location of +specified values in the stack map without generating any code.

+
+
+

Operands:

+

The first operand is an ID to be encoded within the stack map. The +second operand is the number of shadow bytes following the +intrinsic. The variable number of operands that follow are the live +values for which locations will be recorded in the stack map.

+

To use this intrinsic as a bare-bones stack map, with no code patching +support, the number of shadow bytes can be set to zero.

+
+
+

Semantics:

+

The stack map intrinsic generates no code in place, unless nops are +needed to cover its shadow (see below). However, its offset from +function entry is stored in the stack map. This is the relative +instruction address immediately following the instructions that +precede the stack map.

+

The stack map ID allows a runtime to locate the desired stack map +record. LLVM passes this ID through directly to the stack map +record without checking uniqueness.

+

LLVM guarantees a shadow of instructions following the stack map’s +instruction offset during which neither the end of the basic block nor +another call to llvm.experimental.stackmap or +llvm.experimental.patchpoint may occur. This allows the runtime to +patch the code at this point in response to an event triggered from +outside the code. The code for instructions following the stack map +may be emitted in the stack map’s shadow, and these instructions may +be overwritten by destructive patching. Without shadow bytes, this +destructive patching could overwrite program text or data outside the +current function. We disallow overlapping stack map shadows so that +the runtime does not need to consider this corner case.

+

For example, a stack map with 8 byte shadow:

+
call void @runtime()
+call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 77, i32 8,
+                                                       i64* %ptr)
+%val = load i64* %ptr
+%add = add i64 %val, 3
+ret i64 %add
+
+
+

May require one byte of nop-padding:

+
0x00 callq _runtime
+0x05 nop                <--- stack map address
+0x06 movq (%rdi), %rax
+0x07 addq $3, %rax
+0x0a popq %rdx
+0x0b ret                <---- end of 8-byte shadow
+
+
+

Now, if the runtime needs to invalidate the compiled code, it may +patch 8 bytes of code at the stack map’s address at follows:

+
0x00 callq _runtime
+0x05 movl  $0xffff, %rax <--- patched code at stack map address
+0x0a callq *%rax         <---- end of 8-byte shadow
+
+
+

This way, after the normal call to the runtime returns, the code will +execute a patched call to a special entry point that can rebuild a +stack frame from the values located by the stack map.

+
+
+
+

llvm.experimental.patchpoint.*’ Intrinsic

+
+

Syntax:

+
declare void
+  @llvm.experimental.patchpoint.void(i64 <id>, i32 <numBytes>,
+                                     i8* <target>, i32 <numArgs>, ...)
+declare i64
+  @llvm.experimental.patchpoint.i64(i64 <id>, i32 <numBytes>,
+                                    i8* <target>, i32 <numArgs>, ...)
+
+
+
+
+

Overview:

+

The ‘llvm.experimental.patchpoint.*’ intrinsics creates a function +call to the specified <target> and records the location of specified +values in the stack map.

+
+
+

Operands:

+

The first operand is an ID, the second operand is the number of bytes +reserved for the patchable region, the third operand is the target +address of a function (optionally null), and the fourth operand +specifies how many of the following variable operands are considered +function call arguments. The remaining variable number of operands are +the live values for which locations will be recorded in the stack +map.

+
+
+

Semantics:

+

The patch point intrinsic generates a stack map. It also emits a +function call to the address specified by <target> if the address +is not a constant null. The function call and its arguments are +lowered according to the calling convention specified at the +intrinsic’s callsite. Variants of the intrinsic with non-void return +type also return a value according to calling convention.

+

On PowerPC, note that <target> must be the ABI function pointer for the +intended target of the indirect call. Specifically, when compiling for the +ELF V1 ABI, <target> is the function-descriptor address normally used as +the C/C++ function-pointer representation.

+

Requesting zero patch point arguments is valid. In this case, all +variable operands are handled just like +llvm.experimental.stackmap.*. The difference is that space will +still be reserved for patching, a call will be emitted, and a return +value is allowed.

+

The location of the arguments are not normally recorded in the stack +map because they are already fixed by the calling convention. The +remaining live values will have their location recorded, which +could be a register, stack location, or constant. A special calling +convention has been introduced for use with stack maps, anyregcc, +which forces the arguments to be loaded into registers but allows +those register to be dynamically allocated. These argument registers +will have their register locations recorded in the stack map in +addition to the remaining live values.

+

The patch point also emits nops to cover at least <numBytes> of +instruction encoding space. Hence, the client must ensure that +<numBytes> is enough to encode a call to the target address on the +supported targets. If the call target is constant null, then there is +no minimum requirement. A zero-byte null target patchpoint is +valid.

+

The runtime may patch the code emitted for the patch point, including +the call sequence and nops. However, the runtime may not assume +anything about the code LLVM emits within the reserved space. Partial +patching is not allowed. The runtime must patch all reserved bytes, +padding with nops if necessary.

+

This example shows a patch point reserving 15 bytes, with one argument +in $rdi, and a return value in $rax per native calling convention:

+
%target = inttoptr i64 -281474976710654 to i8*
+%val = call i64 (i64, i32, ...)*
+         @llvm.experimental.patchpoint.i64(i64 78, i32 15,
+                                           i8* %target, i32 1, i64* %ptr)
+%add = add i64 %val, 3
+ret i64 %add
+
+
+

May generate:

+
0x00 movabsq $0xffff000000000002, %r11 <--- patch point address
+0x0a callq   *%r11
+0x0d nop
+0x0e nop                               <--- end of reserved 15-bytes
+0x0f addq    $0x3, %rax
+0x10 movl    %rax, 8(%rsp)
+
+
+

Note that no stack map locations will be recorded. If the patched code +sequence does not need arguments fixed to specific calling convention +registers, then the anyregcc convention may be used:

+
%val = call anyregcc @llvm.experimental.patchpoint(i64 78, i32 15,
+                                                   i8* %target, i32 1,
+                                                   i64* %ptr)
+
+
+

The stack map now indicates the location of the %ptr argument and +return value:

+
Stack Map: ID=78, Loc0=%r9 Loc1=%r8
+
+
+

The patch code sequence may now use the argument that happened to be +allocated in %r8 and return a value allocated in %r9:

+
0x00 movslq 4(%r8) %r9              <--- patched code at patch point address
+0x03 nop
+...
+0x0e nop                            <--- end of reserved 15-bytes
+0x0f addq    $0x3, %r9
+0x10 movl    %r9, 8(%rsp)
+
+
+
+
+
+
+

Stack Map Format

+

The existence of a stack map or patch point intrinsic within an LLVM +Module forces code emission to create a Stack Map Section. The +format of this section follows:

+
Header {
+  uint8  : Stack Map Version (current version is 3)
+  uint8  : Reserved (expected to be 0)
+  uint16 : Reserved (expected to be 0)
+}
+uint32 : NumFunctions
+uint32 : NumConstants
+uint32 : NumRecords
+StkSizeRecord[NumFunctions] {
+  uint64 : Function Address
+  uint64 : Stack Size
+  uint64 : Record Count
+}
+Constants[NumConstants] {
+  uint64 : LargeConstant
+}
+StkMapRecord[NumRecords] {
+  uint64 : PatchPoint ID
+  uint32 : Instruction Offset
+  uint16 : Reserved (record flags)
+  uint16 : NumLocations
+  Location[NumLocations] {
+    uint8  : Register | Direct | Indirect | Constant | ConstantIndex
+    uint8  : Reserved (expected to be 0)
+    uint16 : Location Size
+    uint16 : Dwarf RegNum
+    uint16 : Reserved (expected to be 0)
+    int32  : Offset or SmallConstant
+  }
+  uint32 : Padding (only if required to align to 8 byte)
+  uint16 : Padding
+  uint16 : NumLiveOuts
+  LiveOuts[NumLiveOuts]
+    uint16 : Dwarf RegNum
+    uint8  : Reserved
+    uint8  : Size in Bytes
+  }
+  uint32 : Padding (only if required to align to 8 byte)
+}
+
+
+

The first byte of each location encodes a type that indicates how to +interpret the RegNum and Offset fields as follows:

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Encoding

Type

Value

Description

0x1

Register

Reg

Value in a register

0x2

Direct

Reg + Offset

Frame index value

0x3

Indirect

[Reg + Offset]

Spilled value

0x4

Constant

Offset

Small constant

0x5

ConstIndex

Constants[Offset]

Large constant

+

In the common case, a value is available in a register, and the +Offset field will be zero. Values spilled to the stack are encoded +as Indirect locations. The runtime must load those values from a +stack address, typically in the form [BP + Offset]. If an +alloca value is passed directly to a stack map intrinsic, then +LLVM may fold the frame index into the stack map as an optimization to +avoid allocating a register or stack slot. These frame indices will be +encoded as Direct locations in the form BP + Offset. LLVM may +also optimize constants by emitting them directly in the stack map, +either in the Offset of a Constant location or in the constant +pool, referred to by ConstantIndex locations.

+

At each callsite, a “liveout” register list is also recorded. These +are the registers that are live across the stackmap and therefore must +be saved by the runtime. This is an important optimization when the +patchpoint intrinsic is used with a calling convention that by default +preserves most registers as callee-save.

+

Each entry in the liveout register list contains a DWARF register +number and size in bytes. The stackmap format deliberately omits +specific subregister information. Instead the runtime must interpret +this information conservatively. For example, if the stackmap reports +one byte at %rax, then the value may be in either %al or +%ah. It doesn’t matter in practice, because the runtime will +simply save %rax. However, if the stackmap reports 16 bytes at +%ymm0, then the runtime can safely optimize by saving only +%xmm0.

+

The stack map format is a contract between an LLVM SVN revision and +the runtime. It is currently experimental and may change in the short +term, but minimizing the need to update the runtime is +important. Consequently, the stack map design is motivated by +simplicity and extensibility. Compactness of the representation is +secondary because the runtime is expected to parse the data +immediately after compiling a module and encode the information in its +own format. Since the runtime controls the allocation of sections, it +can reuse the same stack map space for multiple modules.

+

Stackmap support is currently only implemented for 64-bit +platforms. However, a 32-bit implementation should be able to use the +same format with an insignificant amount of wasted space.

+
+

Stack Map Section

+

A JIT compiler can easily access this section by providing its own +memory manager via the LLVM C API +LLVMCreateSimpleMCJITMemoryManager(). When creating the memory +manager, the JIT provides a callback: +LLVMMemoryManagerAllocateDataSectionCallback(). When LLVM creates +this section, it invokes the callback and passes the section name. The +JIT can record the in-memory address of the section at this time and +later parse it to recover the stack map data.

+

For MachO (e.g. on Darwin), the stack map section name is +“__llvm_stackmaps”. The segment name is “__LLVM_STACKMAPS”.

+

For ELF (e.g. on Linux), the stack map section name is +“.llvm_stackmaps”. The segment name is “__LLVM_STACKMAPS”.

+
+
+
+

Stack Map Usage

+

The stack map support described in this document can be used to +precisely determine the location of values at a specific position in +the code. LLVM does not maintain any mapping between those values and +any higher-level entity. The runtime must be able to interpret the +stack map record given only the ID, offset, and the order of the +locations, records, and functions, which LLVM preserves.

+

Note that this is quite different from the goal of debug information, +which is a best-effort attempt to track the location of named +variables at every instruction.

+

An important motivation for this design is to allow a runtime to +commandeer a stack frame when execution reaches an instruction address +associated with a stack map. The runtime must be able to rebuild a +stack frame and resume program execution using the information +provided by the stack map. For example, execution may resume in an +interpreter or a recompiled version of the same function.

+

This usage restricts LLVM optimization. Clearly, LLVM must not move +stores across a stack map. However, loads must also be handled +conservatively. If the load may trigger an exception, hoisting it +above a stack map could be invalid. For example, the runtime may +determine that a load is safe to execute without a type check given +the current state of the type system. If the type system changes while +some activation of the load’s function exists on the stack, the load +becomes unsafe. The runtime can prevent subsequent execution of that +load by immediately patching any stack map location that lies between +the current call site and the load (typically, the runtime would +simply patch all stack map locations to invalidate the function). If +the compiler had hoisted the load above the stack map, then the +program could crash before the runtime could take back control.

+

To enforce these semantics, stackmap and patchpoint intrinsics are +considered to potentially read and write all memory. This may limit +optimization more than some clients desire. This limitation may be +avoided by marking the call site as “readonly”. In the future we may +also allow meta-data to be added to the intrinsic call to express +aliasing, thereby allowing optimizations to hoist certain loads above +stack maps.

+
+

Direct Stack Map Entries

+

As shown in Stack Map Section, a Direct stack map location +records the address of frame index. This address is itself the value +that the runtime requested. This differs from Indirect locations, +which refer to a stack locations from which the requested values must +be loaded. Direct locations can communicate the address if an alloca, +while Indirect locations handle register spills.

+

For example:

+
entry:
+  %a = alloca i64...
+  llvm.experimental.stackmap(i64 <ID>, i32 <shadowBytes>, i64* %a)
+
+
+

The runtime can determine this alloca’s relative location on the +stack immediately after compilation, or at any time thereafter. This +differs from Register and Indirect locations, because the runtime can +only read the values in those locations when execution reaches the +instruction address of the stack map.

+

This functionality requires LLVM to treat entry-block allocas +specially when they are directly consumed by an intrinsics. (This is +the same requirement imposed by the llvm.gcroot intrinsic.) LLVM +transformations must not substitute the alloca with any intervening +value. This can be verified by the runtime simply by checking that the +stack map’s location is a Direct location type.

+
+
+
+

Supported Architectures

+

Support for StackMap generation and the related intrinsics requires +some code for each backend. Today, only a subset of LLVM’s backends +are supported. The currently supported architectures are X86_64, +PowerPC, Aarch64 and SystemZ.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/StackSafetyAnalysis.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/StackSafetyAnalysis.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/StackSafetyAnalysis.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/StackSafetyAnalysis.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,184 @@ + + + + + + + + + Stack Safety Analysis — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Stack Safety Analysis

+
+

Introduction

+

The Stack Safety Analysis determines if stack allocated variables can be +considered ‘safe’ from memory access bugs.

+

The primary purpose of the analysis is to be used by sanitizers to avoid +unnecessary instrumentation of ‘safe’ variables. SafeStack is going to be the +first user.

+

‘safe’ variables can be defined as variables that can not be used out-of-scope +(e.g. use-after-return) or accessed out of bounds. In the future it can be +extended to track other variable properties. E.g. we plan to extend +implementation with a check to make sure that variable is always initialized +before every read to optimize use-of-uninitialized-memory checks.

+
+
+

How it works

+

The analysis is implemented in two stages:

+

The intra-procedural, or ‘local’, stage performs a depth-first search inside +functions to collect all uses of each alloca, including loads/stores and uses as +arguments functions. After this stage we know which parts of the alloca are used +by functions itself but we don’t know what happens after it is passed as +an argument to another function.

+

The inter-procedural, or ‘global’, stage, resolves what happens to allocas after +they are passed as function arguments. This stage performs a depth-first search +on function calls inside a single module and propagates allocas usage through +functions calls.

+

When used with ThinLTO, the global stage performs a whole program analysis over +the Module Summary Index.

+
+
+

Testing

+

The analysis is covered with lit tests.

+

We expect that users can tolerate false classification of variables as +‘unsafe’ when in-fact it’s ‘safe’. This may lead to inefficient code. However, we +can’t accept false ‘safe’ classification which may cause sanitizers to miss actual +bugs in instrumented code. To avoid that we want additional validation tool.

+

AddressSanitizer may help with this validation. We can instrument all variables +as usual but additionally store stack-safe information in the +ASanStackVariableDescription. Then if AddressSanitizer detects a bug on +a ‘safe’ variable we can produce an additional report to let the user know that +probably Stack Safety Analysis failed and we should check for a bug in the +compiler.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Statepoints.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Statepoints.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Statepoints.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Statepoints.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,875 @@ + + + + + + + + + Garbage Collection Safepoints in LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Garbage Collection Safepoints in LLVM

+ +
+

Status

+

This document describes a set of extensions to LLVM to support garbage +collection. By now, these mechanisms are well proven with commercial java +implementation with a fully relocating collector having shipped using them. +There are a couple places where bugs might still linger; these are called out +below.

+

They are still listed as “experimental” to indicate that no forward or backward +compatibility guarantees are offered across versions. If your use case is such +that you need some form of forward compatibility guarantee, please raise the +issue on the llvm-dev mailing list.

+

LLVM still supports an alternate mechanism for conservative garbage collection +support using the gcroot intrinsic. The gcroot mechanism is mostly of +historical interest at this point with one exception - its implementation of +shadow stacks has been used successfully by a number of language frontends and +is still supported.

+
+
+

Overview & Core Concepts

+

To collect dead objects, garbage collectors must be able to identify +any references to objects contained within executing code, and, +depending on the collector, potentially update them. The collector +does not need this information at all points in code - that would make +the problem much harder - but only at well-defined points in the +execution known as ‘safepoints’ For most collectors, it is sufficient +to track at least one copy of each unique pointer value. However, for +a collector which wishes to relocate objects directly reachable from +running code, a higher standard is required.

+

One additional challenge is that the compiler may compute intermediate +results (“derived pointers”) which point outside of the allocation or +even into the middle of another allocation. The eventual use of this +intermediate value must yield an address within the bounds of the +allocation, but such “exterior derived pointers” may be visible to the +collector. Given this, a garbage collector can not safely rely on the +runtime value of an address to indicate the object it is associated +with. If the garbage collector wishes to move any object, the +compiler must provide a mapping, for each pointer, to an indication of +its allocation.

+

To simplify the interaction between a collector and the compiled code, +most garbage collectors are organized in terms of three abstractions: +load barriers, store barriers, and safepoints.

+
    +
  1. A load barrier is a bit of code executed immediately after the +machine load instruction, but before any use of the value loaded. +Depending on the collector, such a barrier may be needed for all +loads, merely loads of a particular type (in the original source +language), or none at all.

  2. +
  3. Analogously, a store barrier is a code fragment that runs +immediately before the machine store instruction, but after the +computation of the value stored. The most common use of a store +barrier is to update a ‘card table’ in a generational garbage +collector.

  4. +
  5. A safepoint is a location at which pointers visible to the compiled +code (i.e. currently in registers or on the stack) are allowed to +change. After the safepoint completes, the actual pointer value +may differ, but the ‘object’ (as seen by the source language) +pointed to will not.

  6. +
+
+

Note that the term ‘safepoint’ is somewhat overloaded. It refers to +both the location at which the machine state is parsable and the +coordination protocol involved in bring application threads to a +point at which the collector can safely use that information. The +term “statepoint” as used in this document refers exclusively to the +former.

+
+

This document focuses on the last item - compiler support for +safepoints in generated code. We will assume that an outside +mechanism has decided where to place safepoints. From our +perspective, all safepoints will be function calls. To support +relocation of objects directly reachable from values in compiled code, +the collector must be able to:

+
    +
  1. identify every copy of a pointer (including copies introduced by +the compiler itself) at the safepoint,

  2. +
  3. identify which object each pointer relates to, and

  4. +
  5. potentially update each of those copies.

  6. +
+

This document describes the mechanism by which an LLVM based compiler +can provide this information to a language runtime/collector, and +ensure that all pointers can be read and updated if desired.

+
+

Abstract Machine Model

+

At a high level, LLVM has been extended to support compiling to an abstract +machine which extends the actual target with a non-integral pointer type +suitable for representing a garbage collected reference to an object. In +particular, such non-integral pointer type have no defined mapping to an +integer representation. This semantic quirk allows the runtime to pick a +integer mapping for each point in the program allowing relocations of objects +without visible effects.

+

This high level abstract machine model is used for most of the optimizer. As +a result, transform passes do not need to be extended to look through explicit +relocation sequence. Before starting code generation, we switch +representations to an explicit form. The exact location chosen for lowering +is an implementation detail.

+

Note that most of the value of the abstract machine model comes for collectors +which need to model potentially relocatable objects. For a compiler which +supports only a non-relocating collector, you may wish to consider starting +with the fully explicit form.

+

Warning: There is one currently known semantic hole in the definition of +non-integral pointers which has not been addressed upstream. To work around +this, you need to disable speculation of loads unless the memory type +(non-integral pointer vs anything else) is known to unchanged. That is, it is +not safe to speculate a load if doing causes a non-integral pointer value to +be loaded as any other type or vice versa. In practice, this restriction is +well isolated to isSafeToSpeculate in ValueTracking.cpp.

+
+
+

Explicit Representation

+

A frontend could directly generate this low level explicit form, but +doing so may inhibit optimization. Instead, it is recommended that +compilers with relocating collectors target the abstract machine model just +described.

+

The heart of the explicit approach is to construct (or rewrite) the IR in a +manner where the possible updates performed by the garbage collector are +explicitly visible in the IR. Doing so requires that we:

+
    +
  1. create a new SSA value for each potentially relocated pointer, and +ensure that no uses of the original (non relocated) value is +reachable after the safepoint,

  2. +
  3. specify the relocation in a way which is opaque to the compiler to +ensure that the optimizer can not introduce new uses of an +unrelocated value after a statepoint. This prevents the optimizer +from performing unsound optimizations.

  4. +
  5. recording a mapping of live pointers (and the allocation they’re +associated with) for each statepoint.

  6. +
+

At the most abstract level, inserting a safepoint can be thought of as +replacing a call instruction with a call to a multiple return value +function which both calls the original target of the call, returns +its result, and returns updated values for any live pointers to +garbage collected objects.

+
+

Note that the task of identifying all live pointers to garbage +collected values, transforming the IR to expose a pointer giving the +base object for every such live pointer, and inserting all the +intrinsics correctly is explicitly out of scope for this document. +The recommended approach is to use the utility passes described below.

+
+

This abstract function call is concretely represented by a sequence of +intrinsic calls known collectively as a “statepoint relocation sequence”.

+

Let’s consider a simple call in LLVM IR:

+
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
+       gc "statepoint-example" {
+  call void ()* @foo()
+  ret i8 addrspace(1)* %obj
+}
+
+
+

Depending on our language we may need to allow a safepoint during the execution +of foo. If so, we need to let the collector update local values in the +current frame. If we don’t, we’ll be accessing a potential invalid reference +once we eventually return from the call.

+

In this example, we need to relocate the SSA value %obj. Since we can’t +actually change the value in the SSA value %obj, we need to introduce a new +SSA value %obj.relocated which represents the potentially changed value of +%obj after the safepoint and update any following uses appropriately. The +resulting relocation sequence is:

+
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
+       gc "statepoint-example" {
+  %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
+  %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 7, i32 7)
+  ret i8 addrspace(1)* %obj.relocated
+}
+
+
+

Ideally, this sequence would have been represented as a M argument, N +return value function (where M is the number of values being +relocated + the original call arguments and N is the original return +value + each relocated value), but LLVM does not easily support such a +representation.

+

Instead, the statepoint intrinsic marks the actual site of the +safepoint or statepoint. The statepoint returns a token value (which +exists only at compile time). To get back the original return value +of the call, we use the gc.result intrinsic. To get the relocation +of each pointer in turn, we use the gc.relocate intrinsic with the +appropriate index. Note that both the gc.relocate and gc.result are +tied to the statepoint. The combination forms a “statepoint relocation +sequence” and represents the entirety of a parseable call or ‘statepoint’.

+

When lowered, this example would generate the following x86 assembly:

+
        .globl        test1
+        .align        16, 0x90
+        pushq %rax
+        callq foo
+.Ltmp1:
+        movq  (%rsp), %rax  # This load is redundant (oops!)
+        popq  %rdx
+        retq
+
+
+

Each of the potentially relocated values has been spilled to the +stack, and a record of that location has been recorded to the +Stack Map section. If the garbage collector +needs to update any of these pointers during the call, it knows +exactly what to change.

+

The relevant parts of the StackMap section for our example are:

+
# This describes the call site
+# Stack Maps: callsite 2882400000
+        .quad 2882400000
+        .long .Ltmp1-test1
+        .short        0
+# .. 8 entries skipped ..
+# This entry describes the spill slot which is directly addressable
+# off RSP with offset 0.  Given the value was spilled with a pushq,
+# that makes sense.
+# Stack Maps:   Loc 8: Direct RSP     [encoding: .byte 2, .byte 8, .short 7, .int 0]
+        .byte 2
+        .byte 8
+        .short        7
+        .long 0
+
+
+

This example was taken from the tests for the RewriteStatepointsForGC +utility pass. As such, its full StackMap can be easily examined with the +following command.

+
opt -rewrite-statepoints-for-gc test/Transforms/RewriteStatepointsForGC/basics.ll -S | llc -debug-only=stackmaps
+
+
+
+
+

Simplifications for Non-Relocating GCs

+

Some of the complexity in the previous example is unnecessary for a +non-relocating collector. While a non-relocating collector still needs the +information about which location contain live references, it doesn’t need to +represent explicit relocations. As such, the previously described explicit +lowering can be simplified to remove all of the gc.relocate intrinsic +calls and leave uses in terms of the original reference value.

+

Here’s the explicit lowering for the previous example for a non-relocating +collector:

+
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
+       gc "statepoint-example" {
+  call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
+  ret i8 addrspace(1)* %obj
+}
+
+
+
+
+

Recording On Stack Regions

+

In addition to the explicit relocation form previously described, the +statepoint infrastructure also allows the listing of allocas within the gc +pointer list. Allocas can be listed with or without additional explicit gc +pointer values and relocations.

+

An alloca in the gc region of the statepoint operand list will cause the +address of the stack region to be listed in the stackmap for the statepoint.

+

This mechanism can be used to describe explicit spill slots if desired. It +then becomes the generator’s responsibility to ensure that values are +spill/filled to/from the alloca as needed on either side of the safepoint. +Note that there is no way to indicate a corresponding base pointer for such +an explicitly specified spill slot, so usage is restricted to values for +which the associated collector can derive the object base from the pointer +itself.

+

This mechanism can be used to describe on stack objects containing +references provided that the collector can map from the location on the +stack to a heap map describing the internal layout of the references the +collector needs to process.

+

WARNING: At the moment, this alternate form is not well exercised. It is +recommended to use this with caution and expect to have to fix a few bugs. +In particular, the RewriteStatepointsForGC utility pass does not do +anything for allocas today.

+
+
+

Base & Derived Pointers

+

A “base pointer” is one which points to the starting address of an allocation +(object). A “derived pointer” is one which is offset from a base pointer by +some amount. When relocating objects, a garbage collector needs to be able +to relocate each derived pointer associated with an allocation to the same +offset from the new address.

+

“Interior derived pointers” remain within the bounds of the allocation +they’re associated with. As a result, the base object can be found at +runtime provided the bounds of allocations are known to the runtime system.

+

“Exterior derived pointers” are outside the bounds of the associated object; +they may even fall within another allocations address range. As a result, +there is no way for a garbage collector to determine which allocation they +are associated with at runtime and compiler support is needed.

+

The gc.relocate intrinsic supports an explicit operand for describing the +allocation associated with a derived pointer. This operand is frequently +referred to as the base operand, but does not strictly speaking have to be +a base pointer, but it does need to lie within the bounds of the associated +allocation. Some collectors may require that the operand be an actual base +pointer rather than merely an internal derived pointer. Note that during +lowering both the base and derived pointer operands are required to be live +over the associated call safepoint even if the base is otherwise unused +afterwards.

+

If we extend our previous example to include a pointless derived pointer, +we get:

+
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
+       gc "statepoint-example" {
+  %gep = getelementptr i8, i8 addrspace(1)* %obj, i64 20000
+  %token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %gep)
+  %obj.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 7)
+  %gep.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 8)
+  %p = getelementptr i8, i8 addrspace(1)* %gep, i64 -20000
+  ret i8 addrspace(1)* %p
+}
+
+
+

Note that in this example %p and %obj.relocate are the same address and we +could replace one with the other, potentially removing the derived pointer +from the live set at the safepoint entirely.

+
+
+

GC Transitions

+

As a practical consideration, many garbage-collected systems allow code that is +collector-aware (“managed code”) to call code that is not collector-aware +(“unmanaged code”). It is common that such calls must also be safepoints, since +it is desirable to allow the collector to run during the execution of +unmanaged code. Furthermore, it is common that coordinating the transition from +managed to unmanaged code requires extra code generation at the call site to +inform the collector of the transition. In order to support these needs, a +statepoint may be marked as a GC transition, and data that is necessary to +perform the transition (if any) may be provided as additional arguments to the +statepoint.

+
+

Note that although in many cases statepoints may be inferred to be GC +transitions based on the function symbols involved (e.g. a call from a +function with GC strategy “foo” to a function with GC strategy “bar”), +indirect calls that are also GC transitions must also be supported. This +requirement is the driving force behind the decision to require that GC +transitions are explicitly marked.

+
+

Let’s revisit the sample given above, this time treating the call to @foo +as a GC transition. Depending on our target, the transition code may need to +access some extra state in order to inform the collector of the transition. +Let’s assume a hypothetical GC–somewhat unimaginatively named “hypothetical-gc” +–that requires that a TLS variable must be written to before and after a call +to unmanaged code. The resulting relocation sequence is:

+
@flag = thread_local global i32 0, align 4
+
+define i8 addrspace(1)* @test1(i8 addrspace(1) *%obj)
+       gc "hypothetical-gc" {
+
+  %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 1, i32* @Flag, i32 0, i8 addrspace(1)* %obj)
+  %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 7, i32 7)
+  ret i8 addrspace(1)* %obj.relocated
+}
+
+
+

During lowering, this will result in an instruction selection DAG that looks +something like:

+
CALLSEQ_START
+...
+GC_TRANSITION_START (lowered i32 *@Flag), SRCVALUE i32* Flag
+STATEPOINT
+GC_TRANSITION_END (lowered i32 *@Flag), SRCVALUE i32 *Flag
+...
+CALLSEQ_END
+
+
+

In order to generate the necessary transition code, the backend for each target +supported by “hypothetical-gc” must be modified to lower GC_TRANSITION_START +and GC_TRANSITION_END nodes appropriately when the “hypothetical-gc” +strategy is in use for a particular function. Assuming that such lowering has +been added for X86, the generated assembly would be:

+
        .globl        test1
+        .align        16, 0x90
+        pushq %rax
+        movl $1, %fs:Flag@TPOFF
+        callq foo
+        movl $0, %fs:Flag@TPOFF
+.Ltmp1:
+        movq  (%rsp), %rax  # This load is redundant (oops!)
+        popq  %rdx
+        retq
+
+
+

Note that the design as presented above is not fully implemented: in particular, +strategy-specific lowering is not present, and all GC transitions are emitted as +as single no-op before and after the call instruction. These no-ops are often +removed by the backend during dead machine instruction elimination.

+

Before the abstract machine model is lowered to the explicit statepoint model +of relocations by the RewriteStatepointsForGC pass it is possible for +any derived pointer to get its base pointer and offset from the base pointer +by using the gc.get.pointer.base and the gc.get.pointer.offset +intrinsics respectively. These intrinsics are inlined by the +RewriteStatepointsForGC pass and must not be used after this pass.

+
+
+
+

Stack Map Format

+

Locations for each pointer value which may need read and/or updated by +the runtime or collector are provided in a separate section of the +generated object file as specified in the PatchPoint documentation. +This special section is encoded per the +Stack Map format.

+

The general expectation is that a JIT compiler will parse and discard this +format; it is not particularly memory efficient. If you need an alternate +format (e.g. for an ahead of time compiler), see discussion under +:ref: open work items <OpenWork> below.

+

Each statepoint generates the following Locations:

+
    +
  • Constant which describes the calling convention of the call target. This +constant is a valid calling convention identifier for +the version of LLVM used to generate the stackmap. No additional compatibility +guarantees are made for this constant over what LLVM provides elsewhere w.r.t. +these identifiers.

  • +
  • Constant which describes the flags passed to the statepoint intrinsic

  • +
  • Constant which describes number of following deopt Locations (not +operands). Will be 0 if no “deopt” bundle is provided.

  • +
  • Variable number of Locations, one for each deopt parameter listed in the +“deopt” operand bundle. At the moment, only deopt parameters with a bitwidth +of 64 bits or less are supported. Values of a type larger than 64 bits can be +specified and reported only if a) the value is constant at the call site, and +b) the constant can be represented with less than 64 bits (assuming zero +extension to the original bitwidth).

  • +
  • Variable number of relocation records, each of which consists of +exactly two Locations. Relocation records are described in detail +below.

  • +
+

Each relocation record provides sufficient information for a collector to +relocate one or more derived pointers. Each record consists of a pair of +Locations. The second element in the record represents the pointer (or +pointers) which need updated. The first element in the record provides a +pointer to the base of the object with which the pointer(s) being relocated is +associated. This information is required for handling generalized derived +pointers since a pointer may be outside the bounds of the original allocation, +but still needs to be relocated with the allocation. Additionally:

+
    +
  • It is guaranteed that the base pointer must also appear explicitly as a +relocation pair if used after the statepoint.

  • +
  • There may be fewer relocation records then gc parameters in the IR +statepoint. Each unique pair will occur at least once; duplicates +are possible.

  • +
  • The Locations within each record may either be of pointer size or a +multiple of pointer size. In the later case, the record must be +interpreted as describing a sequence of pointers and their corresponding +base pointers. If the Location is of size N x sizeof(pointer), then +there will be N records of one pointer each contained within the Location. +Both Locations in a pair can be assumed to be of the same size.

  • +
+

Note that the Locations used in each section may describe the same +physical location. e.g. A stack slot may appear as a deopt location, +a gc base pointer, and a gc derived pointer.

+

The LiveOut section of the StkMapRecord will be empty for a statepoint +record.

+
+
+

Safepoint Semantics & Verification

+

The fundamental correctness property for the compiled code’s +correctness w.r.t. the garbage collector is a dynamic one. It must be +the case that there is no dynamic trace such that an operation +involving a potentially relocated pointer is observably-after a +safepoint which could relocate it. ‘observably-after’ is this usage +means that an outside observer could observe this sequence of events +in a way which precludes the operation being performed before the +safepoint.

+

To understand why this ‘observable-after’ property is required, +consider a null comparison performed on the original copy of a +relocated pointer. Assuming that control flow follows the safepoint, +there is no way to observe externally whether the null comparison is +performed before or after the safepoint. (Remember, the original +Value is unmodified by the safepoint.) The compiler is free to make +either scheduling choice.

+

The actual correctness property implemented is slightly stronger than +this. We require that there be no static path on which a +potentially relocated pointer is ‘observably-after’ it may have been +relocated. This is slightly stronger than is strictly necessary (and +thus may disallow some otherwise valid programs), but greatly +simplifies reasoning about correctness of the compiled code.

+

By construction, this property will be upheld by the optimizer if +correctly established in the source IR. This is a key invariant of +the design.

+

The existing IR Verifier pass has been extended to check most of the +local restrictions on the intrinsics mentioned in their respective +documentation. The current implementation in LLVM does not check the +key relocation invariant, but this is ongoing work on developing such +a verifier. Please ask on llvm-dev if you’re interested in +experimenting with the current version.

+
+
+

Utility Passes for Safepoint Insertion

+
+

RewriteStatepointsForGC

+

The pass RewriteStatepointsForGC transforms a function’s IR to lower from the +abstract machine model described above to the explicit statepoint model of +relocations. To do this, it replaces all calls or invokes of functions which +might contain a safepoint poll with a gc.statepoint and associated full +relocation sequence, including all required gc.relocates.

+

Note that by default, this pass only runs for the “statepoint-example” or +“core-clr” gc strategies. You will need to add your custom strategy to this +list or use one of the predefined ones.

+

As an example, given this code:

+
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
+       gc "statepoint-example" {
+  call void @foo()
+  ret i8 addrspace(1)* %obj
+}
+
+
+

The pass would produce this IR:

+
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj)
+       gc "statepoint-example" {
+  %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
+  %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12)
+  ret i8 addrspace(1)* %obj.relocated
+}
+
+
+

In the above examples, the addrspace(1) marker on the pointers is the mechanism +that the statepoint-example GC strategy uses to distinguish references from +non references. The pass assumes that all addrspace(1) pointers are non-integral +pointer types. Address space 1 is not globally reserved for this purpose.

+

This pass can be used an utility function by a language frontend that doesn’t +want to manually reason about liveness, base pointers, or relocation when +constructing IR. As currently implemented, RewriteStatepointsForGC must be +run after SSA construction (i.e. mem2ref).

+

RewriteStatepointsForGC will ensure that appropriate base pointers are listed +for every relocation created. It will do so by duplicating code as needed to +propagate the base pointer associated with each pointer being relocated to +the appropriate safepoints. The implementation assumes that the following +IR constructs produce base pointers: loads from the heap, addresses of global +variables, function arguments, function return values. Constant pointers (such +as null) are also assumed to be base pointers. In practice, this constraint +can be relaxed to producing interior derived pointers provided the target +collector can find the associated allocation from an arbitrary interior +derived pointer.

+

By default RewriteStatepointsForGC passes in 0xABCDEF00 as the statepoint +ID and 0 as the number of patchable bytes to the newly constructed +gc.statepoint. These values can be configured on a per-callsite +basis using the attributes "statepoint-id" and +"statepoint-num-patch-bytes". If a call site is marked with a +"statepoint-id" function attribute and its value is a positive +integer (represented as a string), then that value is used as the ID +of the newly constructed gc.statepoint. If a call site is marked +with a "statepoint-num-patch-bytes" function attribute and its +value is a positive integer, then that value is used as the ‘num patch +bytes’ parameter of the newly constructed gc.statepoint. The +"statepoint-id" and "statepoint-num-patch-bytes" attributes +are not propagated to the gc.statepoint call or invoke if they +could be successfully parsed.

+

In practice, RewriteStatepointsForGC should be run much later in the pass +pipeline, after most optimization is already done. This helps to improve +the quality of the generated code when compiled with garbage collection support.

+
+
+

RewriteStatepointsForGC intrinsic lowering

+

As a part of lowering to the explicit model of relocations +RewriteStatepointsForGC performs GC specific lowering for the following +intrinsics:

+
    +
  • gc.get.pointer.base

  • +
  • gc.get.pointer.offset

  • +
  • llvm.memcpy.element.unordered.atomic.*

  • +
  • llvm.memmove.element.unordered.atomic.*

  • +
+

There are two possible lowerings for the memcpy and memmove operations: +GC leaf lowering and GC parseable lowering. If a call is explicitly marked with +“gc-leaf-function” attribute the call is lowered to a GC leaf call to +‘__llvm_memcpy_element_unordered_atomic_*’ or +‘__llvm_memmove_element_unordered_atomic_*’ symbol. Such a call can not +take a safepoint. Otherwise, the call is made GC parseable by wrapping the +call into a statepoint. This makes it possible to take a safepoint during +copy operation. Note that a GC parseable copy operation is not required to +take a safepoint. For example, a short copy operation may be performed without +taking a safepoint.

+

GC parseable calls to ‘llvm.memcpy.element.unordered.atomic.*’, +‘llvm.memmove.element.unordered.atomic.*’ intrinsics are lowered to calls +to ‘__llvm_memcpy_element_unordered_atomic_safepoint_*’, +‘__llvm_memmove_element_unordered_atomic_safepoint_*’ symbols respectively. +This way the runtime can provide implementations of copy operations with and +without safepoints.

+

GC parseable lowering also involves adjusting the arguments for the call. +Memcpy and memmove intrinsics take derived pointers as source and destination +arguments. If a copy operation takes a safepoint it might need to relocate the +underlying source and destination objects. This requires the corresponding base +pointers to be available in the copy operation. In order to make the base +pointers available RewriteStatepointsForGC replaces derived pointers with base +pointer and offset pairs. For example:

+
declare void @__llvm_memcpy_element_unordered_atomic_safepoint_1(
+  i8 addrspace(1)*  %dest_base, i64 %dest_offset,
+  i8 addrspace(1)*  %src_base, i64 %src_offset,
+  i64 %length)
+
+
+
+
+

PlaceSafepoints

+

The pass PlaceSafepoints inserts safepoint polls sufficient to ensure running +code checks for a safepoint request on a timely manner. This pass is expected +to be run before RewriteStatepointsForGC and thus does not produce full +relocation sequences.

+

As an example, given input IR of the following:

+
define void @test() gc "statepoint-example" {
+  call void @foo()
+  ret void
+}
+
+declare void @do_safepoint()
+define void @gc.safepoint_poll() {
+  call void @do_safepoint()
+  ret void
+}
+
+
+

This pass would produce the following IR:

+
define void @test() gc "statepoint-example" {
+  call void @do_safepoint()
+  call void @foo()
+  ret void
+}
+
+
+

In this case, we’ve added an (unconditional) entry safepoint poll. Note that +despite appearances, the entry poll is not necessarily redundant. We’d have to +know that foo and test were not mutually recursive for the poll to be +redundant. In practice, you’d probably want to your poll definition to contain +a conditional branch of some form.

+

At the moment, PlaceSafepoints can insert safepoint polls at method entry and +loop backedges locations. Extending this to work with return polls would be +straight forward if desired.

+

PlaceSafepoints includes a number of optimizations to avoid placing safepoint +polls at particular sites unless needed to ensure timely execution of a poll +under normal conditions. PlaceSafepoints does not attempt to ensure timely +execution of a poll under worst case conditions such as heavy system paging.

+

The implementation of a safepoint poll action is specified by looking up a +function of the name gc.safepoint_poll in the containing Module. The body +of this function is inserted at each poll site desired. While calls or invokes +inside this method are transformed to a gc.statepoints, recursive poll +insertion is not performed.

+

This pass is useful for any language frontend which only has to support +garbage collection semantics at safepoints. If you need other abstract +frame information at safepoints (e.g. for deoptimization or introspection), +you can insert safepoint polls in the frontend. If you have the later case, +please ask on llvm-dev for suggestions. There’s been a good amount of work +done on making such a scheme work well in practice which is not yet documented +here.

+
+
+
+

Supported Architectures

+

Support for statepoint generation requires some code for each backend. +Today, only X86_64 is supported.

+
+
+

Limitations and Half Baked Ideas

+
+

Mixing References and Raw Pointers

+

Support for languages which allow unmanaged pointers to garbage collected +objects (i.e. pass a pointer to an object to a C routine) in the abstract +machine model. At the moment, the best idea on how to approach this +involves an intrinsic or opaque function which hides the connection between +the reference value and the raw pointer. The problem is that having a +ptrtoint or inttoptr cast (which is common for such use cases) breaks the +rules used for inferring base pointers for arbitrary references when +lowering out of the abstract model to the explicit physical model. Note +that a frontend which lowers directly to the physical model doesn’t have +any problems here.

+
+
+

Objects on the Stack

+

As noted above, the explicit lowering supports objects allocated on the +stack provided the collector can find a heap map given the stack address.

+

The missing pieces are a) integration with rewriting (RS4GC) from the +abstract machine model and b) support for optionally decomposing on stack +objects so as not to require heap maps for them. The later is required +for ease of integration with some collectors.

+
+
+

Lowering Quality and Representation Overhead

+

The current statepoint lowering is known to be somewhat poor. In the very +long term, we’d like to integrate statepoints with the register allocator; +in the near term this is unlikely to happen. We’ve found the quality of +lowering to be relatively unimportant as hot-statepoints are almost always +inliner bugs.

+

Concerns have been raised that the statepoint representation results in a +large amount of IR being produced for some examples and that this +contributes to higher than expected memory usage and compile times. There’s +no immediate plans to make changes due to this, but alternate models may be +explored in the future.

+
+
+

Relocations Along Exceptional Edges

+

Relocations along exceptional paths are currently broken in ToT. In +particular, there is current no way to represent a rethrow on a path which +also has relocations. See this llvm-dev discussion for more +detail.

+
+
+

Support for alternate stackmap formats

+

For some use cases, it is +desirable to directly encode a final memory efficient stackmap format for +use by the runtime. This is particularly relevant for ahead of time +compilers which wish to directly link object files without the need for +post processing of each individual object file. While not implemented +today for statepoints, there is precedent for a GCStrategy to be able to +select a customer GCMetataPrinter for this purpose. Patches to enable +this functionality upstream are welcome.

+
+
+
+

Bugs and Enhancements

+

Currently known bugs and enhancements under consideration can be +tracked by performing a bugzilla search +for [Statepoint] in the summary field. When filing new bugs, please +use this tag so that interested parties see the newly filed bug. As +with most LLVM features, design discussions take place on llvm-dev, and patches +should be sent to llvm-commits for review.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/basic.css llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/basic.css --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/basic.css 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/basic.css 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,861 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li div.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 450px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a.brackets:before, +span.brackets > a:before{ + content: "["; +} + +a.brackets:after, +span.brackets > a:after { + content: "]"; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +table.footnote td, table.footnote th { + border: 0 !important; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +dl.footnote > dt, +dl.citation > dt { + float: left; + margin-right: 0.5em; +} + +dl.footnote > dd, +dl.citation > dd { + margin-bottom: 0em; +} + +dl.footnote > dd:after, +dl.citation > dd:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dt:after { + content: ":"; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0.5em; + content: ":"; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.doctest > div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.descname { + background-color: transparent; + font-weight: bold; + font-size: 1.2em; +} + +code.descclassname { + background-color: transparent; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/contents.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/contents.png differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/doctools.js llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/doctools.js --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/doctools.js 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/doctools.js 2021-04-10 03:24:58.000000000 +0000 @@ -0,0 +1,321 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for all documentation. + * + * :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/** + * select a different prefix for underscore + */ +$u = _.noConflict(); + +/** + * make the code below compatible with browsers without + * an installed firebug like debugger +if (!window.console || !console.firebug) { + var names = ["log", "debug", "info", "warn", "error", "assert", "dir", + "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", + "profile", "profileEnd"]; + window.console = {}; + for (var i = 0; i < names.length; ++i) + window.console[names[i]] = function() {}; +} + */ + +/** + * small helper function to urldecode strings + * + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL + */ +jQuery.urldecode = function(x) { + if (!x) { + return x + } + return decodeURIComponent(x.replace(/\+/g, ' ')); +}; + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s === 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node, addItems) { + if (node.nodeType === 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && + !jQuery(node.parentNode).hasClass(className) && + !jQuery(node.parentNode).hasClass("nohighlight")) { + var span; + var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.className = className; + } + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + if (isInSVG) { + var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); + var bbox = node.parentElement.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute('class', className); + addItems.push({ + "parent": node.parentNode, + "target": rect}); + } + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this, addItems); + }); + } + } + var addItems = []; + var result = this.each(function() { + highlight(this, addItems); + }); + for (var i = 0; i < addItems.length; ++i) { + jQuery(addItems[i].parent).before(addItems[i].target); + } + return result; +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} + +/** + * Small JavaScript module for the documentation. + */ +var Documentation = { + + init : function() { + this.fixFirefoxAnchorBug(); + this.highlightSearchWords(); + this.initIndexTable(); + if (DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) { + this.initOnKeyListeners(); + } + }, + + /** + * i18n support + */ + TRANSLATIONS : {}, + PLURAL_EXPR : function(n) { return n === 1 ? 0 : 1; }, + LOCALE : 'unknown', + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext : function(string) { + var translated = Documentation.TRANSLATIONS[string]; + if (typeof translated === 'undefined') + return string; + return (typeof translated === 'string') ? translated : translated[0]; + }, + + ngettext : function(singular, plural, n) { + var translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated === 'undefined') + return (n == 1) ? singular : plural; + return translated[Documentation.PLURALEXPR(n)]; + }, + + addTranslations : function(catalog) { + for (var key in catalog.messages) + this.TRANSLATIONS[key] = catalog.messages[key]; + this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); + this.LOCALE = catalog.locale; + }, + + /** + * add context elements like header anchor links + */ + addContextElements : function() { + $('div[id] > :header:first').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this headline')). + appendTo(this); + }); + $('dt[id]').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this definition')). + appendTo(this); + }); + }, + + /** + * workaround a firefox stupidity + * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 + */ + fixFirefoxAnchorBug : function() { + if (document.location.hash && $.browser.mozilla) + window.setTimeout(function() { + document.location.href += ''; + }, 10); + }, + + /** + * highlight the search words provided in the url in the text + */ + highlightSearchWords : function() { + var params = $.getQueryParameters(); + var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; + if (terms.length) { + var body = $('div.body'); + if (!body.length) { + body = $('body'); + } + window.setTimeout(function() { + $.each(terms, function() { + body.highlightText(this.toLowerCase(), 'highlighted'); + }); + }, 10); + $('') + .appendTo($('#searchbox')); + } + }, + + /** + * init the domain index toggle buttons + */ + initIndexTable : function() { + var togglers = $('img.toggler').click(function() { + var src = $(this).attr('src'); + var idnum = $(this).attr('id').substr(7); + $('tr.cg-' + idnum).toggle(); + if (src.substr(-9) === 'minus.png') + $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); + else + $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); + }).css('display', ''); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { + togglers.click(); + } + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords : function() { + $('#searchbox .highlight-link').fadeOut(300); + $('span.highlighted').removeClass('highlighted'); + }, + + /** + * make the url absolute + */ + makeURL : function(relativeURL) { + return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; + }, + + /** + * get the current relative url + */ + getCurrentURL : function() { + var path = document.location.pathname; + var parts = path.split(/\//); + $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { + if (this === '..') + parts.pop(); + }); + var url = parts.join('/'); + return path.substring(url.lastIndexOf('/') + 1, path.length - 1); + }, + + initOnKeyListeners: function() { + $(document).keydown(function(event) { + var activeElementType = document.activeElement.tagName; + // don't navigate when in search box, textarea, dropdown or button + if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT' + && activeElementType !== 'BUTTON' && !event.altKey && !event.ctrlKey && !event.metaKey + && !event.shiftKey) { + switch (event.keyCode) { + case 37: // left + var prevHref = $('link[rel="prev"]').prop('href'); + if (prevHref) { + window.location.href = prevHref; + return false; + } + case 39: // right + var nextHref = $('link[rel="next"]').prop('href'); + if (nextHref) { + window.location.href = nextHref; + return false; + } + } + } + }); + } +}; + +// quick alias for translations +_ = Documentation.gettext; + +$(document).ready(function() { + Documentation.init(); +}); diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/documentation_options.js llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/documentation_options.js --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/documentation_options.js 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/documentation_options.js 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,12 @@ +var DOCUMENTATION_OPTIONS = { + URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), + VERSION: '13', + LANGUAGE: 'None', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false +}; \ No newline at end of file Binary files /tmp/tmpl4dzhlf2/pn6Btg4EJV/llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/file.png and /tmp/tmpl4dzhlf2/mg0ciunPgC/llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/file.png differ diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/jquery.js llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/jquery.js --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/_static/jquery.js 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/_static/jquery.js 2021-01-12 20:55:26.000000000 +0000 @@ -0,0 +1,10870 @@ +/*! + * jQuery JavaScript Library v3.5.1 + * https://jquery.com/ + * + * Includes Sizzle.js + * https://sizzlejs.com/ + * + * Copyright JS Foundation and other contributors + * Released under the MIT license + * https://jquery.org/license + */ +( function( global, factory ) { + + "use strict"; + + if ( typeof module === "object" && typeof module.exports === "object" ) { + + // For CommonJS and CommonJS-like environments where a proper `window` + // is present, execute the factory and get jQuery. + // For environments that do not have a `window` with a `document` + // (such as Node.js), expose a factory as module.exports. + // This accentuates the need for the creation of a real `window`. + // e.g. var jQuery = require("jquery")(window); + // See ticket #14549 for more info. + module.exports = global.document ? + factory( global, true ) : + function( w ) { + if ( !w.document ) { + throw new Error( "jQuery requires a window with a document" ); + } + return factory( w ); + }; + } else { + factory( global ); + } + +// Pass this if window is not defined yet +} )( typeof window !== "undefined" ? window : this, function( window, noGlobal ) { + +// Edge <= 12 - 13+, Firefox <=18 - 45+, IE 10 - 11, Safari 5.1 - 9+, iOS 6 - 9.1 +// throw exceptions when non-strict code (e.g., ASP.NET 4.5) accesses strict mode +// arguments.callee.caller (trac-13335). But as of jQuery 3.0 (2016), strict mode should be common +// enough that all such attempts are guarded in a try block. +"use strict"; + +var arr = []; + +var getProto = Object.getPrototypeOf; + +var slice = arr.slice; + +var flat = arr.flat ? function( array ) { + return arr.flat.call( array ); +} : function( array ) { + return arr.concat.apply( [], array ); +}; + + +var push = arr.push; + +var indexOf = arr.indexOf; + +var class2type = {}; + +var toString = class2type.toString; + +var hasOwn = class2type.hasOwnProperty; + +var fnToString = hasOwn.toString; + +var ObjectFunctionString = fnToString.call( Object ); + +var support = {}; + +var isFunction = function isFunction( obj ) { + + // Support: Chrome <=57, Firefox <=52 + // In some browsers, typeof returns "function" for HTML elements + // (i.e., `typeof document.createElement( "object" ) === "function"`). + // We don't want to classify *any* DOM node as a function. + return typeof obj === "function" && typeof obj.nodeType !== "number"; + }; + + +var isWindow = function isWindow( obj ) { + return obj != null && obj === obj.window; + }; + + +var document = window.document; + + + + var preservedScriptAttributes = { + type: true, + src: true, + nonce: true, + noModule: true + }; + + function DOMEval( code, node, doc ) { + doc = doc || document; + + var i, val, + script = doc.createElement( "script" ); + + script.text = code; + if ( node ) { + for ( i in preservedScriptAttributes ) { + + // Support: Firefox 64+, Edge 18+ + // Some browsers don't support the "nonce" property on scripts. + // On the other hand, just using `getAttribute` is not enough as + // the `nonce` attribute is reset to an empty string whenever it + // becomes browsing-context connected. + // See https://github.com/whatwg/html/issues/2369 + // See https://html.spec.whatwg.org/#nonce-attributes + // The `node.getAttribute` check was added for the sake of + // `jQuery.globalEval` so that it can fake a nonce-containing node + // via an object. + val = node[ i ] || node.getAttribute && node.getAttribute( i ); + if ( val ) { + script.setAttribute( i, val ); + } + } + } + doc.head.appendChild( script ).parentNode.removeChild( script ); + } + + +function toType( obj ) { + if ( obj == null ) { + return obj + ""; + } + + // Support: Android <=2.3 only (functionish RegExp) + return typeof obj === "object" || typeof obj === "function" ? + class2type[ toString.call( obj ) ] || "object" : + typeof obj; +} +/* global Symbol */ +// Defining this global in .eslintrc.json would create a danger of using the global +// unguarded in another place, it seems safer to define global only for this module + + + +var + version = "3.5.1", + + // Define a local copy of jQuery + jQuery = function( selector, context ) { + + // The jQuery object is actually just the init constructor 'enhanced' + // Need init if jQuery is called (just allow error to be thrown if not included) + return new jQuery.fn.init( selector, context ); + }; + +jQuery.fn = jQuery.prototype = { + + // The current version of jQuery being used + jquery: version, + + constructor: jQuery, + + // The default length of a jQuery object is 0 + length: 0, + + toArray: function() { + return slice.call( this ); + }, + + // Get the Nth element in the matched element set OR + // Get the whole matched element set as a clean array + get: function( num ) { + + // Return all the elements in a clean array + if ( num == null ) { + return slice.call( this ); + } + + // Return just the one element from the set + return num < 0 ? this[ num + this.length ] : this[ num ]; + }, + + // Take an array of elements and push it onto the stack + // (returning the new matched element set) + pushStack: function( elems ) { + + // Build a new jQuery matched element set + var ret = jQuery.merge( this.constructor(), elems ); + + // Add the old object onto the stack (as a reference) + ret.prevObject = this; + + // Return the newly-formed element set + return ret; + }, + + // Execute a callback for every element in the matched set. + each: function( callback ) { + return jQuery.each( this, callback ); + }, + + map: function( callback ) { + return this.pushStack( jQuery.map( this, function( elem, i ) { + return callback.call( elem, i, elem ); + } ) ); + }, + + slice: function() { + return this.pushStack( slice.apply( this, arguments ) ); + }, + + first: function() { + return this.eq( 0 ); + }, + + last: function() { + return this.eq( -1 ); + }, + + even: function() { + return this.pushStack( jQuery.grep( this, function( _elem, i ) { + return ( i + 1 ) % 2; + } ) ); + }, + + odd: function() { + return this.pushStack( jQuery.grep( this, function( _elem, i ) { + return i % 2; + } ) ); + }, + + eq: function( i ) { + var len = this.length, + j = +i + ( i < 0 ? len : 0 ); + return this.pushStack( j >= 0 && j < len ? [ this[ j ] ] : [] ); + }, + + end: function() { + return this.prevObject || this.constructor(); + }, + + // For internal use only. + // Behaves like an Array's method, not like a jQuery method. + push: push, + sort: arr.sort, + splice: arr.splice +}; + +jQuery.extend = jQuery.fn.extend = function() { + var options, name, src, copy, copyIsArray, clone, + target = arguments[ 0 ] || {}, + i = 1, + length = arguments.length, + deep = false; + + // Handle a deep copy situation + if ( typeof target === "boolean" ) { + deep = target; + + // Skip the boolean and the target + target = arguments[ i ] || {}; + i++; + } + + // Handle case when target is a string or something (possible in deep copy) + if ( typeof target !== "object" && !isFunction( target ) ) { + target = {}; + } + + // Extend jQuery itself if only one argument is passed + if ( i === length ) { + target = this; + i--; + } + + for ( ; i < length; i++ ) { + + // Only deal with non-null/undefined values + if ( ( options = arguments[ i ] ) != null ) { + + // Extend the base object + for ( name in options ) { + copy = options[ name ]; + + // Prevent Object.prototype pollution + // Prevent never-ending loop + if ( name === "__proto__" || target === copy ) { + continue; + } + + // Recurse if we're merging plain objects or arrays + if ( deep && copy && ( jQuery.isPlainObject( copy ) || + ( copyIsArray = Array.isArray( copy ) ) ) ) { + src = target[ name ]; + + // Ensure proper type for the source value + if ( copyIsArray && !Array.isArray( src ) ) { + clone = []; + } else if ( !copyIsArray && !jQuery.isPlainObject( src ) ) { + clone = {}; + } else { + clone = src; + } + copyIsArray = false; + + // Never move original objects, clone them + target[ name ] = jQuery.extend( deep, clone, copy ); + + // Don't bring in undefined values + } else if ( copy !== undefined ) { + target[ name ] = copy; + } + } + } + } + + // Return the modified object + return target; +}; + +jQuery.extend( { + + // Unique for each copy of jQuery on the page + expando: "jQuery" + ( version + Math.random() ).replace( /\D/g, "" ), + + // Assume jQuery is ready without the ready module + isReady: true, + + error: function( msg ) { + throw new Error( msg ); + }, + + noop: function() {}, + + isPlainObject: function( obj ) { + var proto, Ctor; + + // Detect obvious negatives + // Use toString instead of jQuery.type to catch host objects + if ( !obj || toString.call( obj ) !== "[object Object]" ) { + return false; + } + + proto = getProto( obj ); + + // Objects with no prototype (e.g., `Object.create( null )`) are plain + if ( !proto ) { + return true; + } + + // Objects with prototype are plain iff they were constructed by a global Object function + Ctor = hasOwn.call( proto, "constructor" ) && proto.constructor; + return typeof Ctor === "function" && fnToString.call( Ctor ) === ObjectFunctionString; + }, + + isEmptyObject: function( obj ) { + var name; + + for ( name in obj ) { + return false; + } + return true; + }, + + // Evaluates a script in a provided context; falls back to the global one + // if not specified. + globalEval: function( code, options, doc ) { + DOMEval( code, { nonce: options && options.nonce }, doc ); + }, + + each: function( obj, callback ) { + var length, i = 0; + + if ( isArrayLike( obj ) ) { + length = obj.length; + for ( ; i < length; i++ ) { + if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { + break; + } + } + } else { + for ( i in obj ) { + if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { + break; + } + } + } + + return obj; + }, + + // results is for internal usage only + makeArray: function( arr, results ) { + var ret = results || []; + + if ( arr != null ) { + if ( isArrayLike( Object( arr ) ) ) { + jQuery.merge( ret, + typeof arr === "string" ? + [ arr ] : arr + ); + } else { + push.call( ret, arr ); + } + } + + return ret; + }, + + inArray: function( elem, arr, i ) { + return arr == null ? -1 : indexOf.call( arr, elem, i ); + }, + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + merge: function( first, second ) { + var len = +second.length, + j = 0, + i = first.length; + + for ( ; j < len; j++ ) { + first[ i++ ] = second[ j ]; + } + + first.length = i; + + return first; + }, + + grep: function( elems, callback, invert ) { + var callbackInverse, + matches = [], + i = 0, + length = elems.length, + callbackExpect = !invert; + + // Go through the array, only saving the items + // that pass the validator function + for ( ; i < length; i++ ) { + callbackInverse = !callback( elems[ i ], i ); + if ( callbackInverse !== callbackExpect ) { + matches.push( elems[ i ] ); + } + } + + return matches; + }, + + // arg is for internal usage only + map: function( elems, callback, arg ) { + var length, value, + i = 0, + ret = []; + + // Go through the array, translating each of the items to their new values + if ( isArrayLike( elems ) ) { + length = elems.length; + for ( ; i < length; i++ ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret.push( value ); + } + } + + // Go through every key on the object, + } else { + for ( i in elems ) { + value = callback( elems[ i ], i, arg ); + + if ( value != null ) { + ret.push( value ); + } + } + } + + // Flatten any nested arrays + return flat( ret ); + }, + + // A global GUID counter for objects + guid: 1, + + // jQuery.support is not used in Core but other projects attach their + // properties to it so it needs to exist. + support: support +} ); + +if ( typeof Symbol === "function" ) { + jQuery.fn[ Symbol.iterator ] = arr[ Symbol.iterator ]; +} + +// Populate the class2type map +jQuery.each( "Boolean Number String Function Array Date RegExp Object Error Symbol".split( " " ), +function( _i, name ) { + class2type[ "[object " + name + "]" ] = name.toLowerCase(); +} ); + +function isArrayLike( obj ) { + + // Support: real iOS 8.2 only (not reproducible in simulator) + // `in` check used to prevent JIT error (gh-2145) + // hasOwn isn't used here due to false negatives + // regarding Nodelist length in IE + var length = !!obj && "length" in obj && obj.length, + type = toType( obj ); + + if ( isFunction( obj ) || isWindow( obj ) ) { + return false; + } + + return type === "array" || length === 0 || + typeof length === "number" && length > 0 && ( length - 1 ) in obj; +} +var Sizzle = +/*! + * Sizzle CSS Selector Engine v2.3.5 + * https://sizzlejs.com/ + * + * Copyright JS Foundation and other contributors + * Released under the MIT license + * https://js.foundation/ + * + * Date: 2020-03-14 + */ +( function( window ) { +var i, + support, + Expr, + getText, + isXML, + tokenize, + compile, + select, + outermostContext, + sortInput, + hasDuplicate, + + // Local document vars + setDocument, + document, + docElem, + documentIsHTML, + rbuggyQSA, + rbuggyMatches, + matches, + contains, + + // Instance-specific data + expando = "sizzle" + 1 * new Date(), + preferredDoc = window.document, + dirruns = 0, + done = 0, + classCache = createCache(), + tokenCache = createCache(), + compilerCache = createCache(), + nonnativeSelectorCache = createCache(), + sortOrder = function( a, b ) { + if ( a === b ) { + hasDuplicate = true; + } + return 0; + }, + + // Instance methods + hasOwn = ( {} ).hasOwnProperty, + arr = [], + pop = arr.pop, + pushNative = arr.push, + push = arr.push, + slice = arr.slice, + + // Use a stripped-down indexOf as it's faster than native + // https://jsperf.com/thor-indexof-vs-for/5 + indexOf = function( list, elem ) { + var i = 0, + len = list.length; + for ( ; i < len; i++ ) { + if ( list[ i ] === elem ) { + return i; + } + } + return -1; + }, + + booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|" + + "ismap|loop|multiple|open|readonly|required|scoped", + + // Regular expressions + + // http://www.w3.org/TR/css3-selectors/#whitespace + whitespace = "[\\x20\\t\\r\\n\\f]", + + // https://www.w3.org/TR/css-syntax-3/#ident-token-diagram + identifier = "(?:\\\\[\\da-fA-F]{1,6}" + whitespace + + "?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+", + + // Attribute selectors: http://www.w3.org/TR/selectors/#attribute-selectors + attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace + + + // Operator (capture 2) + "*([*^$|!~]?=)" + whitespace + + + // "Attribute values must be CSS identifiers [capture 5] + // or strings [capture 3 or capture 4]" + "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" + + whitespace + "*\\]", + + pseudos = ":(" + identifier + ")(?:\\((" + + + // To reduce the number of selectors needing tokenize in the preFilter, prefer arguments: + // 1. quoted (capture 3; capture 4 or capture 5) + "('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" + + + // 2. simple (capture 6) + "((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" + + + // 3. anything else (capture 2) + ".*" + + ")\\)|)", + + // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter + rwhitespace = new RegExp( whitespace + "+", "g" ), + rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + + whitespace + "+$", "g" ), + + rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ), + rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + + "*" ), + rdescend = new RegExp( whitespace + "|>" ), + + rpseudo = new RegExp( pseudos ), + ridentifier = new RegExp( "^" + identifier + "$" ), + + matchExpr = { + "ID": new RegExp( "^#(" + identifier + ")" ), + "CLASS": new RegExp( "^\\.(" + identifier + ")" ), + "TAG": new RegExp( "^(" + identifier + "|[*])" ), + "ATTR": new RegExp( "^" + attributes ), + "PSEUDO": new RegExp( "^" + pseudos ), + "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" + + whitespace + "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + + whitespace + "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), + "bool": new RegExp( "^(?:" + booleans + ")$", "i" ), + + // For use in libraries implementing .is() + // We use this for POS matching in `select` + "needsContext": new RegExp( "^" + whitespace + + "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + whitespace + + "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" ) + }, + + rhtml = /HTML$/i, + rinputs = /^(?:input|select|textarea|button)$/i, + rheader = /^h\d$/i, + + rnative = /^[^{]+\{\s*\[native \w/, + + // Easily-parseable/retrievable ID or TAG or CLASS selectors + rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/, + + rsibling = /[+~]/, + + // CSS escapes + // http://www.w3.org/TR/CSS21/syndata.html#escaped-characters + runescape = new RegExp( "\\\\[\\da-fA-F]{1,6}" + whitespace + "?|\\\\([^\\r\\n\\f])", "g" ), + funescape = function( escape, nonHex ) { + var high = "0x" + escape.slice( 1 ) - 0x10000; + + return nonHex ? + + // Strip the backslash prefix from a non-hex escape sequence + nonHex : + + // Replace a hexadecimal escape sequence with the encoded Unicode code point + // Support: IE <=11+ + // For values outside the Basic Multilingual Plane (BMP), manually construct a + // surrogate pair + high < 0 ? + String.fromCharCode( high + 0x10000 ) : + String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 ); + }, + + // CSS string/identifier serialization + // https://drafts.csswg.org/cssom/#common-serializing-idioms + rcssescape = /([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g, + fcssescape = function( ch, asCodePoint ) { + if ( asCodePoint ) { + + // U+0000 NULL becomes U+FFFD REPLACEMENT CHARACTER + if ( ch === "\0" ) { + return "\uFFFD"; + } + + // Control characters and (dependent upon position) numbers get escaped as code points + return ch.slice( 0, -1 ) + "\\" + + ch.charCodeAt( ch.length - 1 ).toString( 16 ) + " "; + } + + // Other potentially-special ASCII characters get backslash-escaped + return "\\" + ch; + }, + + // Used for iframes + // See setDocument() + // Removing the function wrapper causes a "Permission Denied" + // error in IE + unloadHandler = function() { + setDocument(); + }, + + inDisabledFieldset = addCombinator( + function( elem ) { + return elem.disabled === true && elem.nodeName.toLowerCase() === "fieldset"; + }, + { dir: "parentNode", next: "legend" } + ); + +// Optimize for push.apply( _, NodeList ) +try { + push.apply( + ( arr = slice.call( preferredDoc.childNodes ) ), + preferredDoc.childNodes + ); + + // Support: Android<4.0 + // Detect silently failing push.apply + // eslint-disable-next-line no-unused-expressions + arr[ preferredDoc.childNodes.length ].nodeType; +} catch ( e ) { + push = { apply: arr.length ? + + // Leverage slice if possible + function( target, els ) { + pushNative.apply( target, slice.call( els ) ); + } : + + // Support: IE<9 + // Otherwise append directly + function( target, els ) { + var j = target.length, + i = 0; + + // Can't trust NodeList.length + while ( ( target[ j++ ] = els[ i++ ] ) ) {} + target.length = j - 1; + } + }; +} + +function Sizzle( selector, context, results, seed ) { + var m, i, elem, nid, match, groups, newSelector, + newContext = context && context.ownerDocument, + + // nodeType defaults to 9, since context defaults to document + nodeType = context ? context.nodeType : 9; + + results = results || []; + + // Return early from calls with invalid selector or context + if ( typeof selector !== "string" || !selector || + nodeType !== 1 && nodeType !== 9 && nodeType !== 11 ) { + + return results; + } + + // Try to shortcut find operations (as opposed to filters) in HTML documents + if ( !seed ) { + setDocument( context ); + context = context || document; + + if ( documentIsHTML ) { + + // If the selector is sufficiently simple, try using a "get*By*" DOM method + // (excepting DocumentFragment context, where the methods don't exist) + if ( nodeType !== 11 && ( match = rquickExpr.exec( selector ) ) ) { + + // ID selector + if ( ( m = match[ 1 ] ) ) { + + // Document context + if ( nodeType === 9 ) { + if ( ( elem = context.getElementById( m ) ) ) { + + // Support: IE, Opera, Webkit + // TODO: identify versions + // getElementById can match elements by name instead of ID + if ( elem.id === m ) { + results.push( elem ); + return results; + } + } else { + return results; + } + + // Element context + } else { + + // Support: IE, Opera, Webkit + // TODO: identify versions + // getElementById can match elements by name instead of ID + if ( newContext && ( elem = newContext.getElementById( m ) ) && + contains( context, elem ) && + elem.id === m ) { + + results.push( elem ); + return results; + } + } + + // Type selector + } else if ( match[ 2 ] ) { + push.apply( results, context.getElementsByTagName( selector ) ); + return results; + + // Class selector + } else if ( ( m = match[ 3 ] ) && support.getElementsByClassName && + context.getElementsByClassName ) { + + push.apply( results, context.getElementsByClassName( m ) ); + return results; + } + } + + // Take advantage of querySelectorAll + if ( support.qsa && + !nonnativeSelectorCache[ selector + " " ] && + ( !rbuggyQSA || !rbuggyQSA.test( selector ) ) && + + // Support: IE 8 only + // Exclude object elements + ( nodeType !== 1 || context.nodeName.toLowerCase() !== "object" ) ) { + + newSelector = selector; + newContext = context; + + // qSA considers elements outside a scoping root when evaluating child or + // descendant combinators, which is not what we want. + // In such cases, we work around the behavior by prefixing every selector in the + // list with an ID selector referencing the scope context. + // The technique has to be used as well when a leading combinator is used + // as such selectors are not recognized by querySelectorAll. + // Thanks to Andrew Dupont for this technique. + if ( nodeType === 1 && + ( rdescend.test( selector ) || rcombinators.test( selector ) ) ) { + + // Expand context for sibling selectors + newContext = rsibling.test( selector ) && testContext( context.parentNode ) || + context; + + // We can use :scope instead of the ID hack if the browser + // supports it & if we're not changing the context. + if ( newContext !== context || !support.scope ) { + + // Capture the context ID, setting it first if necessary + if ( ( nid = context.getAttribute( "id" ) ) ) { + nid = nid.replace( rcssescape, fcssescape ); + } else { + context.setAttribute( "id", ( nid = expando ) ); + } + } + + // Prefix every selector in the list + groups = tokenize( selector ); + i = groups.length; + while ( i-- ) { + groups[ i ] = ( nid ? "#" + nid : ":scope" ) + " " + + toSelector( groups[ i ] ); + } + newSelector = groups.join( "," ); + } + + try { + push.apply( results, + newContext.querySelectorAll( newSelector ) + ); + return results; + } catch ( qsaError ) { + nonnativeSelectorCache( selector, true ); + } finally { + if ( nid === expando ) { + context.removeAttribute( "id" ); + } + } + } + } + } + + // All others + return select( selector.replace( rtrim, "$1" ), context, results, seed ); +} + +/** + * Create key-value caches of limited size + * @returns {function(string, object)} Returns the Object data after storing it on itself with + * property name the (space-suffixed) string and (if the cache is larger than Expr.cacheLength) + * deleting the oldest entry + */ +function createCache() { + var keys = []; + + function cache( key, value ) { + + // Use (key + " ") to avoid collision with native prototype properties (see Issue #157) + if ( keys.push( key + " " ) > Expr.cacheLength ) { + + // Only keep the most recent entries + delete cache[ keys.shift() ]; + } + return ( cache[ key + " " ] = value ); + } + return cache; +} + +/** + * Mark a function for special use by Sizzle + * @param {Function} fn The function to mark + */ +function markFunction( fn ) { + fn[ expando ] = true; + return fn; +} + +/** + * Support testing using an element + * @param {Function} fn Passed the created element and returns a boolean result + */ +function assert( fn ) { + var el = document.createElement( "fieldset" ); + + try { + return !!fn( el ); + } catch ( e ) { + return false; + } finally { + + // Remove from its parent by default + if ( el.parentNode ) { + el.parentNode.removeChild( el ); + } + + // release memory in IE + el = null; + } +} + +/** + * Adds the same handler for all of the specified attrs + * @param {String} attrs Pipe-separated list of attributes + * @param {Function} handler The method that will be applied + */ +function addHandle( attrs, handler ) { + var arr = attrs.split( "|" ), + i = arr.length; + + while ( i-- ) { + Expr.attrHandle[ arr[ i ] ] = handler; + } +} + +/** + * Checks document order of two siblings + * @param {Element} a + * @param {Element} b + * @returns {Number} Returns less than 0 if a precedes b, greater than 0 if a follows b + */ +function siblingCheck( a, b ) { + var cur = b && a, + diff = cur && a.nodeType === 1 && b.nodeType === 1 && + a.sourceIndex - b.sourceIndex; + + // Use IE sourceIndex if available on both nodes + if ( diff ) { + return diff; + } + + // Check if b follows a + if ( cur ) { + while ( ( cur = cur.nextSibling ) ) { + if ( cur === b ) { + return -1; + } + } + } + + return a ? 1 : -1; +} + +/** + * Returns a function to use in pseudos for input types + * @param {String} type + */ +function createInputPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && elem.type === type; + }; +} + +/** + * Returns a function to use in pseudos for buttons + * @param {String} type + */ +function createButtonPseudo( type ) { + return function( elem ) { + var name = elem.nodeName.toLowerCase(); + return ( name === "input" || name === "button" ) && elem.type === type; + }; +} + +/** + * Returns a function to use in pseudos for :enabled/:disabled + * @param {Boolean} disabled true for :disabled; false for :enabled + */ +function createDisabledPseudo( disabled ) { + + // Known :disabled false positives: fieldset[disabled] > legend:nth-of-type(n+2) :can-disable + return function( elem ) { + + // Only certain elements can match :enabled or :disabled + // https://html.spec.whatwg.org/multipage/scripting.html#selector-enabled + // https://html.spec.whatwg.org/multipage/scripting.html#selector-disabled + if ( "form" in elem ) { + + // Check for inherited disabledness on relevant non-disabled elements: + // * listed form-associated elements in a disabled fieldset + // https://html.spec.whatwg.org/multipage/forms.html#category-listed + // https://html.spec.whatwg.org/multipage/forms.html#concept-fe-disabled + // * option elements in a disabled optgroup + // https://html.spec.whatwg.org/multipage/forms.html#concept-option-disabled + // All such elements have a "form" property. + if ( elem.parentNode && elem.disabled === false ) { + + // Option elements defer to a parent optgroup if present + if ( "label" in elem ) { + if ( "label" in elem.parentNode ) { + return elem.parentNode.disabled === disabled; + } else { + return elem.disabled === disabled; + } + } + + // Support: IE 6 - 11 + // Use the isDisabled shortcut property to check for disabled fieldset ancestors + return elem.isDisabled === disabled || + + // Where there is no isDisabled, check manually + /* jshint -W018 */ + elem.isDisabled !== !disabled && + inDisabledFieldset( elem ) === disabled; + } + + return elem.disabled === disabled; + + // Try to winnow out elements that can't be disabled before trusting the disabled property. + // Some victims get caught in our net (label, legend, menu, track), but it shouldn't + // even exist on them, let alone have a boolean value. + } else if ( "label" in elem ) { + return elem.disabled === disabled; + } + + // Remaining elements are neither :enabled nor :disabled + return false; + }; +} + +/** + * Returns a function to use in pseudos for positionals + * @param {Function} fn + */ +function createPositionalPseudo( fn ) { + return markFunction( function( argument ) { + argument = +argument; + return markFunction( function( seed, matches ) { + var j, + matchIndexes = fn( [], seed.length, argument ), + i = matchIndexes.length; + + // Match elements found at the specified indexes + while ( i-- ) { + if ( seed[ ( j = matchIndexes[ i ] ) ] ) { + seed[ j ] = !( matches[ j ] = seed[ j ] ); + } + } + } ); + } ); +} + +/** + * Checks a node for validity as a Sizzle context + * @param {Element|Object=} context + * @returns {Element|Object|Boolean} The input node if acceptable, otherwise a falsy value + */ +function testContext( context ) { + return context && typeof context.getElementsByTagName !== "undefined" && context; +} + +// Expose support vars for convenience +support = Sizzle.support = {}; + +/** + * Detects XML nodes + * @param {Element|Object} elem An element or a document + * @returns {Boolean} True iff elem is a non-HTML XML node + */ +isXML = Sizzle.isXML = function( elem ) { + var namespace = elem.namespaceURI, + docElem = ( elem.ownerDocument || elem ).documentElement; + + // Support: IE <=8 + // Assume HTML when documentElement doesn't yet exist, such as inside loading iframes + // https://bugs.jquery.com/ticket/4833 + return !rhtml.test( namespace || docElem && docElem.nodeName || "HTML" ); +}; + +/** + * Sets document-related variables once based on the current document + * @param {Element|Object} [doc] An element or document object to use to set the document + * @returns {Object} Returns the current document + */ +setDocument = Sizzle.setDocument = function( node ) { + var hasCompare, subWindow, + doc = node ? node.ownerDocument || node : preferredDoc; + + // Return early if doc is invalid or already selected + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( doc == document || doc.nodeType !== 9 || !doc.documentElement ) { + return document; + } + + // Update global variables + document = doc; + docElem = document.documentElement; + documentIsHTML = !isXML( document ); + + // Support: IE 9 - 11+, Edge 12 - 18+ + // Accessing iframe documents after unload throws "permission denied" errors (jQuery #13936) + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( preferredDoc != document && + ( subWindow = document.defaultView ) && subWindow.top !== subWindow ) { + + // Support: IE 11, Edge + if ( subWindow.addEventListener ) { + subWindow.addEventListener( "unload", unloadHandler, false ); + + // Support: IE 9 - 10 only + } else if ( subWindow.attachEvent ) { + subWindow.attachEvent( "onunload", unloadHandler ); + } + } + + // Support: IE 8 - 11+, Edge 12 - 18+, Chrome <=16 - 25 only, Firefox <=3.6 - 31 only, + // Safari 4 - 5 only, Opera <=11.6 - 12.x only + // IE/Edge & older browsers don't support the :scope pseudo-class. + // Support: Safari 6.0 only + // Safari 6.0 supports :scope but it's an alias of :root there. + support.scope = assert( function( el ) { + docElem.appendChild( el ).appendChild( document.createElement( "div" ) ); + return typeof el.querySelectorAll !== "undefined" && + !el.querySelectorAll( ":scope fieldset div" ).length; + } ); + + /* Attributes + ---------------------------------------------------------------------- */ + + // Support: IE<8 + // Verify that getAttribute really returns attributes and not properties + // (excepting IE8 booleans) + support.attributes = assert( function( el ) { + el.className = "i"; + return !el.getAttribute( "className" ); + } ); + + /* getElement(s)By* + ---------------------------------------------------------------------- */ + + // Check if getElementsByTagName("*") returns only elements + support.getElementsByTagName = assert( function( el ) { + el.appendChild( document.createComment( "" ) ); + return !el.getElementsByTagName( "*" ).length; + } ); + + // Support: IE<9 + support.getElementsByClassName = rnative.test( document.getElementsByClassName ); + + // Support: IE<10 + // Check if getElementById returns elements by name + // The broken getElementById methods don't pick up programmatically-set names, + // so use a roundabout getElementsByName test + support.getById = assert( function( el ) { + docElem.appendChild( el ).id = expando; + return !document.getElementsByName || !document.getElementsByName( expando ).length; + } ); + + // ID filter and find + if ( support.getById ) { + Expr.filter[ "ID" ] = function( id ) { + var attrId = id.replace( runescape, funescape ); + return function( elem ) { + return elem.getAttribute( "id" ) === attrId; + }; + }; + Expr.find[ "ID" ] = function( id, context ) { + if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { + var elem = context.getElementById( id ); + return elem ? [ elem ] : []; + } + }; + } else { + Expr.filter[ "ID" ] = function( id ) { + var attrId = id.replace( runescape, funescape ); + return function( elem ) { + var node = typeof elem.getAttributeNode !== "undefined" && + elem.getAttributeNode( "id" ); + return node && node.value === attrId; + }; + }; + + // Support: IE 6 - 7 only + // getElementById is not reliable as a find shortcut + Expr.find[ "ID" ] = function( id, context ) { + if ( typeof context.getElementById !== "undefined" && documentIsHTML ) { + var node, i, elems, + elem = context.getElementById( id ); + + if ( elem ) { + + // Verify the id attribute + node = elem.getAttributeNode( "id" ); + if ( node && node.value === id ) { + return [ elem ]; + } + + // Fall back on getElementsByName + elems = context.getElementsByName( id ); + i = 0; + while ( ( elem = elems[ i++ ] ) ) { + node = elem.getAttributeNode( "id" ); + if ( node && node.value === id ) { + return [ elem ]; + } + } + } + + return []; + } + }; + } + + // Tag + Expr.find[ "TAG" ] = support.getElementsByTagName ? + function( tag, context ) { + if ( typeof context.getElementsByTagName !== "undefined" ) { + return context.getElementsByTagName( tag ); + + // DocumentFragment nodes don't have gEBTN + } else if ( support.qsa ) { + return context.querySelectorAll( tag ); + } + } : + + function( tag, context ) { + var elem, + tmp = [], + i = 0, + + // By happy coincidence, a (broken) gEBTN appears on DocumentFragment nodes too + results = context.getElementsByTagName( tag ); + + // Filter out possible comments + if ( tag === "*" ) { + while ( ( elem = results[ i++ ] ) ) { + if ( elem.nodeType === 1 ) { + tmp.push( elem ); + } + } + + return tmp; + } + return results; + }; + + // Class + Expr.find[ "CLASS" ] = support.getElementsByClassName && function( className, context ) { + if ( typeof context.getElementsByClassName !== "undefined" && documentIsHTML ) { + return context.getElementsByClassName( className ); + } + }; + + /* QSA/matchesSelector + ---------------------------------------------------------------------- */ + + // QSA and matchesSelector support + + // matchesSelector(:active) reports false when true (IE9/Opera 11.5) + rbuggyMatches = []; + + // qSa(:focus) reports false when true (Chrome 21) + // We allow this because of a bug in IE8/9 that throws an error + // whenever `document.activeElement` is accessed on an iframe + // So, we allow :focus to pass through QSA all the time to avoid the IE error + // See https://bugs.jquery.com/ticket/13378 + rbuggyQSA = []; + + if ( ( support.qsa = rnative.test( document.querySelectorAll ) ) ) { + + // Build QSA regex + // Regex strategy adopted from Diego Perini + assert( function( el ) { + + var input; + + // Select is set to empty string on purpose + // This is to test IE's treatment of not explicitly + // setting a boolean content attribute, + // since its presence should be enough + // https://bugs.jquery.com/ticket/12359 + docElem.appendChild( el ).innerHTML = "" + + ""; + + // Support: IE8, Opera 11-12.16 + // Nothing should be selected when empty strings follow ^= or $= or *= + // The test attribute must be unknown in Opera but "safe" for WinRT + // https://msdn.microsoft.com/en-us/library/ie/hh465388.aspx#attribute_section + if ( el.querySelectorAll( "[msallowcapture^='']" ).length ) { + rbuggyQSA.push( "[*^$]=" + whitespace + "*(?:''|\"\")" ); + } + + // Support: IE8 + // Boolean attributes and "value" are not treated correctly + if ( !el.querySelectorAll( "[selected]" ).length ) { + rbuggyQSA.push( "\\[" + whitespace + "*(?:value|" + booleans + ")" ); + } + + // Support: Chrome<29, Android<4.4, Safari<7.0+, iOS<7.0+, PhantomJS<1.9.8+ + if ( !el.querySelectorAll( "[id~=" + expando + "-]" ).length ) { + rbuggyQSA.push( "~=" ); + } + + // Support: IE 11+, Edge 15 - 18+ + // IE 11/Edge don't find elements on a `[name='']` query in some cases. + // Adding a temporary attribute to the document before the selection works + // around the issue. + // Interestingly, IE 10 & older don't seem to have the issue. + input = document.createElement( "input" ); + input.setAttribute( "name", "" ); + el.appendChild( input ); + if ( !el.querySelectorAll( "[name='']" ).length ) { + rbuggyQSA.push( "\\[" + whitespace + "*name" + whitespace + "*=" + + whitespace + "*(?:''|\"\")" ); + } + + // Webkit/Opera - :checked should return selected option elements + // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked + // IE8 throws error here and will not see later tests + if ( !el.querySelectorAll( ":checked" ).length ) { + rbuggyQSA.push( ":checked" ); + } + + // Support: Safari 8+, iOS 8+ + // https://bugs.webkit.org/show_bug.cgi?id=136851 + // In-page `selector#id sibling-combinator selector` fails + if ( !el.querySelectorAll( "a#" + expando + "+*" ).length ) { + rbuggyQSA.push( ".#.+[+~]" ); + } + + // Support: Firefox <=3.6 - 5 only + // Old Firefox doesn't throw on a badly-escaped identifier. + el.querySelectorAll( "\\\f" ); + rbuggyQSA.push( "[\\r\\n\\f]" ); + } ); + + assert( function( el ) { + el.innerHTML = "" + + ""; + + // Support: Windows 8 Native Apps + // The type and name attributes are restricted during .innerHTML assignment + var input = document.createElement( "input" ); + input.setAttribute( "type", "hidden" ); + el.appendChild( input ).setAttribute( "name", "D" ); + + // Support: IE8 + // Enforce case-sensitivity of name attribute + if ( el.querySelectorAll( "[name=d]" ).length ) { + rbuggyQSA.push( "name" + whitespace + "*[*^$|!~]?=" ); + } + + // FF 3.5 - :enabled/:disabled and hidden elements (hidden elements are still enabled) + // IE8 throws error here and will not see later tests + if ( el.querySelectorAll( ":enabled" ).length !== 2 ) { + rbuggyQSA.push( ":enabled", ":disabled" ); + } + + // Support: IE9-11+ + // IE's :disabled selector does not pick up the children of disabled fieldsets + docElem.appendChild( el ).disabled = true; + if ( el.querySelectorAll( ":disabled" ).length !== 2 ) { + rbuggyQSA.push( ":enabled", ":disabled" ); + } + + // Support: Opera 10 - 11 only + // Opera 10-11 does not throw on post-comma invalid pseudos + el.querySelectorAll( "*,:x" ); + rbuggyQSA.push( ",.*:" ); + } ); + } + + if ( ( support.matchesSelector = rnative.test( ( matches = docElem.matches || + docElem.webkitMatchesSelector || + docElem.mozMatchesSelector || + docElem.oMatchesSelector || + docElem.msMatchesSelector ) ) ) ) { + + assert( function( el ) { + + // Check to see if it's possible to do matchesSelector + // on a disconnected node (IE 9) + support.disconnectedMatch = matches.call( el, "*" ); + + // This should fail with an exception + // Gecko does not error, returns false instead + matches.call( el, "[s!='']:x" ); + rbuggyMatches.push( "!=", pseudos ); + } ); + } + + rbuggyQSA = rbuggyQSA.length && new RegExp( rbuggyQSA.join( "|" ) ); + rbuggyMatches = rbuggyMatches.length && new RegExp( rbuggyMatches.join( "|" ) ); + + /* Contains + ---------------------------------------------------------------------- */ + hasCompare = rnative.test( docElem.compareDocumentPosition ); + + // Element contains another + // Purposefully self-exclusive + // As in, an element does not contain itself + contains = hasCompare || rnative.test( docElem.contains ) ? + function( a, b ) { + var adown = a.nodeType === 9 ? a.documentElement : a, + bup = b && b.parentNode; + return a === bup || !!( bup && bup.nodeType === 1 && ( + adown.contains ? + adown.contains( bup ) : + a.compareDocumentPosition && a.compareDocumentPosition( bup ) & 16 + ) ); + } : + function( a, b ) { + if ( b ) { + while ( ( b = b.parentNode ) ) { + if ( b === a ) { + return true; + } + } + } + return false; + }; + + /* Sorting + ---------------------------------------------------------------------- */ + + // Document order sorting + sortOrder = hasCompare ? + function( a, b ) { + + // Flag for duplicate removal + if ( a === b ) { + hasDuplicate = true; + return 0; + } + + // Sort on method existence if only one input has compareDocumentPosition + var compare = !a.compareDocumentPosition - !b.compareDocumentPosition; + if ( compare ) { + return compare; + } + + // Calculate position if both inputs belong to the same document + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + compare = ( a.ownerDocument || a ) == ( b.ownerDocument || b ) ? + a.compareDocumentPosition( b ) : + + // Otherwise we know they are disconnected + 1; + + // Disconnected nodes + if ( compare & 1 || + ( !support.sortDetached && b.compareDocumentPosition( a ) === compare ) ) { + + // Choose the first element that is related to our preferred document + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( a == document || a.ownerDocument == preferredDoc && + contains( preferredDoc, a ) ) { + return -1; + } + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( b == document || b.ownerDocument == preferredDoc && + contains( preferredDoc, b ) ) { + return 1; + } + + // Maintain original order + return sortInput ? + ( indexOf( sortInput, a ) - indexOf( sortInput, b ) ) : + 0; + } + + return compare & 4 ? -1 : 1; + } : + function( a, b ) { + + // Exit early if the nodes are identical + if ( a === b ) { + hasDuplicate = true; + return 0; + } + + var cur, + i = 0, + aup = a.parentNode, + bup = b.parentNode, + ap = [ a ], + bp = [ b ]; + + // Parentless nodes are either documents or disconnected + if ( !aup || !bup ) { + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + /* eslint-disable eqeqeq */ + return a == document ? -1 : + b == document ? 1 : + /* eslint-enable eqeqeq */ + aup ? -1 : + bup ? 1 : + sortInput ? + ( indexOf( sortInput, a ) - indexOf( sortInput, b ) ) : + 0; + + // If the nodes are siblings, we can do a quick check + } else if ( aup === bup ) { + return siblingCheck( a, b ); + } + + // Otherwise we need full lists of their ancestors for comparison + cur = a; + while ( ( cur = cur.parentNode ) ) { + ap.unshift( cur ); + } + cur = b; + while ( ( cur = cur.parentNode ) ) { + bp.unshift( cur ); + } + + // Walk down the tree looking for a discrepancy + while ( ap[ i ] === bp[ i ] ) { + i++; + } + + return i ? + + // Do a sibling check if the nodes have a common ancestor + siblingCheck( ap[ i ], bp[ i ] ) : + + // Otherwise nodes in our document sort first + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + /* eslint-disable eqeqeq */ + ap[ i ] == preferredDoc ? -1 : + bp[ i ] == preferredDoc ? 1 : + /* eslint-enable eqeqeq */ + 0; + }; + + return document; +}; + +Sizzle.matches = function( expr, elements ) { + return Sizzle( expr, null, null, elements ); +}; + +Sizzle.matchesSelector = function( elem, expr ) { + setDocument( elem ); + + if ( support.matchesSelector && documentIsHTML && + !nonnativeSelectorCache[ expr + " " ] && + ( !rbuggyMatches || !rbuggyMatches.test( expr ) ) && + ( !rbuggyQSA || !rbuggyQSA.test( expr ) ) ) { + + try { + var ret = matches.call( elem, expr ); + + // IE 9's matchesSelector returns false on disconnected nodes + if ( ret || support.disconnectedMatch || + + // As well, disconnected nodes are said to be in a document + // fragment in IE 9 + elem.document && elem.document.nodeType !== 11 ) { + return ret; + } + } catch ( e ) { + nonnativeSelectorCache( expr, true ); + } + } + + return Sizzle( expr, document, null, [ elem ] ).length > 0; +}; + +Sizzle.contains = function( context, elem ) { + + // Set document vars if needed + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( ( context.ownerDocument || context ) != document ) { + setDocument( context ); + } + return contains( context, elem ); +}; + +Sizzle.attr = function( elem, name ) { + + // Set document vars if needed + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( ( elem.ownerDocument || elem ) != document ) { + setDocument( elem ); + } + + var fn = Expr.attrHandle[ name.toLowerCase() ], + + // Don't get fooled by Object.prototype properties (jQuery #13807) + val = fn && hasOwn.call( Expr.attrHandle, name.toLowerCase() ) ? + fn( elem, name, !documentIsHTML ) : + undefined; + + return val !== undefined ? + val : + support.attributes || !documentIsHTML ? + elem.getAttribute( name ) : + ( val = elem.getAttributeNode( name ) ) && val.specified ? + val.value : + null; +}; + +Sizzle.escape = function( sel ) { + return ( sel + "" ).replace( rcssescape, fcssescape ); +}; + +Sizzle.error = function( msg ) { + throw new Error( "Syntax error, unrecognized expression: " + msg ); +}; + +/** + * Document sorting and removing duplicates + * @param {ArrayLike} results + */ +Sizzle.uniqueSort = function( results ) { + var elem, + duplicates = [], + j = 0, + i = 0; + + // Unless we *know* we can detect duplicates, assume their presence + hasDuplicate = !support.detectDuplicates; + sortInput = !support.sortStable && results.slice( 0 ); + results.sort( sortOrder ); + + if ( hasDuplicate ) { + while ( ( elem = results[ i++ ] ) ) { + if ( elem === results[ i ] ) { + j = duplicates.push( i ); + } + } + while ( j-- ) { + results.splice( duplicates[ j ], 1 ); + } + } + + // Clear input after sorting to release objects + // See https://github.com/jquery/sizzle/pull/225 + sortInput = null; + + return results; +}; + +/** + * Utility function for retrieving the text value of an array of DOM nodes + * @param {Array|Element} elem + */ +getText = Sizzle.getText = function( elem ) { + var node, + ret = "", + i = 0, + nodeType = elem.nodeType; + + if ( !nodeType ) { + + // If no nodeType, this is expected to be an array + while ( ( node = elem[ i++ ] ) ) { + + // Do not traverse comment nodes + ret += getText( node ); + } + } else if ( nodeType === 1 || nodeType === 9 || nodeType === 11 ) { + + // Use textContent for elements + // innerText usage removed for consistency of new lines (jQuery #11153) + if ( typeof elem.textContent === "string" ) { + return elem.textContent; + } else { + + // Traverse its children + for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { + ret += getText( elem ); + } + } + } else if ( nodeType === 3 || nodeType === 4 ) { + return elem.nodeValue; + } + + // Do not include comment or processing instruction nodes + + return ret; +}; + +Expr = Sizzle.selectors = { + + // Can be adjusted by the user + cacheLength: 50, + + createPseudo: markFunction, + + match: matchExpr, + + attrHandle: {}, + + find: {}, + + relative: { + ">": { dir: "parentNode", first: true }, + " ": { dir: "parentNode" }, + "+": { dir: "previousSibling", first: true }, + "~": { dir: "previousSibling" } + }, + + preFilter: { + "ATTR": function( match ) { + match[ 1 ] = match[ 1 ].replace( runescape, funescape ); + + // Move the given value to match[3] whether quoted or unquoted + match[ 3 ] = ( match[ 3 ] || match[ 4 ] || + match[ 5 ] || "" ).replace( runescape, funescape ); + + if ( match[ 2 ] === "~=" ) { + match[ 3 ] = " " + match[ 3 ] + " "; + } + + return match.slice( 0, 4 ); + }, + + "CHILD": function( match ) { + + /* matches from matchExpr["CHILD"] + 1 type (only|nth|...) + 2 what (child|of-type) + 3 argument (even|odd|\d*|\d*n([+-]\d+)?|...) + 4 xn-component of xn+y argument ([+-]?\d*n|) + 5 sign of xn-component + 6 x of xn-component + 7 sign of y-component + 8 y of y-component + */ + match[ 1 ] = match[ 1 ].toLowerCase(); + + if ( match[ 1 ].slice( 0, 3 ) === "nth" ) { + + // nth-* requires argument + if ( !match[ 3 ] ) { + Sizzle.error( match[ 0 ] ); + } + + // numeric x and y parameters for Expr.filter.CHILD + // remember that false/true cast respectively to 0/1 + match[ 4 ] = +( match[ 4 ] ? + match[ 5 ] + ( match[ 6 ] || 1 ) : + 2 * ( match[ 3 ] === "even" || match[ 3 ] === "odd" ) ); + match[ 5 ] = +( ( match[ 7 ] + match[ 8 ] ) || match[ 3 ] === "odd" ); + + // other types prohibit arguments + } else if ( match[ 3 ] ) { + Sizzle.error( match[ 0 ] ); + } + + return match; + }, + + "PSEUDO": function( match ) { + var excess, + unquoted = !match[ 6 ] && match[ 2 ]; + + if ( matchExpr[ "CHILD" ].test( match[ 0 ] ) ) { + return null; + } + + // Accept quoted arguments as-is + if ( match[ 3 ] ) { + match[ 2 ] = match[ 4 ] || match[ 5 ] || ""; + + // Strip excess characters from unquoted arguments + } else if ( unquoted && rpseudo.test( unquoted ) && + + // Get excess from tokenize (recursively) + ( excess = tokenize( unquoted, true ) ) && + + // advance to the next closing parenthesis + ( excess = unquoted.indexOf( ")", unquoted.length - excess ) - unquoted.length ) ) { + + // excess is a negative index + match[ 0 ] = match[ 0 ].slice( 0, excess ); + match[ 2 ] = unquoted.slice( 0, excess ); + } + + // Return only captures needed by the pseudo filter method (type and argument) + return match.slice( 0, 3 ); + } + }, + + filter: { + + "TAG": function( nodeNameSelector ) { + var nodeName = nodeNameSelector.replace( runescape, funescape ).toLowerCase(); + return nodeNameSelector === "*" ? + function() { + return true; + } : + function( elem ) { + return elem.nodeName && elem.nodeName.toLowerCase() === nodeName; + }; + }, + + "CLASS": function( className ) { + var pattern = classCache[ className + " " ]; + + return pattern || + ( pattern = new RegExp( "(^|" + whitespace + + ")" + className + "(" + whitespace + "|$)" ) ) && classCache( + className, function( elem ) { + return pattern.test( + typeof elem.className === "string" && elem.className || + typeof elem.getAttribute !== "undefined" && + elem.getAttribute( "class" ) || + "" + ); + } ); + }, + + "ATTR": function( name, operator, check ) { + return function( elem ) { + var result = Sizzle.attr( elem, name ); + + if ( result == null ) { + return operator === "!="; + } + if ( !operator ) { + return true; + } + + result += ""; + + /* eslint-disable max-len */ + + return operator === "=" ? result === check : + operator === "!=" ? result !== check : + operator === "^=" ? check && result.indexOf( check ) === 0 : + operator === "*=" ? check && result.indexOf( check ) > -1 : + operator === "$=" ? check && result.slice( -check.length ) === check : + operator === "~=" ? ( " " + result.replace( rwhitespace, " " ) + " " ).indexOf( check ) > -1 : + operator === "|=" ? result === check || result.slice( 0, check.length + 1 ) === check + "-" : + false; + /* eslint-enable max-len */ + + }; + }, + + "CHILD": function( type, what, _argument, first, last ) { + var simple = type.slice( 0, 3 ) !== "nth", + forward = type.slice( -4 ) !== "last", + ofType = what === "of-type"; + + return first === 1 && last === 0 ? + + // Shortcut for :nth-*(n) + function( elem ) { + return !!elem.parentNode; + } : + + function( elem, _context, xml ) { + var cache, uniqueCache, outerCache, node, nodeIndex, start, + dir = simple !== forward ? "nextSibling" : "previousSibling", + parent = elem.parentNode, + name = ofType && elem.nodeName.toLowerCase(), + useCache = !xml && !ofType, + diff = false; + + if ( parent ) { + + // :(first|last|only)-(child|of-type) + if ( simple ) { + while ( dir ) { + node = elem; + while ( ( node = node[ dir ] ) ) { + if ( ofType ? + node.nodeName.toLowerCase() === name : + node.nodeType === 1 ) { + + return false; + } + } + + // Reverse direction for :only-* (if we haven't yet done so) + start = dir = type === "only" && !start && "nextSibling"; + } + return true; + } + + start = [ forward ? parent.firstChild : parent.lastChild ]; + + // non-xml :nth-child(...) stores cache data on `parent` + if ( forward && useCache ) { + + // Seek `elem` from a previously-cached index + + // ...in a gzip-friendly way + node = parent; + outerCache = node[ expando ] || ( node[ expando ] = {} ); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ node.uniqueID ] || + ( outerCache[ node.uniqueID ] = {} ); + + cache = uniqueCache[ type ] || []; + nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; + diff = nodeIndex && cache[ 2 ]; + node = nodeIndex && parent.childNodes[ nodeIndex ]; + + while ( ( node = ++nodeIndex && node && node[ dir ] || + + // Fallback to seeking `elem` from the start + ( diff = nodeIndex = 0 ) || start.pop() ) ) { + + // When found, cache indexes on `parent` and break + if ( node.nodeType === 1 && ++diff && node === elem ) { + uniqueCache[ type ] = [ dirruns, nodeIndex, diff ]; + break; + } + } + + } else { + + // Use previously-cached element index if available + if ( useCache ) { + + // ...in a gzip-friendly way + node = elem; + outerCache = node[ expando ] || ( node[ expando ] = {} ); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ node.uniqueID ] || + ( outerCache[ node.uniqueID ] = {} ); + + cache = uniqueCache[ type ] || []; + nodeIndex = cache[ 0 ] === dirruns && cache[ 1 ]; + diff = nodeIndex; + } + + // xml :nth-child(...) + // or :nth-last-child(...) or :nth(-last)?-of-type(...) + if ( diff === false ) { + + // Use the same loop as above to seek `elem` from the start + while ( ( node = ++nodeIndex && node && node[ dir ] || + ( diff = nodeIndex = 0 ) || start.pop() ) ) { + + if ( ( ofType ? + node.nodeName.toLowerCase() === name : + node.nodeType === 1 ) && + ++diff ) { + + // Cache the index of each encountered element + if ( useCache ) { + outerCache = node[ expando ] || + ( node[ expando ] = {} ); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ node.uniqueID ] || + ( outerCache[ node.uniqueID ] = {} ); + + uniqueCache[ type ] = [ dirruns, diff ]; + } + + if ( node === elem ) { + break; + } + } + } + } + } + + // Incorporate the offset, then check against cycle size + diff -= last; + return diff === first || ( diff % first === 0 && diff / first >= 0 ); + } + }; + }, + + "PSEUDO": function( pseudo, argument ) { + + // pseudo-class names are case-insensitive + // http://www.w3.org/TR/selectors/#pseudo-classes + // Prioritize by case sensitivity in case custom pseudos are added with uppercase letters + // Remember that setFilters inherits from pseudos + var args, + fn = Expr.pseudos[ pseudo ] || Expr.setFilters[ pseudo.toLowerCase() ] || + Sizzle.error( "unsupported pseudo: " + pseudo ); + + // The user may use createPseudo to indicate that + // arguments are needed to create the filter function + // just as Sizzle does + if ( fn[ expando ] ) { + return fn( argument ); + } + + // But maintain support for old signatures + if ( fn.length > 1 ) { + args = [ pseudo, pseudo, "", argument ]; + return Expr.setFilters.hasOwnProperty( pseudo.toLowerCase() ) ? + markFunction( function( seed, matches ) { + var idx, + matched = fn( seed, argument ), + i = matched.length; + while ( i-- ) { + idx = indexOf( seed, matched[ i ] ); + seed[ idx ] = !( matches[ idx ] = matched[ i ] ); + } + } ) : + function( elem ) { + return fn( elem, 0, args ); + }; + } + + return fn; + } + }, + + pseudos: { + + // Potentially complex pseudos + "not": markFunction( function( selector ) { + + // Trim the selector passed to compile + // to avoid treating leading and trailing + // spaces as combinators + var input = [], + results = [], + matcher = compile( selector.replace( rtrim, "$1" ) ); + + return matcher[ expando ] ? + markFunction( function( seed, matches, _context, xml ) { + var elem, + unmatched = matcher( seed, null, xml, [] ), + i = seed.length; + + // Match elements unmatched by `matcher` + while ( i-- ) { + if ( ( elem = unmatched[ i ] ) ) { + seed[ i ] = !( matches[ i ] = elem ); + } + } + } ) : + function( elem, _context, xml ) { + input[ 0 ] = elem; + matcher( input, null, xml, results ); + + // Don't keep the element (issue #299) + input[ 0 ] = null; + return !results.pop(); + }; + } ), + + "has": markFunction( function( selector ) { + return function( elem ) { + return Sizzle( selector, elem ).length > 0; + }; + } ), + + "contains": markFunction( function( text ) { + text = text.replace( runescape, funescape ); + return function( elem ) { + return ( elem.textContent || getText( elem ) ).indexOf( text ) > -1; + }; + } ), + + // "Whether an element is represented by a :lang() selector + // is based solely on the element's language value + // being equal to the identifier C, + // or beginning with the identifier C immediately followed by "-". + // The matching of C against the element's language value is performed case-insensitively. + // The identifier C does not have to be a valid language name." + // http://www.w3.org/TR/selectors/#lang-pseudo + "lang": markFunction( function( lang ) { + + // lang value must be a valid identifier + if ( !ridentifier.test( lang || "" ) ) { + Sizzle.error( "unsupported lang: " + lang ); + } + lang = lang.replace( runescape, funescape ).toLowerCase(); + return function( elem ) { + var elemLang; + do { + if ( ( elemLang = documentIsHTML ? + elem.lang : + elem.getAttribute( "xml:lang" ) || elem.getAttribute( "lang" ) ) ) { + + elemLang = elemLang.toLowerCase(); + return elemLang === lang || elemLang.indexOf( lang + "-" ) === 0; + } + } while ( ( elem = elem.parentNode ) && elem.nodeType === 1 ); + return false; + }; + } ), + + // Miscellaneous + "target": function( elem ) { + var hash = window.location && window.location.hash; + return hash && hash.slice( 1 ) === elem.id; + }, + + "root": function( elem ) { + return elem === docElem; + }, + + "focus": function( elem ) { + return elem === document.activeElement && + ( !document.hasFocus || document.hasFocus() ) && + !!( elem.type || elem.href || ~elem.tabIndex ); + }, + + // Boolean properties + "enabled": createDisabledPseudo( false ), + "disabled": createDisabledPseudo( true ), + + "checked": function( elem ) { + + // In CSS3, :checked should return both checked and selected elements + // http://www.w3.org/TR/2011/REC-css3-selectors-20110929/#checked + var nodeName = elem.nodeName.toLowerCase(); + return ( nodeName === "input" && !!elem.checked ) || + ( nodeName === "option" && !!elem.selected ); + }, + + "selected": function( elem ) { + + // Accessing this property makes selected-by-default + // options in Safari work properly + if ( elem.parentNode ) { + // eslint-disable-next-line no-unused-expressions + elem.parentNode.selectedIndex; + } + + return elem.selected === true; + }, + + // Contents + "empty": function( elem ) { + + // http://www.w3.org/TR/selectors/#empty-pseudo + // :empty is negated by element (1) or content nodes (text: 3; cdata: 4; entity ref: 5), + // but not by others (comment: 8; processing instruction: 7; etc.) + // nodeType < 6 works because attributes (2) do not appear as children + for ( elem = elem.firstChild; elem; elem = elem.nextSibling ) { + if ( elem.nodeType < 6 ) { + return false; + } + } + return true; + }, + + "parent": function( elem ) { + return !Expr.pseudos[ "empty" ]( elem ); + }, + + // Element/input types + "header": function( elem ) { + return rheader.test( elem.nodeName ); + }, + + "input": function( elem ) { + return rinputs.test( elem.nodeName ); + }, + + "button": function( elem ) { + var name = elem.nodeName.toLowerCase(); + return name === "input" && elem.type === "button" || name === "button"; + }, + + "text": function( elem ) { + var attr; + return elem.nodeName.toLowerCase() === "input" && + elem.type === "text" && + + // Support: IE<8 + // New HTML5 attribute values (e.g., "search") appear with elem.type === "text" + ( ( attr = elem.getAttribute( "type" ) ) == null || + attr.toLowerCase() === "text" ); + }, + + // Position-in-collection + "first": createPositionalPseudo( function() { + return [ 0 ]; + } ), + + "last": createPositionalPseudo( function( _matchIndexes, length ) { + return [ length - 1 ]; + } ), + + "eq": createPositionalPseudo( function( _matchIndexes, length, argument ) { + return [ argument < 0 ? argument + length : argument ]; + } ), + + "even": createPositionalPseudo( function( matchIndexes, length ) { + var i = 0; + for ( ; i < length; i += 2 ) { + matchIndexes.push( i ); + } + return matchIndexes; + } ), + + "odd": createPositionalPseudo( function( matchIndexes, length ) { + var i = 1; + for ( ; i < length; i += 2 ) { + matchIndexes.push( i ); + } + return matchIndexes; + } ), + + "lt": createPositionalPseudo( function( matchIndexes, length, argument ) { + var i = argument < 0 ? + argument + length : + argument > length ? + length : + argument; + for ( ; --i >= 0; ) { + matchIndexes.push( i ); + } + return matchIndexes; + } ), + + "gt": createPositionalPseudo( function( matchIndexes, length, argument ) { + var i = argument < 0 ? argument + length : argument; + for ( ; ++i < length; ) { + matchIndexes.push( i ); + } + return matchIndexes; + } ) + } +}; + +Expr.pseudos[ "nth" ] = Expr.pseudos[ "eq" ]; + +// Add button/input type pseudos +for ( i in { radio: true, checkbox: true, file: true, password: true, image: true } ) { + Expr.pseudos[ i ] = createInputPseudo( i ); +} +for ( i in { submit: true, reset: true } ) { + Expr.pseudos[ i ] = createButtonPseudo( i ); +} + +// Easy API for creating new setFilters +function setFilters() {} +setFilters.prototype = Expr.filters = Expr.pseudos; +Expr.setFilters = new setFilters(); + +tokenize = Sizzle.tokenize = function( selector, parseOnly ) { + var matched, match, tokens, type, + soFar, groups, preFilters, + cached = tokenCache[ selector + " " ]; + + if ( cached ) { + return parseOnly ? 0 : cached.slice( 0 ); + } + + soFar = selector; + groups = []; + preFilters = Expr.preFilter; + + while ( soFar ) { + + // Comma and first run + if ( !matched || ( match = rcomma.exec( soFar ) ) ) { + if ( match ) { + + // Don't consume trailing commas as valid + soFar = soFar.slice( match[ 0 ].length ) || soFar; + } + groups.push( ( tokens = [] ) ); + } + + matched = false; + + // Combinators + if ( ( match = rcombinators.exec( soFar ) ) ) { + matched = match.shift(); + tokens.push( { + value: matched, + + // Cast descendant combinators to space + type: match[ 0 ].replace( rtrim, " " ) + } ); + soFar = soFar.slice( matched.length ); + } + + // Filters + for ( type in Expr.filter ) { + if ( ( match = matchExpr[ type ].exec( soFar ) ) && ( !preFilters[ type ] || + ( match = preFilters[ type ]( match ) ) ) ) { + matched = match.shift(); + tokens.push( { + value: matched, + type: type, + matches: match + } ); + soFar = soFar.slice( matched.length ); + } + } + + if ( !matched ) { + break; + } + } + + // Return the length of the invalid excess + // if we're just parsing + // Otherwise, throw an error or return tokens + return parseOnly ? + soFar.length : + soFar ? + Sizzle.error( selector ) : + + // Cache the tokens + tokenCache( selector, groups ).slice( 0 ); +}; + +function toSelector( tokens ) { + var i = 0, + len = tokens.length, + selector = ""; + for ( ; i < len; i++ ) { + selector += tokens[ i ].value; + } + return selector; +} + +function addCombinator( matcher, combinator, base ) { + var dir = combinator.dir, + skip = combinator.next, + key = skip || dir, + checkNonElements = base && key === "parentNode", + doneName = done++; + + return combinator.first ? + + // Check against closest ancestor/preceding element + function( elem, context, xml ) { + while ( ( elem = elem[ dir ] ) ) { + if ( elem.nodeType === 1 || checkNonElements ) { + return matcher( elem, context, xml ); + } + } + return false; + } : + + // Check against all ancestor/preceding elements + function( elem, context, xml ) { + var oldCache, uniqueCache, outerCache, + newCache = [ dirruns, doneName ]; + + // We can't set arbitrary data on XML nodes, so they don't benefit from combinator caching + if ( xml ) { + while ( ( elem = elem[ dir ] ) ) { + if ( elem.nodeType === 1 || checkNonElements ) { + if ( matcher( elem, context, xml ) ) { + return true; + } + } + } + } else { + while ( ( elem = elem[ dir ] ) ) { + if ( elem.nodeType === 1 || checkNonElements ) { + outerCache = elem[ expando ] || ( elem[ expando ] = {} ); + + // Support: IE <9 only + // Defend against cloned attroperties (jQuery gh-1709) + uniqueCache = outerCache[ elem.uniqueID ] || + ( outerCache[ elem.uniqueID ] = {} ); + + if ( skip && skip === elem.nodeName.toLowerCase() ) { + elem = elem[ dir ] || elem; + } else if ( ( oldCache = uniqueCache[ key ] ) && + oldCache[ 0 ] === dirruns && oldCache[ 1 ] === doneName ) { + + // Assign to newCache so results back-propagate to previous elements + return ( newCache[ 2 ] = oldCache[ 2 ] ); + } else { + + // Reuse newcache so results back-propagate to previous elements + uniqueCache[ key ] = newCache; + + // A match means we're done; a fail means we have to keep checking + if ( ( newCache[ 2 ] = matcher( elem, context, xml ) ) ) { + return true; + } + } + } + } + } + return false; + }; +} + +function elementMatcher( matchers ) { + return matchers.length > 1 ? + function( elem, context, xml ) { + var i = matchers.length; + while ( i-- ) { + if ( !matchers[ i ]( elem, context, xml ) ) { + return false; + } + } + return true; + } : + matchers[ 0 ]; +} + +function multipleContexts( selector, contexts, results ) { + var i = 0, + len = contexts.length; + for ( ; i < len; i++ ) { + Sizzle( selector, contexts[ i ], results ); + } + return results; +} + +function condense( unmatched, map, filter, context, xml ) { + var elem, + newUnmatched = [], + i = 0, + len = unmatched.length, + mapped = map != null; + + for ( ; i < len; i++ ) { + if ( ( elem = unmatched[ i ] ) ) { + if ( !filter || filter( elem, context, xml ) ) { + newUnmatched.push( elem ); + if ( mapped ) { + map.push( i ); + } + } + } + } + + return newUnmatched; +} + +function setMatcher( preFilter, selector, matcher, postFilter, postFinder, postSelector ) { + if ( postFilter && !postFilter[ expando ] ) { + postFilter = setMatcher( postFilter ); + } + if ( postFinder && !postFinder[ expando ] ) { + postFinder = setMatcher( postFinder, postSelector ); + } + return markFunction( function( seed, results, context, xml ) { + var temp, i, elem, + preMap = [], + postMap = [], + preexisting = results.length, + + // Get initial elements from seed or context + elems = seed || multipleContexts( + selector || "*", + context.nodeType ? [ context ] : context, + [] + ), + + // Prefilter to get matcher input, preserving a map for seed-results synchronization + matcherIn = preFilter && ( seed || !selector ) ? + condense( elems, preMap, preFilter, context, xml ) : + elems, + + matcherOut = matcher ? + + // If we have a postFinder, or filtered seed, or non-seed postFilter or preexisting results, + postFinder || ( seed ? preFilter : preexisting || postFilter ) ? + + // ...intermediate processing is necessary + [] : + + // ...otherwise use results directly + results : + matcherIn; + + // Find primary matches + if ( matcher ) { + matcher( matcherIn, matcherOut, context, xml ); + } + + // Apply postFilter + if ( postFilter ) { + temp = condense( matcherOut, postMap ); + postFilter( temp, [], context, xml ); + + // Un-match failing elements by moving them back to matcherIn + i = temp.length; + while ( i-- ) { + if ( ( elem = temp[ i ] ) ) { + matcherOut[ postMap[ i ] ] = !( matcherIn[ postMap[ i ] ] = elem ); + } + } + } + + if ( seed ) { + if ( postFinder || preFilter ) { + if ( postFinder ) { + + // Get the final matcherOut by condensing this intermediate into postFinder contexts + temp = []; + i = matcherOut.length; + while ( i-- ) { + if ( ( elem = matcherOut[ i ] ) ) { + + // Restore matcherIn since elem is not yet a final match + temp.push( ( matcherIn[ i ] = elem ) ); + } + } + postFinder( null, ( matcherOut = [] ), temp, xml ); + } + + // Move matched elements from seed to results to keep them synchronized + i = matcherOut.length; + while ( i-- ) { + if ( ( elem = matcherOut[ i ] ) && + ( temp = postFinder ? indexOf( seed, elem ) : preMap[ i ] ) > -1 ) { + + seed[ temp ] = !( results[ temp ] = elem ); + } + } + } + + // Add elements to results, through postFinder if defined + } else { + matcherOut = condense( + matcherOut === results ? + matcherOut.splice( preexisting, matcherOut.length ) : + matcherOut + ); + if ( postFinder ) { + postFinder( null, results, matcherOut, xml ); + } else { + push.apply( results, matcherOut ); + } + } + } ); +} + +function matcherFromTokens( tokens ) { + var checkContext, matcher, j, + len = tokens.length, + leadingRelative = Expr.relative[ tokens[ 0 ].type ], + implicitRelative = leadingRelative || Expr.relative[ " " ], + i = leadingRelative ? 1 : 0, + + // The foundational matcher ensures that elements are reachable from top-level context(s) + matchContext = addCombinator( function( elem ) { + return elem === checkContext; + }, implicitRelative, true ), + matchAnyContext = addCombinator( function( elem ) { + return indexOf( checkContext, elem ) > -1; + }, implicitRelative, true ), + matchers = [ function( elem, context, xml ) { + var ret = ( !leadingRelative && ( xml || context !== outermostContext ) ) || ( + ( checkContext = context ).nodeType ? + matchContext( elem, context, xml ) : + matchAnyContext( elem, context, xml ) ); + + // Avoid hanging onto element (issue #299) + checkContext = null; + return ret; + } ]; + + for ( ; i < len; i++ ) { + if ( ( matcher = Expr.relative[ tokens[ i ].type ] ) ) { + matchers = [ addCombinator( elementMatcher( matchers ), matcher ) ]; + } else { + matcher = Expr.filter[ tokens[ i ].type ].apply( null, tokens[ i ].matches ); + + // Return special upon seeing a positional matcher + if ( matcher[ expando ] ) { + + // Find the next relative operator (if any) for proper handling + j = ++i; + for ( ; j < len; j++ ) { + if ( Expr.relative[ tokens[ j ].type ] ) { + break; + } + } + return setMatcher( + i > 1 && elementMatcher( matchers ), + i > 1 && toSelector( + + // If the preceding token was a descendant combinator, insert an implicit any-element `*` + tokens + .slice( 0, i - 1 ) + .concat( { value: tokens[ i - 2 ].type === " " ? "*" : "" } ) + ).replace( rtrim, "$1" ), + matcher, + i < j && matcherFromTokens( tokens.slice( i, j ) ), + j < len && matcherFromTokens( ( tokens = tokens.slice( j ) ) ), + j < len && toSelector( tokens ) + ); + } + matchers.push( matcher ); + } + } + + return elementMatcher( matchers ); +} + +function matcherFromGroupMatchers( elementMatchers, setMatchers ) { + var bySet = setMatchers.length > 0, + byElement = elementMatchers.length > 0, + superMatcher = function( seed, context, xml, results, outermost ) { + var elem, j, matcher, + matchedCount = 0, + i = "0", + unmatched = seed && [], + setMatched = [], + contextBackup = outermostContext, + + // We must always have either seed elements or outermost context + elems = seed || byElement && Expr.find[ "TAG" ]( "*", outermost ), + + // Use integer dirruns iff this is the outermost matcher + dirrunsUnique = ( dirruns += contextBackup == null ? 1 : Math.random() || 0.1 ), + len = elems.length; + + if ( outermost ) { + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + outermostContext = context == document || context || outermost; + } + + // Add elements passing elementMatchers directly to results + // Support: IE<9, Safari + // Tolerate NodeList properties (IE: "length"; Safari: ) matching elements by id + for ( ; i !== len && ( elem = elems[ i ] ) != null; i++ ) { + if ( byElement && elem ) { + j = 0; + + // Support: IE 11+, Edge 17 - 18+ + // IE/Edge sometimes throw a "Permission denied" error when strict-comparing + // two documents; shallow comparisons work. + // eslint-disable-next-line eqeqeq + if ( !context && elem.ownerDocument != document ) { + setDocument( elem ); + xml = !documentIsHTML; + } + while ( ( matcher = elementMatchers[ j++ ] ) ) { + if ( matcher( elem, context || document, xml ) ) { + results.push( elem ); + break; + } + } + if ( outermost ) { + dirruns = dirrunsUnique; + } + } + + // Track unmatched elements for set filters + if ( bySet ) { + + // They will have gone through all possible matchers + if ( ( elem = !matcher && elem ) ) { + matchedCount--; + } + + // Lengthen the array for every element, matched or not + if ( seed ) { + unmatched.push( elem ); + } + } + } + + // `i` is now the count of elements visited above, and adding it to `matchedCount` + // makes the latter nonnegative. + matchedCount += i; + + // Apply set filters to unmatched elements + // NOTE: This can be skipped if there are no unmatched elements (i.e., `matchedCount` + // equals `i`), unless we didn't visit _any_ elements in the above loop because we have + // no element matchers and no seed. + // Incrementing an initially-string "0" `i` allows `i` to remain a string only in that + // case, which will result in a "00" `matchedCount` that differs from `i` but is also + // numerically zero. + if ( bySet && i !== matchedCount ) { + j = 0; + while ( ( matcher = setMatchers[ j++ ] ) ) { + matcher( unmatched, setMatched, context, xml ); + } + + if ( seed ) { + + // Reintegrate element matches to eliminate the need for sorting + if ( matchedCount > 0 ) { + while ( i-- ) { + if ( !( unmatched[ i ] || setMatched[ i ] ) ) { + setMatched[ i ] = pop.call( results ); + } + } + } + + // Discard index placeholder values to get only actual matches + setMatched = condense( setMatched ); + } + + // Add matches to results + push.apply( results, setMatched ); + + // Seedless set matches succeeding multiple successful matchers stipulate sorting + if ( outermost && !seed && setMatched.length > 0 && + ( matchedCount + setMatchers.length ) > 1 ) { + + Sizzle.uniqueSort( results ); + } + } + + // Override manipulation of globals by nested matchers + if ( outermost ) { + dirruns = dirrunsUnique; + outermostContext = contextBackup; + } + + return unmatched; + }; + + return bySet ? + markFunction( superMatcher ) : + superMatcher; +} + +compile = Sizzle.compile = function( selector, match /* Internal Use Only */ ) { + var i, + setMatchers = [], + elementMatchers = [], + cached = compilerCache[ selector + " " ]; + + if ( !cached ) { + + // Generate a function of recursive functions that can be used to check each element + if ( !match ) { + match = tokenize( selector ); + } + i = match.length; + while ( i-- ) { + cached = matcherFromTokens( match[ i ] ); + if ( cached[ expando ] ) { + setMatchers.push( cached ); + } else { + elementMatchers.push( cached ); + } + } + + // Cache the compiled function + cached = compilerCache( + selector, + matcherFromGroupMatchers( elementMatchers, setMatchers ) + ); + + // Save selector and tokenization + cached.selector = selector; + } + return cached; +}; + +/** + * A low-level selection function that works with Sizzle's compiled + * selector functions + * @param {String|Function} selector A selector or a pre-compiled + * selector function built with Sizzle.compile + * @param {Element} context + * @param {Array} [results] + * @param {Array} [seed] A set of elements to match against + */ +select = Sizzle.select = function( selector, context, results, seed ) { + var i, tokens, token, type, find, + compiled = typeof selector === "function" && selector, + match = !seed && tokenize( ( selector = compiled.selector || selector ) ); + + results = results || []; + + // Try to minimize operations if there is only one selector in the list and no seed + // (the latter of which guarantees us context) + if ( match.length === 1 ) { + + // Reduce context if the leading compound selector is an ID + tokens = match[ 0 ] = match[ 0 ].slice( 0 ); + if ( tokens.length > 2 && ( token = tokens[ 0 ] ).type === "ID" && + context.nodeType === 9 && documentIsHTML && Expr.relative[ tokens[ 1 ].type ] ) { + + context = ( Expr.find[ "ID" ]( token.matches[ 0 ] + .replace( runescape, funescape ), context ) || [] )[ 0 ]; + if ( !context ) { + return results; + + // Precompiled matchers will still verify ancestry, so step up a level + } else if ( compiled ) { + context = context.parentNode; + } + + selector = selector.slice( tokens.shift().value.length ); + } + + // Fetch a seed set for right-to-left matching + i = matchExpr[ "needsContext" ].test( selector ) ? 0 : tokens.length; + while ( i-- ) { + token = tokens[ i ]; + + // Abort if we hit a combinator + if ( Expr.relative[ ( type = token.type ) ] ) { + break; + } + if ( ( find = Expr.find[ type ] ) ) { + + // Search, expanding context for leading sibling combinators + if ( ( seed = find( + token.matches[ 0 ].replace( runescape, funescape ), + rsibling.test( tokens[ 0 ].type ) && testContext( context.parentNode ) || + context + ) ) ) { + + // If seed is empty or no tokens remain, we can return early + tokens.splice( i, 1 ); + selector = seed.length && toSelector( tokens ); + if ( !selector ) { + push.apply( results, seed ); + return results; + } + + break; + } + } + } + } + + // Compile and execute a filtering function if one is not provided + // Provide `match` to avoid retokenization if we modified the selector above + ( compiled || compile( selector, match ) )( + seed, + context, + !documentIsHTML, + results, + !context || rsibling.test( selector ) && testContext( context.parentNode ) || context + ); + return results; +}; + +// One-time assignments + +// Sort stability +support.sortStable = expando.split( "" ).sort( sortOrder ).join( "" ) === expando; + +// Support: Chrome 14-35+ +// Always assume duplicates if they aren't passed to the comparison function +support.detectDuplicates = !!hasDuplicate; + +// Initialize against the default document +setDocument(); + +// Support: Webkit<537.32 - Safari 6.0.3/Chrome 25 (fixed in Chrome 27) +// Detached nodes confoundingly follow *each other* +support.sortDetached = assert( function( el ) { + + // Should return 1, but returns 4 (following) + return el.compareDocumentPosition( document.createElement( "fieldset" ) ) & 1; +} ); + +// Support: IE<8 +// Prevent attribute/property "interpolation" +// https://msdn.microsoft.com/en-us/library/ms536429%28VS.85%29.aspx +if ( !assert( function( el ) { + el.innerHTML = ""; + return el.firstChild.getAttribute( "href" ) === "#"; +} ) ) { + addHandle( "type|href|height|width", function( elem, name, isXML ) { + if ( !isXML ) { + return elem.getAttribute( name, name.toLowerCase() === "type" ? 1 : 2 ); + } + } ); +} + +// Support: IE<9 +// Use defaultValue in place of getAttribute("value") +if ( !support.attributes || !assert( function( el ) { + el.innerHTML = ""; + el.firstChild.setAttribute( "value", "" ); + return el.firstChild.getAttribute( "value" ) === ""; +} ) ) { + addHandle( "value", function( elem, _name, isXML ) { + if ( !isXML && elem.nodeName.toLowerCase() === "input" ) { + return elem.defaultValue; + } + } ); +} + +// Support: IE<9 +// Use getAttributeNode to fetch booleans when getAttribute lies +if ( !assert( function( el ) { + return el.getAttribute( "disabled" ) == null; +} ) ) { + addHandle( booleans, function( elem, name, isXML ) { + var val; + if ( !isXML ) { + return elem[ name ] === true ? name.toLowerCase() : + ( val = elem.getAttributeNode( name ) ) && val.specified ? + val.value : + null; + } + } ); +} + +return Sizzle; + +} )( window ); + + + +jQuery.find = Sizzle; +jQuery.expr = Sizzle.selectors; + +// Deprecated +jQuery.expr[ ":" ] = jQuery.expr.pseudos; +jQuery.uniqueSort = jQuery.unique = Sizzle.uniqueSort; +jQuery.text = Sizzle.getText; +jQuery.isXMLDoc = Sizzle.isXML; +jQuery.contains = Sizzle.contains; +jQuery.escapeSelector = Sizzle.escape; + + + + +var dir = function( elem, dir, until ) { + var matched = [], + truncate = until !== undefined; + + while ( ( elem = elem[ dir ] ) && elem.nodeType !== 9 ) { + if ( elem.nodeType === 1 ) { + if ( truncate && jQuery( elem ).is( until ) ) { + break; + } + matched.push( elem ); + } + } + return matched; +}; + + +var siblings = function( n, elem ) { + var matched = []; + + for ( ; n; n = n.nextSibling ) { + if ( n.nodeType === 1 && n !== elem ) { + matched.push( n ); + } + } + + return matched; +}; + + +var rneedsContext = jQuery.expr.match.needsContext; + + + +function nodeName( elem, name ) { + + return elem.nodeName && elem.nodeName.toLowerCase() === name.toLowerCase(); + +}; +var rsingleTag = ( /^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i ); + + + +// Implement the identical functionality for filter and not +function winnow( elements, qualifier, not ) { + if ( isFunction( qualifier ) ) { + return jQuery.grep( elements, function( elem, i ) { + return !!qualifier.call( elem, i, elem ) !== not; + } ); + } + + // Single element + if ( qualifier.nodeType ) { + return jQuery.grep( elements, function( elem ) { + return ( elem === qualifier ) !== not; + } ); + } + + // Arraylike of elements (jQuery, arguments, Array) + if ( typeof qualifier !== "string" ) { + return jQuery.grep( elements, function( elem ) { + return ( indexOf.call( qualifier, elem ) > -1 ) !== not; + } ); + } + + // Filtered directly for both simple and complex selectors + return jQuery.filter( qualifier, elements, not ); +} + +jQuery.filter = function( expr, elems, not ) { + var elem = elems[ 0 ]; + + if ( not ) { + expr = ":not(" + expr + ")"; + } + + if ( elems.length === 1 && elem.nodeType === 1 ) { + return jQuery.find.matchesSelector( elem, expr ) ? [ elem ] : []; + } + + return jQuery.find.matches( expr, jQuery.grep( elems, function( elem ) { + return elem.nodeType === 1; + } ) ); +}; + +jQuery.fn.extend( { + find: function( selector ) { + var i, ret, + len = this.length, + self = this; + + if ( typeof selector !== "string" ) { + return this.pushStack( jQuery( selector ).filter( function() { + for ( i = 0; i < len; i++ ) { + if ( jQuery.contains( self[ i ], this ) ) { + return true; + } + } + } ) ); + } + + ret = this.pushStack( [] ); + + for ( i = 0; i < len; i++ ) { + jQuery.find( selector, self[ i ], ret ); + } + + return len > 1 ? jQuery.uniqueSort( ret ) : ret; + }, + filter: function( selector ) { + return this.pushStack( winnow( this, selector || [], false ) ); + }, + not: function( selector ) { + return this.pushStack( winnow( this, selector || [], true ) ); + }, + is: function( selector ) { + return !!winnow( + this, + + // If this is a positional/relative selector, check membership in the returned set + // so $("p:first").is("p:last") won't return true for a doc with two "p". + typeof selector === "string" && rneedsContext.test( selector ) ? + jQuery( selector ) : + selector || [], + false + ).length; + } +} ); + + +// Initialize a jQuery object + + +// A central reference to the root jQuery(document) +var rootjQuery, + + // A simple way to check for HTML strings + // Prioritize #id over to avoid XSS via location.hash (#9521) + // Strict HTML recognition (#11290: must start with <) + // Shortcut simple #id case for speed + rquickExpr = /^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/, + + init = jQuery.fn.init = function( selector, context, root ) { + var match, elem; + + // HANDLE: $(""), $(null), $(undefined), $(false) + if ( !selector ) { + return this; + } + + // Method init() accepts an alternate rootjQuery + // so migrate can support jQuery.sub (gh-2101) + root = root || rootjQuery; + + // Handle HTML strings + if ( typeof selector === "string" ) { + if ( selector[ 0 ] === "<" && + selector[ selector.length - 1 ] === ">" && + selector.length >= 3 ) { + + // Assume that strings that start and end with <> are HTML and skip the regex check + match = [ null, selector, null ]; + + } else { + match = rquickExpr.exec( selector ); + } + + // Match html or make sure no context is specified for #id + if ( match && ( match[ 1 ] || !context ) ) { + + // HANDLE: $(html) -> $(array) + if ( match[ 1 ] ) { + context = context instanceof jQuery ? context[ 0 ] : context; + + // Option to run scripts is true for back-compat + // Intentionally let the error be thrown if parseHTML is not present + jQuery.merge( this, jQuery.parseHTML( + match[ 1 ], + context && context.nodeType ? context.ownerDocument || context : document, + true + ) ); + + // HANDLE: $(html, props) + if ( rsingleTag.test( match[ 1 ] ) && jQuery.isPlainObject( context ) ) { + for ( match in context ) { + + // Properties of context are called as methods if possible + if ( isFunction( this[ match ] ) ) { + this[ match ]( context[ match ] ); + + // ...and otherwise set as attributes + } else { + this.attr( match, context[ match ] ); + } + } + } + + return this; + + // HANDLE: $(#id) + } else { + elem = document.getElementById( match[ 2 ] ); + + if ( elem ) { + + // Inject the element directly into the jQuery object + this[ 0 ] = elem; + this.length = 1; + } + return this; + } + + // HANDLE: $(expr, $(...)) + } else if ( !context || context.jquery ) { + return ( context || root ).find( selector ); + + // HANDLE: $(expr, context) + // (which is just equivalent to: $(context).find(expr) + } else { + return this.constructor( context ).find( selector ); + } + + // HANDLE: $(DOMElement) + } else if ( selector.nodeType ) { + this[ 0 ] = selector; + this.length = 1; + return this; + + // HANDLE: $(function) + // Shortcut for document ready + } else if ( isFunction( selector ) ) { + return root.ready !== undefined ? + root.ready( selector ) : + + // Execute immediately if ready is not present + selector( jQuery ); + } + + return jQuery.makeArray( selector, this ); + }; + +// Give the init function the jQuery prototype for later instantiation +init.prototype = jQuery.fn; + +// Initialize central reference +rootjQuery = jQuery( document ); + + +var rparentsprev = /^(?:parents|prev(?:Until|All))/, + + // Methods guaranteed to produce a unique set when starting from a unique set + guaranteedUnique = { + children: true, + contents: true, + next: true, + prev: true + }; + +jQuery.fn.extend( { + has: function( target ) { + var targets = jQuery( target, this ), + l = targets.length; + + return this.filter( function() { + var i = 0; + for ( ; i < l; i++ ) { + if ( jQuery.contains( this, targets[ i ] ) ) { + return true; + } + } + } ); + }, + + closest: function( selectors, context ) { + var cur, + i = 0, + l = this.length, + matched = [], + targets = typeof selectors !== "string" && jQuery( selectors ); + + // Positional selectors never match, since there's no _selection_ context + if ( !rneedsContext.test( selectors ) ) { + for ( ; i < l; i++ ) { + for ( cur = this[ i ]; cur && cur !== context; cur = cur.parentNode ) { + + // Always skip document fragments + if ( cur.nodeType < 11 && ( targets ? + targets.index( cur ) > -1 : + + // Don't pass non-elements to Sizzle + cur.nodeType === 1 && + jQuery.find.matchesSelector( cur, selectors ) ) ) { + + matched.push( cur ); + break; + } + } + } + } + + return this.pushStack( matched.length > 1 ? jQuery.uniqueSort( matched ) : matched ); + }, + + // Determine the position of an element within the set + index: function( elem ) { + + // No argument, return index in parent + if ( !elem ) { + return ( this[ 0 ] && this[ 0 ].parentNode ) ? this.first().prevAll().length : -1; + } + + // Index in selector + if ( typeof elem === "string" ) { + return indexOf.call( jQuery( elem ), this[ 0 ] ); + } + + // Locate the position of the desired element + return indexOf.call( this, + + // If it receives a jQuery object, the first element is used + elem.jquery ? elem[ 0 ] : elem + ); + }, + + add: function( selector, context ) { + return this.pushStack( + jQuery.uniqueSort( + jQuery.merge( this.get(), jQuery( selector, context ) ) + ) + ); + }, + + addBack: function( selector ) { + return this.add( selector == null ? + this.prevObject : this.prevObject.filter( selector ) + ); + } +} ); + +function sibling( cur, dir ) { + while ( ( cur = cur[ dir ] ) && cur.nodeType !== 1 ) {} + return cur; +} + +jQuery.each( { + parent: function( elem ) { + var parent = elem.parentNode; + return parent && parent.nodeType !== 11 ? parent : null; + }, + parents: function( elem ) { + return dir( elem, "parentNode" ); + }, + parentsUntil: function( elem, _i, until ) { + return dir( elem, "parentNode", until ); + }, + next: function( elem ) { + return sibling( elem, "nextSibling" ); + }, + prev: function( elem ) { + return sibling( elem, "previousSibling" ); + }, + nextAll: function( elem ) { + return dir( elem, "nextSibling" ); + }, + prevAll: function( elem ) { + return dir( elem, "previousSibling" ); + }, + nextUntil: function( elem, _i, until ) { + return dir( elem, "nextSibling", until ); + }, + prevUntil: function( elem, _i, until ) { + return dir( elem, "previousSibling", until ); + }, + siblings: function( elem ) { + return siblings( ( elem.parentNode || {} ).firstChild, elem ); + }, + children: function( elem ) { + return siblings( elem.firstChild ); + }, + contents: function( elem ) { + if ( elem.contentDocument != null && + + // Support: IE 11+ + // elements with no `data` attribute has an object + // `contentDocument` with a `null` prototype. + getProto( elem.contentDocument ) ) { + + return elem.contentDocument; + } + + // Support: IE 9 - 11 only, iOS 7 only, Android Browser <=4.3 only + // Treat the template element as a regular one in browsers that + // don't support it. + if ( nodeName( elem, "template" ) ) { + elem = elem.content || elem; + } + + return jQuery.merge( [], elem.childNodes ); + } +}, function( name, fn ) { + jQuery.fn[ name ] = function( until, selector ) { + var matched = jQuery.map( this, fn, until ); + + if ( name.slice( -5 ) !== "Until" ) { + selector = until; + } + + if ( selector && typeof selector === "string" ) { + matched = jQuery.filter( selector, matched ); + } + + if ( this.length > 1 ) { + + // Remove duplicates + if ( !guaranteedUnique[ name ] ) { + jQuery.uniqueSort( matched ); + } + + // Reverse order for parents* and prev-derivatives + if ( rparentsprev.test( name ) ) { + matched.reverse(); + } + } + + return this.pushStack( matched ); + }; +} ); +var rnothtmlwhite = ( /[^\x20\t\r\n\f]+/g ); + + + +// Convert String-formatted options into Object-formatted ones +function createOptions( options ) { + var object = {}; + jQuery.each( options.match( rnothtmlwhite ) || [], function( _, flag ) { + object[ flag ] = true; + } ); + return object; +} + +/* + * Create a callback list using the following parameters: + * + * options: an optional list of space-separated options that will change how + * the callback list behaves or a more traditional option object + * + * By default a callback list will act like an event callback list and can be + * "fired" multiple times. + * + * Possible options: + * + * once: will ensure the callback list can only be fired once (like a Deferred) + * + * memory: will keep track of previous values and will call any callback added + * after the list has been fired right away with the latest "memorized" + * values (like a Deferred) + * + * unique: will ensure a callback can only be added once (no duplicate in the list) + * + * stopOnFalse: interrupt callings when a callback returns false + * + */ +jQuery.Callbacks = function( options ) { + + // Convert options from String-formatted to Object-formatted if needed + // (we check in cache first) + options = typeof options === "string" ? + createOptions( options ) : + jQuery.extend( {}, options ); + + var // Flag to know if list is currently firing + firing, + + // Last fire value for non-forgettable lists + memory, + + // Flag to know if list was already fired + fired, + + // Flag to prevent firing + locked, + + // Actual callback list + list = [], + + // Queue of execution data for repeatable lists + queue = [], + + // Index of currently firing callback (modified by add/remove as needed) + firingIndex = -1, + + // Fire callbacks + fire = function() { + + // Enforce single-firing + locked = locked || options.once; + + // Execute callbacks for all pending executions, + // respecting firingIndex overrides and runtime changes + fired = firing = true; + for ( ; queue.length; firingIndex = -1 ) { + memory = queue.shift(); + while ( ++firingIndex < list.length ) { + + // Run callback and check for early termination + if ( list[ firingIndex ].apply( memory[ 0 ], memory[ 1 ] ) === false && + options.stopOnFalse ) { + + // Jump to end and forget the data so .add doesn't re-fire + firingIndex = list.length; + memory = false; + } + } + } + + // Forget the data if we're done with it + if ( !options.memory ) { + memory = false; + } + + firing = false; + + // Clean up if we're done firing for good + if ( locked ) { + + // Keep an empty list if we have data for future add calls + if ( memory ) { + list = []; + + // Otherwise, this object is spent + } else { + list = ""; + } + } + }, + + // Actual Callbacks object + self = { + + // Add a callback or a collection of callbacks to the list + add: function() { + if ( list ) { + + // If we have memory from a past run, we should fire after adding + if ( memory && !firing ) { + firingIndex = list.length - 1; + queue.push( memory ); + } + + ( function add( args ) { + jQuery.each( args, function( _, arg ) { + if ( isFunction( arg ) ) { + if ( !options.unique || !self.has( arg ) ) { + list.push( arg ); + } + } else if ( arg && arg.length && toType( arg ) !== "string" ) { + + // Inspect recursively + add( arg ); + } + } ); + } )( arguments ); + + if ( memory && !firing ) { + fire(); + } + } + return this; + }, + + // Remove a callback from the list + remove: function() { + jQuery.each( arguments, function( _, arg ) { + var index; + while ( ( index = jQuery.inArray( arg, list, index ) ) > -1 ) { + list.splice( index, 1 ); + + // Handle firing indexes + if ( index <= firingIndex ) { + firingIndex--; + } + } + } ); + return this; + }, + + // Check if a given callback is in the list. + // If no argument is given, return whether or not list has callbacks attached. + has: function( fn ) { + return fn ? + jQuery.inArray( fn, list ) > -1 : + list.length > 0; + }, + + // Remove all callbacks from the list + empty: function() { + if ( list ) { + list = []; + } + return this; + }, + + // Disable .fire and .add + // Abort any current/pending executions + // Clear all callbacks and values + disable: function() { + locked = queue = []; + list = memory = ""; + return this; + }, + disabled: function() { + return !list; + }, + + // Disable .fire + // Also disable .add unless we have memory (since it would have no effect) + // Abort any pending executions + lock: function() { + locked = queue = []; + if ( !memory && !firing ) { + list = memory = ""; + } + return this; + }, + locked: function() { + return !!locked; + }, + + // Call all callbacks with the given context and arguments + fireWith: function( context, args ) { + if ( !locked ) { + args = args || []; + args = [ context, args.slice ? args.slice() : args ]; + queue.push( args ); + if ( !firing ) { + fire(); + } + } + return this; + }, + + // Call all the callbacks with the given arguments + fire: function() { + self.fireWith( this, arguments ); + return this; + }, + + // To know if the callbacks have already been called at least once + fired: function() { + return !!fired; + } + }; + + return self; +}; + + +function Identity( v ) { + return v; +} +function Thrower( ex ) { + throw ex; +} + +function adoptValue( value, resolve, reject, noValue ) { + var method; + + try { + + // Check for promise aspect first to privilege synchronous behavior + if ( value && isFunction( ( method = value.promise ) ) ) { + method.call( value ).done( resolve ).fail( reject ); + + // Other thenables + } else if ( value && isFunction( ( method = value.then ) ) ) { + method.call( value, resolve, reject ); + + // Other non-thenables + } else { + + // Control `resolve` arguments by letting Array#slice cast boolean `noValue` to integer: + // * false: [ value ].slice( 0 ) => resolve( value ) + // * true: [ value ].slice( 1 ) => resolve() + resolve.apply( undefined, [ value ].slice( noValue ) ); + } + + // For Promises/A+, convert exceptions into rejections + // Since jQuery.when doesn't unwrap thenables, we can skip the extra checks appearing in + // Deferred#then to conditionally suppress rejection. + } catch ( value ) { + + // Support: Android 4.0 only + // Strict mode functions invoked without .call/.apply get global-object context + reject.apply( undefined, [ value ] ); + } +} + +jQuery.extend( { + + Deferred: function( func ) { + var tuples = [ + + // action, add listener, callbacks, + // ... .then handlers, argument index, [final state] + [ "notify", "progress", jQuery.Callbacks( "memory" ), + jQuery.Callbacks( "memory" ), 2 ], + [ "resolve", "done", jQuery.Callbacks( "once memory" ), + jQuery.Callbacks( "once memory" ), 0, "resolved" ], + [ "reject", "fail", jQuery.Callbacks( "once memory" ), + jQuery.Callbacks( "once memory" ), 1, "rejected" ] + ], + state = "pending", + promise = { + state: function() { + return state; + }, + always: function() { + deferred.done( arguments ).fail( arguments ); + return this; + }, + "catch": function( fn ) { + return promise.then( null, fn ); + }, + + // Keep pipe for back-compat + pipe: function( /* fnDone, fnFail, fnProgress */ ) { + var fns = arguments; + + return jQuery.Deferred( function( newDefer ) { + jQuery.each( tuples, function( _i, tuple ) { + + // Map tuples (progress, done, fail) to arguments (done, fail, progress) + var fn = isFunction( fns[ tuple[ 4 ] ] ) && fns[ tuple[ 4 ] ]; + + // deferred.progress(function() { bind to newDefer or newDefer.notify }) + // deferred.done(function() { bind to newDefer or newDefer.resolve }) + // deferred.fail(function() { bind to newDefer or newDefer.reject }) + deferred[ tuple[ 1 ] ]( function() { + var returned = fn && fn.apply( this, arguments ); + if ( returned && isFunction( returned.promise ) ) { + returned.promise() + .progress( newDefer.notify ) + .done( newDefer.resolve ) + .fail( newDefer.reject ); + } else { + newDefer[ tuple[ 0 ] + "With" ]( + this, + fn ? [ returned ] : arguments + ); + } + } ); + } ); + fns = null; + } ).promise(); + }, + then: function( onFulfilled, onRejected, onProgress ) { + var maxDepth = 0; + function resolve( depth, deferred, handler, special ) { + return function() { + var that = this, + args = arguments, + mightThrow = function() { + var returned, then; + + // Support: Promises/A+ section 2.3.3.3.3 + // https://promisesaplus.com/#point-59 + // Ignore double-resolution attempts + if ( depth < maxDepth ) { + return; + } + + returned = handler.apply( that, args ); + + // Support: Promises/A+ section 2.3.1 + // https://promisesaplus.com/#point-48 + if ( returned === deferred.promise() ) { + throw new TypeError( "Thenable self-resolution" ); + } + + // Support: Promises/A+ sections 2.3.3.1, 3.5 + // https://promisesaplus.com/#point-54 + // https://promisesaplus.com/#point-75 + // Retrieve `then` only once + then = returned && + + // Support: Promises/A+ section 2.3.4 + // https://promisesaplus.com/#point-64 + // Only check objects and functions for thenability + ( typeof returned === "object" || + typeof returned === "function" ) && + returned.then; + + // Handle a returned thenable + if ( isFunction( then ) ) { + + // Special processors (notify) just wait for resolution + if ( special ) { + then.call( + returned, + resolve( maxDepth, deferred, Identity, special ), + resolve( maxDepth, deferred, Thrower, special ) + ); + + // Normal processors (resolve) also hook into progress + } else { + + // ...and disregard older resolution values + maxDepth++; + + then.call( + returned, + resolve( maxDepth, deferred, Identity, special ), + resolve( maxDepth, deferred, Thrower, special ), + resolve( maxDepth, deferred, Identity, + deferred.notifyWith ) + ); + } + + // Handle all other returned values + } else { + + // Only substitute handlers pass on context + // and multiple values (non-spec behavior) + if ( handler !== Identity ) { + that = undefined; + args = [ returned ]; + } + + // Process the value(s) + // Default process is resolve + ( special || deferred.resolveWith )( that, args ); + } + }, + + // Only normal processors (resolve) catch and reject exceptions + process = special ? + mightThrow : + function() { + try { + mightThrow(); + } catch ( e ) { + + if ( jQuery.Deferred.exceptionHook ) { + jQuery.Deferred.exceptionHook( e, + process.stackTrace ); + } + + // Support: Promises/A+ section 2.3.3.3.4.1 + // https://promisesaplus.com/#point-61 + // Ignore post-resolution exceptions + if ( depth + 1 >= maxDepth ) { + + // Only substitute handlers pass on context + // and multiple values (non-spec behavior) + if ( handler !== Thrower ) { + that = undefined; + args = [ e ]; + } + + deferred.rejectWith( that, args ); + } + } + }; + + // Support: Promises/A+ section 2.3.3.3.1 + // https://promisesaplus.com/#point-57 + // Re-resolve promises immediately to dodge false rejection from + // subsequent errors + if ( depth ) { + process(); + } else { + + // Call an optional hook to record the stack, in case of exception + // since it's otherwise lost when execution goes async + if ( jQuery.Deferred.getStackHook ) { + process.stackTrace = jQuery.Deferred.getStackHook(); + } + window.setTimeout( process ); + } + }; + } + + return jQuery.Deferred( function( newDefer ) { + + // progress_handlers.add( ... ) + tuples[ 0 ][ 3 ].add( + resolve( + 0, + newDefer, + isFunction( onProgress ) ? + onProgress : + Identity, + newDefer.notifyWith + ) + ); + + // fulfilled_handlers.add( ... ) + tuples[ 1 ][ 3 ].add( + resolve( + 0, + newDefer, + isFunction( onFulfilled ) ? + onFulfilled : + Identity + ) + ); + + // rejected_handlers.add( ... ) + tuples[ 2 ][ 3 ].add( + resolve( + 0, + newDefer, + isFunction( onRejected ) ? + onRejected : + Thrower + ) + ); + } ).promise(); + }, + + // Get a promise for this deferred + // If obj is provided, the promise aspect is added to the object + promise: function( obj ) { + return obj != null ? jQuery.extend( obj, promise ) : promise; + } + }, + deferred = {}; + + // Add list-specific methods + jQuery.each( tuples, function( i, tuple ) { + var list = tuple[ 2 ], + stateString = tuple[ 5 ]; + + // promise.progress = list.add + // promise.done = list.add + // promise.fail = list.add + promise[ tuple[ 1 ] ] = list.add; + + // Handle state + if ( stateString ) { + list.add( + function() { + + // state = "resolved" (i.e., fulfilled) + // state = "rejected" + state = stateString; + }, + + // rejected_callbacks.disable + // fulfilled_callbacks.disable + tuples[ 3 - i ][ 2 ].disable, + + // rejected_handlers.disable + // fulfilled_handlers.disable + tuples[ 3 - i ][ 3 ].disable, + + // progress_callbacks.lock + tuples[ 0 ][ 2 ].lock, + + // progress_handlers.lock + tuples[ 0 ][ 3 ].lock + ); + } + + // progress_handlers.fire + // fulfilled_handlers.fire + // rejected_handlers.fire + list.add( tuple[ 3 ].fire ); + + // deferred.notify = function() { deferred.notifyWith(...) } + // deferred.resolve = function() { deferred.resolveWith(...) } + // deferred.reject = function() { deferred.rejectWith(...) } + deferred[ tuple[ 0 ] ] = function() { + deferred[ tuple[ 0 ] + "With" ]( this === deferred ? undefined : this, arguments ); + return this; + }; + + // deferred.notifyWith = list.fireWith + // deferred.resolveWith = list.fireWith + // deferred.rejectWith = list.fireWith + deferred[ tuple[ 0 ] + "With" ] = list.fireWith; + } ); + + // Make the deferred a promise + promise.promise( deferred ); + + // Call given func if any + if ( func ) { + func.call( deferred, deferred ); + } + + // All done! + return deferred; + }, + + // Deferred helper + when: function( singleValue ) { + var + + // count of uncompleted subordinates + remaining = arguments.length, + + // count of unprocessed arguments + i = remaining, + + // subordinate fulfillment data + resolveContexts = Array( i ), + resolveValues = slice.call( arguments ), + + // the master Deferred + master = jQuery.Deferred(), + + // subordinate callback factory + updateFunc = function( i ) { + return function( value ) { + resolveContexts[ i ] = this; + resolveValues[ i ] = arguments.length > 1 ? slice.call( arguments ) : value; + if ( !( --remaining ) ) { + master.resolveWith( resolveContexts, resolveValues ); + } + }; + }; + + // Single- and empty arguments are adopted like Promise.resolve + if ( remaining <= 1 ) { + adoptValue( singleValue, master.done( updateFunc( i ) ).resolve, master.reject, + !remaining ); + + // Use .then() to unwrap secondary thenables (cf. gh-3000) + if ( master.state() === "pending" || + isFunction( resolveValues[ i ] && resolveValues[ i ].then ) ) { + + return master.then(); + } + } + + // Multiple arguments are aggregated like Promise.all array elements + while ( i-- ) { + adoptValue( resolveValues[ i ], updateFunc( i ), master.reject ); + } + + return master.promise(); + } +} ); + + +// These usually indicate a programmer mistake during development, +// warn about them ASAP rather than swallowing them by default. +var rerrorNames = /^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/; + +jQuery.Deferred.exceptionHook = function( error, stack ) { + + // Support: IE 8 - 9 only + // Console exists when dev tools are open, which can happen at any time + if ( window.console && window.console.warn && error && rerrorNames.test( error.name ) ) { + window.console.warn( "jQuery.Deferred exception: " + error.message, error.stack, stack ); + } +}; + + + + +jQuery.readyException = function( error ) { + window.setTimeout( function() { + throw error; + } ); +}; + + + + +// The deferred used on DOM ready +var readyList = jQuery.Deferred(); + +jQuery.fn.ready = function( fn ) { + + readyList + .then( fn ) + + // Wrap jQuery.readyException in a function so that the lookup + // happens at the time of error handling instead of callback + // registration. + .catch( function( error ) { + jQuery.readyException( error ); + } ); + + return this; +}; + +jQuery.extend( { + + // Is the DOM ready to be used? Set to true once it occurs. + isReady: false, + + // A counter to track how many items to wait for before + // the ready event fires. See #6781 + readyWait: 1, + + // Handle when the DOM is ready + ready: function( wait ) { + + // Abort if there are pending holds or we're already ready + if ( wait === true ? --jQuery.readyWait : jQuery.isReady ) { + return; + } + + // Remember that the DOM is ready + jQuery.isReady = true; + + // If a normal DOM Ready event fired, decrement, and wait if need be + if ( wait !== true && --jQuery.readyWait > 0 ) { + return; + } + + // If there are functions bound, to execute + readyList.resolveWith( document, [ jQuery ] ); + } +} ); + +jQuery.ready.then = readyList.then; + +// The ready event handler and self cleanup method +function completed() { + document.removeEventListener( "DOMContentLoaded", completed ); + window.removeEventListener( "load", completed ); + jQuery.ready(); +} + +// Catch cases where $(document).ready() is called +// after the browser event has already occurred. +// Support: IE <=9 - 10 only +// Older IE sometimes signals "interactive" too soon +if ( document.readyState === "complete" || + ( document.readyState !== "loading" && !document.documentElement.doScroll ) ) { + + // Handle it asynchronously to allow scripts the opportunity to delay ready + window.setTimeout( jQuery.ready ); + +} else { + + // Use the handy event callback + document.addEventListener( "DOMContentLoaded", completed ); + + // A fallback to window.onload, that will always work + window.addEventListener( "load", completed ); +} + + + + +// Multifunctional method to get and set values of a collection +// The value/s can optionally be executed if it's a function +var access = function( elems, fn, key, value, chainable, emptyGet, raw ) { + var i = 0, + len = elems.length, + bulk = key == null; + + // Sets many values + if ( toType( key ) === "object" ) { + chainable = true; + for ( i in key ) { + access( elems, fn, i, key[ i ], true, emptyGet, raw ); + } + + // Sets one value + } else if ( value !== undefined ) { + chainable = true; + + if ( !isFunction( value ) ) { + raw = true; + } + + if ( bulk ) { + + // Bulk operations run against the entire set + if ( raw ) { + fn.call( elems, value ); + fn = null; + + // ...except when executing function values + } else { + bulk = fn; + fn = function( elem, _key, value ) { + return bulk.call( jQuery( elem ), value ); + }; + } + } + + if ( fn ) { + for ( ; i < len; i++ ) { + fn( + elems[ i ], key, raw ? + value : + value.call( elems[ i ], i, fn( elems[ i ], key ) ) + ); + } + } + } + + if ( chainable ) { + return elems; + } + + // Gets + if ( bulk ) { + return fn.call( elems ); + } + + return len ? fn( elems[ 0 ], key ) : emptyGet; +}; + + +// Matches dashed string for camelizing +var rmsPrefix = /^-ms-/, + rdashAlpha = /-([a-z])/g; + +// Used by camelCase as callback to replace() +function fcamelCase( _all, letter ) { + return letter.toUpperCase(); +} + +// Convert dashed to camelCase; used by the css and data modules +// Support: IE <=9 - 11, Edge 12 - 15 +// Microsoft forgot to hump their vendor prefix (#9572) +function camelCase( string ) { + return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase ); +} +var acceptData = function( owner ) { + + // Accepts only: + // - Node + // - Node.ELEMENT_NODE + // - Node.DOCUMENT_NODE + // - Object + // - Any + return owner.nodeType === 1 || owner.nodeType === 9 || !( +owner.nodeType ); +}; + + + + +function Data() { + this.expando = jQuery.expando + Data.uid++; +} + +Data.uid = 1; + +Data.prototype = { + + cache: function( owner ) { + + // Check if the owner object already has a cache + var value = owner[ this.expando ]; + + // If not, create one + if ( !value ) { + value = {}; + + // We can accept data for non-element nodes in modern browsers, + // but we should not, see #8335. + // Always return an empty object. + if ( acceptData( owner ) ) { + + // If it is a node unlikely to be stringify-ed or looped over + // use plain assignment + if ( owner.nodeType ) { + owner[ this.expando ] = value; + + // Otherwise secure it in a non-enumerable property + // configurable must be true to allow the property to be + // deleted when data is removed + } else { + Object.defineProperty( owner, this.expando, { + value: value, + configurable: true + } ); + } + } + } + + return value; + }, + set: function( owner, data, value ) { + var prop, + cache = this.cache( owner ); + + // Handle: [ owner, key, value ] args + // Always use camelCase key (gh-2257) + if ( typeof data === "string" ) { + cache[ camelCase( data ) ] = value; + + // Handle: [ owner, { properties } ] args + } else { + + // Copy the properties one-by-one to the cache object + for ( prop in data ) { + cache[ camelCase( prop ) ] = data[ prop ]; + } + } + return cache; + }, + get: function( owner, key ) { + return key === undefined ? + this.cache( owner ) : + + // Always use camelCase key (gh-2257) + owner[ this.expando ] && owner[ this.expando ][ camelCase( key ) ]; + }, + access: function( owner, key, value ) { + + // In cases where either: + // + // 1. No key was specified + // 2. A string key was specified, but no value provided + // + // Take the "read" path and allow the get method to determine + // which value to return, respectively either: + // + // 1. The entire cache object + // 2. The data stored at the key + // + if ( key === undefined || + ( ( key && typeof key === "string" ) && value === undefined ) ) { + + return this.get( owner, key ); + } + + // When the key is not a string, or both a key and value + // are specified, set or extend (existing objects) with either: + // + // 1. An object of properties + // 2. A key and value + // + this.set( owner, key, value ); + + // Since the "set" path can have two possible entry points + // return the expected data based on which path was taken[*] + return value !== undefined ? value : key; + }, + remove: function( owner, key ) { + var i, + cache = owner[ this.expando ]; + + if ( cache === undefined ) { + return; + } + + if ( key !== undefined ) { + + // Support array or space separated string of keys + if ( Array.isArray( key ) ) { + + // If key is an array of keys... + // We always set camelCase keys, so remove that. + key = key.map( camelCase ); + } else { + key = camelCase( key ); + + // If a key with the spaces exists, use it. + // Otherwise, create an array by matching non-whitespace + key = key in cache ? + [ key ] : + ( key.match( rnothtmlwhite ) || [] ); + } + + i = key.length; + + while ( i-- ) { + delete cache[ key[ i ] ]; + } + } + + // Remove the expando if there's no more data + if ( key === undefined || jQuery.isEmptyObject( cache ) ) { + + // Support: Chrome <=35 - 45 + // Webkit & Blink performance suffers when deleting properties + // from DOM nodes, so set to undefined instead + // https://bugs.chromium.org/p/chromium/issues/detail?id=378607 (bug restricted) + if ( owner.nodeType ) { + owner[ this.expando ] = undefined; + } else { + delete owner[ this.expando ]; + } + } + }, + hasData: function( owner ) { + var cache = owner[ this.expando ]; + return cache !== undefined && !jQuery.isEmptyObject( cache ); + } +}; +var dataPriv = new Data(); + +var dataUser = new Data(); + + + +// Implementation Summary +// +// 1. Enforce API surface and semantic compatibility with 1.9.x branch +// 2. Improve the module's maintainability by reducing the storage +// paths to a single mechanism. +// 3. Use the same single mechanism to support "private" and "user" data. +// 4. _Never_ expose "private" data to user code (TODO: Drop _data, _removeData) +// 5. Avoid exposing implementation details on user objects (eg. expando properties) +// 6. Provide a clear path for implementation upgrade to WeakMap in 2014 + +var rbrace = /^(?:\{[\w\W]*\}|\[[\w\W]*\])$/, + rmultiDash = /[A-Z]/g; + +function getData( data ) { + if ( data === "true" ) { + return true; + } + + if ( data === "false" ) { + return false; + } + + if ( data === "null" ) { + return null; + } + + // Only convert to a number if it doesn't change the string + if ( data === +data + "" ) { + return +data; + } + + if ( rbrace.test( data ) ) { + return JSON.parse( data ); + } + + return data; +} + +function dataAttr( elem, key, data ) { + var name; + + // If nothing was found internally, try to fetch any + // data from the HTML5 data-* attribute + if ( data === undefined && elem.nodeType === 1 ) { + name = "data-" + key.replace( rmultiDash, "-$&" ).toLowerCase(); + data = elem.getAttribute( name ); + + if ( typeof data === "string" ) { + try { + data = getData( data ); + } catch ( e ) {} + + // Make sure we set the data so it isn't changed later + dataUser.set( elem, key, data ); + } else { + data = undefined; + } + } + return data; +} + +jQuery.extend( { + hasData: function( elem ) { + return dataUser.hasData( elem ) || dataPriv.hasData( elem ); + }, + + data: function( elem, name, data ) { + return dataUser.access( elem, name, data ); + }, + + removeData: function( elem, name ) { + dataUser.remove( elem, name ); + }, + + // TODO: Now that all calls to _data and _removeData have been replaced + // with direct calls to dataPriv methods, these can be deprecated. + _data: function( elem, name, data ) { + return dataPriv.access( elem, name, data ); + }, + + _removeData: function( elem, name ) { + dataPriv.remove( elem, name ); + } +} ); + +jQuery.fn.extend( { + data: function( key, value ) { + var i, name, data, + elem = this[ 0 ], + attrs = elem && elem.attributes; + + // Gets all values + if ( key === undefined ) { + if ( this.length ) { + data = dataUser.get( elem ); + + if ( elem.nodeType === 1 && !dataPriv.get( elem, "hasDataAttrs" ) ) { + i = attrs.length; + while ( i-- ) { + + // Support: IE 11 only + // The attrs elements can be null (#14894) + if ( attrs[ i ] ) { + name = attrs[ i ].name; + if ( name.indexOf( "data-" ) === 0 ) { + name = camelCase( name.slice( 5 ) ); + dataAttr( elem, name, data[ name ] ); + } + } + } + dataPriv.set( elem, "hasDataAttrs", true ); + } + } + + return data; + } + + // Sets multiple values + if ( typeof key === "object" ) { + return this.each( function() { + dataUser.set( this, key ); + } ); + } + + return access( this, function( value ) { + var data; + + // The calling jQuery object (element matches) is not empty + // (and therefore has an element appears at this[ 0 ]) and the + // `value` parameter was not undefined. An empty jQuery object + // will result in `undefined` for elem = this[ 0 ] which will + // throw an exception if an attempt to read a data cache is made. + if ( elem && value === undefined ) { + + // Attempt to get data from the cache + // The key will always be camelCased in Data + data = dataUser.get( elem, key ); + if ( data !== undefined ) { + return data; + } + + // Attempt to "discover" the data in + // HTML5 custom data-* attrs + data = dataAttr( elem, key ); + if ( data !== undefined ) { + return data; + } + + // We tried really hard, but the data doesn't exist. + return; + } + + // Set the data... + this.each( function() { + + // We always store the camelCased key + dataUser.set( this, key, value ); + } ); + }, null, value, arguments.length > 1, null, true ); + }, + + removeData: function( key ) { + return this.each( function() { + dataUser.remove( this, key ); + } ); + } +} ); + + +jQuery.extend( { + queue: function( elem, type, data ) { + var queue; + + if ( elem ) { + type = ( type || "fx" ) + "queue"; + queue = dataPriv.get( elem, type ); + + // Speed up dequeue by getting out quickly if this is just a lookup + if ( data ) { + if ( !queue || Array.isArray( data ) ) { + queue = dataPriv.access( elem, type, jQuery.makeArray( data ) ); + } else { + queue.push( data ); + } + } + return queue || []; + } + }, + + dequeue: function( elem, type ) { + type = type || "fx"; + + var queue = jQuery.queue( elem, type ), + startLength = queue.length, + fn = queue.shift(), + hooks = jQuery._queueHooks( elem, type ), + next = function() { + jQuery.dequeue( elem, type ); + }; + + // If the fx queue is dequeued, always remove the progress sentinel + if ( fn === "inprogress" ) { + fn = queue.shift(); + startLength--; + } + + if ( fn ) { + + // Add a progress sentinel to prevent the fx queue from being + // automatically dequeued + if ( type === "fx" ) { + queue.unshift( "inprogress" ); + } + + // Clear up the last queue stop function + delete hooks.stop; + fn.call( elem, next, hooks ); + } + + if ( !startLength && hooks ) { + hooks.empty.fire(); + } + }, + + // Not public - generate a queueHooks object, or return the current one + _queueHooks: function( elem, type ) { + var key = type + "queueHooks"; + return dataPriv.get( elem, key ) || dataPriv.access( elem, key, { + empty: jQuery.Callbacks( "once memory" ).add( function() { + dataPriv.remove( elem, [ type + "queue", key ] ); + } ) + } ); + } +} ); + +jQuery.fn.extend( { + queue: function( type, data ) { + var setter = 2; + + if ( typeof type !== "string" ) { + data = type; + type = "fx"; + setter--; + } + + if ( arguments.length < setter ) { + return jQuery.queue( this[ 0 ], type ); + } + + return data === undefined ? + this : + this.each( function() { + var queue = jQuery.queue( this, type, data ); + + // Ensure a hooks for this queue + jQuery._queueHooks( this, type ); + + if ( type === "fx" && queue[ 0 ] !== "inprogress" ) { + jQuery.dequeue( this, type ); + } + } ); + }, + dequeue: function( type ) { + return this.each( function() { + jQuery.dequeue( this, type ); + } ); + }, + clearQueue: function( type ) { + return this.queue( type || "fx", [] ); + }, + + // Get a promise resolved when queues of a certain type + // are emptied (fx is the type by default) + promise: function( type, obj ) { + var tmp, + count = 1, + defer = jQuery.Deferred(), + elements = this, + i = this.length, + resolve = function() { + if ( !( --count ) ) { + defer.resolveWith( elements, [ elements ] ); + } + }; + + if ( typeof type !== "string" ) { + obj = type; + type = undefined; + } + type = type || "fx"; + + while ( i-- ) { + tmp = dataPriv.get( elements[ i ], type + "queueHooks" ); + if ( tmp && tmp.empty ) { + count++; + tmp.empty.add( resolve ); + } + } + resolve(); + return defer.promise( obj ); + } +} ); +var pnum = ( /[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/ ).source; + +var rcssNum = new RegExp( "^(?:([+-])=|)(" + pnum + ")([a-z%]*)$", "i" ); + + +var cssExpand = [ "Top", "Right", "Bottom", "Left" ]; + +var documentElement = document.documentElement; + + + + var isAttached = function( elem ) { + return jQuery.contains( elem.ownerDocument, elem ); + }, + composed = { composed: true }; + + // Support: IE 9 - 11+, Edge 12 - 18+, iOS 10.0 - 10.2 only + // Check attachment across shadow DOM boundaries when possible (gh-3504) + // Support: iOS 10.0-10.2 only + // Early iOS 10 versions support `attachShadow` but not `getRootNode`, + // leading to errors. We need to check for `getRootNode`. + if ( documentElement.getRootNode ) { + isAttached = function( elem ) { + return jQuery.contains( elem.ownerDocument, elem ) || + elem.getRootNode( composed ) === elem.ownerDocument; + }; + } +var isHiddenWithinTree = function( elem, el ) { + + // isHiddenWithinTree might be called from jQuery#filter function; + // in that case, element will be second argument + elem = el || elem; + + // Inline style trumps all + return elem.style.display === "none" || + elem.style.display === "" && + + // Otherwise, check computed style + // Support: Firefox <=43 - 45 + // Disconnected elements can have computed display: none, so first confirm that elem is + // in the document. + isAttached( elem ) && + + jQuery.css( elem, "display" ) === "none"; + }; + + + +function adjustCSS( elem, prop, valueParts, tween ) { + var adjusted, scale, + maxIterations = 20, + currentValue = tween ? + function() { + return tween.cur(); + } : + function() { + return jQuery.css( elem, prop, "" ); + }, + initial = currentValue(), + unit = valueParts && valueParts[ 3 ] || ( jQuery.cssNumber[ prop ] ? "" : "px" ), + + // Starting value computation is required for potential unit mismatches + initialInUnit = elem.nodeType && + ( jQuery.cssNumber[ prop ] || unit !== "px" && +initial ) && + rcssNum.exec( jQuery.css( elem, prop ) ); + + if ( initialInUnit && initialInUnit[ 3 ] !== unit ) { + + // Support: Firefox <=54 + // Halve the iteration target value to prevent interference from CSS upper bounds (gh-2144) + initial = initial / 2; + + // Trust units reported by jQuery.css + unit = unit || initialInUnit[ 3 ]; + + // Iteratively approximate from a nonzero starting point + initialInUnit = +initial || 1; + + while ( maxIterations-- ) { + + // Evaluate and update our best guess (doubling guesses that zero out). + // Finish if the scale equals or crosses 1 (making the old*new product non-positive). + jQuery.style( elem, prop, initialInUnit + unit ); + if ( ( 1 - scale ) * ( 1 - ( scale = currentValue() / initial || 0.5 ) ) <= 0 ) { + maxIterations = 0; + } + initialInUnit = initialInUnit / scale; + + } + + initialInUnit = initialInUnit * 2; + jQuery.style( elem, prop, initialInUnit + unit ); + + // Make sure we update the tween properties later on + valueParts = valueParts || []; + } + + if ( valueParts ) { + initialInUnit = +initialInUnit || +initial || 0; + + // Apply relative offset (+=/-=) if specified + adjusted = valueParts[ 1 ] ? + initialInUnit + ( valueParts[ 1 ] + 1 ) * valueParts[ 2 ] : + +valueParts[ 2 ]; + if ( tween ) { + tween.unit = unit; + tween.start = initialInUnit; + tween.end = adjusted; + } + } + return adjusted; +} + + +var defaultDisplayMap = {}; + +function getDefaultDisplay( elem ) { + var temp, + doc = elem.ownerDocument, + nodeName = elem.nodeName, + display = defaultDisplayMap[ nodeName ]; + + if ( display ) { + return display; + } + + temp = doc.body.appendChild( doc.createElement( nodeName ) ); + display = jQuery.css( temp, "display" ); + + temp.parentNode.removeChild( temp ); + + if ( display === "none" ) { + display = "block"; + } + defaultDisplayMap[ nodeName ] = display; + + return display; +} + +function showHide( elements, show ) { + var display, elem, + values = [], + index = 0, + length = elements.length; + + // Determine new display value for elements that need to change + for ( ; index < length; index++ ) { + elem = elements[ index ]; + if ( !elem.style ) { + continue; + } + + display = elem.style.display; + if ( show ) { + + // Since we force visibility upon cascade-hidden elements, an immediate (and slow) + // check is required in this first loop unless we have a nonempty display value (either + // inline or about-to-be-restored) + if ( display === "none" ) { + values[ index ] = dataPriv.get( elem, "display" ) || null; + if ( !values[ index ] ) { + elem.style.display = ""; + } + } + if ( elem.style.display === "" && isHiddenWithinTree( elem ) ) { + values[ index ] = getDefaultDisplay( elem ); + } + } else { + if ( display !== "none" ) { + values[ index ] = "none"; + + // Remember what we're overwriting + dataPriv.set( elem, "display", display ); + } + } + } + + // Set the display of the elements in a second loop to avoid constant reflow + for ( index = 0; index < length; index++ ) { + if ( values[ index ] != null ) { + elements[ index ].style.display = values[ index ]; + } + } + + return elements; +} + +jQuery.fn.extend( { + show: function() { + return showHide( this, true ); + }, + hide: function() { + return showHide( this ); + }, + toggle: function( state ) { + if ( typeof state === "boolean" ) { + return state ? this.show() : this.hide(); + } + + return this.each( function() { + if ( isHiddenWithinTree( this ) ) { + jQuery( this ).show(); + } else { + jQuery( this ).hide(); + } + } ); + } +} ); +var rcheckableType = ( /^(?:checkbox|radio)$/i ); + +var rtagName = ( /<([a-z][^\/\0>\x20\t\r\n\f]*)/i ); + +var rscriptType = ( /^$|^module$|\/(?:java|ecma)script/i ); + + + +( function() { + var fragment = document.createDocumentFragment(), + div = fragment.appendChild( document.createElement( "div" ) ), + input = document.createElement( "input" ); + + // Support: Android 4.0 - 4.3 only + // Check state lost if the name is set (#11217) + // Support: Windows Web Apps (WWA) + // `name` and `type` must use .setAttribute for WWA (#14901) + input.setAttribute( "type", "radio" ); + input.setAttribute( "checked", "checked" ); + input.setAttribute( "name", "t" ); + + div.appendChild( input ); + + // Support: Android <=4.1 only + // Older WebKit doesn't clone checked state correctly in fragments + support.checkClone = div.cloneNode( true ).cloneNode( true ).lastChild.checked; + + // Support: IE <=11 only + // Make sure textarea (and checkbox) defaultValue is properly cloned + div.innerHTML = ""; + support.noCloneChecked = !!div.cloneNode( true ).lastChild.defaultValue; + + // Support: IE <=9 only + // IE <=9 replaces "; + support.option = !!div.lastChild; +} )(); + + +// We have to close these tags to support XHTML (#13200) +var wrapMap = { + + // XHTML parsers do not magically insert elements in the + // same way that tag soup parsers do. So we cannot shorten + // this by omitting or other required elements. + thead: [ 1, "", "
" ], + col: [ 2, "", "
" ], + tr: [ 2, "", "
" ], + td: [ 3, "", "
" ], + + _default: [ 0, "", "" ] +}; + +wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead; +wrapMap.th = wrapMap.td; + +// Support: IE <=9 only +if ( !support.option ) { + wrapMap.optgroup = wrapMap.option = [ 1, "" ]; +} + + +function getAll( context, tag ) { + + // Support: IE <=9 - 11 only + // Use typeof to avoid zero-argument method invocation on host objects (#15151) + var ret; + + if ( typeof context.getElementsByTagName !== "undefined" ) { + ret = context.getElementsByTagName( tag || "*" ); + + } else if ( typeof context.querySelectorAll !== "undefined" ) { + ret = context.querySelectorAll( tag || "*" ); + + } else { + ret = []; + } + + if ( tag === undefined || tag && nodeName( context, tag ) ) { + return jQuery.merge( [ context ], ret ); + } + + return ret; +} + + +// Mark scripts as having already been evaluated +function setGlobalEval( elems, refElements ) { + var i = 0, + l = elems.length; + + for ( ; i < l; i++ ) { + dataPriv.set( + elems[ i ], + "globalEval", + !refElements || dataPriv.get( refElements[ i ], "globalEval" ) + ); + } +} + + +var rhtml = /<|&#?\w+;/; + +function buildFragment( elems, context, scripts, selection, ignored ) { + var elem, tmp, tag, wrap, attached, j, + fragment = context.createDocumentFragment(), + nodes = [], + i = 0, + l = elems.length; + + for ( ; i < l; i++ ) { + elem = elems[ i ]; + + if ( elem || elem === 0 ) { + + // Add nodes directly + if ( toType( elem ) === "object" ) { + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + jQuery.merge( nodes, elem.nodeType ? [ elem ] : elem ); + + // Convert non-html into a text node + } else if ( !rhtml.test( elem ) ) { + nodes.push( context.createTextNode( elem ) ); + + // Convert html into DOM nodes + } else { + tmp = tmp || fragment.appendChild( context.createElement( "div" ) ); + + // Deserialize a standard representation + tag = ( rtagName.exec( elem ) || [ "", "" ] )[ 1 ].toLowerCase(); + wrap = wrapMap[ tag ] || wrapMap._default; + tmp.innerHTML = wrap[ 1 ] + jQuery.htmlPrefilter( elem ) + wrap[ 2 ]; + + // Descend through wrappers to the right content + j = wrap[ 0 ]; + while ( j-- ) { + tmp = tmp.lastChild; + } + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + jQuery.merge( nodes, tmp.childNodes ); + + // Remember the top-level container + tmp = fragment.firstChild; + + // Ensure the created nodes are orphaned (#12392) + tmp.textContent = ""; + } + } + } + + // Remove wrapper from fragment + fragment.textContent = ""; + + i = 0; + while ( ( elem = nodes[ i++ ] ) ) { + + // Skip elements already in the context collection (trac-4087) + if ( selection && jQuery.inArray( elem, selection ) > -1 ) { + if ( ignored ) { + ignored.push( elem ); + } + continue; + } + + attached = isAttached( elem ); + + // Append to fragment + tmp = getAll( fragment.appendChild( elem ), "script" ); + + // Preserve script evaluation history + if ( attached ) { + setGlobalEval( tmp ); + } + + // Capture executables + if ( scripts ) { + j = 0; + while ( ( elem = tmp[ j++ ] ) ) { + if ( rscriptType.test( elem.type || "" ) ) { + scripts.push( elem ); + } + } + } + } + + return fragment; +} + + +var + rkeyEvent = /^key/, + rmouseEvent = /^(?:mouse|pointer|contextmenu|drag|drop)|click/, + rtypenamespace = /^([^.]*)(?:\.(.+)|)/; + +function returnTrue() { + return true; +} + +function returnFalse() { + return false; +} + +// Support: IE <=9 - 11+ +// focus() and blur() are asynchronous, except when they are no-op. +// So expect focus to be synchronous when the element is already active, +// and blur to be synchronous when the element is not already active. +// (focus and blur are always synchronous in other supported browsers, +// this just defines when we can count on it). +function expectSync( elem, type ) { + return ( elem === safeActiveElement() ) === ( type === "focus" ); +} + +// Support: IE <=9 only +// Accessing document.activeElement can throw unexpectedly +// https://bugs.jquery.com/ticket/13393 +function safeActiveElement() { + try { + return document.activeElement; + } catch ( err ) { } +} + +function on( elem, types, selector, data, fn, one ) { + var origFn, type; + + // Types can be a map of types/handlers + if ( typeof types === "object" ) { + + // ( types-Object, selector, data ) + if ( typeof selector !== "string" ) { + + // ( types-Object, data ) + data = data || selector; + selector = undefined; + } + for ( type in types ) { + on( elem, type, selector, data, types[ type ], one ); + } + return elem; + } + + if ( data == null && fn == null ) { + + // ( types, fn ) + fn = selector; + data = selector = undefined; + } else if ( fn == null ) { + if ( typeof selector === "string" ) { + + // ( types, selector, fn ) + fn = data; + data = undefined; + } else { + + // ( types, data, fn ) + fn = data; + data = selector; + selector = undefined; + } + } + if ( fn === false ) { + fn = returnFalse; + } else if ( !fn ) { + return elem; + } + + if ( one === 1 ) { + origFn = fn; + fn = function( event ) { + + // Can use an empty set, since event contains the info + jQuery().off( event ); + return origFn.apply( this, arguments ); + }; + + // Use same guid so caller can remove using origFn + fn.guid = origFn.guid || ( origFn.guid = jQuery.guid++ ); + } + return elem.each( function() { + jQuery.event.add( this, types, fn, data, selector ); + } ); +} + +/* + * Helper functions for managing events -- not part of the public interface. + * Props to Dean Edwards' addEvent library for many of the ideas. + */ +jQuery.event = { + + global: {}, + + add: function( elem, types, handler, data, selector ) { + + var handleObjIn, eventHandle, tmp, + events, t, handleObj, + special, handlers, type, namespaces, origType, + elemData = dataPriv.get( elem ); + + // Only attach events to objects that accept data + if ( !acceptData( elem ) ) { + return; + } + + // Caller can pass in an object of custom data in lieu of the handler + if ( handler.handler ) { + handleObjIn = handler; + handler = handleObjIn.handler; + selector = handleObjIn.selector; + } + + // Ensure that invalid selectors throw exceptions at attach time + // Evaluate against documentElement in case elem is a non-element node (e.g., document) + if ( selector ) { + jQuery.find.matchesSelector( documentElement, selector ); + } + + // Make sure that the handler has a unique ID, used to find/remove it later + if ( !handler.guid ) { + handler.guid = jQuery.guid++; + } + + // Init the element's event structure and main handler, if this is the first + if ( !( events = elemData.events ) ) { + events = elemData.events = Object.create( null ); + } + if ( !( eventHandle = elemData.handle ) ) { + eventHandle = elemData.handle = function( e ) { + + // Discard the second event of a jQuery.event.trigger() and + // when an event is called after a page has unloaded + return typeof jQuery !== "undefined" && jQuery.event.triggered !== e.type ? + jQuery.event.dispatch.apply( elem, arguments ) : undefined; + }; + } + + // Handle multiple events separated by a space + types = ( types || "" ).match( rnothtmlwhite ) || [ "" ]; + t = types.length; + while ( t-- ) { + tmp = rtypenamespace.exec( types[ t ] ) || []; + type = origType = tmp[ 1 ]; + namespaces = ( tmp[ 2 ] || "" ).split( "." ).sort(); + + // There *must* be a type, no attaching namespace-only handlers + if ( !type ) { + continue; + } + + // If event changes its type, use the special event handlers for the changed type + special = jQuery.event.special[ type ] || {}; + + // If selector defined, determine special event api type, otherwise given type + type = ( selector ? special.delegateType : special.bindType ) || type; + + // Update special based on newly reset type + special = jQuery.event.special[ type ] || {}; + + // handleObj is passed to all event handlers + handleObj = jQuery.extend( { + type: type, + origType: origType, + data: data, + handler: handler, + guid: handler.guid, + selector: selector, + needsContext: selector && jQuery.expr.match.needsContext.test( selector ), + namespace: namespaces.join( "." ) + }, handleObjIn ); + + // Init the event handler queue if we're the first + if ( !( handlers = events[ type ] ) ) { + handlers = events[ type ] = []; + handlers.delegateCount = 0; + + // Only use addEventListener if the special events handler returns false + if ( !special.setup || + special.setup.call( elem, data, namespaces, eventHandle ) === false ) { + + if ( elem.addEventListener ) { + elem.addEventListener( type, eventHandle ); + } + } + } + + if ( special.add ) { + special.add.call( elem, handleObj ); + + if ( !handleObj.handler.guid ) { + handleObj.handler.guid = handler.guid; + } + } + + // Add to the element's handler list, delegates in front + if ( selector ) { + handlers.splice( handlers.delegateCount++, 0, handleObj ); + } else { + handlers.push( handleObj ); + } + + // Keep track of which events have ever been used, for event optimization + jQuery.event.global[ type ] = true; + } + + }, + + // Detach an event or set of events from an element + remove: function( elem, types, handler, selector, mappedTypes ) { + + var j, origCount, tmp, + events, t, handleObj, + special, handlers, type, namespaces, origType, + elemData = dataPriv.hasData( elem ) && dataPriv.get( elem ); + + if ( !elemData || !( events = elemData.events ) ) { + return; + } + + // Once for each type.namespace in types; type may be omitted + types = ( types || "" ).match( rnothtmlwhite ) || [ "" ]; + t = types.length; + while ( t-- ) { + tmp = rtypenamespace.exec( types[ t ] ) || []; + type = origType = tmp[ 1 ]; + namespaces = ( tmp[ 2 ] || "" ).split( "." ).sort(); + + // Unbind all events (on this namespace, if provided) for the element + if ( !type ) { + for ( type in events ) { + jQuery.event.remove( elem, type + types[ t ], handler, selector, true ); + } + continue; + } + + special = jQuery.event.special[ type ] || {}; + type = ( selector ? special.delegateType : special.bindType ) || type; + handlers = events[ type ] || []; + tmp = tmp[ 2 ] && + new RegExp( "(^|\\.)" + namespaces.join( "\\.(?:.*\\.|)" ) + "(\\.|$)" ); + + // Remove matching events + origCount = j = handlers.length; + while ( j-- ) { + handleObj = handlers[ j ]; + + if ( ( mappedTypes || origType === handleObj.origType ) && + ( !handler || handler.guid === handleObj.guid ) && + ( !tmp || tmp.test( handleObj.namespace ) ) && + ( !selector || selector === handleObj.selector || + selector === "**" && handleObj.selector ) ) { + handlers.splice( j, 1 ); + + if ( handleObj.selector ) { + handlers.delegateCount--; + } + if ( special.remove ) { + special.remove.call( elem, handleObj ); + } + } + } + + // Remove generic event handler if we removed something and no more handlers exist + // (avoids potential for endless recursion during removal of special event handlers) + if ( origCount && !handlers.length ) { + if ( !special.teardown || + special.teardown.call( elem, namespaces, elemData.handle ) === false ) { + + jQuery.removeEvent( elem, type, elemData.handle ); + } + + delete events[ type ]; + } + } + + // Remove data and the expando if it's no longer used + if ( jQuery.isEmptyObject( events ) ) { + dataPriv.remove( elem, "handle events" ); + } + }, + + dispatch: function( nativeEvent ) { + + var i, j, ret, matched, handleObj, handlerQueue, + args = new Array( arguments.length ), + + // Make a writable jQuery.Event from the native event object + event = jQuery.event.fix( nativeEvent ), + + handlers = ( + dataPriv.get( this, "events" ) || Object.create( null ) + )[ event.type ] || [], + special = jQuery.event.special[ event.type ] || {}; + + // Use the fix-ed jQuery.Event rather than the (read-only) native event + args[ 0 ] = event; + + for ( i = 1; i < arguments.length; i++ ) { + args[ i ] = arguments[ i ]; + } + + event.delegateTarget = this; + + // Call the preDispatch hook for the mapped type, and let it bail if desired + if ( special.preDispatch && special.preDispatch.call( this, event ) === false ) { + return; + } + + // Determine handlers + handlerQueue = jQuery.event.handlers.call( this, event, handlers ); + + // Run delegates first; they may want to stop propagation beneath us + i = 0; + while ( ( matched = handlerQueue[ i++ ] ) && !event.isPropagationStopped() ) { + event.currentTarget = matched.elem; + + j = 0; + while ( ( handleObj = matched.handlers[ j++ ] ) && + !event.isImmediatePropagationStopped() ) { + + // If the event is namespaced, then each handler is only invoked if it is + // specially universal or its namespaces are a superset of the event's. + if ( !event.rnamespace || handleObj.namespace === false || + event.rnamespace.test( handleObj.namespace ) ) { + + event.handleObj = handleObj; + event.data = handleObj.data; + + ret = ( ( jQuery.event.special[ handleObj.origType ] || {} ).handle || + handleObj.handler ).apply( matched.elem, args ); + + if ( ret !== undefined ) { + if ( ( event.result = ret ) === false ) { + event.preventDefault(); + event.stopPropagation(); + } + } + } + } + } + + // Call the postDispatch hook for the mapped type + if ( special.postDispatch ) { + special.postDispatch.call( this, event ); + } + + return event.result; + }, + + handlers: function( event, handlers ) { + var i, handleObj, sel, matchedHandlers, matchedSelectors, + handlerQueue = [], + delegateCount = handlers.delegateCount, + cur = event.target; + + // Find delegate handlers + if ( delegateCount && + + // Support: IE <=9 + // Black-hole SVG instance trees (trac-13180) + cur.nodeType && + + // Support: Firefox <=42 + // Suppress spec-violating clicks indicating a non-primary pointer button (trac-3861) + // https://www.w3.org/TR/DOM-Level-3-Events/#event-type-click + // Support: IE 11 only + // ...but not arrow key "clicks" of radio inputs, which can have `button` -1 (gh-2343) + !( event.type === "click" && event.button >= 1 ) ) { + + for ( ; cur !== this; cur = cur.parentNode || this ) { + + // Don't check non-elements (#13208) + // Don't process clicks on disabled elements (#6911, #8165, #11382, #11764) + if ( cur.nodeType === 1 && !( event.type === "click" && cur.disabled === true ) ) { + matchedHandlers = []; + matchedSelectors = {}; + for ( i = 0; i < delegateCount; i++ ) { + handleObj = handlers[ i ]; + + // Don't conflict with Object.prototype properties (#13203) + sel = handleObj.selector + " "; + + if ( matchedSelectors[ sel ] === undefined ) { + matchedSelectors[ sel ] = handleObj.needsContext ? + jQuery( sel, this ).index( cur ) > -1 : + jQuery.find( sel, this, null, [ cur ] ).length; + } + if ( matchedSelectors[ sel ] ) { + matchedHandlers.push( handleObj ); + } + } + if ( matchedHandlers.length ) { + handlerQueue.push( { elem: cur, handlers: matchedHandlers } ); + } + } + } + } + + // Add the remaining (directly-bound) handlers + cur = this; + if ( delegateCount < handlers.length ) { + handlerQueue.push( { elem: cur, handlers: handlers.slice( delegateCount ) } ); + } + + return handlerQueue; + }, + + addProp: function( name, hook ) { + Object.defineProperty( jQuery.Event.prototype, name, { + enumerable: true, + configurable: true, + + get: isFunction( hook ) ? + function() { + if ( this.originalEvent ) { + return hook( this.originalEvent ); + } + } : + function() { + if ( this.originalEvent ) { + return this.originalEvent[ name ]; + } + }, + + set: function( value ) { + Object.defineProperty( this, name, { + enumerable: true, + configurable: true, + writable: true, + value: value + } ); + } + } ); + }, + + fix: function( originalEvent ) { + return originalEvent[ jQuery.expando ] ? + originalEvent : + new jQuery.Event( originalEvent ); + }, + + special: { + load: { + + // Prevent triggered image.load events from bubbling to window.load + noBubble: true + }, + click: { + + // Utilize native event to ensure correct state for checkable inputs + setup: function( data ) { + + // For mutual compressibility with _default, replace `this` access with a local var. + // `|| data` is dead code meant only to preserve the variable through minification. + var el = this || data; + + // Claim the first handler + if ( rcheckableType.test( el.type ) && + el.click && nodeName( el, "input" ) ) { + + // dataPriv.set( el, "click", ... ) + leverageNative( el, "click", returnTrue ); + } + + // Return false to allow normal processing in the caller + return false; + }, + trigger: function( data ) { + + // For mutual compressibility with _default, replace `this` access with a local var. + // `|| data` is dead code meant only to preserve the variable through minification. + var el = this || data; + + // Force setup before triggering a click + if ( rcheckableType.test( el.type ) && + el.click && nodeName( el, "input" ) ) { + + leverageNative( el, "click" ); + } + + // Return non-false to allow normal event-path propagation + return true; + }, + + // For cross-browser consistency, suppress native .click() on links + // Also prevent it if we're currently inside a leveraged native-event stack + _default: function( event ) { + var target = event.target; + return rcheckableType.test( target.type ) && + target.click && nodeName( target, "input" ) && + dataPriv.get( target, "click" ) || + nodeName( target, "a" ); + } + }, + + beforeunload: { + postDispatch: function( event ) { + + // Support: Firefox 20+ + // Firefox doesn't alert if the returnValue field is not set. + if ( event.result !== undefined && event.originalEvent ) { + event.originalEvent.returnValue = event.result; + } + } + } + } +}; + +// Ensure the presence of an event listener that handles manually-triggered +// synthetic events by interrupting progress until reinvoked in response to +// *native* events that it fires directly, ensuring that state changes have +// already occurred before other listeners are invoked. +function leverageNative( el, type, expectSync ) { + + // Missing expectSync indicates a trigger call, which must force setup through jQuery.event.add + if ( !expectSync ) { + if ( dataPriv.get( el, type ) === undefined ) { + jQuery.event.add( el, type, returnTrue ); + } + return; + } + + // Register the controller as a special universal handler for all event namespaces + dataPriv.set( el, type, false ); + jQuery.event.add( el, type, { + namespace: false, + handler: function( event ) { + var notAsync, result, + saved = dataPriv.get( this, type ); + + if ( ( event.isTrigger & 1 ) && this[ type ] ) { + + // Interrupt processing of the outer synthetic .trigger()ed event + // Saved data should be false in such cases, but might be a leftover capture object + // from an async native handler (gh-4350) + if ( !saved.length ) { + + // Store arguments for use when handling the inner native event + // There will always be at least one argument (an event object), so this array + // will not be confused with a leftover capture object. + saved = slice.call( arguments ); + dataPriv.set( this, type, saved ); + + // Trigger the native event and capture its result + // Support: IE <=9 - 11+ + // focus() and blur() are asynchronous + notAsync = expectSync( this, type ); + this[ type ](); + result = dataPriv.get( this, type ); + if ( saved !== result || notAsync ) { + dataPriv.set( this, type, false ); + } else { + result = {}; + } + if ( saved !== result ) { + + // Cancel the outer synthetic event + event.stopImmediatePropagation(); + event.preventDefault(); + return result.value; + } + + // If this is an inner synthetic event for an event with a bubbling surrogate + // (focus or blur), assume that the surrogate already propagated from triggering the + // native event and prevent that from happening again here. + // This technically gets the ordering wrong w.r.t. to `.trigger()` (in which the + // bubbling surrogate propagates *after* the non-bubbling base), but that seems + // less bad than duplication. + } else if ( ( jQuery.event.special[ type ] || {} ).delegateType ) { + event.stopPropagation(); + } + + // If this is a native event triggered above, everything is now in order + // Fire an inner synthetic event with the original arguments + } else if ( saved.length ) { + + // ...and capture the result + dataPriv.set( this, type, { + value: jQuery.event.trigger( + + // Support: IE <=9 - 11+ + // Extend with the prototype to reset the above stopImmediatePropagation() + jQuery.extend( saved[ 0 ], jQuery.Event.prototype ), + saved.slice( 1 ), + this + ) + } ); + + // Abort handling of the native event + event.stopImmediatePropagation(); + } + } + } ); +} + +jQuery.removeEvent = function( elem, type, handle ) { + + // This "if" is needed for plain objects + if ( elem.removeEventListener ) { + elem.removeEventListener( type, handle ); + } +}; + +jQuery.Event = function( src, props ) { + + // Allow instantiation without the 'new' keyword + if ( !( this instanceof jQuery.Event ) ) { + return new jQuery.Event( src, props ); + } + + // Event object + if ( src && src.type ) { + this.originalEvent = src; + this.type = src.type; + + // Events bubbling up the document may have been marked as prevented + // by a handler lower down the tree; reflect the correct value. + this.isDefaultPrevented = src.defaultPrevented || + src.defaultPrevented === undefined && + + // Support: Android <=2.3 only + src.returnValue === false ? + returnTrue : + returnFalse; + + // Create target properties + // Support: Safari <=6 - 7 only + // Target should not be a text node (#504, #13143) + this.target = ( src.target && src.target.nodeType === 3 ) ? + src.target.parentNode : + src.target; + + this.currentTarget = src.currentTarget; + this.relatedTarget = src.relatedTarget; + + // Event type + } else { + this.type = src; + } + + // Put explicitly provided properties onto the event object + if ( props ) { + jQuery.extend( this, props ); + } + + // Create a timestamp if incoming event doesn't have one + this.timeStamp = src && src.timeStamp || Date.now(); + + // Mark it as fixed + this[ jQuery.expando ] = true; +}; + +// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding +// https://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html +jQuery.Event.prototype = { + constructor: jQuery.Event, + isDefaultPrevented: returnFalse, + isPropagationStopped: returnFalse, + isImmediatePropagationStopped: returnFalse, + isSimulated: false, + + preventDefault: function() { + var e = this.originalEvent; + + this.isDefaultPrevented = returnTrue; + + if ( e && !this.isSimulated ) { + e.preventDefault(); + } + }, + stopPropagation: function() { + var e = this.originalEvent; + + this.isPropagationStopped = returnTrue; + + if ( e && !this.isSimulated ) { + e.stopPropagation(); + } + }, + stopImmediatePropagation: function() { + var e = this.originalEvent; + + this.isImmediatePropagationStopped = returnTrue; + + if ( e && !this.isSimulated ) { + e.stopImmediatePropagation(); + } + + this.stopPropagation(); + } +}; + +// Includes all common event props including KeyEvent and MouseEvent specific props +jQuery.each( { + altKey: true, + bubbles: true, + cancelable: true, + changedTouches: true, + ctrlKey: true, + detail: true, + eventPhase: true, + metaKey: true, + pageX: true, + pageY: true, + shiftKey: true, + view: true, + "char": true, + code: true, + charCode: true, + key: true, + keyCode: true, + button: true, + buttons: true, + clientX: true, + clientY: true, + offsetX: true, + offsetY: true, + pointerId: true, + pointerType: true, + screenX: true, + screenY: true, + targetTouches: true, + toElement: true, + touches: true, + + which: function( event ) { + var button = event.button; + + // Add which for key events + if ( event.which == null && rkeyEvent.test( event.type ) ) { + return event.charCode != null ? event.charCode : event.keyCode; + } + + // Add which for click: 1 === left; 2 === middle; 3 === right + if ( !event.which && button !== undefined && rmouseEvent.test( event.type ) ) { + if ( button & 1 ) { + return 1; + } + + if ( button & 2 ) { + return 3; + } + + if ( button & 4 ) { + return 2; + } + + return 0; + } + + return event.which; + } +}, jQuery.event.addProp ); + +jQuery.each( { focus: "focusin", blur: "focusout" }, function( type, delegateType ) { + jQuery.event.special[ type ] = { + + // Utilize native event if possible so blur/focus sequence is correct + setup: function() { + + // Claim the first handler + // dataPriv.set( this, "focus", ... ) + // dataPriv.set( this, "blur", ... ) + leverageNative( this, type, expectSync ); + + // Return false to allow normal processing in the caller + return false; + }, + trigger: function() { + + // Force setup before trigger + leverageNative( this, type ); + + // Return non-false to allow normal event-path propagation + return true; + }, + + delegateType: delegateType + }; +} ); + +// Create mouseenter/leave events using mouseover/out and event-time checks +// so that event delegation works in jQuery. +// Do the same for pointerenter/pointerleave and pointerover/pointerout +// +// Support: Safari 7 only +// Safari sends mouseenter too often; see: +// https://bugs.chromium.org/p/chromium/issues/detail?id=470258 +// for the description of the bug (it existed in older Chrome versions as well). +jQuery.each( { + mouseenter: "mouseover", + mouseleave: "mouseout", + pointerenter: "pointerover", + pointerleave: "pointerout" +}, function( orig, fix ) { + jQuery.event.special[ orig ] = { + delegateType: fix, + bindType: fix, + + handle: function( event ) { + var ret, + target = this, + related = event.relatedTarget, + handleObj = event.handleObj; + + // For mouseenter/leave call the handler if related is outside the target. + // NB: No relatedTarget if the mouse left/entered the browser window + if ( !related || ( related !== target && !jQuery.contains( target, related ) ) ) { + event.type = handleObj.origType; + ret = handleObj.handler.apply( this, arguments ); + event.type = fix; + } + return ret; + } + }; +} ); + +jQuery.fn.extend( { + + on: function( types, selector, data, fn ) { + return on( this, types, selector, data, fn ); + }, + one: function( types, selector, data, fn ) { + return on( this, types, selector, data, fn, 1 ); + }, + off: function( types, selector, fn ) { + var handleObj, type; + if ( types && types.preventDefault && types.handleObj ) { + + // ( event ) dispatched jQuery.Event + handleObj = types.handleObj; + jQuery( types.delegateTarget ).off( + handleObj.namespace ? + handleObj.origType + "." + handleObj.namespace : + handleObj.origType, + handleObj.selector, + handleObj.handler + ); + return this; + } + if ( typeof types === "object" ) { + + // ( types-object [, selector] ) + for ( type in types ) { + this.off( type, selector, types[ type ] ); + } + return this; + } + if ( selector === false || typeof selector === "function" ) { + + // ( types [, fn] ) + fn = selector; + selector = undefined; + } + if ( fn === false ) { + fn = returnFalse; + } + return this.each( function() { + jQuery.event.remove( this, types, fn, selector ); + } ); + } +} ); + + +var + + // Support: IE <=10 - 11, Edge 12 - 13 only + // In IE/Edge using regex groups here causes severe slowdowns. + // See https://connect.microsoft.com/IE/feedback/details/1736512/ + rnoInnerhtml = /\s*$/g; + +// Prefer a tbody over its parent table for containing new rows +function manipulationTarget( elem, content ) { + if ( nodeName( elem, "table" ) && + nodeName( content.nodeType !== 11 ? content : content.firstChild, "tr" ) ) { + + return jQuery( elem ).children( "tbody" )[ 0 ] || elem; + } + + return elem; +} + +// Replace/restore the type attribute of script elements for safe DOM manipulation +function disableScript( elem ) { + elem.type = ( elem.getAttribute( "type" ) !== null ) + "/" + elem.type; + return elem; +} +function restoreScript( elem ) { + if ( ( elem.type || "" ).slice( 0, 5 ) === "true/" ) { + elem.type = elem.type.slice( 5 ); + } else { + elem.removeAttribute( "type" ); + } + + return elem; +} + +function cloneCopyEvent( src, dest ) { + var i, l, type, pdataOld, udataOld, udataCur, events; + + if ( dest.nodeType !== 1 ) { + return; + } + + // 1. Copy private data: events, handlers, etc. + if ( dataPriv.hasData( src ) ) { + pdataOld = dataPriv.get( src ); + events = pdataOld.events; + + if ( events ) { + dataPriv.remove( dest, "handle events" ); + + for ( type in events ) { + for ( i = 0, l = events[ type ].length; i < l; i++ ) { + jQuery.event.add( dest, type, events[ type ][ i ] ); + } + } + } + } + + // 2. Copy user data + if ( dataUser.hasData( src ) ) { + udataOld = dataUser.access( src ); + udataCur = jQuery.extend( {}, udataOld ); + + dataUser.set( dest, udataCur ); + } +} + +// Fix IE bugs, see support tests +function fixInput( src, dest ) { + var nodeName = dest.nodeName.toLowerCase(); + + // Fails to persist the checked state of a cloned checkbox or radio button. + if ( nodeName === "input" && rcheckableType.test( src.type ) ) { + dest.checked = src.checked; + + // Fails to return the selected option to the default selected state when cloning options + } else if ( nodeName === "input" || nodeName === "textarea" ) { + dest.defaultValue = src.defaultValue; + } +} + +function domManip( collection, args, callback, ignored ) { + + // Flatten any nested arrays + args = flat( args ); + + var fragment, first, scripts, hasScripts, node, doc, + i = 0, + l = collection.length, + iNoClone = l - 1, + value = args[ 0 ], + valueIsFunction = isFunction( value ); + + // We can't cloneNode fragments that contain checked, in WebKit + if ( valueIsFunction || + ( l > 1 && typeof value === "string" && + !support.checkClone && rchecked.test( value ) ) ) { + return collection.each( function( index ) { + var self = collection.eq( index ); + if ( valueIsFunction ) { + args[ 0 ] = value.call( this, index, self.html() ); + } + domManip( self, args, callback, ignored ); + } ); + } + + if ( l ) { + fragment = buildFragment( args, collection[ 0 ].ownerDocument, false, collection, ignored ); + first = fragment.firstChild; + + if ( fragment.childNodes.length === 1 ) { + fragment = first; + } + + // Require either new content or an interest in ignored elements to invoke the callback + if ( first || ignored ) { + scripts = jQuery.map( getAll( fragment, "script" ), disableScript ); + hasScripts = scripts.length; + + // Use the original fragment for the last item + // instead of the first because it can end up + // being emptied incorrectly in certain situations (#8070). + for ( ; i < l; i++ ) { + node = fragment; + + if ( i !== iNoClone ) { + node = jQuery.clone( node, true, true ); + + // Keep references to cloned scripts for later restoration + if ( hasScripts ) { + + // Support: Android <=4.0 only, PhantomJS 1 only + // push.apply(_, arraylike) throws on ancient WebKit + jQuery.merge( scripts, getAll( node, "script" ) ); + } + } + + callback.call( collection[ i ], node, i ); + } + + if ( hasScripts ) { + doc = scripts[ scripts.length - 1 ].ownerDocument; + + // Reenable scripts + jQuery.map( scripts, restoreScript ); + + // Evaluate executable scripts on first document insertion + for ( i = 0; i < hasScripts; i++ ) { + node = scripts[ i ]; + if ( rscriptType.test( node.type || "" ) && + !dataPriv.access( node, "globalEval" ) && + jQuery.contains( doc, node ) ) { + + if ( node.src && ( node.type || "" ).toLowerCase() !== "module" ) { + + // Optional AJAX dependency, but won't run scripts if not present + if ( jQuery._evalUrl && !node.noModule ) { + jQuery._evalUrl( node.src, { + nonce: node.nonce || node.getAttribute( "nonce" ) + }, doc ); + } + } else { + DOMEval( node.textContent.replace( rcleanScript, "" ), node, doc ); + } + } + } + } + } + } + + return collection; +} + +function remove( elem, selector, keepData ) { + var node, + nodes = selector ? jQuery.filter( selector, elem ) : elem, + i = 0; + + for ( ; ( node = nodes[ i ] ) != null; i++ ) { + if ( !keepData && node.nodeType === 1 ) { + jQuery.cleanData( getAll( node ) ); + } + + if ( node.parentNode ) { + if ( keepData && isAttached( node ) ) { + setGlobalEval( getAll( node, "script" ) ); + } + node.parentNode.removeChild( node ); + } + } + + return elem; +} + +jQuery.extend( { + htmlPrefilter: function( html ) { + return html; + }, + + clone: function( elem, dataAndEvents, deepDataAndEvents ) { + var i, l, srcElements, destElements, + clone = elem.cloneNode( true ), + inPage = isAttached( elem ); + + // Fix IE cloning issues + if ( !support.noCloneChecked && ( elem.nodeType === 1 || elem.nodeType === 11 ) && + !jQuery.isXMLDoc( elem ) ) { + + // We eschew Sizzle here for performance reasons: https://jsperf.com/getall-vs-sizzle/2 + destElements = getAll( clone ); + srcElements = getAll( elem ); + + for ( i = 0, l = srcElements.length; i < l; i++ ) { + fixInput( srcElements[ i ], destElements[ i ] ); + } + } + + // Copy the events from the original to the clone + if ( dataAndEvents ) { + if ( deepDataAndEvents ) { + srcElements = srcElements || getAll( elem ); + destElements = destElements || getAll( clone ); + + for ( i = 0, l = srcElements.length; i < l; i++ ) { + cloneCopyEvent( srcElements[ i ], destElements[ i ] ); + } + } else { + cloneCopyEvent( elem, clone ); + } + } + + // Preserve script evaluation history + destElements = getAll( clone, "script" ); + if ( destElements.length > 0 ) { + setGlobalEval( destElements, !inPage && getAll( elem, "script" ) ); + } + + // Return the cloned set + return clone; + }, + + cleanData: function( elems ) { + var data, elem, type, + special = jQuery.event.special, + i = 0; + + for ( ; ( elem = elems[ i ] ) !== undefined; i++ ) { + if ( acceptData( elem ) ) { + if ( ( data = elem[ dataPriv.expando ] ) ) { + if ( data.events ) { + for ( type in data.events ) { + if ( special[ type ] ) { + jQuery.event.remove( elem, type ); + + // This is a shortcut to avoid jQuery.event.remove's overhead + } else { + jQuery.removeEvent( elem, type, data.handle ); + } + } + } + + // Support: Chrome <=35 - 45+ + // Assign undefined instead of using delete, see Data#remove + elem[ dataPriv.expando ] = undefined; + } + if ( elem[ dataUser.expando ] ) { + + // Support: Chrome <=35 - 45+ + // Assign undefined instead of using delete, see Data#remove + elem[ dataUser.expando ] = undefined; + } + } + } + } +} ); + +jQuery.fn.extend( { + detach: function( selector ) { + return remove( this, selector, true ); + }, + + remove: function( selector ) { + return remove( this, selector ); + }, + + text: function( value ) { + return access( this, function( value ) { + return value === undefined ? + jQuery.text( this ) : + this.empty().each( function() { + if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { + this.textContent = value; + } + } ); + }, null, value, arguments.length ); + }, + + append: function() { + return domManip( this, arguments, function( elem ) { + if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { + var target = manipulationTarget( this, elem ); + target.appendChild( elem ); + } + } ); + }, + + prepend: function() { + return domManip( this, arguments, function( elem ) { + if ( this.nodeType === 1 || this.nodeType === 11 || this.nodeType === 9 ) { + var target = manipulationTarget( this, elem ); + target.insertBefore( elem, target.firstChild ); + } + } ); + }, + + before: function() { + return domManip( this, arguments, function( elem ) { + if ( this.parentNode ) { + this.parentNode.insertBefore( elem, this ); + } + } ); + }, + + after: function() { + return domManip( this, arguments, function( elem ) { + if ( this.parentNode ) { + this.parentNode.insertBefore( elem, this.nextSibling ); + } + } ); + }, + + empty: function() { + var elem, + i = 0; + + for ( ; ( elem = this[ i ] ) != null; i++ ) { + if ( elem.nodeType === 1 ) { + + // Prevent memory leaks + jQuery.cleanData( getAll( elem, false ) ); + + // Remove any remaining nodes + elem.textContent = ""; + } + } + + return this; + }, + + clone: function( dataAndEvents, deepDataAndEvents ) { + dataAndEvents = dataAndEvents == null ? false : dataAndEvents; + deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents; + + return this.map( function() { + return jQuery.clone( this, dataAndEvents, deepDataAndEvents ); + } ); + }, + + html: function( value ) { + return access( this, function( value ) { + var elem = this[ 0 ] || {}, + i = 0, + l = this.length; + + if ( value === undefined && elem.nodeType === 1 ) { + return elem.innerHTML; + } + + // See if we can take a shortcut and just use innerHTML + if ( typeof value === "string" && !rnoInnerhtml.test( value ) && + !wrapMap[ ( rtagName.exec( value ) || [ "", "" ] )[ 1 ].toLowerCase() ] ) { + + value = jQuery.htmlPrefilter( value ); + + try { + for ( ; i < l; i++ ) { + elem = this[ i ] || {}; + + // Remove element nodes and prevent memory leaks + if ( elem.nodeType === 1 ) { + jQuery.cleanData( getAll( elem, false ) ); + elem.innerHTML = value; + } + } + + elem = 0; + + // If using innerHTML throws an exception, use the fallback method + } catch ( e ) {} + } + + if ( elem ) { + this.empty().append( value ); + } + }, null, value, arguments.length ); + }, + + replaceWith: function() { + var ignored = []; + + // Make the changes, replacing each non-ignored context element with the new content + return domManip( this, arguments, function( elem ) { + var parent = this.parentNode; + + if ( jQuery.inArray( this, ignored ) < 0 ) { + jQuery.cleanData( getAll( this ) ); + if ( parent ) { + parent.replaceChild( elem, this ); + } + } + + // Force callback invocation + }, ignored ); + } +} ); + +jQuery.each( { + appendTo: "append", + prependTo: "prepend", + insertBefore: "before", + insertAfter: "after", + replaceAll: "replaceWith" +}, function( name, original ) { + jQuery.fn[ name ] = function( selector ) { + var elems, + ret = [], + insert = jQuery( selector ), + last = insert.length - 1, + i = 0; + + for ( ; i <= last; i++ ) { + elems = i === last ? this : this.clone( true ); + jQuery( insert[ i ] )[ original ]( elems ); + + // Support: Android <=4.0 only, PhantomJS 1 only + // .get() because push.apply(_, arraylike) throws on ancient WebKit + push.apply( ret, elems.get() ); + } + + return this.pushStack( ret ); + }; +} ); +var rnumnonpx = new RegExp( "^(" + pnum + ")(?!px)[a-z%]+$", "i" ); + +var getStyles = function( elem ) { + + // Support: IE <=11 only, Firefox <=30 (#15098, #14150) + // IE throws on elements created in popups + // FF meanwhile throws on frame elements through "defaultView.getComputedStyle" + var view = elem.ownerDocument.defaultView; + + if ( !view || !view.opener ) { + view = window; + } + + return view.getComputedStyle( elem ); + }; + +var swap = function( elem, options, callback ) { + var ret, name, + old = {}; + + // Remember the old values, and insert the new ones + for ( name in options ) { + old[ name ] = elem.style[ name ]; + elem.style[ name ] = options[ name ]; + } + + ret = callback.call( elem ); + + // Revert the old values + for ( name in options ) { + elem.style[ name ] = old[ name ]; + } + + return ret; +}; + + +var rboxStyle = new RegExp( cssExpand.join( "|" ), "i" ); + + + +( function() { + + // Executing both pixelPosition & boxSizingReliable tests require only one layout + // so they're executed at the same time to save the second computation. + function computeStyleTests() { + + // This is a singleton, we need to execute it only once + if ( !div ) { + return; + } + + container.style.cssText = "position:absolute;left:-11111px;width:60px;" + + "margin-top:1px;padding:0;border:0"; + div.style.cssText = + "position:relative;display:block;box-sizing:border-box;overflow:scroll;" + + "margin:auto;border:1px;padding:1px;" + + "width:60%;top:1%"; + documentElement.appendChild( container ).appendChild( div ); + + var divStyle = window.getComputedStyle( div ); + pixelPositionVal = divStyle.top !== "1%"; + + // Support: Android 4.0 - 4.3 only, Firefox <=3 - 44 + reliableMarginLeftVal = roundPixelMeasures( divStyle.marginLeft ) === 12; + + // Support: Android 4.0 - 4.3 only, Safari <=9.1 - 10.1, iOS <=7.0 - 9.3 + // Some styles come back with percentage values, even though they shouldn't + div.style.right = "60%"; + pixelBoxStylesVal = roundPixelMeasures( divStyle.right ) === 36; + + // Support: IE 9 - 11 only + // Detect misreporting of content dimensions for box-sizing:border-box elements + boxSizingReliableVal = roundPixelMeasures( divStyle.width ) === 36; + + // Support: IE 9 only + // Detect overflow:scroll screwiness (gh-3699) + // Support: Chrome <=64 + // Don't get tricked when zoom affects offsetWidth (gh-4029) + div.style.position = "absolute"; + scrollboxSizeVal = roundPixelMeasures( div.offsetWidth / 3 ) === 12; + + documentElement.removeChild( container ); + + // Nullify the div so it wouldn't be stored in the memory and + // it will also be a sign that checks already performed + div = null; + } + + function roundPixelMeasures( measure ) { + return Math.round( parseFloat( measure ) ); + } + + var pixelPositionVal, boxSizingReliableVal, scrollboxSizeVal, pixelBoxStylesVal, + reliableTrDimensionsVal, reliableMarginLeftVal, + container = document.createElement( "div" ), + div = document.createElement( "div" ); + + // Finish early in limited (non-browser) environments + if ( !div.style ) { + return; + } + + // Support: IE <=9 - 11 only + // Style of cloned element affects source element cloned (#8908) + div.style.backgroundClip = "content-box"; + div.cloneNode( true ).style.backgroundClip = ""; + support.clearCloneStyle = div.style.backgroundClip === "content-box"; + + jQuery.extend( support, { + boxSizingReliable: function() { + computeStyleTests(); + return boxSizingReliableVal; + }, + pixelBoxStyles: function() { + computeStyleTests(); + return pixelBoxStylesVal; + }, + pixelPosition: function() { + computeStyleTests(); + return pixelPositionVal; + }, + reliableMarginLeft: function() { + computeStyleTests(); + return reliableMarginLeftVal; + }, + scrollboxSize: function() { + computeStyleTests(); + return scrollboxSizeVal; + }, + + // Support: IE 9 - 11+, Edge 15 - 18+ + // IE/Edge misreport `getComputedStyle` of table rows with width/height + // set in CSS while `offset*` properties report correct values. + // Behavior in IE 9 is more subtle than in newer versions & it passes + // some versions of this test; make sure not to make it pass there! + reliableTrDimensions: function() { + var table, tr, trChild, trStyle; + if ( reliableTrDimensionsVal == null ) { + table = document.createElement( "table" ); + tr = document.createElement( "tr" ); + trChild = document.createElement( "div" ); + + table.style.cssText = "position:absolute;left:-11111px"; + tr.style.height = "1px"; + trChild.style.height = "9px"; + + documentElement + .appendChild( table ) + .appendChild( tr ) + .appendChild( trChild ); + + trStyle = window.getComputedStyle( tr ); + reliableTrDimensionsVal = parseInt( trStyle.height ) > 3; + + documentElement.removeChild( table ); + } + return reliableTrDimensionsVal; + } + } ); +} )(); + + +function curCSS( elem, name, computed ) { + var width, minWidth, maxWidth, ret, + + // Support: Firefox 51+ + // Retrieving style before computed somehow + // fixes an issue with getting wrong values + // on detached elements + style = elem.style; + + computed = computed || getStyles( elem ); + + // getPropertyValue is needed for: + // .css('filter') (IE 9 only, #12537) + // .css('--customProperty) (#3144) + if ( computed ) { + ret = computed.getPropertyValue( name ) || computed[ name ]; + + if ( ret === "" && !isAttached( elem ) ) { + ret = jQuery.style( elem, name ); + } + + // A tribute to the "awesome hack by Dean Edwards" + // Android Browser returns percentage for some values, + // but width seems to be reliably pixels. + // This is against the CSSOM draft spec: + // https://drafts.csswg.org/cssom/#resolved-values + if ( !support.pixelBoxStyles() && rnumnonpx.test( ret ) && rboxStyle.test( name ) ) { + + // Remember the original values + width = style.width; + minWidth = style.minWidth; + maxWidth = style.maxWidth; + + // Put in the new values to get a computed value out + style.minWidth = style.maxWidth = style.width = ret; + ret = computed.width; + + // Revert the changed values + style.width = width; + style.minWidth = minWidth; + style.maxWidth = maxWidth; + } + } + + return ret !== undefined ? + + // Support: IE <=9 - 11 only + // IE returns zIndex value as an integer. + ret + "" : + ret; +} + + +function addGetHookIf( conditionFn, hookFn ) { + + // Define the hook, we'll check on the first run if it's really needed. + return { + get: function() { + if ( conditionFn() ) { + + // Hook not needed (or it's not possible to use it due + // to missing dependency), remove it. + delete this.get; + return; + } + + // Hook needed; redefine it so that the support test is not executed again. + return ( this.get = hookFn ).apply( this, arguments ); + } + }; +} + + +var cssPrefixes = [ "Webkit", "Moz", "ms" ], + emptyStyle = document.createElement( "div" ).style, + vendorProps = {}; + +// Return a vendor-prefixed property or undefined +function vendorPropName( name ) { + + // Check for vendor prefixed names + var capName = name[ 0 ].toUpperCase() + name.slice( 1 ), + i = cssPrefixes.length; + + while ( i-- ) { + name = cssPrefixes[ i ] + capName; + if ( name in emptyStyle ) { + return name; + } + } +} + +// Return a potentially-mapped jQuery.cssProps or vendor prefixed property +function finalPropName( name ) { + var final = jQuery.cssProps[ name ] || vendorProps[ name ]; + + if ( final ) { + return final; + } + if ( name in emptyStyle ) { + return name; + } + return vendorProps[ name ] = vendorPropName( name ) || name; +} + + +var + + // Swappable if display is none or starts with table + // except "table", "table-cell", or "table-caption" + // See here for display values: https://developer.mozilla.org/en-US/docs/CSS/display + rdisplayswap = /^(none|table(?!-c[ea]).+)/, + rcustomProp = /^--/, + cssShow = { position: "absolute", visibility: "hidden", display: "block" }, + cssNormalTransform = { + letterSpacing: "0", + fontWeight: "400" + }; + +function setPositiveNumber( _elem, value, subtract ) { + + // Any relative (+/-) values have already been + // normalized at this point + var matches = rcssNum.exec( value ); + return matches ? + + // Guard against undefined "subtract", e.g., when used as in cssHooks + Math.max( 0, matches[ 2 ] - ( subtract || 0 ) ) + ( matches[ 3 ] || "px" ) : + value; +} + +function boxModelAdjustment( elem, dimension, box, isBorderBox, styles, computedVal ) { + var i = dimension === "width" ? 1 : 0, + extra = 0, + delta = 0; + + // Adjustment may not be necessary + if ( box === ( isBorderBox ? "border" : "content" ) ) { + return 0; + } + + for ( ; i < 4; i += 2 ) { + + // Both box models exclude margin + if ( box === "margin" ) { + delta += jQuery.css( elem, box + cssExpand[ i ], true, styles ); + } + + // If we get here with a content-box, we're seeking "padding" or "border" or "margin" + if ( !isBorderBox ) { + + // Add padding + delta += jQuery.css( elem, "padding" + cssExpand[ i ], true, styles ); + + // For "border" or "margin", add border + if ( box !== "padding" ) { + delta += jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); + + // But still keep track of it otherwise + } else { + extra += jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); + } + + // If we get here with a border-box (content + padding + border), we're seeking "content" or + // "padding" or "margin" + } else { + + // For "content", subtract padding + if ( box === "content" ) { + delta -= jQuery.css( elem, "padding" + cssExpand[ i ], true, styles ); + } + + // For "content" or "padding", subtract border + if ( box !== "margin" ) { + delta -= jQuery.css( elem, "border" + cssExpand[ i ] + "Width", true, styles ); + } + } + } + + // Account for positive content-box scroll gutter when requested by providing computedVal + if ( !isBorderBox && computedVal >= 0 ) { + + // offsetWidth/offsetHeight is a rounded sum of content, padding, scroll gutter, and border + // Assuming integer scroll gutter, subtract the rest and round down + delta += Math.max( 0, Math.ceil( + elem[ "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ) ] - + computedVal - + delta - + extra - + 0.5 + + // If offsetWidth/offsetHeight is unknown, then we can't determine content-box scroll gutter + // Use an explicit zero to avoid NaN (gh-3964) + ) ) || 0; + } + + return delta; +} + +function getWidthOrHeight( elem, dimension, extra ) { + + // Start with computed style + var styles = getStyles( elem ), + + // To avoid forcing a reflow, only fetch boxSizing if we need it (gh-4322). + // Fake content-box until we know it's needed to know the true value. + boxSizingNeeded = !support.boxSizingReliable() || extra, + isBorderBox = boxSizingNeeded && + jQuery.css( elem, "boxSizing", false, styles ) === "border-box", + valueIsBorderBox = isBorderBox, + + val = curCSS( elem, dimension, styles ), + offsetProp = "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ); + + // Support: Firefox <=54 + // Return a confounding non-pixel value or feign ignorance, as appropriate. + if ( rnumnonpx.test( val ) ) { + if ( !extra ) { + return val; + } + val = "auto"; + } + + + // Support: IE 9 - 11 only + // Use offsetWidth/offsetHeight for when box sizing is unreliable. + // In those cases, the computed value can be trusted to be border-box. + if ( ( !support.boxSizingReliable() && isBorderBox || + + // Support: IE 10 - 11+, Edge 15 - 18+ + // IE/Edge misreport `getComputedStyle` of table rows with width/height + // set in CSS while `offset*` properties report correct values. + // Interestingly, in some cases IE 9 doesn't suffer from this issue. + !support.reliableTrDimensions() && nodeName( elem, "tr" ) || + + // Fall back to offsetWidth/offsetHeight when value is "auto" + // This happens for inline elements with no explicit setting (gh-3571) + val === "auto" || + + // Support: Android <=4.1 - 4.3 only + // Also use offsetWidth/offsetHeight for misreported inline dimensions (gh-3602) + !parseFloat( val ) && jQuery.css( elem, "display", false, styles ) === "inline" ) && + + // Make sure the element is visible & connected + elem.getClientRects().length ) { + + isBorderBox = jQuery.css( elem, "boxSizing", false, styles ) === "border-box"; + + // Where available, offsetWidth/offsetHeight approximate border box dimensions. + // Where not available (e.g., SVG), assume unreliable box-sizing and interpret the + // retrieved value as a content box dimension. + valueIsBorderBox = offsetProp in elem; + if ( valueIsBorderBox ) { + val = elem[ offsetProp ]; + } + } + + // Normalize "" and auto + val = parseFloat( val ) || 0; + + // Adjust for the element's box model + return ( val + + boxModelAdjustment( + elem, + dimension, + extra || ( isBorderBox ? "border" : "content" ), + valueIsBorderBox, + styles, + + // Provide the current computed size to request scroll gutter calculation (gh-3589) + val + ) + ) + "px"; +} + +jQuery.extend( { + + // Add in style property hooks for overriding the default + // behavior of getting and setting a style property + cssHooks: { + opacity: { + get: function( elem, computed ) { + if ( computed ) { + + // We should always get a number back from opacity + var ret = curCSS( elem, "opacity" ); + return ret === "" ? "1" : ret; + } + } + } + }, + + // Don't automatically add "px" to these possibly-unitless properties + cssNumber: { + "animationIterationCount": true, + "columnCount": true, + "fillOpacity": true, + "flexGrow": true, + "flexShrink": true, + "fontWeight": true, + "gridArea": true, + "gridColumn": true, + "gridColumnEnd": true, + "gridColumnStart": true, + "gridRow": true, + "gridRowEnd": true, + "gridRowStart": true, + "lineHeight": true, + "opacity": true, + "order": true, + "orphans": true, + "widows": true, + "zIndex": true, + "zoom": true + }, + + // Add in properties whose names you wish to fix before + // setting or getting the value + cssProps: {}, + + // Get and set the style property on a DOM Node + style: function( elem, name, value, extra ) { + + // Don't set styles on text and comment nodes + if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) { + return; + } + + // Make sure that we're working with the right name + var ret, type, hooks, + origName = camelCase( name ), + isCustomProp = rcustomProp.test( name ), + style = elem.style; + + // Make sure that we're working with the right name. We don't + // want to query the value if it is a CSS custom property + // since they are user-defined. + if ( !isCustomProp ) { + name = finalPropName( origName ); + } + + // Gets hook for the prefixed version, then unprefixed version + hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; + + // Check if we're setting a value + if ( value !== undefined ) { + type = typeof value; + + // Convert "+=" or "-=" to relative numbers (#7345) + if ( type === "string" && ( ret = rcssNum.exec( value ) ) && ret[ 1 ] ) { + value = adjustCSS( elem, name, ret ); + + // Fixes bug #9237 + type = "number"; + } + + // Make sure that null and NaN values aren't set (#7116) + if ( value == null || value !== value ) { + return; + } + + // If a number was passed in, add the unit (except for certain CSS properties) + // The isCustomProp check can be removed in jQuery 4.0 when we only auto-append + // "px" to a few hardcoded values. + if ( type === "number" && !isCustomProp ) { + value += ret && ret[ 3 ] || ( jQuery.cssNumber[ origName ] ? "" : "px" ); + } + + // background-* props affect original clone's values + if ( !support.clearCloneStyle && value === "" && name.indexOf( "background" ) === 0 ) { + style[ name ] = "inherit"; + } + + // If a hook was provided, use that value, otherwise just set the specified value + if ( !hooks || !( "set" in hooks ) || + ( value = hooks.set( elem, value, extra ) ) !== undefined ) { + + if ( isCustomProp ) { + style.setProperty( name, value ); + } else { + style[ name ] = value; + } + } + + } else { + + // If a hook was provided get the non-computed value from there + if ( hooks && "get" in hooks && + ( ret = hooks.get( elem, false, extra ) ) !== undefined ) { + + return ret; + } + + // Otherwise just get the value from the style object + return style[ name ]; + } + }, + + css: function( elem, name, extra, styles ) { + var val, num, hooks, + origName = camelCase( name ), + isCustomProp = rcustomProp.test( name ); + + // Make sure that we're working with the right name. We don't + // want to modify the value if it is a CSS custom property + // since they are user-defined. + if ( !isCustomProp ) { + name = finalPropName( origName ); + } + + // Try prefixed name followed by the unprefixed name + hooks = jQuery.cssHooks[ name ] || jQuery.cssHooks[ origName ]; + + // If a hook was provided get the computed value from there + if ( hooks && "get" in hooks ) { + val = hooks.get( elem, true, extra ); + } + + // Otherwise, if a way to get the computed value exists, use that + if ( val === undefined ) { + val = curCSS( elem, name, styles ); + } + + // Convert "normal" to computed value + if ( val === "normal" && name in cssNormalTransform ) { + val = cssNormalTransform[ name ]; + } + + // Make numeric if forced or a qualifier was provided and val looks numeric + if ( extra === "" || extra ) { + num = parseFloat( val ); + return extra === true || isFinite( num ) ? num || 0 : val; + } + + return val; + } +} ); + +jQuery.each( [ "height", "width" ], function( _i, dimension ) { + jQuery.cssHooks[ dimension ] = { + get: function( elem, computed, extra ) { + if ( computed ) { + + // Certain elements can have dimension info if we invisibly show them + // but it must have a current display style that would benefit + return rdisplayswap.test( jQuery.css( elem, "display" ) ) && + + // Support: Safari 8+ + // Table columns in Safari have non-zero offsetWidth & zero + // getBoundingClientRect().width unless display is changed. + // Support: IE <=11 only + // Running getBoundingClientRect on a disconnected node + // in IE throws an error. + ( !elem.getClientRects().length || !elem.getBoundingClientRect().width ) ? + swap( elem, cssShow, function() { + return getWidthOrHeight( elem, dimension, extra ); + } ) : + getWidthOrHeight( elem, dimension, extra ); + } + }, + + set: function( elem, value, extra ) { + var matches, + styles = getStyles( elem ), + + // Only read styles.position if the test has a chance to fail + // to avoid forcing a reflow. + scrollboxSizeBuggy = !support.scrollboxSize() && + styles.position === "absolute", + + // To avoid forcing a reflow, only fetch boxSizing if we need it (gh-3991) + boxSizingNeeded = scrollboxSizeBuggy || extra, + isBorderBox = boxSizingNeeded && + jQuery.css( elem, "boxSizing", false, styles ) === "border-box", + subtract = extra ? + boxModelAdjustment( + elem, + dimension, + extra, + isBorderBox, + styles + ) : + 0; + + // Account for unreliable border-box dimensions by comparing offset* to computed and + // faking a content-box to get border and padding (gh-3699) + if ( isBorderBox && scrollboxSizeBuggy ) { + subtract -= Math.ceil( + elem[ "offset" + dimension[ 0 ].toUpperCase() + dimension.slice( 1 ) ] - + parseFloat( styles[ dimension ] ) - + boxModelAdjustment( elem, dimension, "border", false, styles ) - + 0.5 + ); + } + + // Convert to pixels if value adjustment is needed + if ( subtract && ( matches = rcssNum.exec( value ) ) && + ( matches[ 3 ] || "px" ) !== "px" ) { + + elem.style[ dimension ] = value; + value = jQuery.css( elem, dimension ); + } + + return setPositiveNumber( elem, value, subtract ); + } + }; +} ); + +jQuery.cssHooks.marginLeft = addGetHookIf( support.reliableMarginLeft, + function( elem, computed ) { + if ( computed ) { + return ( parseFloat( curCSS( elem, "marginLeft" ) ) || + elem.getBoundingClientRect().left - + swap( elem, { marginLeft: 0 }, function() { + return elem.getBoundingClientRect().left; + } ) + ) + "px"; + } + } +); + +// These hooks are used by animate to expand properties +jQuery.each( { + margin: "", + padding: "", + border: "Width" +}, function( prefix, suffix ) { + jQuery.cssHooks[ prefix + suffix ] = { + expand: function( value ) { + var i = 0, + expanded = {}, + + // Assumes a single number if not a string + parts = typeof value === "string" ? value.split( " " ) : [ value ]; + + for ( ; i < 4; i++ ) { + expanded[ prefix + cssExpand[ i ] + suffix ] = + parts[ i ] || parts[ i - 2 ] || parts[ 0 ]; + } + + return expanded; + } + }; + + if ( prefix !== "margin" ) { + jQuery.cssHooks[ prefix + suffix ].set = setPositiveNumber; + } +} ); + +jQuery.fn.extend( { + css: function( name, value ) { + return access( this, function( elem, name, value ) { + var styles, len, + map = {}, + i = 0; + + if ( Array.isArray( name ) ) { + styles = getStyles( elem ); + len = name.length; + + for ( ; i < len; i++ ) { + map[ name[ i ] ] = jQuery.css( elem, name[ i ], false, styles ); + } + + return map; + } + + return value !== undefined ? + jQuery.style( elem, name, value ) : + jQuery.css( elem, name ); + }, name, value, arguments.length > 1 ); + } +} ); + + +function Tween( elem, options, prop, end, easing ) { + return new Tween.prototype.init( elem, options, prop, end, easing ); +} +jQuery.Tween = Tween; + +Tween.prototype = { + constructor: Tween, + init: function( elem, options, prop, end, easing, unit ) { + this.elem = elem; + this.prop = prop; + this.easing = easing || jQuery.easing._default; + this.options = options; + this.start = this.now = this.cur(); + this.end = end; + this.unit = unit || ( jQuery.cssNumber[ prop ] ? "" : "px" ); + }, + cur: function() { + var hooks = Tween.propHooks[ this.prop ]; + + return hooks && hooks.get ? + hooks.get( this ) : + Tween.propHooks._default.get( this ); + }, + run: function( percent ) { + var eased, + hooks = Tween.propHooks[ this.prop ]; + + if ( this.options.duration ) { + this.pos = eased = jQuery.easing[ this.easing ]( + percent, this.options.duration * percent, 0, 1, this.options.duration + ); + } else { + this.pos = eased = percent; + } + this.now = ( this.end - this.start ) * eased + this.start; + + if ( this.options.step ) { + this.options.step.call( this.elem, this.now, this ); + } + + if ( hooks && hooks.set ) { + hooks.set( this ); + } else { + Tween.propHooks._default.set( this ); + } + return this; + } +}; + +Tween.prototype.init.prototype = Tween.prototype; + +Tween.propHooks = { + _default: { + get: function( tween ) { + var result; + + // Use a property on the element directly when it is not a DOM element, + // or when there is no matching style property that exists. + if ( tween.elem.nodeType !== 1 || + tween.elem[ tween.prop ] != null && tween.elem.style[ tween.prop ] == null ) { + return tween.elem[ tween.prop ]; + } + + // Passing an empty string as a 3rd parameter to .css will automatically + // attempt a parseFloat and fallback to a string if the parse fails. + // Simple values such as "10px" are parsed to Float; + // complex values such as "rotate(1rad)" are returned as-is. + result = jQuery.css( tween.elem, tween.prop, "" ); + + // Empty strings, null, undefined and "auto" are converted to 0. + return !result || result === "auto" ? 0 : result; + }, + set: function( tween ) { + + // Use step hook for back compat. + // Use cssHook if its there. + // Use .style if available and use plain properties where available. + if ( jQuery.fx.step[ tween.prop ] ) { + jQuery.fx.step[ tween.prop ]( tween ); + } else if ( tween.elem.nodeType === 1 && ( + jQuery.cssHooks[ tween.prop ] || + tween.elem.style[ finalPropName( tween.prop ) ] != null ) ) { + jQuery.style( tween.elem, tween.prop, tween.now + tween.unit ); + } else { + tween.elem[ tween.prop ] = tween.now; + } + } + } +}; + +// Support: IE <=9 only +// Panic based approach to setting things on disconnected nodes +Tween.propHooks.scrollTop = Tween.propHooks.scrollLeft = { + set: function( tween ) { + if ( tween.elem.nodeType && tween.elem.parentNode ) { + tween.elem[ tween.prop ] = tween.now; + } + } +}; + +jQuery.easing = { + linear: function( p ) { + return p; + }, + swing: function( p ) { + return 0.5 - Math.cos( p * Math.PI ) / 2; + }, + _default: "swing" +}; + +jQuery.fx = Tween.prototype.init; + +// Back compat <1.8 extension point +jQuery.fx.step = {}; + + + + +var + fxNow, inProgress, + rfxtypes = /^(?:toggle|show|hide)$/, + rrun = /queueHooks$/; + +function schedule() { + if ( inProgress ) { + if ( document.hidden === false && window.requestAnimationFrame ) { + window.requestAnimationFrame( schedule ); + } else { + window.setTimeout( schedule, jQuery.fx.interval ); + } + + jQuery.fx.tick(); + } +} + +// Animations created synchronously will run synchronously +function createFxNow() { + window.setTimeout( function() { + fxNow = undefined; + } ); + return ( fxNow = Date.now() ); +} + +// Generate parameters to create a standard animation +function genFx( type, includeWidth ) { + var which, + i = 0, + attrs = { height: type }; + + // If we include width, step value is 1 to do all cssExpand values, + // otherwise step value is 2 to skip over Left and Right + includeWidth = includeWidth ? 1 : 0; + for ( ; i < 4; i += 2 - includeWidth ) { + which = cssExpand[ i ]; + attrs[ "margin" + which ] = attrs[ "padding" + which ] = type; + } + + if ( includeWidth ) { + attrs.opacity = attrs.width = type; + } + + return attrs; +} + +function createTween( value, prop, animation ) { + var tween, + collection = ( Animation.tweeners[ prop ] || [] ).concat( Animation.tweeners[ "*" ] ), + index = 0, + length = collection.length; + for ( ; index < length; index++ ) { + if ( ( tween = collection[ index ].call( animation, prop, value ) ) ) { + + // We're done with this property + return tween; + } + } +} + +function defaultPrefilter( elem, props, opts ) { + var prop, value, toggle, hooks, oldfire, propTween, restoreDisplay, display, + isBox = "width" in props || "height" in props, + anim = this, + orig = {}, + style = elem.style, + hidden = elem.nodeType && isHiddenWithinTree( elem ), + dataShow = dataPriv.get( elem, "fxshow" ); + + // Queue-skipping animations hijack the fx hooks + if ( !opts.queue ) { + hooks = jQuery._queueHooks( elem, "fx" ); + if ( hooks.unqueued == null ) { + hooks.unqueued = 0; + oldfire = hooks.empty.fire; + hooks.empty.fire = function() { + if ( !hooks.unqueued ) { + oldfire(); + } + }; + } + hooks.unqueued++; + + anim.always( function() { + + // Ensure the complete handler is called before this completes + anim.always( function() { + hooks.unqueued--; + if ( !jQuery.queue( elem, "fx" ).length ) { + hooks.empty.fire(); + } + } ); + } ); + } + + // Detect show/hide animations + for ( prop in props ) { + value = props[ prop ]; + if ( rfxtypes.test( value ) ) { + delete props[ prop ]; + toggle = toggle || value === "toggle"; + if ( value === ( hidden ? "hide" : "show" ) ) { + + // Pretend to be hidden if this is a "show" and + // there is still data from a stopped show/hide + if ( value === "show" && dataShow && dataShow[ prop ] !== undefined ) { + hidden = true; + + // Ignore all other no-op show/hide data + } else { + continue; + } + } + orig[ prop ] = dataShow && dataShow[ prop ] || jQuery.style( elem, prop ); + } + } + + // Bail out if this is a no-op like .hide().hide() + propTween = !jQuery.isEmptyObject( props ); + if ( !propTween && jQuery.isEmptyObject( orig ) ) { + return; + } + + // Restrict "overflow" and "display" styles during box animations + if ( isBox && elem.nodeType === 1 ) { + + // Support: IE <=9 - 11, Edge 12 - 15 + // Record all 3 overflow attributes because IE does not infer the shorthand + // from identically-valued overflowX and overflowY and Edge just mirrors + // the overflowX value there. + opts.overflow = [ style.overflow, style.overflowX, style.overflowY ]; + + // Identify a display type, preferring old show/hide data over the CSS cascade + restoreDisplay = dataShow && dataShow.display; + if ( restoreDisplay == null ) { + restoreDisplay = dataPriv.get( elem, "display" ); + } + display = jQuery.css( elem, "display" ); + if ( display === "none" ) { + if ( restoreDisplay ) { + display = restoreDisplay; + } else { + + // Get nonempty value(s) by temporarily forcing visibility + showHide( [ elem ], true ); + restoreDisplay = elem.style.display || restoreDisplay; + display = jQuery.css( elem, "display" ); + showHide( [ elem ] ); + } + } + + // Animate inline elements as inline-block + if ( display === "inline" || display === "inline-block" && restoreDisplay != null ) { + if ( jQuery.css( elem, "float" ) === "none" ) { + + // Restore the original display value at the end of pure show/hide animations + if ( !propTween ) { + anim.done( function() { + style.display = restoreDisplay; + } ); + if ( restoreDisplay == null ) { + display = style.display; + restoreDisplay = display === "none" ? "" : display; + } + } + style.display = "inline-block"; + } + } + } + + if ( opts.overflow ) { + style.overflow = "hidden"; + anim.always( function() { + style.overflow = opts.overflow[ 0 ]; + style.overflowX = opts.overflow[ 1 ]; + style.overflowY = opts.overflow[ 2 ]; + } ); + } + + // Implement show/hide animations + propTween = false; + for ( prop in orig ) { + + // General show/hide setup for this element animation + if ( !propTween ) { + if ( dataShow ) { + if ( "hidden" in dataShow ) { + hidden = dataShow.hidden; + } + } else { + dataShow = dataPriv.access( elem, "fxshow", { display: restoreDisplay } ); + } + + // Store hidden/visible for toggle so `.stop().toggle()` "reverses" + if ( toggle ) { + dataShow.hidden = !hidden; + } + + // Show elements before animating them + if ( hidden ) { + showHide( [ elem ], true ); + } + + /* eslint-disable no-loop-func */ + + anim.done( function() { + + /* eslint-enable no-loop-func */ + + // The final step of a "hide" animation is actually hiding the element + if ( !hidden ) { + showHide( [ elem ] ); + } + dataPriv.remove( elem, "fxshow" ); + for ( prop in orig ) { + jQuery.style( elem, prop, orig[ prop ] ); + } + } ); + } + + // Per-property setup + propTween = createTween( hidden ? dataShow[ prop ] : 0, prop, anim ); + if ( !( prop in dataShow ) ) { + dataShow[ prop ] = propTween.start; + if ( hidden ) { + propTween.end = propTween.start; + propTween.start = 0; + } + } + } +} + +function propFilter( props, specialEasing ) { + var index, name, easing, value, hooks; + + // camelCase, specialEasing and expand cssHook pass + for ( index in props ) { + name = camelCase( index ); + easing = specialEasing[ name ]; + value = props[ index ]; + if ( Array.isArray( value ) ) { + easing = value[ 1 ]; + value = props[ index ] = value[ 0 ]; + } + + if ( index !== name ) { + props[ name ] = value; + delete props[ index ]; + } + + hooks = jQuery.cssHooks[ name ]; + if ( hooks && "expand" in hooks ) { + value = hooks.expand( value ); + delete props[ name ]; + + // Not quite $.extend, this won't overwrite existing keys. + // Reusing 'index' because we have the correct "name" + for ( index in value ) { + if ( !( index in props ) ) { + props[ index ] = value[ index ]; + specialEasing[ index ] = easing; + } + } + } else { + specialEasing[ name ] = easing; + } + } +} + +function Animation( elem, properties, options ) { + var result, + stopped, + index = 0, + length = Animation.prefilters.length, + deferred = jQuery.Deferred().always( function() { + + // Don't match elem in the :animated selector + delete tick.elem; + } ), + tick = function() { + if ( stopped ) { + return false; + } + var currentTime = fxNow || createFxNow(), + remaining = Math.max( 0, animation.startTime + animation.duration - currentTime ), + + // Support: Android 2.3 only + // Archaic crash bug won't allow us to use `1 - ( 0.5 || 0 )` (#12497) + temp = remaining / animation.duration || 0, + percent = 1 - temp, + index = 0, + length = animation.tweens.length; + + for ( ; index < length; index++ ) { + animation.tweens[ index ].run( percent ); + } + + deferred.notifyWith( elem, [ animation, percent, remaining ] ); + + // If there's more to do, yield + if ( percent < 1 && length ) { + return remaining; + } + + // If this was an empty animation, synthesize a final progress notification + if ( !length ) { + deferred.notifyWith( elem, [ animation, 1, 0 ] ); + } + + // Resolve the animation and report its conclusion + deferred.resolveWith( elem, [ animation ] ); + return false; + }, + animation = deferred.promise( { + elem: elem, + props: jQuery.extend( {}, properties ), + opts: jQuery.extend( true, { + specialEasing: {}, + easing: jQuery.easing._default + }, options ), + originalProperties: properties, + originalOptions: options, + startTime: fxNow || createFxNow(), + duration: options.duration, + tweens: [], + createTween: function( prop, end ) { + var tween = jQuery.Tween( elem, animation.opts, prop, end, + animation.opts.specialEasing[ prop ] || animation.opts.easing ); + animation.tweens.push( tween ); + return tween; + }, + stop: function( gotoEnd ) { + var index = 0, + + // If we are going to the end, we want to run all the tweens + // otherwise we skip this part + length = gotoEnd ? animation.tweens.length : 0; + if ( stopped ) { + return this; + } + stopped = true; + for ( ; index < length; index++ ) { + animation.tweens[ index ].run( 1 ); + } + + // Resolve when we played the last frame; otherwise, reject + if ( gotoEnd ) { + deferred.notifyWith( elem, [ animation, 1, 0 ] ); + deferred.resolveWith( elem, [ animation, gotoEnd ] ); + } else { + deferred.rejectWith( elem, [ animation, gotoEnd ] ); + } + return this; + } + } ), + props = animation.props; + + propFilter( props, animation.opts.specialEasing ); + + for ( ; index < length; index++ ) { + result = Animation.prefilters[ index ].call( animation, elem, props, animation.opts ); + if ( result ) { + if ( isFunction( result.stop ) ) { + jQuery._queueHooks( animation.elem, animation.opts.queue ).stop = + result.stop.bind( result ); + } + return result; + } + } + + jQuery.map( props, createTween, animation ); + + if ( isFunction( animation.opts.start ) ) { + animation.opts.start.call( elem, animation ); + } + + // Attach callbacks from options + animation + .progress( animation.opts.progress ) + .done( animation.opts.done, animation.opts.complete ) + .fail( animation.opts.fail ) + .always( animation.opts.always ); + + jQuery.fx.timer( + jQuery.extend( tick, { + elem: elem, + anim: animation, + queue: animation.opts.queue + } ) + ); + + return animation; +} + +jQuery.Animation = jQuery.extend( Animation, { + + tweeners: { + "*": [ function( prop, value ) { + var tween = this.createTween( prop, value ); + adjustCSS( tween.elem, prop, rcssNum.exec( value ), tween ); + return tween; + } ] + }, + + tweener: function( props, callback ) { + if ( isFunction( props ) ) { + callback = props; + props = [ "*" ]; + } else { + props = props.match( rnothtmlwhite ); + } + + var prop, + index = 0, + length = props.length; + + for ( ; index < length; index++ ) { + prop = props[ index ]; + Animation.tweeners[ prop ] = Animation.tweeners[ prop ] || []; + Animation.tweeners[ prop ].unshift( callback ); + } + }, + + prefilters: [ defaultPrefilter ], + + prefilter: function( callback, prepend ) { + if ( prepend ) { + Animation.prefilters.unshift( callback ); + } else { + Animation.prefilters.push( callback ); + } + } +} ); + +jQuery.speed = function( speed, easing, fn ) { + var opt = speed && typeof speed === "object" ? jQuery.extend( {}, speed ) : { + complete: fn || !fn && easing || + isFunction( speed ) && speed, + duration: speed, + easing: fn && easing || easing && !isFunction( easing ) && easing + }; + + // Go to the end state if fx are off + if ( jQuery.fx.off ) { + opt.duration = 0; + + } else { + if ( typeof opt.duration !== "number" ) { + if ( opt.duration in jQuery.fx.speeds ) { + opt.duration = jQuery.fx.speeds[ opt.duration ]; + + } else { + opt.duration = jQuery.fx.speeds._default; + } + } + } + + // Normalize opt.queue - true/undefined/null -> "fx" + if ( opt.queue == null || opt.queue === true ) { + opt.queue = "fx"; + } + + // Queueing + opt.old = opt.complete; + + opt.complete = function() { + if ( isFunction( opt.old ) ) { + opt.old.call( this ); + } + + if ( opt.queue ) { + jQuery.dequeue( this, opt.queue ); + } + }; + + return opt; +}; + +jQuery.fn.extend( { + fadeTo: function( speed, to, easing, callback ) { + + // Show any hidden elements after setting opacity to 0 + return this.filter( isHiddenWithinTree ).css( "opacity", 0 ).show() + + // Animate to the value specified + .end().animate( { opacity: to }, speed, easing, callback ); + }, + animate: function( prop, speed, easing, callback ) { + var empty = jQuery.isEmptyObject( prop ), + optall = jQuery.speed( speed, easing, callback ), + doAnimation = function() { + + // Operate on a copy of prop so per-property easing won't be lost + var anim = Animation( this, jQuery.extend( {}, prop ), optall ); + + // Empty animations, or finishing resolves immediately + if ( empty || dataPriv.get( this, "finish" ) ) { + anim.stop( true ); + } + }; + doAnimation.finish = doAnimation; + + return empty || optall.queue === false ? + this.each( doAnimation ) : + this.queue( optall.queue, doAnimation ); + }, + stop: function( type, clearQueue, gotoEnd ) { + var stopQueue = function( hooks ) { + var stop = hooks.stop; + delete hooks.stop; + stop( gotoEnd ); + }; + + if ( typeof type !== "string" ) { + gotoEnd = clearQueue; + clearQueue = type; + type = undefined; + } + if ( clearQueue ) { + this.queue( type || "fx", [] ); + } + + return this.each( function() { + var dequeue = true, + index = type != null && type + "queueHooks", + timers = jQuery.timers, + data = dataPriv.get( this ); + + if ( index ) { + if ( data[ index ] && data[ index ].stop ) { + stopQueue( data[ index ] ); + } + } else { + for ( index in data ) { + if ( data[ index ] && data[ index ].stop && rrun.test( index ) ) { + stopQueue( data[ index ] ); + } + } + } + + for ( index = timers.length; index--; ) { + if ( timers[ index ].elem === this && + ( type == null || timers[ index ].queue === type ) ) { + + timers[ index ].anim.stop( gotoEnd ); + dequeue = false; + timers.splice( index, 1 ); + } + } + + // Start the next in the queue if the last step wasn't forced. + // Timers currently will call their complete callbacks, which + // will dequeue but only if they were gotoEnd. + if ( dequeue || !gotoEnd ) { + jQuery.dequeue( this, type ); + } + } ); + }, + finish: function( type ) { + if ( type !== false ) { + type = type || "fx"; + } + return this.each( function() { + var index, + data = dataPriv.get( this ), + queue = data[ type + "queue" ], + hooks = data[ type + "queueHooks" ], + timers = jQuery.timers, + length = queue ? queue.length : 0; + + // Enable finishing flag on private data + data.finish = true; + + // Empty the queue first + jQuery.queue( this, type, [] ); + + if ( hooks && hooks.stop ) { + hooks.stop.call( this, true ); + } + + // Look for any active animations, and finish them + for ( index = timers.length; index--; ) { + if ( timers[ index ].elem === this && timers[ index ].queue === type ) { + timers[ index ].anim.stop( true ); + timers.splice( index, 1 ); + } + } + + // Look for any animations in the old queue and finish them + for ( index = 0; index < length; index++ ) { + if ( queue[ index ] && queue[ index ].finish ) { + queue[ index ].finish.call( this ); + } + } + + // Turn off finishing flag + delete data.finish; + } ); + } +} ); + +jQuery.each( [ "toggle", "show", "hide" ], function( _i, name ) { + var cssFn = jQuery.fn[ name ]; + jQuery.fn[ name ] = function( speed, easing, callback ) { + return speed == null || typeof speed === "boolean" ? + cssFn.apply( this, arguments ) : + this.animate( genFx( name, true ), speed, easing, callback ); + }; +} ); + +// Generate shortcuts for custom animations +jQuery.each( { + slideDown: genFx( "show" ), + slideUp: genFx( "hide" ), + slideToggle: genFx( "toggle" ), + fadeIn: { opacity: "show" }, + fadeOut: { opacity: "hide" }, + fadeToggle: { opacity: "toggle" } +}, function( name, props ) { + jQuery.fn[ name ] = function( speed, easing, callback ) { + return this.animate( props, speed, easing, callback ); + }; +} ); + +jQuery.timers = []; +jQuery.fx.tick = function() { + var timer, + i = 0, + timers = jQuery.timers; + + fxNow = Date.now(); + + for ( ; i < timers.length; i++ ) { + timer = timers[ i ]; + + // Run the timer and safely remove it when done (allowing for external removal) + if ( !timer() && timers[ i ] === timer ) { + timers.splice( i--, 1 ); + } + } + + if ( !timers.length ) { + jQuery.fx.stop(); + } + fxNow = undefined; +}; + +jQuery.fx.timer = function( timer ) { + jQuery.timers.push( timer ); + jQuery.fx.start(); +}; + +jQuery.fx.interval = 13; +jQuery.fx.start = function() { + if ( inProgress ) { + return; + } + + inProgress = true; + schedule(); +}; + +jQuery.fx.stop = function() { + inProgress = null; +}; + +jQuery.fx.speeds = { + slow: 600, + fast: 200, + + // Default speed + _default: 400 +}; + + +// Based off of the plugin by Clint Helfers, with permission. +// https://web.archive.org/web/20100324014747/http://blindsignals.com/index.php/2009/07/jquery-delay/ +jQuery.fn.delay = function( time, type ) { + time = jQuery.fx ? jQuery.fx.speeds[ time ] || time : time; + type = type || "fx"; + + return this.queue( type, function( next, hooks ) { + var timeout = window.setTimeout( next, time ); + hooks.stop = function() { + window.clearTimeout( timeout ); + }; + } ); +}; + + +( function() { + var input = document.createElement( "input" ), + select = document.createElement( "select" ), + opt = select.appendChild( document.createElement( "option" ) ); + + input.type = "checkbox"; + + // Support: Android <=4.3 only + // Default value for a checkbox should be "on" + support.checkOn = input.value !== ""; + + // Support: IE <=11 only + // Must access selectedIndex to make default options select + support.optSelected = opt.selected; + + // Support: IE <=11 only + // An input loses its value after becoming a radio + input = document.createElement( "input" ); + input.value = "t"; + input.type = "radio"; + support.radioValue = input.value === "t"; +} )(); + + +var boolHook, + attrHandle = jQuery.expr.attrHandle; + +jQuery.fn.extend( { + attr: function( name, value ) { + return access( this, jQuery.attr, name, value, arguments.length > 1 ); + }, + + removeAttr: function( name ) { + return this.each( function() { + jQuery.removeAttr( this, name ); + } ); + } +} ); + +jQuery.extend( { + attr: function( elem, name, value ) { + var ret, hooks, + nType = elem.nodeType; + + // Don't get/set attributes on text, comment and attribute nodes + if ( nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + // Fallback to prop when attributes are not supported + if ( typeof elem.getAttribute === "undefined" ) { + return jQuery.prop( elem, name, value ); + } + + // Attribute hooks are determined by the lowercase version + // Grab necessary hook if one is defined + if ( nType !== 1 || !jQuery.isXMLDoc( elem ) ) { + hooks = jQuery.attrHooks[ name.toLowerCase() ] || + ( jQuery.expr.match.bool.test( name ) ? boolHook : undefined ); + } + + if ( value !== undefined ) { + if ( value === null ) { + jQuery.removeAttr( elem, name ); + return; + } + + if ( hooks && "set" in hooks && + ( ret = hooks.set( elem, value, name ) ) !== undefined ) { + return ret; + } + + elem.setAttribute( name, value + "" ); + return value; + } + + if ( hooks && "get" in hooks && ( ret = hooks.get( elem, name ) ) !== null ) { + return ret; + } + + ret = jQuery.find.attr( elem, name ); + + // Non-existent attributes return null, we normalize to undefined + return ret == null ? undefined : ret; + }, + + attrHooks: { + type: { + set: function( elem, value ) { + if ( !support.radioValue && value === "radio" && + nodeName( elem, "input" ) ) { + var val = elem.value; + elem.setAttribute( "type", value ); + if ( val ) { + elem.value = val; + } + return value; + } + } + } + }, + + removeAttr: function( elem, value ) { + var name, + i = 0, + + // Attribute names can contain non-HTML whitespace characters + // https://html.spec.whatwg.org/multipage/syntax.html#attributes-2 + attrNames = value && value.match( rnothtmlwhite ); + + if ( attrNames && elem.nodeType === 1 ) { + while ( ( name = attrNames[ i++ ] ) ) { + elem.removeAttribute( name ); + } + } + } +} ); + +// Hooks for boolean attributes +boolHook = { + set: function( elem, value, name ) { + if ( value === false ) { + + // Remove boolean attributes when set to false + jQuery.removeAttr( elem, name ); + } else { + elem.setAttribute( name, name ); + } + return name; + } +}; + +jQuery.each( jQuery.expr.match.bool.source.match( /\w+/g ), function( _i, name ) { + var getter = attrHandle[ name ] || jQuery.find.attr; + + attrHandle[ name ] = function( elem, name, isXML ) { + var ret, handle, + lowercaseName = name.toLowerCase(); + + if ( !isXML ) { + + // Avoid an infinite loop by temporarily removing this function from the getter + handle = attrHandle[ lowercaseName ]; + attrHandle[ lowercaseName ] = ret; + ret = getter( elem, name, isXML ) != null ? + lowercaseName : + null; + attrHandle[ lowercaseName ] = handle; + } + return ret; + }; +} ); + + + + +var rfocusable = /^(?:input|select|textarea|button)$/i, + rclickable = /^(?:a|area)$/i; + +jQuery.fn.extend( { + prop: function( name, value ) { + return access( this, jQuery.prop, name, value, arguments.length > 1 ); + }, + + removeProp: function( name ) { + return this.each( function() { + delete this[ jQuery.propFix[ name ] || name ]; + } ); + } +} ); + +jQuery.extend( { + prop: function( elem, name, value ) { + var ret, hooks, + nType = elem.nodeType; + + // Don't get/set properties on text, comment and attribute nodes + if ( nType === 3 || nType === 8 || nType === 2 ) { + return; + } + + if ( nType !== 1 || !jQuery.isXMLDoc( elem ) ) { + + // Fix name and attach hooks + name = jQuery.propFix[ name ] || name; + hooks = jQuery.propHooks[ name ]; + } + + if ( value !== undefined ) { + if ( hooks && "set" in hooks && + ( ret = hooks.set( elem, value, name ) ) !== undefined ) { + return ret; + } + + return ( elem[ name ] = value ); + } + + if ( hooks && "get" in hooks && ( ret = hooks.get( elem, name ) ) !== null ) { + return ret; + } + + return elem[ name ]; + }, + + propHooks: { + tabIndex: { + get: function( elem ) { + + // Support: IE <=9 - 11 only + // elem.tabIndex doesn't always return the + // correct value when it hasn't been explicitly set + // https://web.archive.org/web/20141116233347/http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/ + // Use proper attribute retrieval(#12072) + var tabindex = jQuery.find.attr( elem, "tabindex" ); + + if ( tabindex ) { + return parseInt( tabindex, 10 ); + } + + if ( + rfocusable.test( elem.nodeName ) || + rclickable.test( elem.nodeName ) && + elem.href + ) { + return 0; + } + + return -1; + } + } + }, + + propFix: { + "for": "htmlFor", + "class": "className" + } +} ); + +// Support: IE <=11 only +// Accessing the selectedIndex property +// forces the browser to respect setting selected +// on the option +// The getter ensures a default option is selected +// when in an optgroup +// eslint rule "no-unused-expressions" is disabled for this code +// since it considers such accessions noop +if ( !support.optSelected ) { + jQuery.propHooks.selected = { + get: function( elem ) { + + /* eslint no-unused-expressions: "off" */ + + var parent = elem.parentNode; + if ( parent && parent.parentNode ) { + parent.parentNode.selectedIndex; + } + return null; + }, + set: function( elem ) { + + /* eslint no-unused-expressions: "off" */ + + var parent = elem.parentNode; + if ( parent ) { + parent.selectedIndex; + + if ( parent.parentNode ) { + parent.parentNode.selectedIndex; + } + } + } + }; +} + +jQuery.each( [ + "tabIndex", + "readOnly", + "maxLength", + "cellSpacing", + "cellPadding", + "rowSpan", + "colSpan", + "useMap", + "frameBorder", + "contentEditable" +], function() { + jQuery.propFix[ this.toLowerCase() ] = this; +} ); + + + + + // Strip and collapse whitespace according to HTML spec + // https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace + function stripAndCollapse( value ) { + var tokens = value.match( rnothtmlwhite ) || []; + return tokens.join( " " ); + } + + +function getClass( elem ) { + return elem.getAttribute && elem.getAttribute( "class" ) || ""; +} + +function classesToArray( value ) { + if ( Array.isArray( value ) ) { + return value; + } + if ( typeof value === "string" ) { + return value.match( rnothtmlwhite ) || []; + } + return []; +} + +jQuery.fn.extend( { + addClass: function( value ) { + var classes, elem, cur, curValue, clazz, j, finalValue, + i = 0; + + if ( isFunction( value ) ) { + return this.each( function( j ) { + jQuery( this ).addClass( value.call( this, j, getClass( this ) ) ); + } ); + } + + classes = classesToArray( value ); + + if ( classes.length ) { + while ( ( elem = this[ i++ ] ) ) { + curValue = getClass( elem ); + cur = elem.nodeType === 1 && ( " " + stripAndCollapse( curValue ) + " " ); + + if ( cur ) { + j = 0; + while ( ( clazz = classes[ j++ ] ) ) { + if ( cur.indexOf( " " + clazz + " " ) < 0 ) { + cur += clazz + " "; + } + } + + // Only assign if different to avoid unneeded rendering. + finalValue = stripAndCollapse( cur ); + if ( curValue !== finalValue ) { + elem.setAttribute( "class", finalValue ); + } + } + } + } + + return this; + }, + + removeClass: function( value ) { + var classes, elem, cur, curValue, clazz, j, finalValue, + i = 0; + + if ( isFunction( value ) ) { + return this.each( function( j ) { + jQuery( this ).removeClass( value.call( this, j, getClass( this ) ) ); + } ); + } + + if ( !arguments.length ) { + return this.attr( "class", "" ); + } + + classes = classesToArray( value ); + + if ( classes.length ) { + while ( ( elem = this[ i++ ] ) ) { + curValue = getClass( elem ); + + // This expression is here for better compressibility (see addClass) + cur = elem.nodeType === 1 && ( " " + stripAndCollapse( curValue ) + " " ); + + if ( cur ) { + j = 0; + while ( ( clazz = classes[ j++ ] ) ) { + + // Remove *all* instances + while ( cur.indexOf( " " + clazz + " " ) > -1 ) { + cur = cur.replace( " " + clazz + " ", " " ); + } + } + + // Only assign if different to avoid unneeded rendering. + finalValue = stripAndCollapse( cur ); + if ( curValue !== finalValue ) { + elem.setAttribute( "class", finalValue ); + } + } + } + } + + return this; + }, + + toggleClass: function( value, stateVal ) { + var type = typeof value, + isValidValue = type === "string" || Array.isArray( value ); + + if ( typeof stateVal === "boolean" && isValidValue ) { + return stateVal ? this.addClass( value ) : this.removeClass( value ); + } + + if ( isFunction( value ) ) { + return this.each( function( i ) { + jQuery( this ).toggleClass( + value.call( this, i, getClass( this ), stateVal ), + stateVal + ); + } ); + } + + return this.each( function() { + var className, i, self, classNames; + + if ( isValidValue ) { + + // Toggle individual class names + i = 0; + self = jQuery( this ); + classNames = classesToArray( value ); + + while ( ( className = classNames[ i++ ] ) ) { + + // Check each className given, space separated list + if ( self.hasClass( className ) ) { + self.removeClass( className ); + } else { + self.addClass( className ); + } + } + + // Toggle whole class name + } else if ( value === undefined || type === "boolean" ) { + className = getClass( this ); + if ( className ) { + + // Store className if set + dataPriv.set( this, "__className__", className ); + } + + // If the element has a class name or if we're passed `false`, + // then remove the whole classname (if there was one, the above saved it). + // Otherwise bring back whatever was previously saved (if anything), + // falling back to the empty string if nothing was stored. + if ( this.setAttribute ) { + this.setAttribute( "class", + className || value === false ? + "" : + dataPriv.get( this, "__className__" ) || "" + ); + } + } + } ); + }, + + hasClass: function( selector ) { + var className, elem, + i = 0; + + className = " " + selector + " "; + while ( ( elem = this[ i++ ] ) ) { + if ( elem.nodeType === 1 && + ( " " + stripAndCollapse( getClass( elem ) ) + " " ).indexOf( className ) > -1 ) { + return true; + } + } + + return false; + } +} ); + + + + +var rreturn = /\r/g; + +jQuery.fn.extend( { + val: function( value ) { + var hooks, ret, valueIsFunction, + elem = this[ 0 ]; + + if ( !arguments.length ) { + if ( elem ) { + hooks = jQuery.valHooks[ elem.type ] || + jQuery.valHooks[ elem.nodeName.toLowerCase() ]; + + if ( hooks && + "get" in hooks && + ( ret = hooks.get( elem, "value" ) ) !== undefined + ) { + return ret; + } + + ret = elem.value; + + // Handle most common string cases + if ( typeof ret === "string" ) { + return ret.replace( rreturn, "" ); + } + + // Handle cases where value is null/undef or number + return ret == null ? "" : ret; + } + + return; + } + + valueIsFunction = isFunction( value ); + + return this.each( function( i ) { + var val; + + if ( this.nodeType !== 1 ) { + return; + } + + if ( valueIsFunction ) { + val = value.call( this, i, jQuery( this ).val() ); + } else { + val = value; + } + + // Treat null/undefined as ""; convert numbers to string + if ( val == null ) { + val = ""; + + } else if ( typeof val === "number" ) { + val += ""; + + } else if ( Array.isArray( val ) ) { + val = jQuery.map( val, function( value ) { + return value == null ? "" : value + ""; + } ); + } + + hooks = jQuery.valHooks[ this.type ] || jQuery.valHooks[ this.nodeName.toLowerCase() ]; + + // If set returns undefined, fall back to normal setting + if ( !hooks || !( "set" in hooks ) || hooks.set( this, val, "value" ) === undefined ) { + this.value = val; + } + } ); + } +} ); + +jQuery.extend( { + valHooks: { + option: { + get: function( elem ) { + + var val = jQuery.find.attr( elem, "value" ); + return val != null ? + val : + + // Support: IE <=10 - 11 only + // option.text throws exceptions (#14686, #14858) + // Strip and collapse whitespace + // https://html.spec.whatwg.org/#strip-and-collapse-whitespace + stripAndCollapse( jQuery.text( elem ) ); + } + }, + select: { + get: function( elem ) { + var value, option, i, + options = elem.options, + index = elem.selectedIndex, + one = elem.type === "select-one", + values = one ? null : [], + max = one ? index + 1 : options.length; + + if ( index < 0 ) { + i = max; + + } else { + i = one ? index : 0; + } + + // Loop through all the selected options + for ( ; i < max; i++ ) { + option = options[ i ]; + + // Support: IE <=9 only + // IE8-9 doesn't update selected after form reset (#2551) + if ( ( option.selected || i === index ) && + + // Don't return options that are disabled or in a disabled optgroup + !option.disabled && + ( !option.parentNode.disabled || + !nodeName( option.parentNode, "optgroup" ) ) ) { + + // Get the specific value for the option + value = jQuery( option ).val(); + + // We don't need an array for one selects + if ( one ) { + return value; + } + + // Multi-Selects return an array + values.push( value ); + } + } + + return values; + }, + + set: function( elem, value ) { + var optionSet, option, + options = elem.options, + values = jQuery.makeArray( value ), + i = options.length; + + while ( i-- ) { + option = options[ i ]; + + /* eslint-disable no-cond-assign */ + + if ( option.selected = + jQuery.inArray( jQuery.valHooks.option.get( option ), values ) > -1 + ) { + optionSet = true; + } + + /* eslint-enable no-cond-assign */ + } + + // Force browsers to behave consistently when non-matching value is set + if ( !optionSet ) { + elem.selectedIndex = -1; + } + return values; + } + } + } +} ); + +// Radios and checkboxes getter/setter +jQuery.each( [ "radio", "checkbox" ], function() { + jQuery.valHooks[ this ] = { + set: function( elem, value ) { + if ( Array.isArray( value ) ) { + return ( elem.checked = jQuery.inArray( jQuery( elem ).val(), value ) > -1 ); + } + } + }; + if ( !support.checkOn ) { + jQuery.valHooks[ this ].get = function( elem ) { + return elem.getAttribute( "value" ) === null ? "on" : elem.value; + }; + } +} ); + + + + +// Return jQuery for attributes-only inclusion + + +support.focusin = "onfocusin" in window; + + +var rfocusMorph = /^(?:focusinfocus|focusoutblur)$/, + stopPropagationCallback = function( e ) { + e.stopPropagation(); + }; + +jQuery.extend( jQuery.event, { + + trigger: function( event, data, elem, onlyHandlers ) { + + var i, cur, tmp, bubbleType, ontype, handle, special, lastElement, + eventPath = [ elem || document ], + type = hasOwn.call( event, "type" ) ? event.type : event, + namespaces = hasOwn.call( event, "namespace" ) ? event.namespace.split( "." ) : []; + + cur = lastElement = tmp = elem = elem || document; + + // Don't do events on text and comment nodes + if ( elem.nodeType === 3 || elem.nodeType === 8 ) { + return; + } + + // focus/blur morphs to focusin/out; ensure we're not firing them right now + if ( rfocusMorph.test( type + jQuery.event.triggered ) ) { + return; + } + + if ( type.indexOf( "." ) > -1 ) { + + // Namespaced trigger; create a regexp to match event type in handle() + namespaces = type.split( "." ); + type = namespaces.shift(); + namespaces.sort(); + } + ontype = type.indexOf( ":" ) < 0 && "on" + type; + + // Caller can pass in a jQuery.Event object, Object, or just an event type string + event = event[ jQuery.expando ] ? + event : + new jQuery.Event( type, typeof event === "object" && event ); + + // Trigger bitmask: & 1 for native handlers; & 2 for jQuery (always true) + event.isTrigger = onlyHandlers ? 2 : 3; + event.namespace = namespaces.join( "." ); + event.rnamespace = event.namespace ? + new RegExp( "(^|\\.)" + namespaces.join( "\\.(?:.*\\.|)" ) + "(\\.|$)" ) : + null; + + // Clean up the event in case it is being reused + event.result = undefined; + if ( !event.target ) { + event.target = elem; + } + + // Clone any incoming data and prepend the event, creating the handler arg list + data = data == null ? + [ event ] : + jQuery.makeArray( data, [ event ] ); + + // Allow special events to draw outside the lines + special = jQuery.event.special[ type ] || {}; + if ( !onlyHandlers && special.trigger && special.trigger.apply( elem, data ) === false ) { + return; + } + + // Determine event propagation path in advance, per W3C events spec (#9951) + // Bubble up to document, then to window; watch for a global ownerDocument var (#9724) + if ( !onlyHandlers && !special.noBubble && !isWindow( elem ) ) { + + bubbleType = special.delegateType || type; + if ( !rfocusMorph.test( bubbleType + type ) ) { + cur = cur.parentNode; + } + for ( ; cur; cur = cur.parentNode ) { + eventPath.push( cur ); + tmp = cur; + } + + // Only add window if we got to document (e.g., not plain obj or detached DOM) + if ( tmp === ( elem.ownerDocument || document ) ) { + eventPath.push( tmp.defaultView || tmp.parentWindow || window ); + } + } + + // Fire handlers on the event path + i = 0; + while ( ( cur = eventPath[ i++ ] ) && !event.isPropagationStopped() ) { + lastElement = cur; + event.type = i > 1 ? + bubbleType : + special.bindType || type; + + // jQuery handler + handle = ( + dataPriv.get( cur, "events" ) || Object.create( null ) + )[ event.type ] && + dataPriv.get( cur, "handle" ); + if ( handle ) { + handle.apply( cur, data ); + } + + // Native handler + handle = ontype && cur[ ontype ]; + if ( handle && handle.apply && acceptData( cur ) ) { + event.result = handle.apply( cur, data ); + if ( event.result === false ) { + event.preventDefault(); + } + } + } + event.type = type; + + // If nobody prevented the default action, do it now + if ( !onlyHandlers && !event.isDefaultPrevented() ) { + + if ( ( !special._default || + special._default.apply( eventPath.pop(), data ) === false ) && + acceptData( elem ) ) { + + // Call a native DOM method on the target with the same name as the event. + // Don't do default actions on window, that's where global variables be (#6170) + if ( ontype && isFunction( elem[ type ] ) && !isWindow( elem ) ) { + + // Don't re-trigger an onFOO event when we call its FOO() method + tmp = elem[ ontype ]; + + if ( tmp ) { + elem[ ontype ] = null; + } + + // Prevent re-triggering of the same event, since we already bubbled it above + jQuery.event.triggered = type; + + if ( event.isPropagationStopped() ) { + lastElement.addEventListener( type, stopPropagationCallback ); + } + + elem[ type ](); + + if ( event.isPropagationStopped() ) { + lastElement.removeEventListener( type, stopPropagationCallback ); + } + + jQuery.event.triggered = undefined; + + if ( tmp ) { + elem[ ontype ] = tmp; + } + } + } + } + + return event.result; + }, + + // Piggyback on a donor event to simulate a different one + // Used only for `focus(in | out)` events + simulate: function( type, elem, event ) { + var e = jQuery.extend( + new jQuery.Event(), + event, + { + type: type, + isSimulated: true + } + ); + + jQuery.event.trigger( e, null, elem ); + } + +} ); + +jQuery.fn.extend( { + + trigger: function( type, data ) { + return this.each( function() { + jQuery.event.trigger( type, data, this ); + } ); + }, + triggerHandler: function( type, data ) { + var elem = this[ 0 ]; + if ( elem ) { + return jQuery.event.trigger( type, data, elem, true ); + } + } +} ); + + +// Support: Firefox <=44 +// Firefox doesn't have focus(in | out) events +// Related ticket - https://bugzilla.mozilla.org/show_bug.cgi?id=687787 +// +// Support: Chrome <=48 - 49, Safari <=9.0 - 9.1 +// focus(in | out) events fire after focus & blur events, +// which is spec violation - http://www.w3.org/TR/DOM-Level-3-Events/#events-focusevent-event-order +// Related ticket - https://bugs.chromium.org/p/chromium/issues/detail?id=449857 +if ( !support.focusin ) { + jQuery.each( { focus: "focusin", blur: "focusout" }, function( orig, fix ) { + + // Attach a single capturing handler on the document while someone wants focusin/focusout + var handler = function( event ) { + jQuery.event.simulate( fix, event.target, jQuery.event.fix( event ) ); + }; + + jQuery.event.special[ fix ] = { + setup: function() { + + // Handle: regular nodes (via `this.ownerDocument`), window + // (via `this.document`) & document (via `this`). + var doc = this.ownerDocument || this.document || this, + attaches = dataPriv.access( doc, fix ); + + if ( !attaches ) { + doc.addEventListener( orig, handler, true ); + } + dataPriv.access( doc, fix, ( attaches || 0 ) + 1 ); + }, + teardown: function() { + var doc = this.ownerDocument || this.document || this, + attaches = dataPriv.access( doc, fix ) - 1; + + if ( !attaches ) { + doc.removeEventListener( orig, handler, true ); + dataPriv.remove( doc, fix ); + + } else { + dataPriv.access( doc, fix, attaches ); + } + } + }; + } ); +} +var location = window.location; + +var nonce = { guid: Date.now() }; + +var rquery = ( /\?/ ); + + + +// Cross-browser xml parsing +jQuery.parseXML = function( data ) { + var xml; + if ( !data || typeof data !== "string" ) { + return null; + } + + // Support: IE 9 - 11 only + // IE throws on parseFromString with invalid input. + try { + xml = ( new window.DOMParser() ).parseFromString( data, "text/xml" ); + } catch ( e ) { + xml = undefined; + } + + if ( !xml || xml.getElementsByTagName( "parsererror" ).length ) { + jQuery.error( "Invalid XML: " + data ); + } + return xml; +}; + + +var + rbracket = /\[\]$/, + rCRLF = /\r?\n/g, + rsubmitterTypes = /^(?:submit|button|image|reset|file)$/i, + rsubmittable = /^(?:input|select|textarea|keygen)/i; + +function buildParams( prefix, obj, traditional, add ) { + var name; + + if ( Array.isArray( obj ) ) { + + // Serialize array item. + jQuery.each( obj, function( i, v ) { + if ( traditional || rbracket.test( prefix ) ) { + + // Treat each array item as a scalar. + add( prefix, v ); + + } else { + + // Item is non-scalar (array or object), encode its numeric index. + buildParams( + prefix + "[" + ( typeof v === "object" && v != null ? i : "" ) + "]", + v, + traditional, + add + ); + } + } ); + + } else if ( !traditional && toType( obj ) === "object" ) { + + // Serialize object item. + for ( name in obj ) { + buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add ); + } + + } else { + + // Serialize scalar item. + add( prefix, obj ); + } +} + +// Serialize an array of form elements or a set of +// key/values into a query string +jQuery.param = function( a, traditional ) { + var prefix, + s = [], + add = function( key, valueOrFunction ) { + + // If value is a function, invoke it and use its return value + var value = isFunction( valueOrFunction ) ? + valueOrFunction() : + valueOrFunction; + + s[ s.length ] = encodeURIComponent( key ) + "=" + + encodeURIComponent( value == null ? "" : value ); + }; + + if ( a == null ) { + return ""; + } + + // If an array was passed in, assume that it is an array of form elements. + if ( Array.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) { + + // Serialize the form elements + jQuery.each( a, function() { + add( this.name, this.value ); + } ); + + } else { + + // If traditional, encode the "old" way (the way 1.3.2 or older + // did it), otherwise encode params recursively. + for ( prefix in a ) { + buildParams( prefix, a[ prefix ], traditional, add ); + } + } + + // Return the resulting serialization + return s.join( "&" ); +}; + +jQuery.fn.extend( { + serialize: function() { + return jQuery.param( this.serializeArray() ); + }, + serializeArray: function() { + return this.map( function() { + + // Can add propHook for "elements" to filter or add form elements + var elements = jQuery.prop( this, "elements" ); + return elements ? jQuery.makeArray( elements ) : this; + } ) + .filter( function() { + var type = this.type; + + // Use .is( ":disabled" ) so that fieldset[disabled] works + return this.name && !jQuery( this ).is( ":disabled" ) && + rsubmittable.test( this.nodeName ) && !rsubmitterTypes.test( type ) && + ( this.checked || !rcheckableType.test( type ) ); + } ) + .map( function( _i, elem ) { + var val = jQuery( this ).val(); + + if ( val == null ) { + return null; + } + + if ( Array.isArray( val ) ) { + return jQuery.map( val, function( val ) { + return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + } ); + } + + return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) }; + } ).get(); + } +} ); + + +var + r20 = /%20/g, + rhash = /#.*$/, + rantiCache = /([?&])_=[^&]*/, + rheaders = /^(.*?):[ \t]*([^\r\n]*)$/mg, + + // #7653, #8125, #8152: local protocol detection + rlocalProtocol = /^(?:about|app|app-storage|.+-extension|file|res|widget):$/, + rnoContent = /^(?:GET|HEAD)$/, + rprotocol = /^\/\//, + + /* Prefilters + * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example) + * 2) These are called: + * - BEFORE asking for a transport + * - AFTER param serialization (s.data is a string if s.processData is true) + * 3) key is the dataType + * 4) the catchall symbol "*" can be used + * 5) execution will start with transport dataType and THEN continue down to "*" if needed + */ + prefilters = {}, + + /* Transports bindings + * 1) key is the dataType + * 2) the catchall symbol "*" can be used + * 3) selection will start with transport dataType and THEN go to "*" if needed + */ + transports = {}, + + // Avoid comment-prolog char sequence (#10098); must appease lint and evade compression + allTypes = "*/".concat( "*" ), + + // Anchor tag for parsing the document origin + originAnchor = document.createElement( "a" ); + originAnchor.href = location.href; + +// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport +function addToPrefiltersOrTransports( structure ) { + + // dataTypeExpression is optional and defaults to "*" + return function( dataTypeExpression, func ) { + + if ( typeof dataTypeExpression !== "string" ) { + func = dataTypeExpression; + dataTypeExpression = "*"; + } + + var dataType, + i = 0, + dataTypes = dataTypeExpression.toLowerCase().match( rnothtmlwhite ) || []; + + if ( isFunction( func ) ) { + + // For each dataType in the dataTypeExpression + while ( ( dataType = dataTypes[ i++ ] ) ) { + + // Prepend if requested + if ( dataType[ 0 ] === "+" ) { + dataType = dataType.slice( 1 ) || "*"; + ( structure[ dataType ] = structure[ dataType ] || [] ).unshift( func ); + + // Otherwise append + } else { + ( structure[ dataType ] = structure[ dataType ] || [] ).push( func ); + } + } + } + }; +} + +// Base inspection function for prefilters and transports +function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR ) { + + var inspected = {}, + seekingTransport = ( structure === transports ); + + function inspect( dataType ) { + var selected; + inspected[ dataType ] = true; + jQuery.each( structure[ dataType ] || [], function( _, prefilterOrFactory ) { + var dataTypeOrTransport = prefilterOrFactory( options, originalOptions, jqXHR ); + if ( typeof dataTypeOrTransport === "string" && + !seekingTransport && !inspected[ dataTypeOrTransport ] ) { + + options.dataTypes.unshift( dataTypeOrTransport ); + inspect( dataTypeOrTransport ); + return false; + } else if ( seekingTransport ) { + return !( selected = dataTypeOrTransport ); + } + } ); + return selected; + } + + return inspect( options.dataTypes[ 0 ] ) || !inspected[ "*" ] && inspect( "*" ); +} + +// A special extend for ajax options +// that takes "flat" options (not to be deep extended) +// Fixes #9887 +function ajaxExtend( target, src ) { + var key, deep, + flatOptions = jQuery.ajaxSettings.flatOptions || {}; + + for ( key in src ) { + if ( src[ key ] !== undefined ) { + ( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ]; + } + } + if ( deep ) { + jQuery.extend( true, target, deep ); + } + + return target; +} + +/* Handles responses to an ajax request: + * - finds the right dataType (mediates between content-type and expected dataType) + * - returns the corresponding response + */ +function ajaxHandleResponses( s, jqXHR, responses ) { + + var ct, type, finalDataType, firstDataType, + contents = s.contents, + dataTypes = s.dataTypes; + + // Remove auto dataType and get content-type in the process + while ( dataTypes[ 0 ] === "*" ) { + dataTypes.shift(); + if ( ct === undefined ) { + ct = s.mimeType || jqXHR.getResponseHeader( "Content-Type" ); + } + } + + // Check if we're dealing with a known content-type + if ( ct ) { + for ( type in contents ) { + if ( contents[ type ] && contents[ type ].test( ct ) ) { + dataTypes.unshift( type ); + break; + } + } + } + + // Check to see if we have a response for the expected dataType + if ( dataTypes[ 0 ] in responses ) { + finalDataType = dataTypes[ 0 ]; + } else { + + // Try convertible dataTypes + for ( type in responses ) { + if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[ 0 ] ] ) { + finalDataType = type; + break; + } + if ( !firstDataType ) { + firstDataType = type; + } + } + + // Or just use first one + finalDataType = finalDataType || firstDataType; + } + + // If we found a dataType + // We add the dataType to the list if needed + // and return the corresponding response + if ( finalDataType ) { + if ( finalDataType !== dataTypes[ 0 ] ) { + dataTypes.unshift( finalDataType ); + } + return responses[ finalDataType ]; + } +} + +/* Chain conversions given the request and the original response + * Also sets the responseXXX fields on the jqXHR instance + */ +function ajaxConvert( s, response, jqXHR, isSuccess ) { + var conv2, current, conv, tmp, prev, + converters = {}, + + // Work with a copy of dataTypes in case we need to modify it for conversion + dataTypes = s.dataTypes.slice(); + + // Create converters map with lowercased keys + if ( dataTypes[ 1 ] ) { + for ( conv in s.converters ) { + converters[ conv.toLowerCase() ] = s.converters[ conv ]; + } + } + + current = dataTypes.shift(); + + // Convert to each sequential dataType + while ( current ) { + + if ( s.responseFields[ current ] ) { + jqXHR[ s.responseFields[ current ] ] = response; + } + + // Apply the dataFilter if provided + if ( !prev && isSuccess && s.dataFilter ) { + response = s.dataFilter( response, s.dataType ); + } + + prev = current; + current = dataTypes.shift(); + + if ( current ) { + + // There's only work to do if current dataType is non-auto + if ( current === "*" ) { + + current = prev; + + // Convert response if prev dataType is non-auto and differs from current + } else if ( prev !== "*" && prev !== current ) { + + // Seek a direct converter + conv = converters[ prev + " " + current ] || converters[ "* " + current ]; + + // If none found, seek a pair + if ( !conv ) { + for ( conv2 in converters ) { + + // If conv2 outputs current + tmp = conv2.split( " " ); + if ( tmp[ 1 ] === current ) { + + // If prev can be converted to accepted input + conv = converters[ prev + " " + tmp[ 0 ] ] || + converters[ "* " + tmp[ 0 ] ]; + if ( conv ) { + + // Condense equivalence converters + if ( conv === true ) { + conv = converters[ conv2 ]; + + // Otherwise, insert the intermediate dataType + } else if ( converters[ conv2 ] !== true ) { + current = tmp[ 0 ]; + dataTypes.unshift( tmp[ 1 ] ); + } + break; + } + } + } + } + + // Apply converter (if not an equivalence) + if ( conv !== true ) { + + // Unless errors are allowed to bubble, catch and return them + if ( conv && s.throws ) { + response = conv( response ); + } else { + try { + response = conv( response ); + } catch ( e ) { + return { + state: "parsererror", + error: conv ? e : "No conversion from " + prev + " to " + current + }; + } + } + } + } + } + } + + return { state: "success", data: response }; +} + +jQuery.extend( { + + // Counter for holding the number of active queries + active: 0, + + // Last-Modified header cache for next request + lastModified: {}, + etag: {}, + + ajaxSettings: { + url: location.href, + type: "GET", + isLocal: rlocalProtocol.test( location.protocol ), + global: true, + processData: true, + async: true, + contentType: "application/x-www-form-urlencoded; charset=UTF-8", + + /* + timeout: 0, + data: null, + dataType: null, + username: null, + password: null, + cache: null, + throws: false, + traditional: false, + headers: {}, + */ + + accepts: { + "*": allTypes, + text: "text/plain", + html: "text/html", + xml: "application/xml, text/xml", + json: "application/json, text/javascript" + }, + + contents: { + xml: /\bxml\b/, + html: /\bhtml/, + json: /\bjson\b/ + }, + + responseFields: { + xml: "responseXML", + text: "responseText", + json: "responseJSON" + }, + + // Data converters + // Keys separate source (or catchall "*") and destination types with a single space + converters: { + + // Convert anything to text + "* text": String, + + // Text to html (true = no transformation) + "text html": true, + + // Evaluate text as a json expression + "text json": JSON.parse, + + // Parse text as xml + "text xml": jQuery.parseXML + }, + + // For options that shouldn't be deep extended: + // you can add your own custom options here if + // and when you create one that shouldn't be + // deep extended (see ajaxExtend) + flatOptions: { + url: true, + context: true + } + }, + + // Creates a full fledged settings object into target + // with both ajaxSettings and settings fields. + // If target is omitted, writes into ajaxSettings. + ajaxSetup: function( target, settings ) { + return settings ? + + // Building a settings object + ajaxExtend( ajaxExtend( target, jQuery.ajaxSettings ), settings ) : + + // Extending ajaxSettings + ajaxExtend( jQuery.ajaxSettings, target ); + }, + + ajaxPrefilter: addToPrefiltersOrTransports( prefilters ), + ajaxTransport: addToPrefiltersOrTransports( transports ), + + // Main method + ajax: function( url, options ) { + + // If url is an object, simulate pre-1.5 signature + if ( typeof url === "object" ) { + options = url; + url = undefined; + } + + // Force options to be an object + options = options || {}; + + var transport, + + // URL without anti-cache param + cacheURL, + + // Response headers + responseHeadersString, + responseHeaders, + + // timeout handle + timeoutTimer, + + // Url cleanup var + urlAnchor, + + // Request state (becomes false upon send and true upon completion) + completed, + + // To know if global events are to be dispatched + fireGlobals, + + // Loop variable + i, + + // uncached part of the url + uncached, + + // Create the final options object + s = jQuery.ajaxSetup( {}, options ), + + // Callbacks context + callbackContext = s.context || s, + + // Context for global events is callbackContext if it is a DOM node or jQuery collection + globalEventContext = s.context && + ( callbackContext.nodeType || callbackContext.jquery ) ? + jQuery( callbackContext ) : + jQuery.event, + + // Deferreds + deferred = jQuery.Deferred(), + completeDeferred = jQuery.Callbacks( "once memory" ), + + // Status-dependent callbacks + statusCode = s.statusCode || {}, + + // Headers (they are sent all at once) + requestHeaders = {}, + requestHeadersNames = {}, + + // Default abort message + strAbort = "canceled", + + // Fake xhr + jqXHR = { + readyState: 0, + + // Builds headers hashtable if needed + getResponseHeader: function( key ) { + var match; + if ( completed ) { + if ( !responseHeaders ) { + responseHeaders = {}; + while ( ( match = rheaders.exec( responseHeadersString ) ) ) { + responseHeaders[ match[ 1 ].toLowerCase() + " " ] = + ( responseHeaders[ match[ 1 ].toLowerCase() + " " ] || [] ) + .concat( match[ 2 ] ); + } + } + match = responseHeaders[ key.toLowerCase() + " " ]; + } + return match == null ? null : match.join( ", " ); + }, + + // Raw string + getAllResponseHeaders: function() { + return completed ? responseHeadersString : null; + }, + + // Caches the header + setRequestHeader: function( name, value ) { + if ( completed == null ) { + name = requestHeadersNames[ name.toLowerCase() ] = + requestHeadersNames[ name.toLowerCase() ] || name; + requestHeaders[ name ] = value; + } + return this; + }, + + // Overrides response content-type header + overrideMimeType: function( type ) { + if ( completed == null ) { + s.mimeType = type; + } + return this; + }, + + // Status-dependent callbacks + statusCode: function( map ) { + var code; + if ( map ) { + if ( completed ) { + + // Execute the appropriate callbacks + jqXHR.always( map[ jqXHR.status ] ); + } else { + + // Lazy-add the new callbacks in a way that preserves old ones + for ( code in map ) { + statusCode[ code ] = [ statusCode[ code ], map[ code ] ]; + } + } + } + return this; + }, + + // Cancel the request + abort: function( statusText ) { + var finalText = statusText || strAbort; + if ( transport ) { + transport.abort( finalText ); + } + done( 0, finalText ); + return this; + } + }; + + // Attach deferreds + deferred.promise( jqXHR ); + + // Add protocol if not provided (prefilters might expect it) + // Handle falsy url in the settings object (#10093: consistency with old signature) + // We also use the url parameter if available + s.url = ( ( url || s.url || location.href ) + "" ) + .replace( rprotocol, location.protocol + "//" ); + + // Alias method option to type as per ticket #12004 + s.type = options.method || options.type || s.method || s.type; + + // Extract dataTypes list + s.dataTypes = ( s.dataType || "*" ).toLowerCase().match( rnothtmlwhite ) || [ "" ]; + + // A cross-domain request is in order when the origin doesn't match the current origin. + if ( s.crossDomain == null ) { + urlAnchor = document.createElement( "a" ); + + // Support: IE <=8 - 11, Edge 12 - 15 + // IE throws exception on accessing the href property if url is malformed, + // e.g. http://example.com:80x/ + try { + urlAnchor.href = s.url; + + // Support: IE <=8 - 11 only + // Anchor's host property isn't correctly set when s.url is relative + urlAnchor.href = urlAnchor.href; + s.crossDomain = originAnchor.protocol + "//" + originAnchor.host !== + urlAnchor.protocol + "//" + urlAnchor.host; + } catch ( e ) { + + // If there is an error parsing the URL, assume it is crossDomain, + // it can be rejected by the transport if it is invalid + s.crossDomain = true; + } + } + + // Convert data if not already a string + if ( s.data && s.processData && typeof s.data !== "string" ) { + s.data = jQuery.param( s.data, s.traditional ); + } + + // Apply prefilters + inspectPrefiltersOrTransports( prefilters, s, options, jqXHR ); + + // If request was aborted inside a prefilter, stop there + if ( completed ) { + return jqXHR; + } + + // We can fire global events as of now if asked to + // Don't fire events if jQuery.event is undefined in an AMD-usage scenario (#15118) + fireGlobals = jQuery.event && s.global; + + // Watch for a new set of requests + if ( fireGlobals && jQuery.active++ === 0 ) { + jQuery.event.trigger( "ajaxStart" ); + } + + // Uppercase the type + s.type = s.type.toUpperCase(); + + // Determine if request has content + s.hasContent = !rnoContent.test( s.type ); + + // Save the URL in case we're toying with the If-Modified-Since + // and/or If-None-Match header later on + // Remove hash to simplify url manipulation + cacheURL = s.url.replace( rhash, "" ); + + // More options handling for requests with no content + if ( !s.hasContent ) { + + // Remember the hash so we can put it back + uncached = s.url.slice( cacheURL.length ); + + // If data is available and should be processed, append data to url + if ( s.data && ( s.processData || typeof s.data === "string" ) ) { + cacheURL += ( rquery.test( cacheURL ) ? "&" : "?" ) + s.data; + + // #9682: remove data so that it's not used in an eventual retry + delete s.data; + } + + // Add or update anti-cache param if needed + if ( s.cache === false ) { + cacheURL = cacheURL.replace( rantiCache, "$1" ); + uncached = ( rquery.test( cacheURL ) ? "&" : "?" ) + "_=" + ( nonce.guid++ ) + + uncached; + } + + // Put hash and anti-cache on the URL that will be requested (gh-1732) + s.url = cacheURL + uncached; + + // Change '%20' to '+' if this is encoded form body content (gh-2658) + } else if ( s.data && s.processData && + ( s.contentType || "" ).indexOf( "application/x-www-form-urlencoded" ) === 0 ) { + s.data = s.data.replace( r20, "+" ); + } + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + if ( jQuery.lastModified[ cacheURL ] ) { + jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ cacheURL ] ); + } + if ( jQuery.etag[ cacheURL ] ) { + jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ cacheURL ] ); + } + } + + // Set the correct header, if data is being sent + if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) { + jqXHR.setRequestHeader( "Content-Type", s.contentType ); + } + + // Set the Accepts header for the server, depending on the dataType + jqXHR.setRequestHeader( + "Accept", + s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[ 0 ] ] ? + s.accepts[ s.dataTypes[ 0 ] ] + + ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) : + s.accepts[ "*" ] + ); + + // Check for headers option + for ( i in s.headers ) { + jqXHR.setRequestHeader( i, s.headers[ i ] ); + } + + // Allow custom headers/mimetypes and early abort + if ( s.beforeSend && + ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || completed ) ) { + + // Abort if not done already and return + return jqXHR.abort(); + } + + // Aborting is no longer a cancellation + strAbort = "abort"; + + // Install callbacks on deferreds + completeDeferred.add( s.complete ); + jqXHR.done( s.success ); + jqXHR.fail( s.error ); + + // Get transport + transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR ); + + // If no transport, we auto-abort + if ( !transport ) { + done( -1, "No Transport" ); + } else { + jqXHR.readyState = 1; + + // Send global event + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] ); + } + + // If request was aborted inside ajaxSend, stop there + if ( completed ) { + return jqXHR; + } + + // Timeout + if ( s.async && s.timeout > 0 ) { + timeoutTimer = window.setTimeout( function() { + jqXHR.abort( "timeout" ); + }, s.timeout ); + } + + try { + completed = false; + transport.send( requestHeaders, done ); + } catch ( e ) { + + // Rethrow post-completion exceptions + if ( completed ) { + throw e; + } + + // Propagate others as results + done( -1, e ); + } + } + + // Callback for when everything is done + function done( status, nativeStatusText, responses, headers ) { + var isSuccess, success, error, response, modified, + statusText = nativeStatusText; + + // Ignore repeat invocations + if ( completed ) { + return; + } + + completed = true; + + // Clear timeout if it exists + if ( timeoutTimer ) { + window.clearTimeout( timeoutTimer ); + } + + // Dereference transport for early garbage collection + // (no matter how long the jqXHR object will be used) + transport = undefined; + + // Cache response headers + responseHeadersString = headers || ""; + + // Set readyState + jqXHR.readyState = status > 0 ? 4 : 0; + + // Determine if successful + isSuccess = status >= 200 && status < 300 || status === 304; + + // Get response data + if ( responses ) { + response = ajaxHandleResponses( s, jqXHR, responses ); + } + + // Use a noop converter for missing script + if ( !isSuccess && jQuery.inArray( "script", s.dataTypes ) > -1 ) { + s.converters[ "text script" ] = function() {}; + } + + // Convert no matter what (that way responseXXX fields are always set) + response = ajaxConvert( s, response, jqXHR, isSuccess ); + + // If successful, handle type chaining + if ( isSuccess ) { + + // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode. + if ( s.ifModified ) { + modified = jqXHR.getResponseHeader( "Last-Modified" ); + if ( modified ) { + jQuery.lastModified[ cacheURL ] = modified; + } + modified = jqXHR.getResponseHeader( "etag" ); + if ( modified ) { + jQuery.etag[ cacheURL ] = modified; + } + } + + // if no content + if ( status === 204 || s.type === "HEAD" ) { + statusText = "nocontent"; + + // if not modified + } else if ( status === 304 ) { + statusText = "notmodified"; + + // If we have data, let's convert it + } else { + statusText = response.state; + success = response.data; + error = response.error; + isSuccess = !error; + } + } else { + + // Extract error from statusText and normalize for non-aborts + error = statusText; + if ( status || !statusText ) { + statusText = "error"; + if ( status < 0 ) { + status = 0; + } + } + } + + // Set data for the fake xhr object + jqXHR.status = status; + jqXHR.statusText = ( nativeStatusText || statusText ) + ""; + + // Success/Error + if ( isSuccess ) { + deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] ); + } else { + deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] ); + } + + // Status-dependent callbacks + jqXHR.statusCode( statusCode ); + statusCode = undefined; + + if ( fireGlobals ) { + globalEventContext.trigger( isSuccess ? "ajaxSuccess" : "ajaxError", + [ jqXHR, s, isSuccess ? success : error ] ); + } + + // Complete + completeDeferred.fireWith( callbackContext, [ jqXHR, statusText ] ); + + if ( fireGlobals ) { + globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] ); + + // Handle the global AJAX counter + if ( !( --jQuery.active ) ) { + jQuery.event.trigger( "ajaxStop" ); + } + } + } + + return jqXHR; + }, + + getJSON: function( url, data, callback ) { + return jQuery.get( url, data, callback, "json" ); + }, + + getScript: function( url, callback ) { + return jQuery.get( url, undefined, callback, "script" ); + } +} ); + +jQuery.each( [ "get", "post" ], function( _i, method ) { + jQuery[ method ] = function( url, data, callback, type ) { + + // Shift arguments if data argument was omitted + if ( isFunction( data ) ) { + type = type || callback; + callback = data; + data = undefined; + } + + // The url can be an options object (which then must have .url) + return jQuery.ajax( jQuery.extend( { + url: url, + type: method, + dataType: type, + data: data, + success: callback + }, jQuery.isPlainObject( url ) && url ) ); + }; +} ); + +jQuery.ajaxPrefilter( function( s ) { + var i; + for ( i in s.headers ) { + if ( i.toLowerCase() === "content-type" ) { + s.contentType = s.headers[ i ] || ""; + } + } +} ); + + +jQuery._evalUrl = function( url, options, doc ) { + return jQuery.ajax( { + url: url, + + // Make this explicit, since user can override this through ajaxSetup (#11264) + type: "GET", + dataType: "script", + cache: true, + async: false, + global: false, + + // Only evaluate the response if it is successful (gh-4126) + // dataFilter is not invoked for failure responses, so using it instead + // of the default converter is kludgy but it works. + converters: { + "text script": function() {} + }, + dataFilter: function( response ) { + jQuery.globalEval( response, options, doc ); + } + } ); +}; + + +jQuery.fn.extend( { + wrapAll: function( html ) { + var wrap; + + if ( this[ 0 ] ) { + if ( isFunction( html ) ) { + html = html.call( this[ 0 ] ); + } + + // The elements to wrap the target around + wrap = jQuery( html, this[ 0 ].ownerDocument ).eq( 0 ).clone( true ); + + if ( this[ 0 ].parentNode ) { + wrap.insertBefore( this[ 0 ] ); + } + + wrap.map( function() { + var elem = this; + + while ( elem.firstElementChild ) { + elem = elem.firstElementChild; + } + + return elem; + } ).append( this ); + } + + return this; + }, + + wrapInner: function( html ) { + if ( isFunction( html ) ) { + return this.each( function( i ) { + jQuery( this ).wrapInner( html.call( this, i ) ); + } ); + } + + return this.each( function() { + var self = jQuery( this ), + contents = self.contents(); + + if ( contents.length ) { + contents.wrapAll( html ); + + } else { + self.append( html ); + } + } ); + }, + + wrap: function( html ) { + var htmlIsFunction = isFunction( html ); + + return this.each( function( i ) { + jQuery( this ).wrapAll( htmlIsFunction ? html.call( this, i ) : html ); + } ); + }, + + unwrap: function( selector ) { + this.parent( selector ).not( "body" ).each( function() { + jQuery( this ).replaceWith( this.childNodes ); + } ); + return this; + } +} ); + + +jQuery.expr.pseudos.hidden = function( elem ) { + return !jQuery.expr.pseudos.visible( elem ); +}; +jQuery.expr.pseudos.visible = function( elem ) { + return !!( elem.offsetWidth || elem.offsetHeight || elem.getClientRects().length ); +}; + + + + +jQuery.ajaxSettings.xhr = function() { + try { + return new window.XMLHttpRequest(); + } catch ( e ) {} +}; + +var xhrSuccessStatus = { + + // File protocol always yields status code 0, assume 200 + 0: 200, + + // Support: IE <=9 only + // #1450: sometimes IE returns 1223 when it should be 204 + 1223: 204 + }, + xhrSupported = jQuery.ajaxSettings.xhr(); + +support.cors = !!xhrSupported && ( "withCredentials" in xhrSupported ); +support.ajax = xhrSupported = !!xhrSupported; + +jQuery.ajaxTransport( function( options ) { + var callback, errorCallback; + + // Cross domain only allowed if supported through XMLHttpRequest + if ( support.cors || xhrSupported && !options.crossDomain ) { + return { + send: function( headers, complete ) { + var i, + xhr = options.xhr(); + + xhr.open( + options.type, + options.url, + options.async, + options.username, + options.password + ); + + // Apply custom fields if provided + if ( options.xhrFields ) { + for ( i in options.xhrFields ) { + xhr[ i ] = options.xhrFields[ i ]; + } + } + + // Override mime type if needed + if ( options.mimeType && xhr.overrideMimeType ) { + xhr.overrideMimeType( options.mimeType ); + } + + // X-Requested-With header + // For cross-domain requests, seeing as conditions for a preflight are + // akin to a jigsaw puzzle, we simply never set it to be sure. + // (it can always be set on a per-request basis or even using ajaxSetup) + // For same-domain requests, won't change header if already provided. + if ( !options.crossDomain && !headers[ "X-Requested-With" ] ) { + headers[ "X-Requested-With" ] = "XMLHttpRequest"; + } + + // Set headers + for ( i in headers ) { + xhr.setRequestHeader( i, headers[ i ] ); + } + + // Callback + callback = function( type ) { + return function() { + if ( callback ) { + callback = errorCallback = xhr.onload = + xhr.onerror = xhr.onabort = xhr.ontimeout = + xhr.onreadystatechange = null; + + if ( type === "abort" ) { + xhr.abort(); + } else if ( type === "error" ) { + + // Support: IE <=9 only + // On a manual native abort, IE9 throws + // errors on any property access that is not readyState + if ( typeof xhr.status !== "number" ) { + complete( 0, "error" ); + } else { + complete( + + // File: protocol always yields status 0; see #8605, #14207 + xhr.status, + xhr.statusText + ); + } + } else { + complete( + xhrSuccessStatus[ xhr.status ] || xhr.status, + xhr.statusText, + + // Support: IE <=9 only + // IE9 has no XHR2 but throws on binary (trac-11426) + // For XHR2 non-text, let the caller handle it (gh-2498) + ( xhr.responseType || "text" ) !== "text" || + typeof xhr.responseText !== "string" ? + { binary: xhr.response } : + { text: xhr.responseText }, + xhr.getAllResponseHeaders() + ); + } + } + }; + }; + + // Listen to events + xhr.onload = callback(); + errorCallback = xhr.onerror = xhr.ontimeout = callback( "error" ); + + // Support: IE 9 only + // Use onreadystatechange to replace onabort + // to handle uncaught aborts + if ( xhr.onabort !== undefined ) { + xhr.onabort = errorCallback; + } else { + xhr.onreadystatechange = function() { + + // Check readyState before timeout as it changes + if ( xhr.readyState === 4 ) { + + // Allow onerror to be called first, + // but that will not handle a native abort + // Also, save errorCallback to a variable + // as xhr.onerror cannot be accessed + window.setTimeout( function() { + if ( callback ) { + errorCallback(); + } + } ); + } + }; + } + + // Create the abort callback + callback = callback( "abort" ); + + try { + + // Do send the request (this may raise an exception) + xhr.send( options.hasContent && options.data || null ); + } catch ( e ) { + + // #14683: Only rethrow if this hasn't been notified as an error yet + if ( callback ) { + throw e; + } + } + }, + + abort: function() { + if ( callback ) { + callback(); + } + } + }; + } +} ); + + + + +// Prevent auto-execution of scripts when no explicit dataType was provided (See gh-2432) +jQuery.ajaxPrefilter( function( s ) { + if ( s.crossDomain ) { + s.contents.script = false; + } +} ); + +// Install script dataType +jQuery.ajaxSetup( { + accepts: { + script: "text/javascript, application/javascript, " + + "application/ecmascript, application/x-ecmascript" + }, + contents: { + script: /\b(?:java|ecma)script\b/ + }, + converters: { + "text script": function( text ) { + jQuery.globalEval( text ); + return text; + } + } +} ); + +// Handle cache's special case and crossDomain +jQuery.ajaxPrefilter( "script", function( s ) { + if ( s.cache === undefined ) { + s.cache = false; + } + if ( s.crossDomain ) { + s.type = "GET"; + } +} ); + +// Bind script tag hack transport +jQuery.ajaxTransport( "script", function( s ) { + + // This transport only deals with cross domain or forced-by-attrs requests + if ( s.crossDomain || s.scriptAttrs ) { + var script, callback; + return { + send: function( _, complete ) { + script = jQuery( " + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Support Library

+
+

Abstract

+

This document provides some details on LLVM’s Support Library, located in the +source at lib/Support and include/llvm/Support. The library’s purpose +is to shield LLVM from the differences between operating systems for the few +services LLVM needs from the operating system. Much of LLVM is written using +portability features of standard C++. However, in a few areas, system dependent +facilities are needed and the Support Library is the wrapper around those +system calls.

+

By centralizing LLVM’s use of operating system interfaces, we make it possible +for the LLVM tool chain and runtime libraries to be more easily ported to new +platforms since (theoretically) only lib/Support needs to be ported. This +library also unclutters the rest of LLVM from #ifdef use and special cases for +specific operating systems. Such uses are replaced with simple calls to the +interfaces provided in include/llvm/Support.

+

Note that the Support Library is not intended to be a complete operating system +wrapper (such as the Adaptive Communications Environment (ACE) or Apache +Portable Runtime (APR)), but only provides the functionality necessary to +support LLVM.

+

The Support Library was originally referred to as the System Library, written +by Reid Spencer who formulated the design based on similar work originating +from the eXtensible Programming System (XPS). Several people helped with the +effort; especially, Jeff Cohen and Henrik Bach on the Win32 port.

+
+
+

Keeping LLVM Portable

+

In order to keep LLVM portable, LLVM developers should adhere to a set of +portability rules associated with the Support Library. Adherence to these rules +should help the Support Library achieve its goal of shielding LLVM from the +variations in operating system interfaces and doing so efficiently. The +following sections define the rules needed to fulfill this objective.

+
+

Don’t Include System Headers

+

Except in lib/Support, no LLVM source code should directly #include a +system header. Care has been taken to remove all such #includes from LLVM +while lib/Support was being developed. Specifically this means that header +files like “unistd.h”, “windows.h”, “stdio.h”, and “string.h” +are forbidden to be included by LLVM source code outside the implementation of +lib/Support.

+

To obtain system-dependent functionality, existing interfaces to the system +found in include/llvm/Support should be used. If an appropriate interface is +not available, it should be added to include/llvm/Support and implemented in +lib/Support for all supported platforms.

+
+
+

Don’t Expose System Headers

+

The Support Library must shield LLVM from all system headers. To obtain +system level functionality, LLVM source must +#include "llvm/Support/Thing.h" and nothing else. This means that +Thing.h cannot expose any system header files. This protects LLVM from +accidentally using system specific functionality and only allows it via +the lib/Support interface.

+
+
+

Use Standard C Headers

+

The standard C headers (the ones beginning with “c”) are allowed to be +exposed through the lib/Support interface. These headers and the things they +declare are considered to be platform agnostic. LLVM source files may include +them directly or obtain their inclusion through lib/Support interfaces.

+
+
+

Use Standard C++ Headers

+

The standard C++ headers from the standard C++ library and standard +template library may be exposed through the lib/Support interface. These +headers and the things they declare are considered to be platform agnostic. +LLVM source files may include them or obtain their inclusion through +lib/Support interfaces.

+
+
+

High Level Interface

+

The entry points specified in the interface of lib/Support must be aimed at +completing some reasonably high level task needed by LLVM. We do not want to +simply wrap each operating system call. It would be preferable to wrap several +operating system calls that are always used in conjunction with one another by +LLVM.

+

For example, consider what is needed to execute a program, wait for it to +complete, and return its result code. On Unix, this involves the following +operating system calls: getenv, fork, execve, and wait. The +correct thing for lib/Support to provide is a function, say +ExecuteProgramAndWait, that implements the functionality completely. what +we don’t want is wrappers for the operating system calls involved.

+

There must not be a one-to-one relationship between operating system +calls and the Support library’s interface. Any such interface function will be +suspicious.

+
+
+

No Unused Functionality

+

There must be no functionality specified in the interface of lib/Support +that isn’t actually used by LLVM. We’re not writing a general purpose operating +system wrapper here, just enough to satisfy LLVM’s needs. And, LLVM doesn’t +need much. This design goal aims to keep the lib/Support interface small and +understandable which should foster its actual use and adoption.

+
+
+

No Duplicate Implementations

+

The implementation of a function for a given platform must be written exactly +once. This implies that it must be possible to apply a function’s +implementation to multiple operating systems if those operating systems can +share the same implementation. This rule applies to the set of operating +systems supported for a given class of operating system (e.g. Unix, Win32).

+
+
+

No Virtual Methods

+

The Support Library interfaces can be called quite frequently by LLVM. In order +to make those calls as efficient as possible, we discourage the use of virtual +methods. There is no need to use inheritance for implementation differences, it +just adds complexity. The #include mechanism works just fine.

+
+
+

No Exposed Functions

+

Any functions defined by system libraries (i.e. not defined by lib/Support) +must not be exposed through the lib/Support interface, even if the header +file for that function is not exposed. This prevents inadvertent use of system +specific functionality.

+

For example, the stat system call is notorious for having variations in the +data it provides. lib/Support must not declare stat nor allow it to be +declared. Instead it should provide its own interface to discovering +information about files and directories. Those interfaces may be implemented in +terms of stat but that is strictly an implementation detail. The interface +provided by the Support Library must be implemented on all platforms (even +those without stat).

+
+
+

No Exposed Data

+

Any data defined by system libraries (i.e. not defined by lib/Support) must +not be exposed through the lib/Support interface, even if the header file +for that function is not exposed. As with functions, this prevents inadvertent +use of data that might not exist on all platforms.

+
+
+

Minimize Soft Errors

+

Operating system interfaces will generally provide error results for every +little thing that could go wrong. In almost all cases, you can divide these +error results into two groups: normal/good/soft and abnormal/bad/hard. That is, +some of the errors are simply information like “file not found”, “insufficient +privileges”, etc. while other errors are much harder like “out of space”, “bad +disk sector”, or “system call interrupted”. We’ll call the first group “soft” +errors and the second group “hard” errors.

+

lib/Support must always attempt to minimize soft errors. This is a design +requirement because the minimization of soft errors can affect the granularity +and the nature of the interface. In general, if you find that you’re wanting to +throw soft errors, you must review the granularity of the interface because it +is likely you’re trying to implement something that is too low level. The rule +of thumb is to provide interface functions that can’t fail, except when +faced with hard errors.

+

For a trivial example, suppose we wanted to add an “OpenFileForWriting” +function. For many operating systems, if the file doesn’t exist, attempting to +open the file will produce an error. However, lib/Support should not simply +throw that error if it occurs because its a soft error. The problem is that the +interface function, OpenFileForWriting is too low level. It should be +OpenOrCreateFileForWriting. In the case of the soft “doesn’t exist” error, +this function would just create it and then open it for writing.

+

This design principle needs to be maintained in lib/Support because it +avoids the propagation of soft error handling throughout the rest of LLVM. +Hard errors will generally just cause a termination for an LLVM tool so don’t +be bashful about throwing them.

+

Rules of thumb:

+
    +
  1. Don’t throw soft errors, only hard errors.

  2. +
  3. If you’re tempted to throw a soft error, re-think the interface.

  4. +
  5. Handle internally the most common normal/good/soft error conditions +so the rest of LLVM doesn’t have to.

  6. +
+
+
+

No throw Specifications

+

None of the lib/Support interface functions may be declared with C++ +throw() specifications on them. This requirement makes sure that the +compiler does not insert additional exception handling code into the interface +functions. This is a performance consideration: lib/Support functions are +at the bottom of many call chains and as such can be frequently called. We +need them to be as efficient as possible. However, no routines in the system +library should actually throw exceptions.

+
+
+

Code Organization

+

Implementations of the Support Library interface are separated by their general +class of operating system. Currently only Unix and Win32 classes are defined +but more could be added for other operating system classifications. To +distinguish which implementation to compile, the code in lib/Support uses +the LLVM_ON_UNIX and _WIN32 #defines. Each source file in +lib/Support, after implementing the generic (operating system independent) +functionality needs to include the correct implementation using a set of +#if defined(LLVM_ON_XYZ) directives. For example, if we had +lib/Support/Path.cpp, we’d expect to see in that file:

+
#if defined(LLVM_ON_UNIX)
+#include "Unix/Path.inc"
+#endif
+#if defined(_WIN32)
+#include "Windows/Path.inc"
+#endif
+
+
+

The implementation in lib/Support/Unix/Path.inc should handle all Unix +variants. The implementation in lib/Support/Windows/Path.inc should handle +all Windows variants. What this does is quickly inc the basic class +of operating system that will provide the implementation. The specific details +for a given platform must still be determined through the use of #ifdef.

+
+
+

Consistent Semantics

+

The implementation of a lib/Support interface can vary drastically between +platforms. That’s okay as long as the end result of the interface function is +the same. For example, a function to create a directory is pretty straight +forward on all operating system. System V IPC on the other hand isn’t even +supported on all platforms. Instead of “supporting” System V IPC, +lib/Support should provide an interface to the basic concept of +inter-process communications. The implementations might use System V IPC if +that was available or named pipes, or whatever gets the job done effectively +for a given operating system. In all cases, the interface and the +implementation must be semantically consistent.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SupportPolicy.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SupportPolicy.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SupportPolicy.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SupportPolicy.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,380 @@ + + + + + + + + + LLVM Community Support Policy — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Community Support Policy

+

As a compilation infrastructure, LLVM has multiple types of users, both +downstream and upstream, of many combinations of its projects, tools and +libraries.

+

There is a core part of it that encompass the implementation of the compiler +(front/middle/back ends), run-time libraries (RT, C++, OpenMP, etc) and +associated tools (debugger, linker, object file manipulation, etc). These +components are present in the public release on our supported architectures +and operating systems and the whole community must maintain and care about.

+

There are, however, other components within the main repository that either +cater to a specific sub-community of LLVM (upstream or downstream) or +help parts of the community to integrate LLVM into their own development tools +or external projects. Those parts of the main repository don’t always have +rigorous testing like the core parts, nor are they validated and shipped with +our public upstream releases.

+

Even not being a core part of the project, we have enough sub-communities +needing those changes with enough overlap that having them in the main +repository is beneficial to minimise the repetition of those changes in all +the external repositories that need them.

+

But the maintenance costs of such diverse ecosystem is non trivial, so we divide +the level of support in two tiers: core and peripheral, with two +different levels of impact and responsibilities. Those tiers refer only to the +main repository (llvm-project) and not the other repositories in our git +project, unless explicitly stated.

+

Regardless of the tier, all code must follow the existing policies on quality, +reviews, style, etc.

+
+

Core Tier

+

The core tier encompasses all of the code in the main repository that is +in production, is actively tested and released in a regular schedule, including +core LLVM APIs and infrastructure, front/middle/back-ends, run-time libraries, +tools, etc.

+

It is the responsibility of every LLVM developer to care for the core tier +regardless of where their work is applied to.

+
+

What is covered

+
+
The core tier is composed of:
    +
  • Core code (llvm-project) present in official releases and buildbots: +compiler, debugger, linker, libraries, etc, including infrastructure code +(table-gen, lit, file-check, unit-tests, etc).

  • +
  • Build infrastructure that creates releases and buildbots (CMake, scripts).

  • +
  • Phabricator and +buildbot infrastructure.

  • +
  • The test-suite.

  • +
+
+
+
+
+

Requirements

+
+
Code in this tier must:
    +
  • Keep official buildbots green, with warnings on breakages being emailed to +all affected developers. Those must be fixed as soon as possible or patches +must be reverted, as per review policy.

  • +
  • Bit-rot of a component in the core tier will result in that component being +downgraded to the peripheral tier or being removed. Sub-communities can +avoid this by fixing all raised issues in a timely manner.

  • +
+
+
+
+
+
+

Peripheral Tier

+

The peripheral tier encompass the parts of LLVM that cater to a specific +sub-community and which don’t usually affect the core components directly.

+

This includes experimental back-ends, disabled-by-default options and +alternative paths (work-in-progress replacements) in the same repository, as +well as separate efforts to integrate LLVM development with local practices.

+

It is the responsibility of each sub-community to care about their own parts +and the intersection of that with the core tier and other peripheral parts.

+
+
There are three main groups of code that fit in this category:
    +
  • Code that is making its way into LLVM, via the experimental +roadmap or similar efforts.

  • +
  • Code that is making its way out of LLVM, via deprecation, replacement or +bit-rot, and will be removed if the sub-community that cares about it +cannot maintain it.

  • +
  • Code that isn’t meant to be in LLVM core and can coexist with the code in +the core tier (and others in the peripheral tier) long term, without causing +breakages or disturbances.

  • +
+
+
+
+

What is covered

+
+
The peripheral tier is composed of:
    +
  • Experimental targets and options that haven’t been enable by default yet.

  • +
  • Main repository projects that don’t get released or regularly tested.

  • +
  • Legacy tools and scripts that aren’t used in upstream validation.

  • +
  • Alternative build systems (ex. GN, Bazel) and related infrastructure.

  • +
  • Tools support (ex. gdb scripts, editor configuration, helper scripts).

  • +
+
+
+
+
+

Requirements

+
+
Code in this tier must:
    +
  • Have a clear benefit for residing in the main repository, catering to an +active sub-community (upstream or downstream).

  • +
  • Be actively maintained by such sub-community and have its problems addressed +in a timely manner.

  • +
+
+
Code in this tier must not:
    +
  • Break or invalidate core tier code or infrastructure. If that happens +accidentally, reverting functionality and working on the issues offline +is the only acceptable course of action.

  • +
  • Negatively affect development of core tier code, with the sub-community +involved responsible for making changes to address specific concerns.

  • +
  • Negatively affect other peripheral tier code, with the sub-communities +involved tasked to resolve the issues, still making sure the solution doesn’t +break or invalidate the core tier.

  • +
  • Impose sub-optimal implementation strategies on core tier components as a +result of idiosyncrasies in the peripheral component.

  • +
  • Have build infrastructure that spams all developers about their breakages.

  • +
  • Fall into disrepair. This is a reflection of lack of an active sub-community +and will result in removal.

  • +
+
+
Code in this tier should:
    +
  • Have infrastructure to test, whenever meaningful, with either no warnings or +notification contained within the sub-community.

  • +
  • Have support and testing that scales with the complexity and resilience of +the component, with the bar for simple and gracefully-degrading components +(such as editor bindings) much lower than for complex components that must +remain fresh with HEAD (such as experimental back-ends or alternative build +systems).

  • +
  • Have a document making clear the status of implementation, level of support +available, who the sub-community is and, if applicable, roadmap for inclusion +into the core tier.

  • +
  • Be restricted to a specific directory or have a consistent pattern (ex. +unique file suffix), making it easy to remove when necessary.

  • +
+
+
+
+
+
+

Inclusion Policy

+

To add a new peripheral component, send an RFC to the appropriate dev list +proposing its addition and explaining how it will meet the support requirements +listed above. Different types of components could require different levels of +detail. when in doubt, ask the community what’s the best approach.

+

Inclusion must reach consensus in the RFC by the community and the approval of +the corresponding review (by multiple members of the community) is the official +note of acceptance.

+

After merge, there often is a period of transition, where teething issues on +existing buildbots are discovered and fixed. If those cannot be fixed straight +away, the sub-community is responsible for tracking and reverting all the +pertinent patches and retrying the inclusion review.

+

Once the component is stable in tree, it must follow this policy and the +deprecation rules below apply.

+

Due to the uncertain nature of inclusion, it’s advisable that new components +are not added too close to a release branch. The time will depend on the size +and complexity of the component, so adding release and testing managers on the +RFC and review is strongly advisable.

+
+
+

Deprecation Policy

+

The LLVM code base has a number of files that aren’t being actively maintained. +But not all of those files are obstructing the development of the project and +so it remains in the repository with the assumption that it could still be +useful for downstream users.

+

For code to remain in the repository, its presence must not impose an undue +burden on maintaining other components (core or peripheral).

+
+

Warnings

+

There are multiple types of issues that might trigger a request for deprecation, +including (but not limited to):

+
+
    +
  • Changes in a component consistently break other areas of the project.

  • +
  • Components go broken for long periods of time (weeks or more).

  • +
  • Clearly superior alternatives are in use and maintenance is painful.

  • +
  • Builds and tests are harder / take longer, increasing the cost of +maintenance, overtaking the perceived benefits.

  • +
+
+

If the maintenance cost is higher than it is acceptable by the majority of +developers, it means that either the sub-community is too small (and the extra +cost should be paid locally), or not active enough (and the problems won’t be +fixed any time soon). In either case, removal of such problematic component is +justified.

+
+
+

Steps for removal

+

However clear the needs for removal are, we should take an incremental approach +to deprecating code, especially when there’s still a sub-community that cares +about it. In that sense, code will never be removed outright without a series +of steps are taken.

+
+
A minimum set of steps should be:
    +
  1. A proposal for removal / deactivation should be made to the developers’ +mailing lists (llvm-dev, cfe-dev, lldb-dev, etc), with a clear +statement of the maintenance costs imposed and the alternatives, if +applicable.

  2. +
  3. There must be enough consensus on the list that removal is warranted, and no +pending proposals to fix the situation from a sub-community.

  4. +
  5. An announcement for removal must be made on the same lists, with ample time +for downstream users to take action on their local infrastructure. The time +will depend on what is being removed.

    +
      +
    1. If a script or documents are to be removed, they can always be pulled +from previous revision, and can be removed within days.

    2. +
    3. if a whole target is removed, we need to first announce publicly, and +potentially mark as deprecated in one release, only to remove on the +next release.

    4. +
    5. Everything else will fall in between those two extremes.

    6. +
    +
  6. +
  7. The removal is made by either the proposer or the sub-community that used to +maintain it, with replacements and arrangements made atomically on the same +commit.

  8. +
+
+
+

If a proposal for removal is delayed by the promise a sub-community will take +care of the code affected, the sub-community will have a time to fix all the +issues (depending on each case, as above), and if those are not fixed in time, a +subsequent request for removal should be made and the community may elect to +eject the component without further attempts to fix.

+
+
+

Reinstatement

+

If a component is removed from LLVM, it may, at a later date, request inclusion +of a modified version, with evidence that all of the issues were fixed and that +there is a clear sub-community that will maintain it.

+

By consequence, the pressure on such sub-community will be higher to keep +overall maintenance costs to a minimum and will need to show steps to mitigate +all of the issues that were listed as reasons for its original removal.

+

Failing on those again, will lead to become a candidate for removal yet again.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SystemLibrary.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SystemLibrary.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/SystemLibrary.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/SystemLibrary.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,147 @@ + + + + + + + + + System Library — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

System Library

+
+

Moved

+

The System Library has been renamed to Support Library with documentation +available at Support Library. Please, change your links to that page.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/BackEnds.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/BackEnds.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/BackEnds.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/BackEnds.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,1038 @@ + + + + + + + + + TableGen BackEnds — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

TableGen BackEnds

+ +
+

Introduction

+

TableGen backends are at the core of TableGen’s functionality. The source +files provide the classes and records that are parsed and end up as a +collection of record instances, but it’s up to the backend to interpret and +print the records in a way that is meaningful to the user (normally a C++ +include file or a textual list of warnings, options, and error messages).

+

TableGen is used by both LLVM, Clang, and MLIR with very different goals. +LLVM uses it as a way to automate the generation of massive amounts of +information regarding instructions, schedules, cores, and architecture +features. Some backends generate output that is consumed by more than one +source file, so they need to be created in a way that makes it is easy for +preprocessor tricks to be used. Some backends can also print C++ code +structures, so that they can be directly included as-is.

+

Clang, on the other hand, uses it mainly for diagnostic messages (errors, +warnings, tips) and attributes, so more on the textual end of the scale.

+

MLIR uses TableGen to define operations, operation dialects, and operation +traits.

+

See the TableGen Programmer’s Reference for an in-depth +description of TableGen, and the TableGen Backend Developer’s Guide for a guide to writing a new backend.

+
+
+

LLVM BackEnds

+
+

Warning

+

This portion is incomplete. Each section below needs three subsections: +description of its purpose with a list of users, output generated from +generic input, and finally why it needed a new backend (in case there’s +something similar).

+
+

Overall, each backend will take the same TableGen file type and transform into +similar output for different targets/uses. There is an implicit contract between +the TableGen files, the back-ends and their users.

+

For instance, a global contract is that each back-end produces macro-guarded +sections. Based on whether the file is included by a header or a source file, +or even in which context of each file the include is being used, you have +todefine a macro just before including it, to get the right output:

+
#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
+
+

And just part of the generated file would be included. This is useful if +you need the same information in multiple formats (instantiation, initialization, +getter/setter functions, etc) from the same source TableGen file without having +to re-compile the TableGen file multiple times.

+

Sometimes, multiple macros might be defined before the same include file to +output multiple blocks:

+
#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
+
+
+

The macros will be undef’d automatically as they’re used, in the include file.

+

On all LLVM back-ends, the llvm-tblgen binary will be executed on the root +TableGen file <Target>.td, which should include all others. This guarantees +that all information needed is accessible, and that no duplication is needed +in the TableGen files.

+
+

CodeEmitter

+

Purpose: CodeEmitterGen uses the descriptions of instructions and their fields to +construct an automated code emitter: a function that, given a MachineInstr, +returns the (currently, 32-bit unsigned) value of the instruction.

+

Output: C++ code, implementing the target’s CodeEmitter +class by overriding the virtual functions as <Target>CodeEmitter::function().

+

Usage: Used to include directly at the end of <Target>MCCodeEmitter.cpp.

+
+
+

RegisterInfo

+

Purpose: This tablegen backend is responsible for emitting a description of a target +register file for a code generator. It uses instances of the Register, +RegisterAliases, and RegisterClass classes to gather this information.

+

Output: C++ code with enums and structures representing the register mappings, +properties, masks, etc.

+

Usage: Both on <Target>BaseRegisterInfo and <Target>MCTargetDesc (headers +and source files) with macros defining in which they are for declaration vs. +initialization issues.

+
+
+

InstrInfo

+

Purpose: This tablegen backend is responsible for emitting a description of the target +instruction set for the code generator. (what are the differences from CodeEmitter?)

+

Output: C++ code with enums and structures representing the instruction mappings, +properties, masks, etc.

+

Usage: Both on <Target>BaseInstrInfo and <Target>MCTargetDesc (headers +and source files) with macros defining in which they are for declaration vs. +initialization issues.

+
+
+

AsmWriter

+

Purpose: Emits an assembly printer for the current target.

+

Output: Implementation of <Target>InstPrinter::printInstruction(), among +other things.

+

Usage: Included directly into InstPrinter/<Target>InstPrinter.cpp.

+
+
+

AsmMatcher

+

Purpose: Emits a target specifier matcher for +converting parsed assembly operands in the MCInst structures. It also +emits a matcher for custom operand parsing. Extensive documentation is +written on the AsmMatcherEmitter.cpp file.

+

Output: Assembler parsers’ matcher functions, declarations, etc.

+

Usage: Used in back-ends’ AsmParser/<Target>AsmParser.cpp for +building the AsmParser class.

+
+
+

Disassembler

+

Purpose: Contains disassembler table emitters for various +architectures. Extensive documentation is written on the +DisassemblerEmitter.cpp file.

+

Output: Decoding tables, static decoding functions, etc.

+

Usage: Directly included in Disassembler/<Target>Disassembler.cpp +to cater for all default decodings, after all hand-made ones.

+
+
+

PseudoLowering

+

Purpose: Generate pseudo instruction lowering.

+

Output: Implements <Target>AsmPrinter::emitPseudoExpansionLowering().

+

Usage: Included directly into <Target>AsmPrinter.cpp.

+
+
+

CallingConv

+

Purpose: Responsible for emitting descriptions of the calling +conventions supported by this target.

+

Output: Implement static functions to deal with calling conventions +chained by matching styles, returning false on no match.

+

Usage: Used in ISelLowering and FastIsel as function pointers to +implementation returned by a CC selection function.

+
+
+

DAGISel

+

Purpose: Generate a DAG instruction selector.

+

Output: Creates huge functions for automating DAG selection.

+

Usage: Included in <Target>ISelDAGToDAG.cpp inside the target’s +implementation of SelectionDAGISel.

+
+
+

DFAPacketizer

+

Purpose: This class parses the Schedule.td file and produces an API that +can be used to reason about whether an instruction can be added to a packet +on a VLIW architecture. The class internally generates a deterministic finite +automaton (DFA) that models all possible mappings of machine instructions +to functional units as instructions are added to a packet.

+

Output: Scheduling tables for GPU back-ends (Hexagon, AMD).

+

Usage: Included directly on <Target>InstrInfo.cpp.

+
+
+

FastISel

+

Purpose: This tablegen backend emits code for use by the “fast” +instruction selection algorithm. See the comments at the top of +lib/CodeGen/SelectionDAG/FastISel.cpp for background. This file +scans through the target’s tablegen instruction-info files +and extracts instructions with obvious-looking patterns, and it emits +code to look up these instructions by type and operator.

+

Output: Generates Predicate and FastEmit methods.

+

Usage: Implements private methods of the targets’ implementation +of FastISel class.

+
+
+

Subtarget

+

Purpose: Generate subtarget enumerations.

+

Output: Enums, globals, local tables for sub-target information.

+

Usage: Populates <Target>Subtarget and +MCTargetDesc/<Target>MCTargetDesc files (both headers and source).

+
+
+

Intrinsic

+

Purpose: Generate (target) intrinsic information.

+
+
+

OptParserDefs

+

Purpose: Print enum values for a class.

+
+
+

SearchableTables

+

Purpose: Generate custom searchable tables.

+

Output: Enums, global tables, and lookup helper functions.

+

Usage: This backend allows generating free-form, target-specific tables +from TableGen records. The ARM and AArch64 targets use this backend to generate +tables of system registers; the AMDGPU target uses it to generate meta-data +about complex image and memory buffer instructions.

+

See SearchableTables Reference for a detailed description.

+
+
+

CTags

+

Purpose: This tablegen backend emits an index of definitions in ctags(1) +format. A helper script, utils/TableGen/tdtags, provides an easier-to-use +interface; run ‘tdtags -H’ for documentation.

+
+
+

X86EVEX2VEX

+

Purpose: This X86 specific tablegen backend emits tables that map EVEX +encoded instructions to their VEX encoded identical instruction.

+
+
+
+

Clang BackEnds

+
+

ClangAttrClasses

+

Purpose: Creates Attrs.inc, which contains semantic attribute class +declarations for any attribute in Attr.td that has not set ASTNode = 0. +This file is included as part of Attr.h.

+
+
+

ClangAttrParserStringSwitches

+

Purpose: Creates AttrParserStringSwitches.inc, which contains +StringSwitch::Case statements for parser-related string switches. Each switch +is given its own macro (such as CLANG_ATTR_ARG_CONTEXT_LIST, or +CLANG_ATTR_IDENTIFIER_ARG_LIST), which is expected to be defined before +including AttrParserStringSwitches.inc, and undefined after.

+
+
+

ClangAttrImpl

+

Purpose: Creates AttrImpl.inc, which contains semantic attribute class +definitions for any attribute in Attr.td that has not set ASTNode = 0. +This file is included as part of AttrImpl.cpp.

+
+
+

ClangAttrList

+

Purpose: Creates AttrList.inc, which is used when a list of semantic +attribute identifiers is required. For instance, AttrKinds.h includes this +file to generate the list of attr::Kind enumeration values. This list is +separated out into multiple categories: attributes, inheritable attributes, and +inheritable parameter attributes. This categorization happens automatically +based on information in Attr.td and is used to implement the classof +functionality required for dyn_cast and similar APIs.

+
+
+

ClangAttrPCHRead

+

Purpose: Creates AttrPCHRead.inc, which is used to deserialize attributes +in the ASTReader::ReadAttributes function.

+
+
+

ClangAttrPCHWrite

+

Purpose: Creates AttrPCHWrite.inc, which is used to serialize attributes in +the ASTWriter::WriteAttributes function.

+
+
+

ClangAttrSpellings

+

Purpose: Creates AttrSpellings.inc, which is used to implement the +__has_attribute feature test macro.

+
+
+

ClangAttrSpellingListIndex

+

Purpose: Creates AttrSpellingListIndex.inc, which is used to map parsed +attribute spellings (including which syntax or scope was used) to an attribute +spelling list index. These spelling list index values are internal +implementation details exposed via +AttributeList::getAttributeSpellingListIndex.

+
+
+

ClangAttrVisitor

+

Purpose: Creates AttrVisitor.inc, which is used when implementing +recursive AST visitors.

+
+
+

ClangAttrTemplateInstantiate

+

Purpose: Creates AttrTemplateInstantiate.inc, which implements the +instantiateTemplateAttribute function, used when instantiating a template +that requires an attribute to be cloned.

+
+
+

ClangAttrParsedAttrList

+

Purpose: Creates AttrParsedAttrList.inc, which is used to generate the +AttributeList::Kind parsed attribute enumeration.

+
+
+

ClangAttrParsedAttrImpl

+

Purpose: Creates AttrParsedAttrImpl.inc, which is used by +AttributeList.cpp to implement several functions on the AttributeList +class. This functionality is implemented via the AttrInfoMap ParsedAttrInfo +array, which contains one element per parsed attribute object.

+
+
+

ClangAttrParsedAttrKinds

+

Purpose: Creates AttrParsedAttrKinds.inc, which is used to implement the +AttributeList::getKind function, mapping a string (and syntax) to a parsed +attribute AttributeList::Kind enumeration.

+
+
+

ClangAttrDump

+

Purpose: Creates AttrDump.inc, which dumps information about an attribute. +It is used to implement ASTDumper::dumpAttr.

+
+
+

ClangDiagsDefs

+

Generate Clang diagnostics definitions.

+
+
+

ClangDiagGroups

+

Generate Clang diagnostic groups.

+
+
+

ClangDiagsIndexName

+

Generate Clang diagnostic name index.

+
+
+

ClangCommentNodes

+

Generate Clang AST comment nodes.

+
+
+

ClangDeclNodes

+

Generate Clang AST declaration nodes.

+
+
+

ClangStmtNodes

+

Generate Clang AST statement nodes.

+
+
+

ClangSACheckers

+

Generate Clang Static Analyzer checkers.

+
+
+

ClangCommentHTMLTags

+

Generate efficient matchers for HTML tag names that are used in documentation comments.

+
+
+

ClangCommentHTMLTagsProperties

+

Generate efficient matchers for HTML tag properties.

+
+
+

ClangCommentHTMLNamedCharacterReferences

+

Generate function to translate named character references to UTF-8 sequences.

+
+
+

ClangCommentCommandInfo

+

Generate command properties for commands that are used in documentation comments.

+
+
+

ClangCommentCommandList

+

Generate list of commands that are used in documentation comments.

+
+
+

ArmNeon

+

Generate arm_neon.h for clang.

+
+
+

ArmNeonSema

+

Generate ARM NEON sema support for clang.

+
+
+

ArmNeonTest

+

Generate ARM NEON tests for clang.

+
+
+

AttrDocs

+

Purpose: Creates AttributeReference.rst from AttrDocs.td, and is +used for documenting user-facing attributes.

+
+
+
+

General BackEnds

+ + +
+

JSON Reference

+

Purpose: Output all the values in every def, as a JSON data +structure that can be easily parsed by a variety of languages. Useful +for writing custom backends without having to modify TableGen itself, +or for performing auxiliary analysis on the same TableGen data passed +to a built-in backend.

+

Output:

+

The root of the output file is a JSON object (i.e. dictionary), +containing the following fixed keys:

+
    +
  • !tablegen_json_version: a numeric version field that will +increase if an incompatible change is ever made to the structure of +this data. The format described here corresponds to version 1.

  • +
  • !instanceof: a dictionary whose keys are the class names defined +in the TableGen input. For each key, the corresponding value is an +array of strings giving the names of def records that derive +from that class. So root["!instanceof"]["Instruction"], for +example, would list the names of all the records deriving from the +class Instruction.

  • +
+

For each def record, the root object also has a key for the record +name. The corresponding value is a subsidiary object containing the +following fixed keys:

+
    +
  • !superclasses: an array of strings giving the names of all the +classes that this record derives from.

  • +
  • !fields: an array of strings giving the names of all the variables +in this record that were defined with the field keyword.

  • +
  • !name: a string giving the name of the record. This is always +identical to the key in the JSON root object corresponding to this +record’s dictionary. (If the record is anonymous, the name is +arbitrary.)

  • +
  • !anonymous: a boolean indicating whether the record’s name was +specified by the TableGen input (if it is false), or invented by +TableGen itself (if true).

  • +
+

For each variable defined in a record, the def object for that +record also has a key for the variable name. The corresponding value +is a translation into JSON of the variable’s value, using the +conventions described below.

+

Some TableGen data types are translated directly into the +corresponding JSON type:

+
    +
  • A completely undefined value (e.g. for a variable declared without +initializer in some superclass of this record, and never initialized +by the record itself or any other superclass) is emitted as the JSON +null value.

  • +
  • int and bit values are emitted as numbers. Note that +TableGen int values are capable of holding integers too large to +be exactly representable in IEEE double precision. The integer +literal in the JSON output will show the full exact integer value. +So if you need to retrieve large integers with full precision, you +should use a JSON reader capable of translating such literals back +into 64-bit integers without losing precision, such as Python’s +standard json module.

  • +
  • string and code values are emitted as JSON strings.

  • +
  • list<T> values, for any element type T, are emitted as JSON +arrays. Each element of the array is represented in turn using these +same conventions.

  • +
  • bits values are also emitted as arrays. A bits array is +ordered from least-significant bit to most-significant. So the +element with index i corresponds to the bit described as +x{i} in TableGen source. However, note that this means that +scripting languages are likely to display the array in the +opposite order from the way it appears in the TableGen source or in +the diagnostic -print-records output.

  • +
+

All other TableGen value types are emitted as a JSON object, +containing two standard fields: kind is a discriminator describing +which kind of value the object represents, and printable is a +string giving the same representation of the value that would appear +in -print-records.

+
    +
  • A reference to a def object has kind=="def", and has an +extra field def giving the name of the object referred to.

  • +
  • A reference to another variable in the same record has +kind=="var", and has an extra field var giving the name of +the variable referred to.

  • +
  • A reference to a specific bit of a bits-typed variable in the +same record has kind=="varbit", and has two extra fields: +var gives the name of the variable referred to, and index +gives the index of the bit.

  • +
  • A value of type dag has kind=="dag", and has two extra +fields. operator gives the initial value after the opening +parenthesis of the dag initializer; args is an array giving the +following arguments. The elements of args are arrays of length +2, giving the value of each argument followed by its colon-suffixed +name (if any). For example, in the JSON representation of the dag +value (Op 22, "hello":$foo) (assuming that Op is the name of +a record defined elsewhere with a def statement):

    +
      +
    • operator will be an object in which kind=="def" and +def=="Op"

    • +
    • args will be the array [[22, null], ["hello", "foo"]].

    • +
    +
  • +
  • If any other kind of value or complicated expression appears in the +output, it will have kind=="complex", and no additional fields. +These values are not expected to be needed by backends. The standard +printable field can be used to extract a representation of them +in TableGen source syntax if necessary.

  • +
+
+
+

SearchableTables Reference

+

A TableGen include file, SearchableTable.td, provides classes for +generating C++ searchable tables. These tables are described in the +following sections. To generate the C++ code, run llvm-tblgen with the +--gen-searchable-tables option, which invokes the backend that generates +the tables from the records you provide.

+

Each of the data structures generated for searchable tables is guarded by an +#ifdef. This allows you to include the generated .inc file and select only +certain data structures for inclusion. The examples below show the macro +names used in these guards.

+
+

Generic Enumerated Types

+

The GenericEnum class makes it easy to define a C++ enumerated type and +the enumerated elements of that type. To define the type, define a record +whose parent class is GenericEnum and whose name is the desired enum +type. This class provides three fields, which you can set in the record +using the let statement.

+
    +
  • string FilterClass. The enum type will have one element for each record +that derives from this class. These records are collected to assemble the +complete set of elements.

  • +
  • string NameField. The name of a field in the collected records that specifies +the name of the element. If a record has no such field, the record’s +name will be used.

  • +
  • string ValueField. The name of a field in the collected records that +specifies the numerical value of the element. If a record has no such +field, it will be assigned an integer value. Values are assigned in +alphabetical order starting with 0.

  • +
+

Here is an example where the values of the elements are specified +explicitly, as a template argument to the BEntry class. The resulting +C++ code is shown.

+
def BValues : GenericEnum {
+  let FilterClass = "BEntry";
+  let NameField = "Name";
+  let ValueField = "Encoding";
+}
+
+class BEntry<bits<16> enc> {
+  string Name = NAME;
+  bits<16> Encoding = enc;
+}
+
+def BFoo   : BEntry<0xac>;
+def BBar   : BEntry<0x14>;
+def BZoo   : BEntry<0x80>;
+def BSnork : BEntry<0x4c>;
+
+
+
#ifdef GET_BValues_DECL
+enum BValues {
+  BBar = 20,
+  BFoo = 172,
+  BSnork = 76,
+  BZoo = 128,
+};
+#endif
+
+
+

In the following example, the values of the elements are assigned +automatically. Note that values are assigned from 0, in alphabetical order +by element name.

+
def CEnum : GenericEnum {
+  let FilterClass = "CEnum";
+}
+
+class CEnum;
+
+def CFoo : CEnum;
+def CBar : CEnum;
+def CBaz : CEnum;
+
+
+
#ifdef GET_CEnum_DECL
+enum CEnum {
+  CBar = 0,
+  CBaz = 1,
+  CFoo = 2,
+};
+#endif
+
+
+
+
+

Generic Tables

+

The GenericTable class is used to define a searchable generic table. +TableGen produces C++ code to define the table entries and also produces +the declaration and definition of a function to search the table based on a +primary key. To define the table, define a record whose parent class is +GenericTable and whose name is the name of the global table of entries. +This class provides six fields.

+
    +
  • string FilterClass. The table will have one entry for each record +that derives from this class.

  • +
  • string CppTypeName. The name of the C++ struct/class type of the +table that holds the entries. If unspecified, the FilterClass name is +used.

  • +
  • list<string> Fields. A list of the names of the fields in the +collected records that contain the data for the table entries. The order of +this list determines the order of the values in the C++ initializers. See +below for information about the types of these fields.

  • +
  • list<string> PrimaryKey. The list of fields that make up the +primary key.

  • +
  • string PrimaryKeyName. The name of the generated C++ function +that performs a lookup on the primary key.

  • +
  • bit PrimaryKeyEarlyOut. See the third example below.

  • +
+

TableGen attempts to deduce the type of each of the table fields so that it +can format the C++ initializers in the emitted table. It can deduce bit, +bits<n>, string, Intrinsic, and Instruction. These can be +used in the primary key. Any other field types must be specified +explicitly; this is done as shown in the second example below. Such fields +cannot be used in the primary key.

+

One special case of the field type has to do with code. Arbitrary code is +represented by a string, but has to be emitted as a C++ initializer without +quotes. If the code field was defined using a code literal ([{...}]), +then TableGen will know to emit it without quotes. However, if it was +defined using a string literal or complex string expression, then TableGen +will not know. In this case, you can force TableGen to treat the field as +code by including the following line in the GenericTable record, where +xxx is the code field name.

+
string TypeOf_xxx = "code";
+
+
+

Here is an example where TableGen can deduce the field types. Note that the +table entry records are anonymous; the names of entry records are +irrelevant.

+
def ATable : GenericTable {
+  let FilterClass = "AEntry";
+  let Fields = ["Str", "Val1", "Val2"];
+  let PrimaryKey = ["Val1", "Val2"];
+  let PrimaryKeyName = "lookupATableByValues";
+}
+
+class AEntry<string str, int val1, int val2> {
+  string Str = str;
+  bits<8> Val1 = val1;
+  bits<10> Val2 = val2;
+}
+
+def : AEntry<"Bob",   5, 3>;
+def : AEntry<"Carol", 2, 6>;
+def : AEntry<"Ted",   4, 4>;
+def : AEntry<"Alice", 4, 5>;
+def : AEntry<"Costa", 2, 1>;
+
+
+

Here is the generated C++ code. The declaration of lookupATableByValues +is guarded by GET_ATable_DECL, while the definitions are guarded by +GET_ATable_IMPL.

+
#ifdef GET_ATable_DECL
+const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2);
+#endif
+
+#ifdef GET_ATable_IMPL
+constexpr AEntry ATable[] = {
+  { "Costa", 0x2, 0x1 }, // 0
+  { "Carol", 0x2, 0x6 }, // 1
+  { "Ted", 0x4, 0x4 }, // 2
+  { "Alice", 0x4, 0x5 }, // 3
+  { "Bob", 0x5, 0x3 }, // 4
+};
+
+const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) {
+  struct KeyType {
+    uint8_t Val1;
+    uint16_t Val2;
+  };
+  KeyType Key = { Val1, Val2 };
+  auto Table = makeArrayRef(ATable);
+  auto Idx = std::lower_bound(Table.begin(), Table.end(), Key,
+    [](const AEntry &LHS, const KeyType &RHS) {
+      if (LHS.Val1 < RHS.Val1)
+        return true;
+      if (LHS.Val1 > RHS.Val1)
+        return false;
+      if (LHS.Val2 < RHS.Val2)
+        return true;
+      if (LHS.Val2 > RHS.Val2)
+        return false;
+      return false;
+    });
+
+  if (Idx == Table.end() ||
+      Key.Val1 != Idx->Val1 ||
+      Key.Val2 != Idx->Val2)
+    return nullptr;
+  return &*Idx;
+}
+#endif
+
+
+

The table entries in ATable are sorted in order by Val1, and within +each of those values, by Val2. This allows a binary search of the table, +which is performed in the lookup function by std::lower_bound. The +lookup function returns a reference to the found table entry, or the null +pointer if no entry is found.

+

This example includes a field whose type TableGen cannot deduce. The Kind +field uses the enumerated type CEnum defined above. To inform TableGen +of the type, the record derived from GenericTable must include a string field +named TypeOf_field, where field is the name of the field whose type +is required.

+
def CTable : GenericTable {
+  let FilterClass = "CEntry";
+  let Fields = ["Name", "Kind", "Encoding"];
+  string TypeOf_Kind = "CEnum";
+  let PrimaryKey = ["Encoding"];
+  let PrimaryKeyName = "lookupCEntryByEncoding";
+}
+
+class CEntry<string name, CEnum kind, int enc> {
+  string Name = name;
+  CEnum Kind = kind;
+  bits<16> Encoding = enc;
+}
+
+def : CEntry<"Apple", CFoo, 10>;
+def : CEntry<"Pear",  CBaz, 15>;
+def : CEntry<"Apple", CBar, 13>;
+
+
+

Here is the generated C++ code.

+
#ifdef GET_CTable_DECL
+const CEntry *lookupCEntryByEncoding(uint16_t Encoding);
+#endif
+
+#ifdef GET_CTable_IMPL
+constexpr CEntry CTable[] = {
+  { "Apple", CFoo, 0xA }, // 0
+  { "Apple", CBar, 0xD }, // 1
+  { "Pear", CBaz, 0xF }, // 2
+};
+
+const CEntry *lookupCEntryByEncoding(uint16_t Encoding) {
+  struct KeyType {
+    uint16_t Encoding;
+  };
+  KeyType Key = { Encoding };
+  auto Table = makeArrayRef(CTable);
+  auto Idx = std::lower_bound(Table.begin(), Table.end(), Key,
+    [](const CEntry &LHS, const KeyType &RHS) {
+      if (LHS.Encoding < RHS.Encoding)
+        return true;
+      if (LHS.Encoding > RHS.Encoding)
+        return false;
+      return false;
+    });
+
+  if (Idx == Table.end() ||
+      Key.Encoding != Idx->Encoding)
+    return nullptr;
+  return &*Idx;
+}
+
+
+

The PrimaryKeyEarlyOut field, when set to 1, modifies the lookup +function so that it tests the first field of the primary key to determine +whether it is within the range of the collected records’ primary keys. If +not, the function returns the null pointer without performing the binary +search. This is useful for tables that provide data for only some of the +elements of a larger enum-based space. The first field of the primary key +must be an integral type; it cannot be a string.

+

Adding let PrimaryKeyEarlyOut = 1 to the ATable above:

+
def ATable : GenericTable {
+  let FilterClass = "AEntry";
+  let Fields = ["Str", "Val1", "Val2"];
+  let PrimaryKey = ["Val1", "Val2"];
+  let PrimaryKeyName = "lookupATableByValues";
+  let PrimaryKeyEarlyOut = 1;
+}
+
+
+

causes the lookup function to change as follows:

+
const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) {
+  if ((Val1 < 0x2) ||
+      (Val1 > 0x5))
+    return nullptr;
+
+  struct KeyType {
+  ...
+
+
+
+
+

Search Indexes

+

The SearchIndex class is used to define additional lookup functions for +generic tables. To define an additional function, define a record whose parent +class is SearchIndex and whose name is the name of the desired lookup +function. This class provides three fields.

+
    +
  • GenericTable Table. The name of the table that is to receive another +lookup function.

  • +
  • list<string> Key. The list of fields that make up the secondary key.

  • +
  • bit EarlyOut. See the third example in Generic Tables.

  • +
+

Here is an example of a secondary key added to the CTable above. The +generated function looks up entries based on the Name and Kind fields.

+
def lookupCEntry : SearchIndex {
+  let Table = CTable;
+  let Key = ["Name", "Kind"];
+}
+
+
+

This use of SearchIndex generates the following additional C++ code.

+
const CEntry *lookupCEntry(StringRef Name, unsigned Kind);
+
+...
+
+const CEntry *lookupCEntryByName(StringRef Name, unsigned Kind) {
+  struct IndexType {
+    const char * Name;
+    unsigned Kind;
+    unsigned _index;
+  };
+  static const struct IndexType Index[] = {
+    { "APPLE", CBar, 1 },
+    { "APPLE", CFoo, 0 },
+    { "PEAR", CBaz, 2 },
+  };
+
+  struct KeyType {
+    std::string Name;
+    unsigned Kind;
+  };
+  KeyType Key = { Name.upper(), Kind };
+  auto Table = makeArrayRef(Index);
+  auto Idx = std::lower_bound(Table.begin(), Table.end(), Key,
+    [](const IndexType &LHS, const KeyType &RHS) {
+      int CmpName = StringRef(LHS.Name).compare(RHS.Name);
+      if (CmpName < 0) return true;
+      if (CmpName > 0) return false;
+      if ((unsigned)LHS.Kind < (unsigned)RHS.Kind)
+        return true;
+      if ((unsigned)LHS.Kind > (unsigned)RHS.Kind)
+        return false;
+      return false;
+    });
+
+  if (Idx == Table.end() ||
+      Key.Name != Idx->Name ||
+      Key.Kind != Idx->Kind)
+    return nullptr;
+  return &CTable[Idx->_index];
+}
+
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/BackGuide.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/BackGuide.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/BackGuide.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/BackGuide.html 2021-09-19 16:16:40.000000000 +0000 @@ -0,0 +1,897 @@ + + + + + + + + + 1 TableGen Backend Developer’s Guide — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

1 TableGen Backend Developer’s Guide

+ +
+

1.1 Introduction

+

The purpose of TableGen is to generate complex output files based on +information from source files that are significantly easier to code than the +output files would be, and also easier to maintain and modify over time. The +information is coded in a declarative style involving classes and records, +which are then processed by TableGen. The internalized records are passed on +to various backends, which extract information from a subset of the records +and generate an output file. These output files are typically .inc files +for C++, but may be any type of file that the backend developer needs.

+

This document is a guide to writing a backend for TableGen. It is not a +complete reference manual, but rather a guide to using the facilities +provided by TableGen for the backends. For a complete reference to the +various data structures and functions involved, see the primary TableGen +header file (record.h) and/or the Doxygen documentation.

+

This document assumes that you have read the TableGen Programmer’s +Reference, which provides a detailed reference for coding +TableGen source files. For a description of the existing backends, see +TableGen BackEnds.

+
+
+

1.2 Data Structures

+

The following sections describe the data structures that contain the classes +and records that are collected from the TableGen source files by the +TableGen parser. Note that the term class refers to an abstract record +class, while the term record refers to a concrete record.

+

Unless otherwise noted, functions associated with classes are instance +functions.

+
+

1.2.1 RecordKeeper

+

An instance of the RecordKeeper class acts as the container for all the +classes and records parsed and collected by TableGen. The RecordKeeper +instance is passed to the backend when it is invoked by TableGen. This class +is usually abbreviated RK.

+

There are two maps in the recordkeeper, one for classes and one for records +(the latter often referred to as defs). Each map maps the class or record +name to an instance of the Record class (see Record), which contains +all the information about that class or record.

+

In addition to the two maps, the RecordKeeper instance contains:

+
    +
  • A map that maps the names of global variables to their values. +Global variables are defined in TableGen files with outer +defvar statements.

  • +
  • A counter for naming anonymous records.

  • +
+

The RecordKeeper class provides a few useful functions.

+
    +
  • Functions to get the complete class and record maps.

  • +
  • Functions to get a subset of the records based on their parent classes.

  • +
  • Functions to get individual classes, records, and globals, by name.

  • +
+

A RecordKeeper instance can be printed to an output stream with the << +operator.

+
+
+

1.2.2 Record

+

Each class or record built by TableGen is represented by an instance of +the Record class. The RecordKeeper instance contains one map for the +classes and one for the records. The primary data members of a record are +the record name, the vector of field names and their values, and the vector of +superclasses of the record.

+

The record name is stored as a pointer to an Init (see Init), which +is a class whose instances hold TableGen values (sometimes referred to as +initializers). The field names and values are stored in a vector of +RecordVal instances (see RecordVal), each of which contains both the +field name and its value. The superclass vector contains a sequence of +pairs, with each pair including the superclass record and its source +file location.

+

In addition to those members, a Record instance contains:

+
    +
  • A vector of source file locations that includes the record definition +itself, plus the locations of any multiclasses involved in its definition.

  • +
  • For a class record, a vector of the class’s template arguments.

  • +
  • An instance of DefInit (see DefInit) corresponding to this record.

  • +
  • A unique record ID.

  • +
  • A boolean that specifies whether this is a class definition.

  • +
  • A boolean that specifies whether this is an anonymous record.

  • +
+

The Record class provides many useful functions.

+
    +
  • Functions to get the record name, fields, source file locations, +template arguments, and unique ID.

  • +
  • Functions to get all the record’s superclasses or just its direct +superclasses.

  • +
  • Functions to get a particular field value by specifying its name in various +forms, and returning its value in various forms +(see Getting Record Names and Fields).

  • +
  • Boolean functions to check the various attributes of the record.

  • +
+

A Record instance can be printed to an output stream with the << +operator.

+
+
+

1.2.3 RecordVal

+

Each field of a record is stored in an instance of the RecordVal class. +The Record instance includes a vector of these value instances. A +RecordVal instance contains the name of the field, stored in an Init +instance. It also contains the value of the field, likewise stored in an +Init. (A better name for this class might be RecordField.)

+

In addition to those primary members, the RecordVal has other data members.

+
    +
  • The source file location of the field definition.

  • +
  • The type of the field, stored as an instance +of the RecTy class (see RecTy).

  • +
+

The RecordVal class provides some useful functions.

+
    +
  • Functions to get the name of the field in various forms.

  • +
  • A function to get the type of the field.

  • +
  • A function to get the value of the field.

  • +
  • A function to get the source file location.

  • +
+

Note that field values are more easily obtained directly from the Record +instance (see Record).

+

A RecordVal instance can be printed to an output stream with the << +operator.

+
+
+

1.2.4 RecTy

+

The RecTy class is used to represent the types of field values. It is +the base class for a series of subclasses, one for each of the +available field types. The RecTy class has one data member that is an +enumerated type specifying the specific type of field value. (A better +name for this class might be FieldTy.)

+

The RecTy class provides a few useful functions.

+
    +
  • A virtual function to get the type name as a string.

  • +
  • A virtual function to check whether all the values of this type can +be converted to another given type.

  • +
  • A virtual function to check whether this type is a subtype of +another given type.

  • +
  • A function to get the corresponding list +type for lists with elements of this type. For example, the function +returns the list<int> type when called with the int type.

  • +
+

The subclasses that inherit from RecTy are +BitRecTy, +BitsRecTy, +CodeRecTy, +DagRecTy, +IntRecTy, +ListRecTy, +RecordRecTy, and +StringRecTy. +Some of these classes have additional members that +are described in the following subsections.

+

All of the classes derived from RecTy provide the get() function. +It returns an instance of Recty corresponding to the derived class. +Some of the get() functions require an argument to +specify which particular variant of the type is desired. These arguments are +described in the following subsections.

+

A RecTy instance can be printed to an output stream with the << +operator.

+
+

Warning

+

It is not specified whether there is a single RecTy instance of a +particular type or multiple instances.

+
+
+

1.2.4.1 BitsRecTy

+

This class includes a data member with the size of the bits value and a +function to get that size.

+

The get() function takes the length of the sequence, n, and returns the +BitsRecTy type corresponding to bits<n>.

+
+
+

1.2.4.2 ListRecTy

+

This class includes a data member that specifies the type of the list’s +elements and a function to get that type.

+

The get() function takes the RecTy type of the list members and +returns the ListRecTy type corresponding to list<type>.

+
+
+

1.2.4.3 RecordRecTy

+

This class includes data members that contain the list of parent classes of +this record. It also provides a function to obtain the array of classes and +two functions to get the iterator begin() and end() values. The +class defines a type for the return values of the latter two functions.

+
using const_record_iterator = Record * const *;
+
+
+

The get() function takes an ArrayRef of pointers to the Record +instances of the direct superclasses of the record and returns the RecordRecTy +corresponding to the record inheriting from those superclasses.

+
+
+
+

1.2.5 Init

+

The Init class is used to represent TableGen values. The name derives +from initialization value. This class should not be confused with the +RecordVal class, which represents record fields, both their names and +values. The Init class is the base class for a series of subclasses, one +for each of the available value types. The primary data member of Init +is an enumerated type that represents the specific type of the value.

+

The Init class provides a few useful functions.

+
    +
  • A function to get the type enumerator.

  • +
  • A boolean virtual function to determine whether a value is completely +specified; that is, has no uninitialized subvalues.

  • +
  • Virtual functions to get the value as a string.

  • +
  • Virtual functions to cast the value to other types, implement the bit +range feature of TableGen, and implement the list slice feature.

  • +
  • A virtual function to get a particular bit of the value.

  • +
+

The subclasses that inherit directly from Init are +UnsetInit and TypedInit.

+

An Init instance can be printed to an output stream with the << +operator.

+
+

Warning

+

It is not specified whether two separate initialization values with +the same underlying type and value (e.g., two strings with the value +“Hello”) are represented by two Inits or share the same Init.

+
+
+

1.2.5.1 UnsetInit

+

This class, a subclass of Init, represents the unset (uninitialized) +value. The static function get() can be used to obtain the singleton +Init of this type.

+
+
+

1.2.5.2 TypedInit

+

This class, a subclass of Init, acts as the parent class of the classes +that represent specific value types (except for the unset value). These +classes include BitInit, BitsInit, DagInit, DefInit, +IntInit, ListInit, and StringInit. (There are additional derived +types used by the TableGen parser.)

+

This class includes a data member that specifies the RecTy type of the +value. It provides a function to get that RecTy type.

+
+
+

1.2.5.3 BitInit

+

The BitInit class is a subclass of TypedInit. Its instances +represent the possible values of a bit: 0 or 1. It includes a data member +that contains the bit.

+

All of the classes derived from TypedInit provide the following functions.

+
    +
  • A static function named get() that returns an Init representing +the specified value(s). In the case of BitInit, get(true) returns +an instance of BitInit representing true, while get(false) returns +an instance +representing false. As noted above, it is not specified whether there +is exactly one or more than one BitInit representing true (or false).

  • +
  • A function named GetValue() that returns the value of the instance +in a more direct form, in this case as a bool.

  • +
+
+
+

1.2.5.4 BitsInit

+

The BitsInit class is a subclass of TypedInit. Its instances +represent sequences of bits, from high-order to low-order. It includes a +data member with the length of the sequence and a vector of pointers to +Init instances, one per bit.

+

The class provides the usual get() function. It does not provide the +getValue() function.

+

The class provides the following additional functions.

+
    +
  • A function to get the number of bits in the sequence.

  • +
  • A function that gets a bit specified by an integer index.

  • +
+
+
+

1.2.5.5 DagInit

+

The DagInit class is a subclass of TypedInit. Its instances +represent the possible direct acyclic graphs (dag).

+

The class includes a pointer to an Init for the DAG operator and a +pointer to a StringInit for the operator name. It includes the count of +DAG operands and the count of operand names. Finally, it includes a vector of +pointers to Init instances for the operands and another to +StringInit instances for the operand names. +(The DAG operands are also referred to as arguments.)

+

The class provides two forms of the usual get() function. It does not +provide the usual getValue() function.

+

The class provides many additional functions:

+
    +
  • Functions to get the operator in various forms and to get the +operator name in various forms.

  • +
  • Functions to determine whether there are any operands and to get the +number of operands.

  • +
  • Functions to the get the operands, both individually and together.

  • +
  • Functions to determine whether there are any names and to +get the number of names

  • +
  • Functions to the get the names, both individually and together.

  • +
  • Functions to get the operand iterator begin() and end() values.

  • +
  • Functions to get the name iterator begin() and end() values.

  • +
+

The class defines two types for the return values of the operand and name +iterators.

+
using const_arg_iterator = SmallVectorImpl<Init*>::const_iterator;
+using const_name_iterator = SmallVectorImpl<StringInit*>::const_iterator;
+
+
+
+
+

1.2.5.6 DefInit

+

The DefInit class is a subclass of TypedInit. Its instances +represent the records that were collected by TableGen. It includes a data +member that is a pointer to the record’s Record instance.

+

The class provides the usual get() function. It does not provide +getValue(). Instead, it provides getDef(), which returns the +Record instance.

+
+
+

1.2.5.7 IntInit

+

The IntInit class is a subclass of TypedInit. Its instances +represent the possible values of a 64-bit integer. It includes a data member +that contains the integer.

+

The class provides the usual get() and getValue() functions. The +latter function returns the integer as an int64_t.

+

The class also provides a function, getBit(), to obtain a specified bit +of the integer value.

+
+
+

1.2.5.8 ListInit

+

The ListInit class is a subclass of TypedInit. Its instances +represent lists of elements of some type. It includes a data member with the +length of the list and a vector of pointers to Init instances, one per +element.

+

The class provides the usual get() and getValues() functions. The +latter function returns an ArrayRef of the vector of pointers to Init +instances.

+

The class provides these additional functions.

+
    +
  • A function to get the element type.

  • +
  • Functions to get the length of the vector and to determine whether +it is empty.

  • +
  • Functions to get an element specified by an integer index and return +it in various forms.

  • +
  • Functions to get the iterator begin() and end() values. The +class defines a type for the return type of these two functions.

  • +
+
using const_iterator = Init *const *;
+
+
+
+
+

1.2.5.9 StringInit

+

The StringInit class is a subclass of TypedInit. Its instances +represent arbitrary-length strings. It includes a data member +that contains a StringRef of the value.

+

The class provides the usual get() and getValue() functions. The +latter function returns the StringRef.

+
+
+
+
+

1.3 Creating a New Backend

+

The following steps are required to create a new backend for TableGen.

+
    +
  1. Invent a name for your backend C++ file, say GenAddressModes.

  2. +
  3. Write the new backend, using the file TableGenBackendSkeleton.cpp +as a starting point.

  4. +
  5. Determine which instance of TableGen requires the new backend. There is +one instance for Clang and another for LLVM. Or you may be building +your own instance.

  6. +
  7. Modify the selected tablegen.cpp to include your new backend.

  8. +
+
+
    +
  1. Add the name to the enumerated type ActionType.

  2. +
  3. Add a keyword to the ActionType command option using the +clEnumValN() function.

  4. +
  5. Add a case to the switch statement in the xxxTableGenMain() +function. It should invoke the “main function” of your backend, which +in this case, according to convention, is named EmitAddressModes.

  6. +
+
+
    +
  1. Add a declaration of your “main function” to the corresponding +TableGenBackends.h header file.

  2. +
  3. Add your backend C++ file to the appropriate CMakeLists.txt file so +that it will be built.

  4. +
  5. Add your C++ file to the system.

  6. +
+
+
+

1.4 The Backend Skeleton

+

The file TableGenBackendSkeleton.cpp provides a skeleton C++ translation +unit for writing a new TableGen backend. Here are a few notes on the file.

+
    +
  • The list of includes is the minimal list required by most backends.

  • +
  • As with all LLVM C++ files, it has a using namespace llvm; statement. +It also has an anonymous namespace that contains all the file-specific +data structure definitions, along with the class embodying the emitter +data members and functions. Continuing with the GenAddressModes example, +this class is named AddressModesEmitter.

  • +
  • The constructor for the emitter class accepts a RecordKeeper reference, +typically named RK. The RecordKeeper reference is saved in a data +member so that records can be obtained from it. This data member is usually +named Records.

  • +
  • One function is named run. It is invoked by the backend’s “main +function” to collect records and emit the output file. It accepts an instance +of the raw_ostream class, typically named OS. The output file is +emitted by writing to this stream.

  • +
  • The run function should use the emitSourceFileHeader helper function +to include a standard header in the emitted file.

  • +
  • The only function in the llvm namespace is the backend “main function.” +In this example, it is named EmitAddressModes. It creates an instance +of the AddressModesEmitter class, passing the RecordKeeper +instance, then invokes the run function, passing the raw_ostream +instance.

  • +
+

All the examples in the remainder of this document will assume the naming +conventions used in the skeleton file.

+
+
+

1.5 Getting Classes

+

The RecordKeeper class provides two functions for getting the +Record instances for classes defined in the TableGen files.

+
    +
  • getClasses() returns a RecordMap reference for all the classes.

  • +
  • getClass(name) returns a Record reference for the named +class.

  • +
+

If you need to iterate over all the class records:

+
for (auto ClassPair : Records.getClasses()) {
+  Record *ClassRec = ClassPair.second.get();
+  ...
+}
+
+
+

ClassPair.second gets the class’s unique_ptr, then .get() gets the +class Record itself.

+
+
+

1.6 Getting Records

+

The RecordKeeper class provides four functions for getting the +Record instances for concrete records defined in the TableGen files.

+
    +
  • getDefs() returns a RecordMap reference for all the concrete +records.

  • +
  • getDef(name) returns a Record reference for the named +concrete record.

  • +
  • getAllDerivedDefinitions(classname) returns a vector of +Record references for the concrete records that derive from the +given class.

  • +
  • getAllDerivedDefinitions(classnames) returns +a vector of Record references for the concrete records that derive from +all of the given classes.

  • +
+

This statement obtains all the records that derive from the Attribute +class and iterates over them.

+
auto AttrRecords = Records.getAllDerivedDefinitions("Attribute");
+for (Record *AttrRec : AttrRecords) {
+  ...
+}
+
+
+
+
+

1.7 Getting Record Names and Fields

+

As described above (see Record), there are multiple functions that +return the name of a record. One particularly useful one is +getNameInitAsString(), which returns the name as a std::string.

+

There are also multiple functions that return the fields of a record. To +obtain and iterate over all the fields:

+
for (const RecordVal &Field : SomeRec->getValues()) {
+  ...
+}
+
+
+

You will recall that RecordVal is the class whose instances contain +information about the fields in records.

+

The getValue() function returns the RecordVal instance for a field +specified by name. There are multiple overloaded functions, some taking a +StringRef and others taking a const Init *. Some functions return a +RecordVal * and others return a const RecordVal *. If the field does +not exist, a fatal error message is printed.

+

More often than not, you are interested in the value of the field, not all +the information in the RecordVal. There is a large set of functions that +take a field name in some form and return its value. One function, +getValueInit, returns the value as an Init *. Another function, +isValueUnset, returns a boolean specifying whether the value is unset +(uninitialized).

+

Most of the functions return the value in some more useful form. For +example:

+
std::vector<int64_t> RegCosts =
+    SomeRec->getValueAsListOfInts("RegCosts");
+
+
+

The field RegCosts is assumed to be a list of integers. That list is +returned as a std::vector of 64-bit integers. If the field is not a list +of integers, a fatal error message is printed.

+

Here is a function that returns a field value as a Record, but returns +null if the field does not exist.

+
if (Record *BaseRec = SomeRec->getValueAsOptionalDef(BaseFieldName)) {
+  ...
+}
+
+
+

The field is assumed to have another record as its value. That record is returned +as a pointer to a Record. If the field does not exist or is unset, the +functions returns null.

+
+
+

1.8 Getting Record Superclasses

+

The Record class provides a function to obtain the superclasses of a +record. It is named getSuperClasses and returns an ArrayRef of an +array of std::pair pairs. The superclasses are in post-order: the order +in which the superclasses were visited while copying their fields into the +record. Each pair consists of a pointer to the Record instance for a +superclass record and an instance of the SMRange class. The range +indicates the source file locations of the beginning and end of the class +definition.

+

This example obtains the superclasses of the Prototype record and then +iterates over the pairs in the returned array.

+
ArrayRef<std::pair<Record *, SMRange>>
+    Superclasses = Prototype->getSuperClasses();
+for (const auto &SuperPair : Superclasses) {
+  ...
+}
+
+
+

The Record class also provides a function, getDirectSuperClasses, to +append the direct superclasses of a record to a given vector of type +SmallVectorImpl<Record *>.

+
+
+

1.9 Emitting Text to the Output Stream

+

The run function is passed a raw_ostream to which it prints the +output file. By convention, this stream is saved in the emitter class member +named OS, although some run functions are simple and just use the +stream without saving it. The output can be produced by writing values +directly to the output stream, or by using the std::format() or +llvm::formatv() functions.

+
OS << "#ifndef " << NodeName << "\n";
+
+OS << format("0x%0*x, ", Digits, Value);
+
+
+

Instances of the following classes can be printed using the << operator: +RecordKeeper, +Record, +RecTy, +RecordVal, and +Init.

+

The helper function emitSourceFileHeader() prints the header comment +that should be included at the top of every output file. A call to it is +included in the skeleton backend file TableGenBackendSkeleton.cpp.

+
+
+

1.10 Printing Error Messages

+

TableGen records are often derived from multiple classes and also often +defined through a sequence of multiclasses. Because of this, it can be +difficult for backends to report clear error messages with accurate source +file locations. To make error reporting easier, five error reporting +functions are provided, each with four overloads.

+
    +
  • PrintWarning prints a message tagged as a warning.

  • +
  • PrintError prints a message tagged as an error.

  • +
  • PrintFatalError prints a message tagged as an error and then terminates.

  • +
  • PrintNote prints a note. It is often used after one of the previous +functions to provide more information.

  • +
  • PrintFatalNote prints a note and then terminates.

  • +
+

Each of these five functions is overloaded four times.

+
    +
  • PrintError(const Twine &Msg): +Prints the message with no source file location.

  • +
  • PrintError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg): +Prints the message followed by the specified source line, +along with a pointer to the item in error. The array of +source file locations is typically taken from a Record instance.

  • +
  • PrintError(const Record *Rec, const Twine &Msg): +Prints the message followed by the source line associated with the +specified record (see Record).

  • +
  • PrintError(const RecordVal *RecVal, const Twine &Msg): +Prints the message followed by the source line associated with the +specified record field (see RecordVal).

  • +
+

Using these functions, the goal is to produce the most specific error report +possible.

+
+
+

1.11 Debugging Tools

+

TableGen provides some tools to aid in debugging backends.

+
+

1.11.1 The PrintRecords Backend

+

The TableGen command option --print-records invokes a simple backend +that prints all the classes and records defined in the source files. This is +the default backend option. The format of the output is guaranteed to be +constant over time, so that the output can be compared in tests. The output +looks like this:

+
------------- Classes -----------------
+...
+class XEntry<string XEntry:str = ?, int XEntry:val1 = ?> { // XBase
+  string Str = XEntry:str;
+  bits<8> Val1 = { !cast<bits<8>>(XEntry:val1){7}, ... };
+  bit Val3 = 1;
+}
+...
+------------- Defs -----------------
+def ATable {  // GenericTable
+  string FilterClass = "AEntry";
+  string CppTypeName = "AEntry";
+  list<string> Fields = ["Str", "Val1", "Val2"];
+  list<string> PrimaryKey = ["Val1", "Val2"];
+  string PrimaryKeyName = "lookupATableByValues";
+  bit PrimaryKeyEarlyOut = 0;
+}
+...
+def anonymous_0 {     // AEntry
+  string Str = "Bob";
+  bits<8> Val1 = { 0, 0, 0, 0, 0, 1, 0, 1 };
+  bits<10> Val2 = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1 };
+}
+
+
+

Classes are shown with their template arguments, parent classes (following +//), and fields. Records are shown with their parent classes and +fields. Note that anonymous records are named anonymous_0, +anonymous_1, etc.

+
+
+

1.11.2 The PrintDetailedRecords Backend

+

The TableGen command option --print-detailed-records invokes a backend +that prints all the global variables, classes, and records defined in the +source files. The format of the output is not guaranteed to be constant +over time. The output looks like this.

+
DETAILED RECORDS for file llvm-project\llvm\lib\target\arc\arc.td
+
+-------------------- Global Variables (5) --------------------
+
+AMDGPUBufferIntrinsics = [int_amdgcn_buffer_load_format, ...
+AMDGPUImageDimAtomicIntrinsics = [int_amdgcn_image_atomic_swap_1d, ...
+...
+-------------------- Classes (758) --------------------
+
+AMDGPUBufferLoad  |IntrinsicsAMDGPU.td:879|
+  Template args:
+    LLVMType AMDGPUBufferLoad:data_ty = llvm_any_ty  |IntrinsicsAMDGPU.td:879|
+  Superclasses: (SDPatternOperator) Intrinsic AMDGPURsrcIntrinsic
+  Fields:
+    list<SDNodeProperty> Properties = [SDNPMemOperand]  |Intrinsics.td:348|
+    string LLVMName = ""  |Intrinsics.td:343|
+...
+-------------------- Records (12303) --------------------
+
+AMDGPUSample_lz_o  |IntrinsicsAMDGPU.td:560|
+  Defm sequence: |IntrinsicsAMDGPU.td:584| |IntrinsicsAMDGPU.td:566|
+  Superclasses: AMDGPUSampleVariant
+  Fields:
+    string UpperCaseMod = "_LZ_O"  |IntrinsicsAMDGPU.td:542|
+    string LowerCaseMod = "_lz_o"  |IntrinsicsAMDGPU.td:543|
+...
+
+
+
    +
  • Global variables defined with outer defvar statements are shown with +their values.

  • +
  • The classes are shown with their source location, template arguments, +superclasses, and fields.

  • +
  • The records are shown with their source location, defm sequence, +superclasses, and fields.

  • +
+

Superclasses are shown in the order processed, with indirect superclasses in +parentheses. Each field is shown with its value and the source location at +which it was set. +The defm sequence gives the locations of the defm statements that +were involved in generating the record, in the order they were invoked.

+
+
+

1.11.3 Timing TableGen Phases

+

TableGen provides a phase timing feature that produces a report of the time +used by the various phases of parsing the source files and running the +selected backend. This feature is enabled with the --time-phases option +of the TableGen command.

+

If the backend is not instrumented for timing, then a report such as the +following is produced. This is the timing for the +--print-detailed-records backend run on the AMDGPU target.

+
===-------------------------------------------------------------------------===
+                             TableGen Phase Timing
+===-------------------------------------------------------------------------===
+  Total Execution Time: 101.0106 seconds (102.4819 wall clock)
+
+   ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Name ---
+  85.5197 ( 84.9%)   0.1560 ( 50.0%)  85.6757 ( 84.8%)  85.7009 ( 83.6%)  Backend overall
+  15.1789 ( 15.1%)   0.0000 (  0.0%)  15.1789 ( 15.0%)  15.1829 ( 14.8%)  Parse, build records
+   0.0000 (  0.0%)   0.1560 ( 50.0%)   0.1560 (  0.2%)   1.5981 (  1.6%)  Write output
+  100.6986 (100.0%)   0.3120 (100.0%)  101.0106 (100.0%)  102.4819 (100.0%)  Total
+
+
+

Note that all the time for the backend is lumped under “Backend overall”.

+

If the backend is instrumented for timing, then its processing is +divided into phases and each one timed separately. This is the timing for +the --emit-dag-isel backend run on the AMDGPU target.

+
===-------------------------------------------------------------------------===
+                             TableGen Phase Timing
+===-------------------------------------------------------------------------===
+  Total Execution Time: 746.3868 seconds (747.1447 wall clock)
+
+   ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Name ---
+  657.7938 ( 88.1%)   0.1404 ( 90.0%)  657.9342 ( 88.1%)  658.6497 ( 88.2%)  Emit matcher table
+  70.2317 (  9.4%)   0.0000 (  0.0%)  70.2317 (  9.4%)  70.2700 (  9.4%)  Convert to matchers
+  14.8825 (  2.0%)   0.0156 ( 10.0%)  14.8981 (  2.0%)  14.9009 (  2.0%)  Parse, build records
+   2.1840 (  0.3%)   0.0000 (  0.0%)   2.1840 (  0.3%)   2.1791 (  0.3%)  Sort patterns
+   1.1388 (  0.2%)   0.0000 (  0.0%)   1.1388 (  0.2%)   1.1401 (  0.2%)  Optimize matchers
+   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0050 (  0.0%)  Write output
+  746.2308 (100.0%)   0.1560 (100.0%)  746.3868 (100.0%)  747.1447 (100.0%)  Total
+
+
+

The backend has been divided into four phases and timed separately.

+

If you want to instrument a backend, refer to the backend DAGISelEmitter.cpp +and search for Records.startTimer.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/index.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,408 @@ + + + + + + + + + TableGen Overview — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

TableGen Overview

+ +
+
+
+

Introduction

+

TableGen’s purpose is to help a human develop and maintain records of +domain-specific information. Because there may be a large number of these +records, it is specifically designed to allow writing flexible descriptions and +for common features of these records to be factored out. This reduces the +amount of duplication in the description, reduces the chance of error, and makes +it easier to structure domain specific information.

+

The TableGen front end parses a file, instantiates the declarations, and +hands the result off to a domain-specific backend for processing. See +the TableGen Programmer’s Reference for an in-depth +description of TableGen. See tblgen - Description to C++ Code for details on the *-tblgen commands +that run the various flavors of TableGen.

+

The current major users of TableGen are The LLVM Target-Independent +Code Generator and the Clang diagnostics and attributes.

+

Note that if you work with TableGen frequently and use emacs or vim, +you can find an emacs “TableGen mode” and a vim language file in the +llvm/utils/emacs and llvm/utils/vim directories of your LLVM +distribution, respectively.

+
+
+

The TableGen program

+

TableGen files are interpreted by the TableGen program: llvm-tblgen available +on your build directory under bin. It is not installed in the system (or where +your sysroot is set to), since it has no use beyond LLVM’s build process.

+
+

Running TableGen

+

TableGen runs just like any other LLVM tool. The first (optional) argument +specifies the file to read. If a filename is not specified, llvm-tblgen +reads from standard input.

+

To be useful, one of the backends must be used. These backends are +selectable on the command line (type ‘llvm-tblgen -help’ for a list). For +example, to get a list of all of the definitions that subclass a particular type +(which can be useful for building up an enum list of these records), use the +-print-enums option:

+
$ llvm-tblgen X86.td -print-enums -class=Register
+AH, AL, AX, BH, BL, BP, BPL, BX, CH, CL, CX, DH, DI, DIL, DL, DX, EAX, EBP, EBX,
+ECX, EDI, EDX, EFLAGS, EIP, ESI, ESP, FP0, FP1, FP2, FP3, FP4, FP5, FP6, IP,
+MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, R10, R10B, R10D, R10W, R11, R11B, R11D,
+R11W, R12, R12B, R12D, R12W, R13, R13B, R13D, R13W, R14, R14B, R14D, R14W, R15,
+R15B, R15D, R15W, R8, R8B, R8D, R8W, R9, R9B, R9D, R9W, RAX, RBP, RBX, RCX, RDI,
+RDX, RIP, RSI, RSP, SI, SIL, SP, SPL, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
+XMM0, XMM1, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5,
+XMM6, XMM7, XMM8, XMM9,
+
+$ llvm-tblgen X86.td -print-enums -class=Instruction
+ABS_F, ABS_Fp32, ABS_Fp64, ABS_Fp80, ADC32mi, ADC32mi8, ADC32mr, ADC32ri,
+ADC32ri8, ADC32rm, ADC32rr, ADC64mi32, ADC64mi8, ADC64mr, ADC64ri32, ADC64ri8,
+ADC64rm, ADC64rr, ADD16mi, ADD16mi8, ADD16mr, ADD16ri, ADD16ri8, ADD16rm,
+ADD16rr, ADD32mi, ADD32mi8, ADD32mr, ADD32ri, ADD32ri8, ADD32rm, ADD32rr,
+ADD64mi32, ADD64mi8, ADD64mr, ADD64ri32, ...
+
+
+

The default backend prints out all of the records. There is also a general +backend which outputs all the records as a JSON data structure, enabled using +the -dump-json option.

+

If you plan to use TableGen, you will most likely have to write a backend +that extracts the information specific to what you need and formats it in the +appropriate way. You can do this by extending TableGen itself in C++, or by +writing a script in any language that can consume the JSON output.

+
+
+

Example

+

With no other arguments, llvm-tblgen parses the specified file and prints out all +of the classes, then all of the definitions. This is a good way to see what the +various definitions expand to fully. Running this on the X86.td file prints +this (at the time of this writing):

+
...
+def ADD32rr {   // Instruction X86Inst I
+  string Namespace = "X86";
+  dag OutOperandList = (outs GR32:$dst);
+  dag InOperandList = (ins GR32:$src1, GR32:$src2);
+  string AsmString = "add{l}\t{$src2, $dst|$dst, $src2}";
+  list<dag> Pattern = [(set GR32:$dst, (add GR32:$src1, GR32:$src2))];
+  list<Register> Uses = [];
+  list<Register> Defs = [EFLAGS];
+  list<Predicate> Predicates = [];
+  int CodeSize = 3;
+  int AddedComplexity = 0;
+  bit isReturn = 0;
+  bit isBranch = 0;
+  bit isIndirectBranch = 0;
+  bit isBarrier = 0;
+  bit isCall = 0;
+  bit canFoldAsLoad = 0;
+  bit mayLoad = 0;
+  bit mayStore = 0;
+  bit isImplicitDef = 0;
+  bit isConvertibleToThreeAddress = 1;
+  bit isCommutable = 1;
+  bit isTerminator = 0;
+  bit isReMaterializable = 0;
+  bit isPredicable = 0;
+  bit hasDelaySlot = 0;
+  bit usesCustomInserter = 0;
+  bit hasCtrlDep = 0;
+  bit isNotDuplicable = 0;
+  bit hasSideEffects = 0;
+  InstrItinClass Itinerary = NoItinerary;
+  string Constraints = "";
+  string DisableEncoding = "";
+  bits<8> Opcode = { 0, 0, 0, 0, 0, 0, 0, 1 };
+  Format Form = MRMDestReg;
+  bits<6> FormBits = { 0, 0, 0, 0, 1, 1 };
+  ImmType ImmT = NoImm;
+  bits<3> ImmTypeBits = { 0, 0, 0 };
+  bit hasOpSizePrefix = 0;
+  bit hasAdSizePrefix = 0;
+  bits<4> Prefix = { 0, 0, 0, 0 };
+  bit hasREX_WPrefix = 0;
+  FPFormat FPForm = ?;
+  bits<3> FPFormBits = { 0, 0, 0 };
+}
+...
+
+
+

This definition corresponds to the 32-bit register-register add instruction +of the x86 architecture. def ADD32rr defines a record named +ADD32rr, and the comment at the end of the line indicates the superclasses +of the definition. The body of the record contains all of the data that +TableGen assembled for the record, indicating that the instruction is part of +the “X86” namespace, the pattern indicating how the instruction is selected by +the code generator, that it is a two-address instruction, has a particular +encoding, etc. The contents and semantics of the information in the record are +specific to the needs of the X86 backend, and are only shown as an example.

+

As you can see, a lot of information is needed for every instruction supported +by the code generator, and specifying it all manually would be unmaintainable, +prone to bugs, and tiring to do in the first place. Because we are using +TableGen, all of the information was derived from the following definition:

+
let Defs = [EFLAGS],
+    isCommutable = 1,                  // X = ADD Y,Z --> X = ADD Z,Y
+    isConvertibleToThreeAddress = 1 in // Can transform into LEA.
+def ADD32rr  : I<0x01, MRMDestReg, (outs GR32:$dst),
+                                   (ins GR32:$src1, GR32:$src2),
+                 "add{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>;
+
+
+

This definition makes use of the custom class I (extended from the custom +class X86Inst), which is defined in the X86-specific TableGen file, to +factor out the common features that instructions of its class share. A key +feature of TableGen is that it allows the end-user to define the abstractions +they prefer to use when describing their information.

+
+
+
+

Syntax

+

TableGen has a syntax that is loosely based on C++ templates, with built-in +types and specification. In addition, TableGen’s syntax introduces some +automation concepts like multiclass, foreach, let, etc.

+
+

Basic concepts

+

TableGen files consist of two key parts: ‘classes’ and ‘definitions’, both of +which are considered ‘records’.

+

TableGen records have a unique name, a list of values, and a list of +superclasses. The list of values is the main data that TableGen builds for each +record; it is this that holds the domain specific information for the +application. The interpretation of this data is left to a specific backend, +but the structure and format rules are taken care of and are fixed by +TableGen.

+

TableGen definitions are the concrete form of ‘records’. These generally do +not have any undefined values, and are marked with the ‘def’ keyword.

+
def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
+                                      "Enable ARMv8 FP">;
+
+
+

In this example, FeatureFPARMv8 is SubtargetFeature record initialised +with some values. The names of the classes are defined via the +keyword class either on the same file or some other included. Most target +TableGen files include the generic ones in include/llvm/Target.

+

TableGen classes are abstract records that are used to build and describe +other records. These classes allow the end-user to build abstractions for +either the domain they are targeting (such as “Register”, “RegisterClass”, and +“Instruction” in the LLVM code generator) or for the implementor to help factor +out common properties of records (such as “FPInst”, which is used to represent +floating point instructions in the X86 backend). TableGen keeps track of all of +the classes that are used to build up a definition, so the backend can find all +definitions of a particular class, such as “Instruction”.

+
class ProcNoItin<string Name, list<SubtargetFeature> Features>
+      : Processor<Name, NoItineraries, Features>;
+
+
+

Here, the class ProcNoItin, receiving parameters Name of type string and +a list of target features is specializing the class Processor by passing the +arguments down as well as hard-coding NoItineraries.

+

TableGen multiclasses are groups of abstract records that are instantiated +all at once. Each instantiation can result in multiple TableGen definitions. +If a multiclass inherits from another multiclass, the definitions in the +sub-multiclass become part of the current multiclass, as if they were declared +in the current multiclass.

+
multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
+                        dag address, ValueType sty> {
+def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
+          (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
+            Base, Offset, Extend)>;
+
+def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
+          (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
+            Base, Offset, Extend)>;
+}
+
+defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
+                      !foreach(decls.pattern, address,
+                               !subst(SHIFT, imm_eq0, decls.pattern)),
+                      i8>;
+
+
+

See the TableGen Programmer’s Reference for an in-depth +description of TableGen.

+
+
+
+

TableGen backends

+

TableGen files have no real meaning without a backend. The default operation +when running *-tblgen is to print the information in a textual format, but +that’s only useful for debugging the TableGen files themselves. The power +in TableGen is, however, to interpret the source files into an internal +representation that can be generated into anything you want.

+

Current usage of TableGen is to create huge include files with tables that you +can either include directly (if the output is in the language you’re coding), +or be used in pre-processing via macros surrounding the include of the file.

+

Direct output can be used if the backend already prints a table in C format +or if the output is just a list of strings (for error and warning messages). +Pre-processed output should be used if the same information needs to be used +in different contexts (like Instruction names), so your backend should print +a meta-information list that can be shaped into different compile-time formats.

+

See TableGen BackEnds for a list of available +backends, and see the TableGen Backend Developer’s Guide +for information on how to write and debug a new backend.

+
+
+

TableGen Deficiencies

+

Despite being very generic, TableGen has some deficiencies that have been +pointed out numerous times. The common theme is that, while TableGen allows +you to build domain specific languages, the final languages that you create +lack the power of other DSLs, which in turn increase considerably the size +and complexity of TableGen files.

+

At the same time, TableGen allows you to create virtually any meaning of +the basic concepts via custom-made backends, which can pervert the original +design and make it very hard for newcomers to understand the evil TableGen +file.

+

There are some in favor of extending the semantics even more, but making sure +backends adhere to strict rules. Others are suggesting we should move to less, +more powerful DSLs designed with specific purposes, or even reusing existing +DSLs.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/ProgRef.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/ProgRef.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGen/ProgRef.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGen/ProgRef.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,1959 @@ + + + + + + + + + 1 TableGen Programmer’s Reference — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

1 TableGen Programmer’s Reference

+ +
+

1.1 Introduction

+

The purpose of TableGen is to generate complex output files based on +information from source files that are significantly easier to code than the +output files would be, and also easier to maintain and modify over time. The +information is coded in a declarative style involving classes and records, +which are then processed by TableGen. The internalized records are passed on +to various backends, which extract information from a subset of the records +and generate one or more output files. These output files are typically +.inc files for C++, but may be any type of file that the backend +developer needs.

+

This document describes the LLVM TableGen facility in detail. It is intended +for the programmer who is using TableGen to produce code for a project. If +you are looking for a simple overview, check out the TableGen Overview. The various *-tblgen commands used to invoke TableGen are +described in tblgen Family - Description to C++ +Code.

+

An example of a backend is RegisterInfo, which generates the register +file information for a particular target machine, for use by the LLVM +target-independent code generator. See TableGen Backends +for a description of the LLVM TableGen backends, and TableGen +Backend Developer’s Guide for a guide to writing a new +backend.

+

Here are a few of the things backends can do.

+
    +
  • Generate the register file information for a particular target machine.

  • +
  • Generate the instruction definitions for a target.

  • +
  • Generate the patterns that the code generator uses to match instructions +to intermediate representation (IR) nodes.

  • +
  • Generate semantic attribute identifiers for Clang.

  • +
  • Generate abstract syntax tree (AST) declaration node definitions for Clang.

  • +
  • Generate AST statement node definitions for Clang.

  • +
+
+

1.1.1 Concepts

+

TableGen source files contain two primary items: abstract records and +concrete records. In this and other TableGen documents, abstract records +are called classes. (These classes are different from C++ classes and do +not map onto them.) In addition, concrete records are usually just called +records, although sometimes the term record refers to both classes and +concrete records. The distinction should be clear in context.

+

Classes and concrete records have a unique name, either chosen by +the programmer or generated by TableGen. Associated with that name +is a list of fields with values and an optional list of parent classes +(sometimes called base or super classes). The fields are the primary data that +backends will process. Note that TableGen assigns no meanings to fields; the +meanings are entirely up to the backends and the programs that incorporate +the output of those backends.

+
+

Note

+

The term “parent class” can refer to a class that is a parent of another +class, and also to a class from which a concrete record inherits. This +nonstandard use of the term arises because TableGen treats classes and +concrete records similarly.

+
+

A backend processes some subset of the concrete records built by the +TableGen parser and emits the output files. These files are usually C++ +.inc files that are included by the programs that require the data in +those records. However, a backend can produce any type of output files. For +example, it could produce a data file containing messages tagged with +identifiers and substitution parameters. In a complex use case such as the +LLVM code generator, there can be many concrete records and some of them can +have an unexpectedly large number of fields, resulting in large output files.

+

In order to reduce the complexity of TableGen files, classes are used to +abstract out groups of record fields. For example, a few classes may +abstract the concept of a machine register file, while other classes may +abstract the instruction formats, and still others may abstract the +individual instructions. TableGen allows an arbitrary hierarchy of classes, +so that the abstract classes for two concepts can share a third superclass that +abstracts common “sub-concepts” from the two original concepts.

+

In order to make classes more useful, a concrete record (or another class) +can request a class as a parent class and pass template arguments to it. +These template arguments can be used in the fields of the parent class to +initialize them in a custom manner. That is, record or class A can +request parent class S with one set of template arguments, while record or class +B can request S with a different set of arguments. Without template +arguments, many more classes would be required, one for each combination of +the template arguments.

+

Both classes and concrete records can include fields that are uninitialized. +The uninitialized “value” is represented by a question mark (?). Classes +often have uninitialized fields that are expected to be filled in when those +classes are inherited by concrete records. Even so, some fields of concrete +records may remain uninitialized.

+

TableGen provides multiclasses to collect a group of record definitions in +one place. A multiclass is a sort of macro that can be “invoked” to define +multiple concrete records all at once. A multiclass can inherit from other +multiclasses, which means that the multiclass inherits all the definitions +from its parent multiclasses.

+

Appendix C: Sample Record illustrates a complex record in the Intel X86 +target and the simple way in which it is defined.

+
+
+
+

1.2 Source Files

+

TableGen source files are plain ASCII text files. The files can contain +statements, comments, and blank lines (see Lexical Analysis). The standard file +extension for TableGen files is .td.

+

TableGen files can grow quite large, so there is an include mechanism that +allows one file to include the content of another file (see Include +Files). This allows large files to be broken up into smaller ones, and +also provides a simple library mechanism where multiple source files can +include the same library file.

+

TableGen supports a simple preprocessor that can be used to conditionalize +portions of .td files. See Preprocessing Facilities for more +information.

+
+
+

1.3 Lexical Analysis

+

The lexical and syntax notation used here is intended to imitate +Python’s notation. In particular, for lexical definitions, the productions +operate at the character level and there is no implied whitespace between +elements. The syntax definitions operate at the token level, so there is +implied whitespace between tokens.

+

TableGen supports BCPL-style comments (// ...) and nestable C-style +comments (/* ... */). +TableGen also provides simple Preprocessing Facilities.

+

Formfeed characters may be used freely in files to produce page breaks when +the file is printed for review.

+

The following are the basic punctuation tokens:

+
- + [ ] { } ( ) < > : ; . ... = ? #
+
+
+
+

1.3.1 Literals

+

Numeric literals take one of the following forms:

+
+TokInteger     ::=  DecimalInteger | HexInteger | BinInteger
+DecimalInteger ::=  ["+" | "-"] ("0"..."9")+
+HexInteger     ::=  "0x" ("0"..."9" | "a"..."f" | "A"..."F")+
+BinInteger     ::=  "0b" ("0" | "1")+
+
+

Observe that the DecimalInteger token includes the optional + +or - sign, unlike most languages where the sign would be treated as a +unary operator.

+

TableGen has two kinds of string literals:

+
+TokString ::=  '"' (non-'"' characters and escapes) '"'
+TokCode   ::=  "[{" (shortest text not containing "}]") "}]"
+
+

A TokCode is nothing more than a multi-line string literal +delimited by [{ and }]. It can break across lines and the +line breaks are retained in the string.

+

The current implementation accepts the following escape sequences:

+
\\ \' \" \t \n
+
+
+
+
+

1.3.2 Identifiers

+

TableGen has name- and identifier-like tokens, which are case-sensitive.

+
+ualpha        ::=  "a"..."z" | "A"..."Z" | "_"
+TokIdentifier ::=  ("0"..."9")* ualpha (ualpha | "0"..."9")*
+TokVarName    ::=  "$" ualpha (ualpha |  "0"..."9")*
+
+

Note that, unlike most languages, TableGen allows TokIdentifier to +begin with an integer. In case of ambiguity, a token is interpreted as a +numeric literal rather than an identifier.

+

TableGen has the following reserved keywords, which cannot be used as +identifiers:

+
assert     bit           bits          class         code
+dag        def           else          false         foreach
+defm       defset        defvar        field         if
+in         include       int           let           list
+multiclass string        then          true
+
+
+
+

Warning

+

The field reserved word is deprecated.

+
+
+
+

1.3.3 Bang operators

+

TableGen provides “bang operators” that have a wide variety of uses:

+
+BangOperator ::=  one of
+                  !add        !and         !cast        !con         !dag
+                  !empty      !eq          !filter      !find        !foldl
+                  !foreach    !ge          !getdagop    !gt          !head
+                  !if         !interleave  !isa         !le          !listconcat
+                  !listsplat  !lt          !mul         !ne          !not
+                  !or         !setdagop    !shl         !size        !sra
+                  !srl        !strconcat   !sub         !subst       !substr
+                  !tail       !xor
+
+

The !cond operator has a slightly different +syntax compared to other bang operators, so it is defined separately:

+
+CondOperator ::=  !cond
+
+

See Appendix A: Bang Operators for a description of each bang operator.

+
+
+

1.3.4 Include files

+

TableGen has an include mechanism. The content of the included file +lexically replaces the include directive and is then parsed as if it was +originally in the main file.

+
+IncludeDirective ::=  "include" TokString
+
+

Portions of the main file and included files can be conditionalized using +preprocessor directives.

+
+PreprocessorDirective ::=  "#define" | "#ifdef" | "#ifndef"
+
+
+
+
+

1.4 Types

+

The TableGen language is statically typed, using a simple but complete type +system. Types are used to check for errors, to perform implicit conversions, +and to help interface designers constrain the allowed input. Every value is +required to have an associated type.

+

TableGen supports a mixture of low-level types (e.g., bit) and +high-level types (e.g., dag). This flexibility allows you to describe a +wide range of records conveniently and compactly.

+
+Type    ::=  "bit" | "int" | "string" | "dag"
+            | "bits" "<" TokInteger ">"
+            | "list" "<" Type ">"
+            | ClassID
+ClassID ::=  TokIdentifier
+
+
+
bit

A bit is a boolean value that can be 0 or 1.

+
+
int

The int type represents a simple 64-bit integer value, such as 5 or +-42.

+
+
string

The string type represents an ordered sequence of characters of arbitrary +length.

+
+
bits<n>

The bits type is a fixed-sized integer of arbitrary length n that +is treated as separate bits. These bits can be accessed individually. +A field of this type is useful for representing an instruction operation +code, register number, or address mode/register/displacement. The bits of +the field can be set individually or as subfields. For example, in an +instruction address, the addressing mode, base register number, and +displacement can be set separately.

+
+
list<type>

This type represents a list whose elements are of the type specified in +angle brackets. The element type is arbitrary; it can even be another +list type. List elements are indexed from 0.

+
+
dag

This type represents a nestable directed acyclic graph (DAG) of nodes. +Each node has an operator and zero or more arguments (or operands). +An argument can be +another dag object, allowing an arbitrary tree of nodes and edges. +As an example, DAGs are used to represent code patterns for use by +the code generator instruction selection algorithms. See Directed +acyclic graphs (DAGs) for more details;

+
+
ClassID

Specifying a class name in a type context indicates +that the type of the defined value must +be a subclass of the specified class. This is useful in conjunction with +the list type; for example, to constrain the elements of the list to a +common base class (e.g., a list<Register> can only contain definitions +derived from the Register class). +The ClassID must name a class that has been previously +declared or defined.

+
+
+
+
+

1.5 Values and Expressions

+

There are many contexts in TableGen statements where a value is required. A +common example is in the definition of a record, where each field is +specified by a name and an optional value. TableGen allows for a reasonable +number of different forms when building up value expressions. These forms +allow the TableGen file to be written in a syntax that is natural for the +application.

+

Note that all of the values have rules for converting them from one type to +another. For example, these rules allow you to assign a value like 7 +to an entity of type bits<4>.

+
+Value       ::=  SimpleValue ValueSuffix*
+                | Value "#" Value
+ValueSuffix ::=  "{" RangeList "}"
+                | "[" RangeList "]"
+                | "." TokIdentifier
+RangeList   ::=  RangePiece ("," RangePiece)*
+RangePiece  ::=  TokInteger
+                | TokInteger "..." TokInteger
+                | TokInteger "-" TokInteger
+                | TokInteger TokInteger
+
+
+

Warning

+

The peculiar last form of RangePiece is due to the fact that the +“-” is included in the TokInteger, hence 1-5 gets lexed as +two consecutive tokens, with values 1 and -5, instead of “1”, “-“, +and “5”. The use of hyphen as the range punctuation is deprecated.

+
+
+

1.5.1 Simple values

+

The SimpleValue has a number of forms.

+
+SimpleValue ::=  TokInteger | TokString+ | TokCode
+
+

A value can be an integer literal, a string literal, or a code literal. +Multiple adjacent string literals are concatenated as in C/C++; the simple +value is the concatenation of the strings. Code literals become strings and +are then indistinguishable from them.

+
+SimpleValue2 ::=  "true" | "false"
+
+

The true and false literals are essentially syntactic sugar for the +integer values 1 and 0. They improve the readability of TableGen files when +boolean values are used in field initializations, bit sequences, if +statements, etc. When parsed, these literals are converted to integers.

+
+

Note

+

Although true and false are literal names for 1 and 0, we +recommend as a stylistic rule that you use them for boolean +values only.

+
+
+SimpleValue3 ::=  "?"
+
+

A question mark represents an uninitialized value.

+
+SimpleValue4 ::=  "{" [ValueList] "}"
+ValueList    ::=  ValueListNE
+ValueListNE  ::=  Value ("," Value)*
+
+

This value represents a sequence of bits, which can be used to initialize a +bits<n> field (note the braces). When doing so, the values +must represent a total of n bits.

+
+SimpleValue5 ::=  "[" ValueList "]" ["<" Type ">"]
+
+

This value is a list initializer (note the brackets). The values in brackets +are the elements of the list. The optional Type can be used to +indicate a specific element type; otherwise the element type is inferred +from the given values. TableGen can usually infer the type, although +sometimes not when the value is the empty list ([]).

+
+SimpleValue6 ::=  "(" DagArg [DagArgList] ")"
+DagArgList   ::=  DagArg ("," DagArg)*
+DagArg       ::=  Value [":" TokVarName] | TokVarName
+
+

This represents a DAG initializer (note the parentheses). The first +DagArg is called the “operator” of the DAG and must be a record. +See Directed acyclic graphs (DAGs) for more details.

+
+SimpleValue7 ::=  TokIdentifier
+
+

The resulting value is the value of the entity named by the identifier. The +possible identifiers are described here, but the descriptions will make more +sense after reading the remainder of this guide.

+
    +
  • A template argument of a class, such as the use of Bar in:

    +
    class Foo <int Bar> {
    +  int Baz = Bar;
    +}
    +
    +
    +
  • +
  • The implicit template argument NAME in a class or multiclass +definition (see NAME).

  • +
  • A field local to a class, such as the use of Bar in:

    +
    class Foo {
    +  int Bar = 5;
    +  int Baz = Bar;
    +}
    +
    +
    +
  • +
  • The name of a record definition, such as the use of Bar in the +definition of Foo:

    +
    def Bar : SomeClass {
    +  int X = 5;
    +}
    +
    +def Foo {
    +  SomeClass Baz = Bar;
    +}
    +
    +
    +
  • +
  • A field local to a record definition, such as the use of Bar in:

    +
    def Foo {
    +  int Bar = 5;
    +  int Baz = Bar;
    +}
    +
    +
    +

    Fields inherited from the record’s parent classes can be accessed the same way.

    +
  • +
  • A template argument of a multiclass, such as the use of Bar in:

    +
    multiclass Foo <int Bar> {
    +  def : SomeClass<Bar>;
    +}
    +
    +
    +
  • +
  • A variable defined with the defvar or defset statements.

  • +
  • The iteration variable of a foreach, such as the use of i in:

    +
    foreach i = 0...5 in
    +  def Foo#i;
    +
    +
    +
  • +
+
+SimpleValue8 ::=  ClassID "<" ValueListNE ">"
+
+

This form creates a new anonymous record definition (as would be created by an +unnamed def inheriting from the given class with the given template +arguments; see def) and the value is that record. A field of the record can be +obtained using a suffix; see Suffixed Values.

+

Invoking a class in this manner can provide a simple subroutine facility. +See Using Classes as Subroutines for more information.

+
+SimpleValue9 ::=  BangOperator ["<" Type ">"] "(" ValueListNE ")"
+                 | CondOperator "(" CondClause ("," CondClause)* ")"
+CondClause   ::=  Value ":" Value
+
+

The bang operators provide functions that are not available with the other +simple values. Except in the case of !cond, a bang operator takes a list +of arguments enclosed in parentheses and performs some function on those +arguments, producing a value for that bang operator. The !cond operator +takes a list of pairs of arguments separated by colons. See Appendix A: +Bang Operators for a description of each bang operator.

+
+
+

1.5.2 Suffixed values

+

The SimpleValue values described above can be specified with +certain suffixes. The purpose of a suffix is to obtain a subvalue of the +primary value. Here are the possible suffixes for some primary value.

+
+
value{17}

The final value is bit 17 of the integer value (note the braces).

+
+
value{8...15}

The final value is bits 8–15 of the integer value. The order of the +bits can be reversed by specifying {15...8}.

+
+
value[4]

The final value is element 4 of the list value (note the brackets). +In other words, the brackets act as a subscripting operator on the list. +This is the case only when a single element is specified.

+
+
value[4...7,17,2...3,4]

The final value is a new list that is a slice of the list value. +The new list contains elements 4, 5, 6, 7, 17, 2, 3, and 4. +Elements may be included multiple times and in any order. This is the result +only when more than one element is specified.

+
+
value.field

The final value is the value of the specified field in the specified +record value.

+
+
+
+
+

1.5.3 The paste operator

+

The paste operator (#) is the only infix operator available in TableGen +expressions. It allows you to concatenate strings or lists, but has a few +unusual features.

+

The paste operator can be used when specifying the record name in a +Def or Defm statement, in which case it must construct a +string. If an operand is an undefined name (TokIdentifier) or the +name of a global Defvar or Defset, it is treated as a +verbatim string of characters. The value of a global name is not used.

+

The paste operator can be used in all other value expressions, in which case +it can construct a string or a list. Rather oddly, but consistent with the +previous case, if the right-hand-side operand is an undefined name or a +global name, it is treated as a verbatim string of characters. The +left-hand-side operand is treated normally.

+

Appendix B: Paste Operator Examples presents examples of the behavior of +the paste operator.

+
+
+
+

1.6 Statements

+

The following statements may appear at the top level of TableGen source +files.

+
+TableGenFile ::=  Statement*
+Statement    ::=  Assert | Class | Def | Defm | Defset | Defvar
+                 | Foreach | If | Let | MultiClass
+
+

The following sections describe each of these top-level statements.

+
+

1.6.1 class — define an abstract record class

+

A class statement defines an abstract record class from which other +classes and records can inherit.

+
+Class           ::=  "class" ClassID [TemplateArgList] RecordBody
+TemplateArgList ::=  "<" TemplateArgDecl ("," TemplateArgDecl)* ">"
+TemplateArgDecl ::=  Type TokIdentifier ["=" Value]
+
+

A class can be parameterized by a list of “template arguments,” whose values +can be used in the class’s record body. These template arguments are +specified each time the class is inherited by another class or record.

+

If a template argument is not assigned a default value with =, it is +uninitialized (has the “value” ?) and must be specified in the template +argument list when the class is inherited (required argument). If an +argument is assigned a default value, then it need not be specified in the +argument list (optional argument). In the declaration, all required template +arguments must precede any optional arguments. The template argument default +values are evaluated from left to right.

+

The RecordBody is defined below. It can include a list of +parent classes from which the current class inherits, along with field +definitions and other statements. When a class C inherits from another +class D, the fields of D are effectively merged into the fields of +C.

+

A given class can only be defined once. A class statement is +considered to define the class if any of the following are true (the +RecordBody elements are described below).

+ +

You can declare an empty class by specifying an empty TemplateArgList +and an empty RecordBody. This can serve as a restricted form of +forward declaration. Note that records derived from a forward-declared +class will inherit no fields from it, because those records are built when +their declarations are parsed, and thus before the class is finally defined.

+

Every class has an implicit template argument named NAME (uppercase), +which is bound to the name of the Def or Defm inheriting +from the class. If the class is inherited by an anonymous record, the name +is unspecified but globally unique.

+

See Examples: classes and records for examples.

+
+

1.6.1.1 Record Bodies

+

Record bodies appear in both class and record definitions. A record body can +include a parent class list, which specifies the classes from which the +current class or record inherits fields. Such classes are called the +parent classes of the class or record. The record body also +includes the main body of the definition, which contains the specification +of the fields of the class or record.

+
+RecordBody        ::=  ParentClassList Body
+ParentClassList   ::=  [":" ParentClassListNE]
+ParentClassListNE ::=  ClassRef ("," ClassRef)*
+ClassRef          ::=  (ClassID | MultiClassID) ["<" [ValueList] ">"]
+
+

A ParentClassList containing a MultiClassID is valid only +in the class list of a defm statement. In that case, the ID must be the +name of a multiclass.

+
+Body     ::=  ";" | "{" BodyItem* "}"
+BodyItem ::=  (Type | "code") TokIdentifier ["=" Value] ";"
+             | "let" TokIdentifier ["{" RangeList "}"] "=" Value ";"
+             | "defvar" TokIdentifier "=" Value ";"
+             | Assert
+
+

A field definition in the body specifies a field to be included in the class +or record. If no initial value is specified, then the field’s value is +uninitialized. The type must be specified; TableGen will not infer it from +the value. The keyword code may be used to emphasize that the field +has a string value that is code.

+

The let form is used to reset a field to a new value. This can be done +for fields defined directly in the body or fields inherited from parent +classes. A RangeList can be specified to reset certain bits in a +bit<n> field.

+

The defvar form defines a variable whose value can be used in other +value expressions within the body. The variable is not a field: it does not +become a field of the class or record being defined. Variables are provided +to hold temporary values while processing the body. See Defvar in a Record +Body for more details.

+

When class C2 inherits from class C1, it acquires all the field +definitions of C1. As those definitions are merged into class C2, any +template arguments passed to C1 by C2 are substituted into the +definitions. In other words, the abstract record fields defined by C1 are +expanded with the template arguments before being merged into C2.

+
+
+
+

1.6.2 def — define a concrete record

+

A def statement defines a new concrete record.

+
+Def       ::=  "def" [NameValue] RecordBody
+NameValue ::=  Value (parsed in a special mode)
+
+

The name value is optional. If specified, it is parsed in a special mode +where undefined (unrecognized) identifiers are interpreted as literal +strings. In particular, global identifiers are considered unrecognized. +These include global variables defined by defvar and defset. A +record name can be the null string.

+

If no name value is given, the record is anonymous. The final name of an +anonymous record is unspecified but globally unique.

+

Special handling occurs if a def appears inside a multiclass +statement. See the multiclass section below for details.

+

A record can inherit from one or more classes by specifying the +ParentClassList clause at the beginning of its record body. All of +the fields in the parent classes are added to the record. If two or more +parent classes provide the same field, the record ends up with the field value +of the last parent class.

+

As a special case, the name of a record can be passed as a template argument +to that record’s parent classes. For example:

+
class A <dag d> {
+  dag the_dag = d;
+}
+
+def rec1 : A<(ops rec1)>
+
+
+

The DAG (ops rec1) is passed as a template argument to class A. Notice +that the DAG includes rec1, the record being defined.

+

The steps taken to create a new record are somewhat complex. See How +records are built.

+

See Examples: classes and records for examples.

+
+
+

1.6.3 Examples: classes and records

+

Here is a simple TableGen file with one class and two record definitions.

+
class C {
+  bit V = true;
+}
+
+def X : C;
+def Y : C {
+  let V = false;
+  string Greeting = "Hello!";
+}
+
+
+

First, the abstract class C is defined. It has one field named V +that is a bit initialized to true.

+

Next, two records are defined, derived from class C; that is, with C +as their parent class. Thus they both inherit the V field. Record Y +also defines another string field, Greeting, which is initialized to +"Hello!". In addition, Y overrides the inherited V field, +setting it to false.

+

A class is useful for isolating the common features of multiple records in +one place. A class can initialize common fields to default values, but +records inheriting from that class can override the defaults.

+

TableGen supports the definition of parameterized classes as well as +nonparameterized ones. Parameterized classes specify a list of variable +declarations, which may optionally have defaults, that are bound when the +class is specified as a parent class of another class or record.

+
class FPFormat <bits<3> val> {
+  bits<3> Value = val;
+}
+
+def NotFP      : FPFormat<0>;
+def ZeroArgFP  : FPFormat<1>;
+def OneArgFP   : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP   : FPFormat<4>;
+def CompareFP  : FPFormat<5>;
+def CondMovFP  : FPFormat<6>;
+def SpecialFP  : FPFormat<7>;
+
+
+

The purpose of the FPFormat class is to act as a sort of enumerated +type. It provides a single field, Value, which holds a 3-bit number. Its +template argument, val, is used to set the Value field. Each of the +eight records is defined with FPFormat as its parent class. The +enumeration value is passed in angle brackets as the template argument. Each +record will inherent the Value field with the appropriate enumeration +value.

+

Here is a more complex example of classes with template arguments. First, we +define a class similar to the FPFormat class above. It takes a template +argument and uses it to initialize a field named Value. Then we define +four records that inherit the Value field with its four different +integer values.

+
class ModRefVal <bits<2> val> {
+  bits<2> Value = val;
+}
+
+def None   : ModRefVal<0>;
+def Mod    : ModRefVal<1>;
+def Ref    : ModRefVal<2>;
+def ModRef : ModRefVal<3>;
+
+
+

This is somewhat contrived, but let’s say we would like to examine the two +bits of the Value field independently. We can define a class that +accepts a ModRefVal record as a template argument and splits up its +value into two fields, one bit each. Then we can define records that inherit from +ModRefBits and so acquire two fields from it, one for each bit in the +ModRefVal record passed as the template argument.

+
class ModRefBits <ModRefVal mrv> {
+  // Break the value up into its bits, which can provide a nice
+  // interface to the ModRefVal values.
+  bit isMod = mrv.Value{0};
+  bit isRef = mrv.Value{1};
+}
+
+// Example uses.
+def foo   : ModRefBits<Mod>;
+def bar   : ModRefBits<Ref>;
+def snork : ModRefBits<ModRef>;
+
+
+

This illustrates how one class can be defined to reorganize the +fields in another class, thus hiding the internal representation of that +other class.

+

Running llvm-tblgen on the example prints the following definitions:

+
def bar {      // Value
+  bit isMod = 0;
+  bit isRef = 1;
+}
+def foo {      // Value
+  bit isMod = 1;
+  bit isRef = 0;
+}
+def snork {      // Value
+  bit isMod = 1;
+  bit isRef = 1;
+}
+
+
+
+
+

1.6.4 let — override fields in classes or records

+

A let statement collects a set of field values (sometimes called +bindings) and applies them to all the classes and records defined by +statements within the scope of the let.

+
+Let     ::=   "let" LetList "in" "{" Statement* "}"
+            | "let" LetList "in" Statement
+LetList ::=  LetItem ("," LetItem)*
+LetItem ::=  TokIdentifier ["<" RangeList ">"] "=" Value
+
+

The let statement establishes a scope, which is a sequence of statements +in braces or a single statement with no braces. The bindings in the +LetList apply to the statements in that scope.

+

The field names in the LetList must name fields in classes inherited by +the classes and records defined in the statements. The field values are +applied to the classes and records after the records inherit all the fields from +their parent classes. So the let acts to override inherited field +values. A let cannot override the value of a template argument.

+

Top-level let statements are often useful when a few fields need to be +overridden in several records. Here are two examples. Note that let +statements can be nested.

+
let isTerminator = true, isReturn = true, isBarrier = true, hasCtrlDep = true in
+  def RET : I<0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)]>;
+
+let isCall = true in
+  // All calls clobber the non-callee saved registers...
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2,
+              XMM3, XMM4, XMM5, XMM6, XMM7, EFLAGS] in {
+    def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst, variable_ops),
+                           "call\t${dst:call}", []>;
+    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+                        "call\t{*}$dst", [(X86call GR32:$dst)]>;
+    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+                        "call\t{*}$dst", []>;
+  }
+
+
+

Note that a top-level let will not override fields defined in the classes or records +themselves.

+
+
+

1.6.5 multiclass — define multiple records

+

While classes with template arguments are a good way to factor out commonality +between multiple records, multiclasses allow a convenient method for +defining many records at once. For example, consider a 3-address +instruction architecture whose instructions come in two formats: reg = reg +op reg and reg = reg op imm (e.g., SPARC). We would like to specify in +one place that these two common formats exist, then in a separate place +specify what all the operations are. The multiclass and defm +statements accomplish this goal. You can think of a multiclass as a macro or +template that expands into multiple records.

+
+MultiClass           ::=  "multiclass" TokIdentifier [TemplateArgList]
+                          [":" ParentMultiClassList]
+                          "{" MultiClassStatement+ "}"
+ParentMultiClassList ::=  MultiClassID ("," MultiClassID)*
+MultiClassID         ::=  TokIdentifier
+MultiClassStatement  ::=  Assert | Def | Defm | Defvar | Foreach | If | Let
+
+

As with regular classes, the multiclass has a name and can accept template +arguments. A multiclass can inherit from other multiclasses, which causes +the other multiclasses to be expanded and contribute to the record +definitions in the inheriting multiclass. The body of the multiclass +contains a series of statements that define records, using Def and +Defm. In addition, Defvar, Foreach, and +Let statements can be used to factor out even more common elements. +The If and Assert statements can also be used.

+

Also as with regular classes, the multiclass has the implicit template +argument NAME (see NAME). When a named (non-anonymous) record is +defined in a multiclass and the record’s name does not include a use of the +template argument NAME, such a use is automatically prepended +to the name. That is, the following are equivalent inside a multiclass:

+
def Foo ...
+def NAME # Foo ...
+
+
+

The records defined in a multiclass are created when the multiclass is +“instantiated” or “invoked” by a defm statement outside the multiclass +definition. Each def statement in the multiclass produces a record. As +with top-level def statements, these definitions can inherit from +multiple parent classes.

+

See Examples: multiclasses and defms for examples.

+
+
+

1.6.6 defm — invoke multiclasses to define multiple records

+

Once multiclasses have been defined, you use the defm statement to +“invoke” them and process the multiple record definitions in those +multiclasses. Those record definitions are specified by def +statements in the multiclasses, and indirectly by defm statements.

+
+Defm ::=  "defm" [NameValue] ParentClassList ";"
+
+

The optional NameValue is formed in the same way as the name of a +def. The ParentClassList is a colon followed by a list of at +least one multiclass and any number of regular classes. The multiclasses +must precede the regular classes. Note that the defm does not have a +body.

+

This statement instantiates all the records defined in all the specified +multiclasses, either directly by def statements or indirectly by +defm statements. These records also receive the fields defined in any +regular classes included in the parent class list. This is useful for adding +a common set of fields to all the records created by the defm.

+

The name is parsed in the same special mode used by def. If the name is +not included, an unspecified but globally unique name is provided. That is, +the following examples end up with different names:

+
defm    : SomeMultiClass<...>;   // A globally unique name.
+defm "" : SomeMultiClass<...>;   // An empty name.
+
+
+

The defm statement can be used in a multiclass body. When this occurs, +the second variant is equivalent to:

+
defm NAME : SomeMultiClass<...>;
+
+
+

More generally, when defm occurs in a multiclass and its name does not +include a use of the implicit template argument NAME, then NAME will +be prepended automatically. That is, the following are equivalent inside a +multiclass:

+
defm Foo        : SomeMultiClass<...>;
+defm NAME # Foo : SomeMultiClass<...>;
+
+
+

See Examples: multiclasses and defms for examples.

+
+
+

1.6.7 Examples: multiclasses and defms

+

Here is a simple example using multiclass and defm. Consider a +3-address instruction architecture whose instructions come in two formats: +reg = reg op reg and reg = reg op imm (immediate). The SPARC is an +example of such an architecture.

+
def ops;
+def GPR;
+def Imm;
+class inst <int opc, string asmstr, dag operandlist>;
+
+multiclass ri_inst <int opc, string asmstr> {
+  def _rr : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+                   (ops GPR:$dst, GPR:$src1, GPR:$src2)>;
+  def _ri : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+                   (ops GPR:$dst, GPR:$src1, Imm:$src2)>;
+}
+
+// Define records for each instruction in the RR and RI formats.
+defm ADD : ri_inst<0b111, "add">;
+defm SUB : ri_inst<0b101, "sub">;
+defm MUL : ri_inst<0b100, "mul">;
+
+
+

Each use of the ri_inst multiclass defines two records, one with the +_rr suffix and one with _ri. Recall that the name of the defm +that uses a multiclass is prepended to the names of the records defined in +that multiclass. So the resulting definitions are named:

+
ADD_rr, ADD_ri
+SUB_rr, SUB_ri
+MUL_rr, MUL_ri
+
+
+

Without the multiclass feature, the instructions would have to be +defined as follows.

+
def ops;
+def GPR;
+def Imm;
+class inst <int opc, string asmstr, dag operandlist>;
+
+class rrinst <int opc, string asmstr>
+  : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+           (ops GPR:$dst, GPR:$src1, GPR:$src2)>;
+
+class riinst <int opc, string asmstr>
+  : inst<opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+           (ops GPR:$dst, GPR:$src1, Imm:$src2)>;
+
+// Define records for each instruction in the RR and RI formats.
+def ADD_rr : rrinst<0b111, "add">;
+def ADD_ri : riinst<0b111, "add">;
+def SUB_rr : rrinst<0b101, "sub">;
+def SUB_ri : riinst<0b101, "sub">;
+def MUL_rr : rrinst<0b100, "mul">;
+def MUL_ri : riinst<0b100, "mul">;
+
+
+

A defm can be used in a multiclass to “invoke” other multiclasses and +create the records defined in those multiclasses in addition to the records +defined in the current multiclass. In the following example, the basic_s +and basic_p multiclasses contain defm statements that refer to the +basic_r multiclass. The basic_r multiclass contains only def +statements.

+
class Instruction <bits<4> opc, string Name> {
+  bits<4> opcode = opc;
+  string name = Name;
+}
+
+multiclass basic_r <bits<4> opc> {
+  def rr : Instruction<opc, "rr">;
+  def rm : Instruction<opc, "rm">;
+}
+
+multiclass basic_s <bits<4> opc> {
+  defm SS : basic_r<opc>;
+  defm SD : basic_r<opc>;
+  def X : Instruction<opc, "x">;
+}
+
+multiclass basic_p <bits<4> opc> {
+  defm PS : basic_r<opc>;
+  defm PD : basic_r<opc>;
+  def Y : Instruction<opc, "y">;
+}
+
+defm ADD : basic_s<0xf>, basic_p<0xf>;
+
+
+

The final defm creates the following records, five from the basic_s +multiclass and five from the basic_p multiclass:

+
ADDSSrr, ADDSSrm
+ADDSDrr, ADDSDrm
+ADDX
+ADDPSrr, ADDPSrm
+ADDPDrr, ADDPDrm
+ADDY
+
+
+

A defm statement, both at top level and in a multiclass, can inherit +from regular classes in addition to multiclasses. The rule is that the +regular classes must be listed after the multiclasses, and there must be at least +one multiclass.

+
class XD {
+  bits<4> Prefix = 11;
+}
+class XS {
+  bits<4> Prefix = 12;
+}
+class I <bits<4> op> {
+  bits<4> opcode = op;
+}
+
+multiclass R {
+  def rr : I<4>;
+  def rm : I<2>;
+}
+
+multiclass Y {
+  defm SS : R, XD;    // First multiclass R, then regular class XD.
+  defm SD : R, XS;
+}
+
+defm Instr : Y;
+
+
+

This example will create four records, shown here in alphabetical order with +their fields.

+
def InstrSDrm {
+  bits<4> opcode = { 0, 0, 1, 0 };
+  bits<4> Prefix = { 1, 1, 0, 0 };
+}
+
+def InstrSDrr {
+  bits<4> opcode = { 0, 1, 0, 0 };
+  bits<4> Prefix = { 1, 1, 0, 0 };
+}
+
+def InstrSSrm {
+  bits<4> opcode = { 0, 0, 1, 0 };
+  bits<4> Prefix = { 1, 0, 1, 1 };
+}
+
+def InstrSSrr {
+  bits<4> opcode = { 0, 1, 0, 0 };
+  bits<4> Prefix = { 1, 0, 1, 1 };
+}
+
+
+

It’s also possible to use let statements inside multiclasses, providing +another way to factor out commonality from the records, especially when +using several levels of multiclass instantiations.

+
multiclass basic_r <bits<4> opc> {
+  let Predicates = [HasSSE2] in {
+    def rr : Instruction<opc, "rr">;
+    def rm : Instruction<opc, "rm">;
+  }
+  let Predicates = [HasSSE3] in
+    def rx : Instruction<opc, "rx">;
+}
+
+multiclass basic_ss <bits<4> opc> {
+  let IsDouble = false in
+    defm SS : basic_r<opc>;
+
+  let IsDouble = true in
+    defm SD : basic_r<opc>;
+}
+
+defm ADD : basic_ss<0xf>;
+
+
+
+
+

1.6.8 defset — create a definition set

+

The defset statement is used to collect a set of records into a global +list of records.

+
+Defset ::=  "defset" Type TokIdentifier "=" "{" Statement* "}"
+
+

All records defined inside the braces via def and defm are defined +as usual, and they are also collected in a global list of the given name +(TokIdentifier).

+

The specified type must be list<class>, where class is some +record class. The defset statement establishes a scope for its +statements. It is an error to define a record in the scope of the +defset that is not of type class.

+

The defset statement can be nested. The inner defset adds the +records to its own set, and all those records are also added to the outer +set.

+

Anonymous records created inside initialization expressions using the +ClassID<...> syntax are not collected in the set.

+
+
+

1.6.9 defvar — define a variable

+

A defvar statement defines a global variable. Its value can be used +throughout the statements that follow the definition.

+
+Defvar ::=  "defvar" TokIdentifier "=" Value ";"
+
+

The identifier on the left of the = is defined to be a global variable +whose value is given by the value expression on the right of the =. The +type of the variable is automatically inferred.

+

Once a variable has been defined, it cannot be set to another value.

+

Variables defined in a top-level foreach go out of scope at the end of +each loop iteration, so their value in one iteration is not available in +the next iteration. The following defvar will not work:

+
defvar i = !add(i, 1)
+
+
+

Variables can also be defined with defvar in a record body. See +Defvar in a Record Body for more details.

+
+
+

1.6.10 foreach — iterate over a sequence of statements

+

The foreach statement iterates over a series of statements, varying a +variable over a sequence of values.

+
+Foreach         ::=  "foreach" ForeachIterator "in" "{" Statement* "}"
+                    | "foreach" ForeachIterator "in" Statement
+ForeachIterator ::=  TokIdentifier "=" ("{" RangeList "}" | RangePiece | Value)
+
+

The body of the foreach is a series of statements in braces or a +single statement with no braces. The statements are re-evaluated once for +each value in the range list, range piece, or single value. On each +iteration, the TokIdentifier variable is set to the value and can +be used in the statements.

+

The statement list establishes an inner scope. Variables local to a +foreach go out of scope at the end of each loop iteration, so their +values do not carry over from one iteration to the next. Foreach loops may +be nested.

+

The foreach statement can also be used in a record Body.

+
foreach i = [0, 1, 2, 3] in {
+  def R#i : Register<...>;
+  def F#i : Register<...>;
+}
+
+
+

This loop defines records named R0, R1, R2, and R3, along +with F0, F1, F2, and F3.

+
+
+

1.6.11 if — select statements based on a test

+

The if statement allows one of two statement groups to be selected based +on the value of an expression.

+
+If     ::=  "if" Value "then" IfBody
+           | "if" Value "then" IfBody "else" IfBody
+IfBody ::=  "{" Statement* "}" | Statement
+
+

The value expression is evaluated. If it evaluates to true (in the same +sense used by the bang operators), then the statements following the +then reserved word are processed. Otherwise, if there is an else +reserved word, the statements following the else are processed. If the +value is false and there is no else arm, no statements are processed.

+

Because the braces around the then statements are optional, this grammar rule +has the usual ambiguity with “dangling else” clauses, and it is resolved in +the usual way: in a case like if v1 then if v2 then {...} else {...}, the +else associates with the inner if rather than the outer one.

+

The IfBody of the then and else arms of the if establish an +inner scope. Any defvar variables defined in the bodies go out of scope +when the bodies are finished (see Defvar in a Record Body for more details).

+

The if statement can also be used in a record Body.

+
+
+

1.6.12 assert — check that a condition is true

+

The assert statement checks a boolean condition to be sure that it is true +and prints an error message if it is not.

+
+Assert ::=  "assert" condition "," message ";"
+
+

If the boolean condition is true, the statement does nothing. If the +condition is false, it prints a nonfatal error message. The message, which +can be an arbitrary string expression, is included in the error message as a +note. The exact behavior of the assert statement depends on its +placement.

+
    +
  • At top level, the assertion is checked immediately.

  • +
  • In a record definition, the statement is saved and all assertions are +checked after the record is completely built.

  • +
  • In a class definition, the assertions are saved and inherited by all +the subclasses and records that inherit from the class. The assertions are +then checked when the records are completely built.

  • +
  • In a multiclass definition, the assertions are saved with the other +components of the multiclass and then checked each time the multiclass +is instantiated with defm.

  • +
+

Using assertions in TableGen files can simplify record checking in TableGen +backends. Here is an example of an assert in two class definitions.

+
class PersonName<string name> {
+  assert !le(!size(name), 32), "person name is too long: " # name;
+  string Name = name;
+}
+
+class Person<string name, int age> : PersonName<name> {
+  assert !and(!ge(age, 1), !le(age, 120)), "person age is invalid: " # age;
+  int Age = age;
+}
+
+def Rec20 : Person<"Donald Knuth", 60> {
+  ...
+}
+
+
+
+
+
+

1.7 Additional Details

+
+

1.7.1 Directed acyclic graphs (DAGs)

+

A directed acyclic graph can be represented directly in TableGen using the +dag datatype. A DAG node consists of an operator and zero or more +arguments (or operands). Each argument can be of any desired type. By using +another DAG node as an argument, an arbitrary graph of DAG nodes can be +built.

+

The syntax of a dag instance is:

+
+

( operator argument1, argument2,)

+
+

The operator must be present and must be a record. There can be zero or more +arguments, separated by commas. The operator and arguments can have three +formats.

+ ++++ + + + + + + + + + + + + + + + + +

Format

Meaning

value

argument value

value:name

argument value and associated name

name

argument name with unset (uninitialized) value

+

The value can be any TableGen value. The name, if present, must be a +TokVarName, which starts with a dollar sign ($). The purpose of +a name is to tag an operator or argument in a DAG with a particular meaning, +or to associate an argument in one DAG with a like-named argument in another +DAG.

+

The following bang operators are useful for working with DAGs: +!con, !dag, !empty, !foreach, !getdagop, !setdagop, !size.

+
+
+

1.7.2 Defvar in a record body

+

In addition to defining global variables, the defvar statement can +be used inside the Body of a class or record definition to define +local variables. The scope of the variable extends from the defvar +statement to the end of the body. It cannot be set to a different value +within its scope. The defvar statement can also be used in the statement +list of a foreach, which establishes a scope.

+

A variable named V in an inner scope shadows (hides) any variables V +in outer scopes. In particular, V in a record body shadows a global +V, and V in a foreach statement list shadows any V in +surrounding record or global scopes.

+

Variables defined in a foreach go out of scope at the end of +each loop iteration, so their value in one iteration is not available in +the next iteration. The following defvar will not work:

+
defvar i = !add(i, 1)
+
+
+
+
+

1.7.3 How records are built

+

The following steps are taken by TableGen when a record is built. Classes are simply +abstract records and so go through the same steps.

+
    +
  1. Build the record name (NameValue) and create an empty record.

  2. +
  3. Parse the parent classes in the ParentClassList from left to +right, visiting each parent class’s ancestor classes from top to bottom.

  4. +
+
+
    +
  1. Add the fields from the parent class to the record.

  2. +
  3. Substitute the template arguments into those fields.

  4. +
  5. Add the parent class to the record’s list of inherited classes.

  6. +
+
+
    +
  1. Apply any top-level let bindings to the record. Recall that top-level +bindings only apply to inherited fields.

  2. +
  3. Parse the body of the record.

  4. +
+
+
    +
  • Add any fields to the record.

  • +
  • Modify the values of fields according to local let statements.

  • +
  • Define any defvar variables.

  • +
+
+
    +
  1. Make a pass over all the fields to resolve any inter-field references.

  2. +
  3. Add the record to the master record list.

  4. +
+

Because references between fields are resolved (step 5) after let bindings are +applied (step 3), the let statement has unusual power. For example:

+
class C <int x> {
+  int Y = x;
+  int Yplus1 = !add(Y, 1);
+  int xplus1 = !add(x, 1);
+}
+
+let Y = 10 in {
+  def rec1 : C<5> {
+  }
+}
+
+def rec2 : C<5> {
+  let Y = 10;
+}
+
+
+

In both cases, one where a top-level let is used to bind Y and one +where a local let does the same thing, the results are:

+
def rec1 {      // C
+  int Y = 10;
+  int Yplus1 = 11;
+  int xplus1 = 6;
+}
+def rec2 {      // C
+  int Y = 10;
+  int Yplus1 = 11;
+  int xplus1 = 6;
+}
+
+
+

Yplus1 is 11 because the let Y is performed before the !add(Y, +1) is resolved. Use this power wisely.

+
+
+
+

1.8 Using Classes as Subroutines

+

As described in Simple values, a class can be invoked in an expression +and passed template arguments. This causes TableGen to create a new anonymous +record inheriting from that class. As usual, the record receives all the +fields defined in the class.

+

This feature can be employed as a simple subroutine facility. The class can +use the template arguments to define various variables and fields, which end +up in the anonymous record. Those fields can then be retrieved in the +expression invoking the class as follows. Assume that the field ret +contains the final value of the subroutine.

+
int Result = ... CalcValue<arg>.ret ...;
+
+
+

The CalcValue class is invoked with the template argument arg. It +calculates a value for the ret field, which is then retrieved at the +“point of call” in the initialization for the Result field. The anonymous +record created in this example serves no other purpose than to carry the +result value.

+

Here is a practical example. The class isValidSize determines whether a +specified number of bytes represents a valid data size. The bit ret is +set appropriately. The field ValidSize obtains its initial value by +invoking isValidSize with the data size and retrieving the ret field +from the resulting anonymous record.

+
class isValidSize<int size> {
+  bit ret = !cond(!eq(size,  1): 1,
+                  !eq(size,  2): 1,
+                  !eq(size,  4): 1,
+                  !eq(size,  8): 1,
+                  !eq(size, 16): 1,
+                  true: 0);
+}
+
+def Data1 {
+  int Size = ...;
+  bit ValidSize = isValidSize<Size>.ret;
+}
+
+
+
+
+

1.9 Preprocessing Facilities

+

The preprocessor embedded in TableGen is intended only for simple +conditional compilation. It supports the following directives, which are +specified somewhat informally.

+
+LineBegin              ::=  beginning of line
+LineEnd                ::=  newline | return | EOF
+WhiteSpace             ::=  space | tab
+CComment               ::=  "/*" ... "*/"
+BCPLComment            ::=  "//" ... LineEnd
+WhiteSpaceOrCComment   ::=  WhiteSpace | CComment
+WhiteSpaceOrAnyComment ::=  WhiteSpace | CComment | BCPLComment
+MacroName              ::=  ualpha (ualpha | "0"..."9")*
+PreDefine              ::=  LineBegin (WhiteSpaceOrCComment)*
+                            "#define" (WhiteSpace)+ MacroName
+                            (WhiteSpaceOrAnyComment)* LineEnd
+PreIfdef               ::=  LineBegin (WhiteSpaceOrCComment)*
+                            ("#ifdef" | "#ifndef") (WhiteSpace)+ MacroName
+                            (WhiteSpaceOrAnyComment)* LineEnd
+PreElse                ::=  LineBegin (WhiteSpaceOrCComment)*
+                            "#else" (WhiteSpaceOrAnyComment)* LineEnd
+PreEndif               ::=  LineBegin (WhiteSpaceOrCComment)*
+                            "#endif" (WhiteSpaceOrAnyComment)* LineEnd
+
+

A MacroName can be defined anywhere in a TableGen file. The name has +no value; it can only be tested to see whether it is defined.

+

A macro test region begins with an #ifdef or #ifndef directive. If +the macro name is defined (#ifdef) or undefined (#ifndef), then the +source code between the directive and the corresponding #else or +#endif is processed. If the test fails but there is an #else +clause, the source code between the #else and the #endif is +processed. If the test fails and there is no #else clause, then no +source code in the test region is processed.

+

Test regions may be nested, but they must be properly nested. A region +started in a file must end in that file; that is, must have its +#endif in the same file.

+

A MacroName may be defined externally using the -D option on the +*-tblgen command line:

+
llvm-tblgen self-reference.td -Dmacro1 -Dmacro3
+
+
+
+
+

1.10 Appendix A: Bang Operators

+

Bang operators act as functions in value expressions. A bang operator takes +one or more arguments, operates on them, and produces a result. If the +operator produces a boolean result, the result value will be 1 for true or 0 +for false. When an operator tests a boolean argument, it interprets 0 as false +and non-0 as true.

+
+

Warning

+

The !getop and !setop bang operators are deprecated in favor of +!getdagop and !setdagop.

+
+
+
!add(a, b, ...)

This operator adds a, b, etc., and produces the sum.

+
+
!and(a, b, ...)

This operator does a bitwise AND on a, b, etc., and produces the +result. A logical AND can be performed if all the arguments are either +0 or 1.

+
+
!cast<type>(a)

This operator performs a cast on a and produces the result. +If a is not a string, then a straightforward cast is performed, say +between an int and a bit, or between record types. This allows +casting a record to a class. If a record is cast to string, the +record’s name is produced.

+

If a is a string, then it is treated as a record name and looked up in +the list of all defined records. The resulting record is expected to be of +the specified type.

+

For example, if !cast<type>(name) +appears in a multiclass definition, or in a +class instantiated inside a multiclass definition, and the name does not +reference any template arguments of the multiclass, then a record by +that name must have been instantiated earlier +in the source file. If name does reference +a template argument, then the lookup is delayed until defm statements +instantiating the multiclass (or later, if the defm occurs in another +multiclass and template arguments of the inner multiclass that are +referenced by name are substituted by values that themselves contain +references to template arguments of the outer multiclass).

+

If the type of a does not match type, TableGen raises an error.

+
+
!con(a, b, ...)

This operator concatenates the DAG nodes a, b, etc. Their operations +must equal.

+

!con((op a1:$name1, a2:$name2), (op b1:$name3))

+

results in the DAG node (op a1:$name1, a2:$name2, b1:$name3).

+
+
!cond(cond1 : val1, cond2 : val2, ..., condn : valn)

This operator tests cond1 and returns val1 if the result is true. +If false, the operator tests cond2 and returns val2 if the result is +true. And so forth. An error is reported if no conditions are true.

+

This example produces the sign word for an integer:

+
!cond(!lt(x, 0) : "negative", !eq(x, 0) : "zero", true : "positive")
+
+
+
+
!dag(op, arguments, names)

This operator creates a DAG node with the given operator and +arguments. The arguments and names arguments must be lists +of equal length or uninitialized (?). The names argument +must be of type list<string>.

+

Due to limitations of the type system, arguments must be a list of items +of a common type. In practice, this means that they should either have the +same type or be records with a common parent class. Mixing dag and +non-dag items is not possible. However, ? can be used.

+

Example: !dag(op, [a1, a2, ?], ["name1", "name2", "name3"]) results in +(op a1-value:$name1, a2-value:$name2, ?:$name3).

+
+
!empty(a)

This operator produces 1 if the string, list, or DAG a is empty; 0 otherwise. +A dag is empty if it has no arguments; the operator does not count.

+
+
!eq( a, b)

This operator produces 1 if a is equal to b; 0 otherwise. +The arguments must be bit, bits, int, string, or +record values. Use !cast<string> to compare other types of objects.

+
+
+

!filter(var, list, predicate)

+
+

This operator creates a new list by filtering the elements in +list. To perform the filtering, TableGen binds the variable var to each +element and then evaluates the predicate expression, which presumably +refers to var. The predicate must +produce a boolean value (bit, bits, or int). The value is +interpreted as with !if: +if the value is 0, the element is not included in the new list. If the value +is anything else, the element is included.

+
+
+
!find(string1, string2[, start])

This operator searches for string2 in string1 and produces its +position. The starting position of the search may be specified by start, +which can range between 0 and the length of string1; the default is 0. +If the string is not found, the result is -1.

+
+
!foldl(init, list, acc, var, expr)

This operator performs a left-fold over the items in list. The +variable acc acts as the accumulator and is initialized to init. +The variable var is bound to each element in the list. The +expression is evaluated for each element and presumably uses acc and +var to calculate the accumulated value, which !foldl stores back in +acc. The type of acc is the same as init; the type of var is the +same as the elements of list; expr must have the same type as init.

+

The following example computes the total of the Number field in the +list of records in RecList:

+
int x = !foldl(0, RecList, total, rec, !add(total, rec.Number));
+
+
+

If your goal is to filter the list and produce a new list that includes only +some of the elements, see !filter.

+
+
!foreach(var, sequence, expr)

This operator creates a new list/dag in which each element is a +function of the corresponding element in the sequence list/dag. +To perform the function, TableGen binds the variable var to an element +and then evaluates the expression. The expression presumably refers +to the variable var and calculates the result value.

+

If you simply want to create a list of a certain length containing +the same value repeated multiple times, see !listsplat.

+
+
!ge(a, b)

This operator produces 1 if a is greater than or equal to b; 0 otherwise. +The arguments must be bit, bits, int, or string values.

+
+
!getdagop(dag) –or– !getdagop<type>(dag)

This operator produces the operator of the given dag node. +Example: !getdagop((foo 1, 2)) results in foo. Recall that +DAG operators are always records.

+

The result of !getdagop can be used directly in a context where +any record class at all is acceptable (typically placing it into +another dag value). But in other contexts, it must be explicitly +cast to a particular class. The <type> syntax is +provided to make this easy.

+

For example, to assign the result to a value of type BaseClass, you +could write either of these:

+
BaseClass b = !getdagop<BaseClass>(someDag);
+BaseClass b = !cast<BaseClass>(!getdagop(someDag));
+
+
+

But to create a new DAG node that reuses the operator from another, no +cast is necessary:

+
dag d = !dag(!getdagop(someDag), args, names);
+
+
+
+
!gt(a, b)

This operator produces 1 if a is greater than b; 0 otherwise. +The arguments must be bit, bits, int, or string values.

+
+
!head(a)

This operator produces the zeroth element of the list a. +(See also !tail.)

+
+
!if(test, then, else)

This operator evaluates the test, which must produce a bit or +int. If the result is not 0, the then expression is produced; otherwise +the else expression is produced.

+
+
!interleave(list, delim)

This operator concatenates the items in the list, interleaving the +delim string between each pair, and produces the resulting string. +The list can be a list of string, int, bits, or bit. An empty list +results in an empty string. The delimiter can be the empty string.

+
+
!isa<type>(a)

This operator produces 1 if the type of a is a subtype of the given type; 0 +otherwise.

+
+
!le(a, b)

This operator produces 1 if a is less than or equal to b; 0 otherwise. +The arguments must be bit, bits, int, or string values.

+
+
!listconcat(list1, list2, ...)

This operator concatenates the list arguments list1, list2, etc., and +produces the resulting list. The lists must have the same element type.

+
+
!listsplat(value, count)

This operator produces a list of length count whose elements are all +equal to the value. For example, !listsplat(42, 3) results in +[42, 42, 42].

+
+
!lt(a, b)

This operator produces 1 if a is less than b; 0 otherwise. +The arguments must be bit, bits, int, or string values.

+
+
!mul(a, b, ...)

This operator multiplies a, b, etc., and produces the product.

+
+
!ne(a, b)

This operator produces 1 if a is not equal to b; 0 otherwise. +The arguments must be bit, bits, int, string, +or record values. Use !cast<string> to compare other types of objects.

+
+
!not(a)

This operator performs a logical NOT on a, which must be +an integer. The argument 0 results in 1 (true); any other +argument results in 0 (false).

+
+
!or(a, b, ...)

This operator does a bitwise OR on a, b, etc., and produces the +result. A logical OR can be performed if all the arguments are either +0 or 1.

+
+
!setdagop(dag, op)

This operator produces a DAG node with the same arguments as dag, but with its +operator replaced with op.

+

Example: !setdagop((foo 1, 2), bar) results in (bar 1, 2).

+
+
!shl(a, count)

This operator shifts a left logically by count bits and produces the resulting +value. The operation is performed on a 64-bit integer; the result +is undefined for shift counts outside 0…63.

+
+
!size(a)

This operator produces the size of the string, list, or dag a. +The size of a DAG is the number of arguments; the operator does not count.

+
+
!sra(a, count)

This operator shifts a right arithmetically by count bits and produces the resulting +value. The operation is performed on a 64-bit integer; the result +is undefined for shift counts outside 0…63.

+
+
!srl(a, count)

This operator shifts a right logically by count bits and produces the resulting +value. The operation is performed on a 64-bit integer; the result +is undefined for shift counts outside 0…63.

+
+
!strconcat(str1, str2, ...)

This operator concatenates the string arguments str1, str2, etc., and +produces the resulting string.

+
+
!sub(a, b)

This operator subtracts b from a and produces the arithmetic difference.

+
+
!subst(target, repl, value)

This operator replaces all occurrences of the target in the value with +the repl and produces the resulting value. The value can +be a string, in which case substring substitution is performed.

+

The value can be a record name, in which case the operator produces the repl +record if the target record name equals the value record name; otherwise it +produces the value.

+
+
!substr(string, start[, length])

This operator extracts a substring of the given string. The starting +position of the substring is specified by start, which can range +between 0 and the length of the string. The length of the substring +is specified by length; if not specified, the rest of the string is +extracted. The start and length arguments must be integers.

+
+
!tail(a)

This operator produces a new list with all the elements +of the list a except for the zeroth one. (See also !head.)

+
+
!xor(a, b, ...)

This operator does a bitwise EXCLUSIVE OR on a, b, etc., and produces +the result. A logical XOR can be performed if all the arguments are either +0 or 1.

+
+
+
+
+

1.11 Appendix B: Paste Operator Examples

+

Here is an example illustrating the use of the paste operator in record names.

+
defvar suffix = "_suffstring";
+defvar some_ints = [0, 1, 2, 3];
+
+def name # suffix {
+}
+
+foreach i = [1, 2] in {
+def rec # i {
+}
+}
+
+
+

The first def does not use the value of the suffix variable. The +second def does use the value of the i iterator variable, because it is not a +global name. The following records are produced.

+
def namesuffix {
+}
+def rec1 {
+}
+def rec2 {
+}
+
+
+

Here is a second example illustrating the paste operator in field value expressions.

+
def test {
+  string strings = suffix # suffix;
+  list<int> integers = some_ints # [4, 5, 6];
+}
+
+
+

The strings field expression uses suffix on both sides of the paste +operator. It is evaluated normally on the left hand side, but taken verbatim +on the right hand side. The integers field expression uses the value of +the some_ints variable and a literal list. The following record is +produced.

+
def test {
+  string strings = "_suffstringsuffix";
+  list<int> ints = [0, 1, 2, 3, 4, 5, 6];
+}
+
+
+
+
+

1.12 Appendix C: Sample Record

+

One target machine supported by LLVM is the Intel x86. The following output +from TableGen shows the record that is created to represent the 32-bit +register-to-register ADD instruction.

+
def ADD32rr { // InstructionEncoding Instruction X86Inst I ITy Sched BinOpRR BinOpRR_RF
+  int Size = 0;
+  string DecoderNamespace = "";
+  list<Predicate> Predicates = [];
+  string DecoderMethod = "";
+  bit hasCompleteDecoder = 1;
+  string Namespace = "X86";
+  dag OutOperandList = (outs GR32:$dst);
+  dag InOperandList = (ins GR32:$src1, GR32:$src2);
+  string AsmString = "add{l}  {$src2, $src1|$src1, $src2}";
+  EncodingByHwMode EncodingInfos = ?;
+  list<dag> Pattern = [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1, GR32:$src2))];
+  list<Register> Uses = [];
+  list<Register> Defs = [EFLAGS];
+  int CodeSize = 3;
+  int AddedComplexity = 0;
+  bit isPreISelOpcode = 0;
+  bit isReturn = 0;
+  bit isBranch = 0;
+  bit isEHScopeReturn = 0;
+  bit isIndirectBranch = 0;
+  bit isCompare = 0;
+  bit isMoveImm = 0;
+  bit isMoveReg = 0;
+  bit isBitcast = 0;
+  bit isSelect = 0;
+  bit isBarrier = 0;
+  bit isCall = 0;
+  bit isAdd = 0;
+  bit isTrap = 0;
+  bit canFoldAsLoad = 0;
+  bit mayLoad = ?;
+  bit mayStore = ?;
+  bit mayRaiseFPException = 0;
+  bit isConvertibleToThreeAddress = 1;
+  bit isCommutable = 1;
+  bit isTerminator = 0;
+  bit isReMaterializable = 0;
+  bit isPredicable = 0;
+  bit isUnpredicable = 0;
+  bit hasDelaySlot = 0;
+  bit usesCustomInserter = 0;
+  bit hasPostISelHook = 0;
+  bit hasCtrlDep = 0;
+  bit isNotDuplicable = 0;
+  bit isConvergent = 0;
+  bit isAuthenticated = 0;
+  bit isAsCheapAsAMove = 0;
+  bit hasExtraSrcRegAllocReq = 0;
+  bit hasExtraDefRegAllocReq = 0;
+  bit isRegSequence = 0;
+  bit isPseudo = 0;
+  bit isExtractSubreg = 0;
+  bit isInsertSubreg = 0;
+  bit variadicOpsAreDefs = 0;
+  bit hasSideEffects = ?;
+  bit isCodeGenOnly = 0;
+  bit isAsmParserOnly = 0;
+  bit hasNoSchedulingInfo = 0;
+  InstrItinClass Itinerary = NoItinerary;
+  list<SchedReadWrite> SchedRW = [WriteALU];
+  string Constraints = "$src1 = $dst";
+  string DisableEncoding = "";
+  string PostEncoderMethod = "";
+  bits<64> TSFlags = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0 };
+  string AsmMatchConverter = "";
+  string TwoOperandAliasConstraint = "";
+  string AsmVariantName = "";
+  bit UseNamedOperandTable = 0;
+  bit FastISelShouldIgnore = 0;
+  bits<8> Opcode = { 0, 0, 0, 0, 0, 0, 0, 1 };
+  Format Form = MRMDestReg;
+  bits<7> FormBits = { 0, 1, 0, 1, 0, 0, 0 };
+  ImmType ImmT = NoImm;
+  bit ForceDisassemble = 0;
+  OperandSize OpSize = OpSize32;
+  bits<2> OpSizeBits = { 1, 0 };
+  AddressSize AdSize = AdSizeX;
+  bits<2> AdSizeBits = { 0, 0 };
+  Prefix OpPrefix = NoPrfx;
+  bits<3> OpPrefixBits = { 0, 0, 0 };
+  Map OpMap = OB;
+  bits<3> OpMapBits = { 0, 0, 0 };
+  bit hasREX_WPrefix = 0;
+  FPFormat FPForm = NotFP;
+  bit hasLockPrefix = 0;
+  Domain ExeDomain = GenericDomain;
+  bit hasREPPrefix = 0;
+  Encoding OpEnc = EncNormal;
+  bits<2> OpEncBits = { 0, 0 };
+  bit HasVEX_W = 0;
+  bit IgnoresVEX_W = 0;
+  bit EVEX_W1_VEX_W0 = 0;
+  bit hasVEX_4V = 0;
+  bit hasVEX_L = 0;
+  bit ignoresVEX_L = 0;
+  bit hasEVEX_K = 0;
+  bit hasEVEX_Z = 0;
+  bit hasEVEX_L2 = 0;
+  bit hasEVEX_B = 0;
+  bits<3> CD8_Form = { 0, 0, 0 };
+  int CD8_EltSize = 0;
+  bit hasEVEX_RC = 0;
+  bit hasNoTrackPrefix = 0;
+  bits<7> VectSize = { 0, 0, 1, 0, 0, 0, 0 };
+  bits<7> CD8_Scale = { 0, 0, 0, 0, 0, 0, 0 };
+  string FoldGenRegForm = ?;
+  string EVEX2VEXOverride = ?;
+  bit isMemoryFoldable = 1;
+  bit notEVEX2VEXConvertible = 0;
+}
+
+
+

On the first line of the record, you can see that the ADD32rr record +inherited from eight classes. Although the inheritance hierarchy is complex, +using parent classes is much simpler than specifying the 109 individual +fields for each instruction.

+

Here is the code fragment used to define ADD32rr and multiple other +ADD instructions:

+
defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+                         X86add_flag, add, 1, 1, 1>;
+
+
+

The defm statement tells TableGen that ArithBinOp_RF is a +multiclass, which contains multiple concrete record definitions that inherit +from BinOpRR_RF. That class, in turn, inherits from BinOpRR, which +inherits from ITy and Sched, and so forth. The fields are inherited +from all the parent classes; for example, IsIndirectBranch is inherited +from the Instruction class.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGenFundamentals.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGenFundamentals.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TableGenFundamentals.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TableGenFundamentals.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,148 @@ + + + + + + + + + TableGen Fundamentals — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

TableGen Fundamentals

+
+

Moved

+

The TableGen fundamentals documentation has moved to a directory on its own +and is now available at TableGen Overview. Please, change your links to +that page.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TestingGuide.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TestingGuide.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TestingGuide.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TestingGuide.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,720 @@ + + + + + + + + + LLVM Testing Infrastructure Guide — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Testing Infrastructure Guide

+ +
+
+
+

Overview

+

This document is the reference manual for the LLVM testing +infrastructure. It documents the structure of the LLVM testing +infrastructure, the tools needed to use it, and how to add and run +tests.

+
+
+

Requirements

+

In order to use the LLVM testing infrastructure, you will need all of the +software required to build LLVM, as well as Python 3.6 or +later.

+
+
+

LLVM Testing Infrastructure Organization

+

The LLVM testing infrastructure contains three major categories of tests: +unit tests, regression tests and whole programs. The unit tests and regression +tests are contained inside the LLVM repository itself under llvm/unittests +and llvm/test respectively and are expected to always pass – they should be +run before every commit.

+

The whole programs tests are referred to as the “LLVM test suite” (or +“test-suite”) and are in the test-suite module in subversion. For +historical reasons, these tests are also referred to as the “nightly +tests” in places, which is less ambiguous than “test-suite” and remains +in use although we run them much more often than nightly.

+
+

Unit tests

+

Unit tests are written using Google Test +and Google Mock +and are located in the llvm/unittests directory. +In general unit tests are reserved for targeting the support library and other +generic data structure, we prefer relying on regression tests for testing +transformations and analysis on the IR.

+
+
+

Regression tests

+

The regression tests are small pieces of code that test a specific +feature of LLVM or trigger a specific bug in LLVM. The language they are +written in depends on the part of LLVM being tested. These tests are driven by +the Lit testing tool (which is part of LLVM), and +are located in the llvm/test directory.

+

Typically when a bug is found in LLVM, a regression test containing just +enough code to reproduce the problem should be written and placed +somewhere underneath this directory. For example, it can be a small +piece of LLVM IR distilled from an actual application or benchmark.

+
+
+

Testing Analysis

+

An analysis is a pass that infer properties on some part of the IR and not +transforming it. They are tested in general using the same infrastructure as the +regression tests, by creating a separate “Printer” pass to consume the analysis +result and print it on the standard output in a textual format suitable for +FileCheck. +See llvm/test/Analysis/BranchProbabilityInfo/loop.ll +for an example of such test.

+
+
+

test-suite

+

The test suite contains whole programs, which are pieces of code which +can be compiled and linked into a stand-alone program that can be +executed. These programs are generally written in high level languages +such as C or C++.

+

These programs are compiled using a user specified compiler and set of +flags, and then executed to capture the program output and timing +information. The output of these programs is compared to a reference +output to ensure that the program is being compiled correctly.

+

In addition to compiling and executing programs, whole program tests +serve as a way of benchmarking LLVM performance, both in terms of the +efficiency of the programs generated as well as the speed with which +LLVM compiles, optimizes, and generates code.

+

The test-suite is located in the test-suite Subversion module.

+

See the test-suite Guide for details.

+
+
+

Debugging Information tests

+

The test suite contains tests to check quality of debugging information. +The test are written in C based languages or in LLVM assembly language.

+

These tests are compiled and run under a debugger. The debugger output +is checked to validate of debugging information. See README.txt in the +test suite for more information. This test suite is located in the +cross-project-tests/debuginfo-tests directory.

+
+
+
+

Quick start

+

The tests are located in two separate Subversion modules. The unit and +regression tests are in the main “llvm” module under the directories +llvm/unittests and llvm/test (so you get these tests for free with the +main LLVM tree). Use make check-all to run the unit and regression tests +after building LLVM.

+

The test-suite module contains more comprehensive tests including whole C +and C++ programs. See the test-suite Guide for details.

+
+

Unit and Regression tests

+

To run all of the LLVM unit tests use the check-llvm-unit target:

+
% make check-llvm-unit
+
+
+

To run all of the LLVM regression tests use the check-llvm target:

+
% make check-llvm
+
+
+

In order to get reasonable testing performance, build LLVM and subprojects +in release mode, i.e.

+
% cmake -DCMAKE_BUILD_TYPE="Release" -DLLVM_ENABLE_ASSERTIONS=On
+
+
+

If you have Clang checked out and built, you +can run the LLVM and Clang tests simultaneously using:

+
% make check-all
+
+
+

To run the tests with Valgrind (Memcheck by default), use the LIT_ARGS make +variable to pass the required options to lit. For example, you can use:

+
% make check LIT_ARGS="-v --vg --vg-leak"
+
+
+

to enable testing with valgrind and with leak checking enabled.

+

To run individual tests or subsets of tests, you can use the llvm-lit +script which is built as part of LLVM. For example, to run the +Integer/BitPacked.ll test by itself you can run:

+
% llvm-lit ~/llvm/test/Integer/BitPacked.ll
+
+
+

or to run all of the ARM CodeGen tests:

+
% llvm-lit ~/llvm/test/CodeGen/ARM
+
+
+

The regression tests will use the Python psutil module only if installed in a +non-user location. Under Linux, install with sudo or within a virtual +environment. Under Windows, install Python for all users and then run +pip install psutil in an elevated command prompt.

+

For more information on using the lit tool, see llvm-lit --help +or the lit man page.

+
+
+

Debugging Information tests

+

To run debugging information tests simply add the debuginfo-tests +project to your LLVM_ENABLE_PROJECTS define on the cmake +command-line.

+
+
+
+

Regression test structure

+

The LLVM regression tests are driven by lit and are located in the +llvm/test directory.

+

This directory contains a large array of small tests that exercise +various features of LLVM and to ensure that regressions do not occur. +The directory is broken into several sub-directories, each focused on a +particular area of LLVM.

+
+

Writing new regression tests

+

The regression test structure is very simple, but does require some +information to be set. This information is gathered via cmake +and is written to a file, test/lit.site.cfg in the build directory. +The llvm/test Makefile does this work for you.

+

In order for the regression tests to work, each directory of tests must +have a lit.local.cfg file. lit looks for this file to determine +how to run the tests. This file is just Python code and thus is very +flexible, but we’ve standardized it for the LLVM regression tests. If +you’re adding a directory of tests, just copy lit.local.cfg from +another directory to get running. The standard lit.local.cfg simply +specifies which files to look in for tests. Any directory that contains +only directories does not need the lit.local.cfg file. Read the Lit +documentation for more information.

+

Each test file must contain lines starting with “RUN:” that tell lit +how to run it. If there are no RUN lines, lit will issue an error +while running a test.

+

RUN lines are specified in the comments of the test program using the +keyword RUN followed by a colon, and lastly the command (pipeline) +to execute. Together, these lines form the “script” that lit +executes to run the test case. The syntax of the RUN lines is similar to a +shell’s syntax for pipelines including I/O redirection and variable +substitution. However, even though these lines may look like a shell +script, they are not. RUN lines are interpreted by lit. +Consequently, the syntax differs from shell in a few ways. You can specify +as many RUN lines as needed.

+

lit performs substitution on each RUN line to replace LLVM tool names +with the full paths to the executable built for each tool (in +$(LLVM_OBJ_ROOT)/$(BuildMode)/bin). This ensures that lit does +not invoke any stray LLVM tools in the user’s path during testing.

+

Each RUN line is executed on its own, distinct from other lines unless +its last character is \. This continuation character causes the RUN +line to be concatenated with the next one. In this way you can build up +long pipelines of commands without making huge line lengths. The lines +ending in \ are concatenated until a RUN line that doesn’t end in +\ is found. This concatenated set of RUN lines then constitutes one +execution. lit will substitute variables and arrange for the pipeline +to be executed. If any process in the pipeline fails, the entire line (and +test case) fails too.

+

Below is an example of legal RUN lines in a .ll file:

+
; RUN: llvm-as < %s | llvm-dis > %t1
+; RUN: llvm-dis < %s.bc-13 > %t2
+; RUN: diff %t1 %t2
+
+
+

As with a Unix shell, the RUN lines permit pipelines and I/O +redirection to be used.

+

There are some quoting rules that you must pay attention to when writing +your RUN lines. In general nothing needs to be quoted. lit won’t +strip off any quote characters so they will get passed to the invoked program. +To avoid this use curly braces to tell lit that it should treat +everything enclosed as one value.

+

In general, you should strive to keep your RUN lines as simple as possible, +using them only to run tools that generate textual output you can then examine. +The recommended way to examine output to figure out if the test passes is using +the FileCheck tool. [The usage of grep in RUN +lines is deprecated - please do not send or commit patches that use it.]

+

Put related tests into a single file rather than having a separate file per +test. Check if there are files already covering your feature and consider +adding your code there instead of creating a new file.

+
+
+

Extra files

+

If your test requires extra files besides the file containing the RUN: lines +and the extra files are small, consider specifying them in the same file and +using split-file to extract them. For example,

+
; RUN: split-file %s %t
+; RUN: llvm-link -S %t/a.ll %t/b.ll | FileCheck %s
+
+; CHECK: ...
+
+;--- a.ll
+...
+;--- b.ll
+...
+
+
+

The parts are separated by the regex ^(.|//)--- <part>. By default the +extracted content has leading empty lines to preserve line numbers. Specify +--no-leading-lines to drop leading lines.

+

If the extra files are large, the idiomatic place to put them is in a subdirectory Inputs. +You can then refer to the extra files as %S/Inputs/foo.bar.

+

For example, consider test/Linker/ident.ll. The directory structure is +as follows:

+
test/
+  Linker/
+    ident.ll
+    Inputs/
+      ident.a.ll
+      ident.b.ll
+
+
+

For convenience, these are the contents:

+
;;;;; ident.ll:
+
+; RUN: llvm-link %S/Inputs/ident.a.ll %S/Inputs/ident.b.ll -S | FileCheck %s
+
+; Verify that multiple input llvm.ident metadata are linked together.
+
+; CHECK-DAG: !llvm.ident = !{!0, !1, !2}
+; CHECK-DAG: "Compiler V1"
+; CHECK-DAG: "Compiler V2"
+; CHECK-DAG: "Compiler V3"
+
+;;;;; Inputs/ident.a.ll:
+
+!llvm.ident = !{!0, !1}
+!0 = metadata !{metadata !"Compiler V1"}
+!1 = metadata !{metadata !"Compiler V2"}
+
+;;;;; Inputs/ident.b.ll:
+
+!llvm.ident = !{!0}
+!0 = metadata !{metadata !"Compiler V3"}
+
+
+

For symmetry reasons, ident.ll is just a dummy file that doesn’t +actually participate in the test besides holding the RUN: lines.

+
+

Note

+

Some existing tests use RUN: true in extra files instead of just +putting the extra files in an Inputs/ directory. This pattern is +deprecated.

+
+
+
+

Fragile tests

+

It is easy to write a fragile test that would fail spuriously if the tool being +tested outputs a full path to the input file. For example, opt by +default outputs a ModuleID:

+
$ cat example.ll
+define i32 @main() nounwind {
+    ret i32 0
+}
+
+$ opt -S /path/to/example.ll
+; ModuleID = '/path/to/example.ll'
+
+define i32 @main() nounwind {
+    ret i32 0
+}
+
+
+

ModuleID can unexpectedly match against CHECK lines. For example:

+
; RUN: opt -S %s | FileCheck
+
+define i32 @main() nounwind {
+    ; CHECK-NOT: load
+    ret i32 0
+}
+
+
+

This test will fail if placed into a download directory.

+

To make your tests robust, always use opt ... < %s in the RUN line. +opt does not output a ModuleID when input comes from stdin.

+
+
+

Platform-Specific Tests

+

Whenever adding tests that require the knowledge of a specific platform, +either related to code generated, specific output or back-end features, +you must make sure to isolate the features, so that buildbots that +run on different architectures (and don’t even compile all back-ends), +don’t fail.

+

The first problem is to check for target-specific output, for example sizes +of structures, paths and architecture names, for example:

+
    +
  • Tests containing Windows paths will fail on Linux and vice-versa.

  • +
  • Tests that check for x86_64 somewhere in the text will fail anywhere else.

  • +
  • Tests where the debug information calculates the size of types and structures.

  • +
+

Also, if the test rely on any behaviour that is coded in any back-end, it must +go in its own directory. So, for instance, code generator tests for ARM go +into test/CodeGen/ARM and so on. Those directories contain a special +lit configuration file that ensure all tests in that directory will +only run if a specific back-end is compiled and available.

+

For instance, on test/CodeGen/ARM, the lit.local.cfg is:

+
config.suffixes = ['.ll', '.c', '.cpp', '.test']
+if not 'ARM' in config.root.targets:
+  config.unsupported = True
+
+
+

Other platform-specific tests are those that depend on a specific feature +of a specific sub-architecture, for example only to Intel chips that support AVX2.

+

For instance, test/CodeGen/X86/psubus.ll tests three sub-architecture +variants:

+
; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
+; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+
+

And the checks are different:

+
; SSE2: @test1
+; SSE2: psubusw LCPI0_0(%rip), %xmm0
+; AVX1: @test1
+; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+; AVX2: @test1
+; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
+
+
+

So, if you’re testing for a behaviour that you know is platform-specific or +depends on special features of sub-architectures, you must add the specific +triple, test with the specific FileCheck and put it into the specific +directory that will filter out all other architectures.

+
+
+

Constraining test execution

+

Some tests can be run only in specific configurations, such as +with debug builds or on particular platforms. Use REQUIRES +and UNSUPPORTED to control when the test is enabled.

+

Some tests are expected to fail. For example, there may be a known bug +that the test detect. Use XFAIL to mark a test as an expected failure. +An XFAIL test will be successful if its execution fails, and +will be a failure if its execution succeeds.

+
; This test will be only enabled in the build with asserts.
+; REQUIRES: asserts
+; This test is disabled on Linux.
+; UNSUPPORTED: -linux-
+; This test is expected to fail on PowerPC.
+; XFAIL: powerpc
+
+
+

REQUIRES and UNSUPPORTED and XFAIL all accept a comma-separated +list of boolean expressions. The values in each expression may be:

+
    +
  • Features added to config.available_features by configuration files such as lit.cfg. +String comparison of features is case-sensitive. Furthermore, a boolean expression can +contain any Python regular expression enclosed in {{ }}, in which case the boolean +expression is satisfied if any feature matches the regular expression. Regular +expressions can appear inside an identifier, so for example he{{l+}}o would match +helo, hello, helllo, and so on.

  • +
  • Substrings of the target triple (UNSUPPORTED and XFAIL only).

  • +
+
+
REQUIRES enables the test if all expressions are true.
+
UNSUPPORTED disables the test if any expression is true.
+
XFAIL expects the test to fail if any expression is true.
+
+

As a special case, XFAIL: * is expected to fail everywhere.

+
; This test is disabled on Windows,
+; and is disabled on Linux, except for Android Linux.
+; UNSUPPORTED: windows, linux && !android
+; This test is expected to fail on both PowerPC and ARM.
+; XFAIL: powerpc || arm
+
+
+
+
+

Substitutions

+

Besides replacing LLVM tool names the following substitutions are performed in +RUN lines:

+
+
%%

Replaced by a single %. This allows escaping other substitutions.

+
+
%s

File path to the test case’s source. This is suitable for passing on the +command line as the input to an LLVM tool.

+

Example: /home/user/llvm/test/MC/ELF/foo_test.s

+
+
%S

Directory path to the test case’s source.

+

Example: /home/user/llvm/test/MC/ELF

+
+
%t

File path to a temporary file name that could be used for this test case. +The file name won’t conflict with other test cases. You can append to it +if you need multiple temporaries. This is useful as the destination of +some redirected output.

+

Example: /home/user/llvm.build/test/MC/ELF/Output/foo_test.s.tmp

+
+
%T

Directory of %t. Deprecated. Shouldn’t be used, because it can be easily +misused and cause race conditions between tests.

+

Use rm -rf %t && mkdir %t instead if a temporary directory is necessary.

+

Example: /home/user/llvm.build/test/MC/ELF/Output

+
+
+

%{pathsep}

+
+

Expands to the path separator, i.e. : (or ; on Windows).

+
+

%/s, %/S, %/t, %/T:

+
+

Act like the corresponding substitution above but replace any \ +character with a /. This is useful to normalize path separators.

+
+

Example: %s:  C:\Desktop Files/foo_test.s.tmp

+

Example: %/s: C:/Desktop Files/foo_test.s.tmp

+
+
+

%:s, %:S, %:t, %:T:

+
+

Act like the corresponding substitution above but remove colons at +the beginning of Windows paths. This is useful to allow concatenation +of absolute paths on Windows to produce a legal path.

+
+

Example: %s:  C:\Desktop Files\foo_test.s.tmp

+

Example: %:s: C\Desktop Files\foo_test.s.tmp

+
+
+

%errc_<ERRCODE>

+
+

Some error messages may be substituted to allow different spellings +based on the host platform.

+
+

The following error codes are currently supported: +ENOENT, EISDIR, EINVAL, EACCES.

+

Example: Linux %errc_ENOENT: No such file or directory

+

Example: Windows %errc_ENOENT: no such file or directory

+
+
+

LLVM-specific substitutions:

+
+
%shlibext

The suffix for the host platforms shared library files. This includes the +period as the first character.

+

Example: .so (Linux), .dylib (macOS), .dll (Windows)

+
+
%exeext

The suffix for the host platforms executable files. This includes the +period as the first character.

+

Example: .exe (Windows), empty on Linux.

+
+
%(line), %(line+<number>), %(line-<number>)

The number of the line where this substitution is used, with an optional +integer offset. This can be used in tests with multiple RUN lines, which +reference test file’s line numbers.

+
+
+

Clang-specific substitutions:

+
+
%clang

Invokes the Clang driver.

+
+
%clang_cpp

Invokes the Clang driver for C++.

+
+
%clang_cl

Invokes the CL-compatible Clang driver.

+
+
%clangxx

Invokes the G++-compatible Clang driver.

+
+
%clang_cc1

Invokes the Clang frontend.

+
+
%itanium_abi_triple, %ms_abi_triple

These substitutions can be used to get the current target triple adjusted to +the desired ABI. For example, if the test suite is running with the +i686-pc-win32 target, %itanium_abi_triple will expand to +i686-pc-mingw32. This allows a test to run with a specific ABI without +constraining it to a specific triple.

+
+
+

FileCheck-specific substitutions:

+
+
%ProtectFileCheckOutput

This should precede a FileCheck call if and only if the call’s textual +output affects test results. It’s usually easy to tell: just look for +redirection or piping of the FileCheck call’s stdout or stderr.

+
+
+

To add more substitutions, look at test/lit.cfg or lit.local.cfg.

+
+
+

Options

+

The llvm lit configuration allows to customize some things with user options:

+
+
llc, opt, …

Substitute the respective llvm tool name with a custom command line. This +allows to specify custom paths and default arguments for these tools. +Example:

+

% llvm-lit “-Dllc=llc -verify-machineinstrs”

+
+
run_long_tests

Enable the execution of long running tests.

+
+
llvm_site_config

Load the specified lit configuration instead of the default one.

+
+
+
+
+

Other Features

+

To make RUN line writing easier, there are several helper programs. These +helpers are in the PATH when running tests, so you can just call them using +their name. For example:

+
+
not

This program runs its arguments and then inverts the result code from it. +Zero result codes become 1. Non-zero result codes become 0.

+
+
+

To make the output more useful, lit will scan +the lines of the test case for ones that contain a pattern that matches +PR[0-9]+. This is the syntax for specifying a PR (Problem Report) number +that is related to the test case. The number after “PR” specifies the +LLVM Bugzilla number. When a PR number is specified, it will be used in +the pass/fail reporting. This is useful to quickly get some context when +a test fails.

+

Finally, any line that contains “END.” will cause the special +interpretation of lines to terminate. This is generally done right after +the last RUN: line. This has two side effects:

+
    +
  1. it prevents special interpretation of lines that are part of the test +program, not the instructions to the test case, and

  2. +
  3. it speeds things up for really big test cases by avoiding +interpretation of the remainder of the file.

  4. +
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TestSuiteGuide.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TestSuiteGuide.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TestSuiteGuide.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TestSuiteGuide.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,514 @@ + + + + + + + + + test-suite Guide — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

test-suite Guide

+
+

Quickstart

+
    +
  1. The lit test runner is required to run the tests. You can either use one +from an LLVM build:

    +
    % <path to llvm build>/bin/llvm-lit --version
    +lit 0.8.0dev
    +
    +
    +

    An alternative is installing it as a python package in a python virtual +environment:

    +
    % mkdir venv
    +% virtualenv venv
    +% . venv/bin/activate
    +% pip install svn+https://llvm.org/svn/llvm-project/llvm/trunk/utils/lit
    +% lit --version
    +lit 0.8.0dev
    +
    +
    +
  2. +
  3. Check out the test-suite module with:

    +
    % git clone https://github.com/llvm/llvm-test-suite.git test-suite
    +
    +
    +
  4. +
  5. Create a build directory and use CMake to configure the suite. Use the +CMAKE_C_COMPILER option to specify the compiler to test. Use a cache file +to choose a typical build configuration:

    +
    % mkdir test-suite-build
    +% cd test-suite-build
    +% cmake -DCMAKE_C_COMPILER=<path to llvm build>/bin/clang \
    +        -C../test-suite/cmake/caches/O3.cmake \
    +        ../test-suite
    +
    +
    +
  6. +
+

NOTE! if you are using your built clang, and you want to build and run the +MicroBenchmarks/XRay microbenchmarks, you need to add compiler-rt to your +LLVM_ENABLE_RUNTIMES cmake flag.

+
    +
  1. Build the benchmarks:

    +
    % make
    +Scanning dependencies of target timeit-target
    +[  0%] Building C object tools/CMakeFiles/timeit-target.dir/timeit.c.o
    +[  0%] Linking C executable timeit-target
    +...
    +
    +
    +
  2. +
  3. Run the tests with lit:

    +
    % llvm-lit -v -j 1 -o results.json .
    +-- Testing: 474 tests, 1 threads --
    +PASS: test-suite :: MultiSource/Applications/ALAC/decode/alacconvert-decode.test (1 of 474)
    +********** TEST 'test-suite :: MultiSource/Applications/ALAC/decode/alacconvert-decode.test' RESULTS **********
    +compile_time: 0.2192
    +exec_time: 0.0462
    +hash: "59620e187c6ac38b36382685ccd2b63b"
    +size: 83348
    +**********
    +PASS: test-suite :: MultiSource/Applications/ALAC/encode/alacconvert-encode.test (2 of 474)
    +...
    +
    +
    +
  4. +
  5. Show and compare result files (optional):

    +
    # Make sure pandas and scipy are installed. Prepend `sudo` if necessary.
    +% pip install pandas scipy
    +# Show a single result file:
    +% test-suite/utils/compare.py results.json
    +# Compare two result files:
    +% test-suite/utils/compare.py results_a.json results_b.json
    +
    +
    +
  6. +
+
+
+

Structure

+

The test-suite contains benchmark and test programs. The programs come with +reference outputs so that their correctness can be checked. The suite comes +with tools to collect metrics such as benchmark runtime, compilation time and +code size.

+

The test-suite is divided into several directories:

+
    +
  • SingleSource/

    +

    Contains test programs that are only a single source file in size. A +subdirectory may contain several programs.

    +
  • +
  • MultiSource/

    +

    Contains subdirectories which entire programs with multiple source files. +Large benchmarks and whole applications go here.

    +
  • +
  • MicroBenchmarks/

    +

    Programs using the google-benchmark +library. The programs define functions that are run multiple times until the +measurement results are statistically significant.

    +
  • +
  • External/

    +

    Contains descriptions and test data for code that cannot be directly +distributed with the test-suite. The most prominent members of this +directory are the SPEC CPU benchmark suites. +See External Suites.

    +
  • +
  • Bitcode/

    +

    These tests are mostly written in LLVM bitcode.

    +
  • +
  • CTMark/

    +

    Contains symbolic links to other benchmarks forming a representative sample +for compilation performance measurements.

    +
  • +
+
+

Benchmarks

+

Every program can work as a correctness test. Some programs are unsuitable for +performance measurements. Setting the TEST_SUITE_BENCHMARKING_ONLY CMake +option to ON will disable them.

+
+
+
+

Configuration

+

The test-suite has configuration options to customize building and running the +benchmarks. CMake can print a list of them:

+
% cd test-suite-build
+# Print basic options:
+% cmake -LH
+# Print all options:
+% cmake -LAH
+
+
+
+

Common Configuration Options

+
    +
  • CMAKE_C_FLAGS

    +

    Specify extra flags to be passed to C compiler invocations. The flags are +also passed to the C++ compiler and linker invocations. See +https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_FLAGS.html

    +
  • +
  • CMAKE_C_COMPILER

    +

    Select the C compiler executable to be used. Note that the C++ compiler is +inferred automatically i.e. when specifying path/to/clang CMake will +automatically use path/to/clang++ as the C++ compiler. See +https://cmake.org/cmake/help/latest/variable/CMAKE_LANG_COMPILER.html

    +
  • +
  • CMAKE_Fortran_COMPILER

    +

    Select the Fortran compiler executable to be used. Not set by default and not +required unless running the Fortran Test Suite.

    +
  • +
  • CMAKE_BUILD_TYPE

    +

    Select a build type like OPTIMIZE or DEBUG selecting a set of predefined +compiler flags. These flags are applied regardless of the CMAKE_C_FLAGS +option and may be changed by modifying CMAKE_C_FLAGS_OPTIMIZE etc. See +https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html

    +
  • +
  • TEST_SUITE_FORTRAN

    +

    Activate that Fortran tests. This is a work in progress. More information can be +found in the Flang documentation

    +
  • +
  • TEST_SUITE_RUN_UNDER

    +

    Prefix test invocations with the given tool. This is typically used to run +cross-compiled tests within a simulator tool.

    +
  • +
  • TEST_SUITE_BENCHMARKING_ONLY

    +

    Disable tests that are unsuitable for performance measurements. The disabled +tests either run for a very short time or are dominated by I/O performance +making them unsuitable as compiler performance tests.

    +
  • +
  • TEST_SUITE_SUBDIRS

    +

    Semicolon-separated list of directories to include. This can be used to only +build parts of the test-suite or to include external suites. This option +does not work reliably with deeper subdirectories as it skips intermediate +CMakeLists.txt files which may be required.

    +
  • +
  • TEST_SUITE_COLLECT_STATS

    +

    Collect internal LLVM statistics. Appends -save-stats=obj when invoking the +compiler and makes the lit runner collect and merge the statistic files.

    +
  • +
  • TEST_SUITE_RUN_BENCHMARKS

    +

    If this is set to OFF then lit will not actually run the tests but just +collect build statistics like compile time and code size.

    +
  • +
  • TEST_SUITE_USE_PERF

    +

    Use the perf tool for time measurement instead of the timeit tool that +comes with the test-suite. The perf is usually available on linux systems.

    +
  • +
  • TEST_SUITE_SPEC2000_ROOT, TEST_SUITE_SPEC2006_ROOT, TEST_SUITE_SPEC2017_ROOT, …

    +

    Specify installation directories of external benchmark suites. You can find +more information about expected versions or usage in the README files in the +External directory (such as External/SPEC/README)

    +
  • +
+
+
+

Common CMake Flags

+
    +
  • -GNinja

    +

    Generate build files for the ninja build tool.

    +
  • +
  • -Ctest-suite/cmake/caches/<cachefile.cmake>

    +

    Use a CMake cache. The test-suite comes with several CMake caches which +predefine common or tricky build configurations.

    +
  • +
+
+
+
+

Displaying and Analyzing Results

+

The compare.py script displays and compares result files. A result file is +produced when invoking lit with the -o filename.json flag.

+

Example usage:

+
    +
  • Basic Usage:

    +
    % test-suite/utils/compare.py baseline.json
    +Warning: 'test-suite :: External/SPEC/CINT2006/403.gcc/403.gcc.test' has No metrics!
    +Tests: 508
    +Metric: exec_time
    +
    +Program                                         baseline
    +
    +INT2006/456.hmmer/456.hmmer                   1222.90
    +INT2006/464.h264ref/464.h264ref               928.70
    +...
    +             baseline
    +count  506.000000
    +mean   20.563098
    +std    111.423325
    +min    0.003400
    +25%    0.011200
    +50%    0.339450
    +75%    4.067200
    +max    1222.896800
    +
    +
    +
  • +
  • Show compile_time or text segment size metrics:

    +
    % test-suite/utils/compare.py -m compile_time baseline.json
    +% test-suite/utils/compare.py -m size.__text baseline.json
    +
    +
    +
  • +
  • Compare two result files and filter short running tests:

    +
    % test-suite/utils/compare.py --filter-short baseline.json experiment.json
    +...
    +Program                                         baseline  experiment  diff
    +
    +SingleSour.../Benchmarks/Linpack/linpack-pc     5.16      4.30        -16.5%
    +MultiSourc...erolling-dbl/LoopRerolling-dbl     7.01      7.86         12.2%
    +SingleSour...UnitTests/Vectorizer/gcc-loops     3.89      3.54        -9.0%
    +...
    +
    +
    +
  • +
  • Merge multiple baseline and experiment result files by taking the minimum +runtime each:

    +
    % test-suite/utils/compare.py base0.json base1.json base2.json vs exp0.json exp1.json exp2.json
    +
    +
    +
  • +
+
+

Continuous Tracking with LNT

+

LNT is a set of client and server tools for continuously monitoring +performance. You can find more information at +https://llvm.org/docs/lnt. The official LNT instance +of the LLVM project is hosted at http://lnt.llvm.org.

+
+
+
+

External Suites

+

External suites such as SPEC can be enabled by either

+
    +
  • placing (or linking) them into the test-suite/test-suite-externals/xxx directory (example: test-suite/test-suite-externals/speccpu2000)

  • +
  • using a configuration option such as -D TEST_SUITE_SPEC2000_ROOT=path/to/speccpu2000

  • +
+

You can find further information in the respective README files such as +test-suite/External/SPEC/README.

+

For the SPEC benchmarks you can switch between the test, train and +ref input datasets via the TEST_SUITE_RUN_TYPE configuration option. +The train dataset is used by default.

+
+
+

Custom Suites

+

You can build custom suites using the test-suite infrastructure. A custom suite +has a CMakeLists.txt file at the top directory. The CMakeLists.txt will be +picked up automatically if placed into a subdirectory of the test-suite or when +setting the TEST_SUITE_SUBDIRS variable:

+
% cmake -DTEST_SUITE_SUBDIRS=path/to/my/benchmark-suite ../test-suite
+
+
+
+
+

Profile Guided Optimization

+

Profile guided optimization requires to compile and run twice. First the +benchmark should be compiled with profile generation instrumentation enabled +and setup for training data. The lit runner will merge the profile files +using llvm-profdata so they can be used by the second compilation run.

+

Example:

+
# Profile generation run:
+% cmake -DTEST_SUITE_PROFILE_GENERATE=ON \
+        -DTEST_SUITE_RUN_TYPE=train \
+        ../test-suite
+% make
+% llvm-lit .
+# Use the profile data for compilation and actual benchmark run:
+% cmake -DTEST_SUITE_PROFILE_GENERATE=OFF \
+        -DTEST_SUITE_PROFILE_USE=ON \
+        -DTEST_SUITE_RUN_TYPE=ref \
+        .
+% make
+% llvm-lit -o result.json .
+
+
+

The TEST_SUITE_RUN_TYPE setting only affects the SPEC benchmark suites.

+
+
+

Cross Compilation and External Devices

+
+

Compilation

+

CMake allows to cross compile to a different target via toolchain files. More +information can be found here:

+ +

Cross compilation from macOS to iOS is possible with the +test-suite/cmake/caches/target-target-*-iphoneos-internal.cmake CMake cache +files; this requires an internal iOS SDK.

+
+
+

Running

+

There are two ways to run the tests in a cross compilation setting:

+
    +
  • Via SSH connection to an external device: The TEST_SUITE_REMOTE_HOST option +should be set to the SSH hostname. The executables and data files need to be +transferred to the device after compilation. This is typically done via the +rsync make target. After this, the lit runner can be used on the host +machine. It will prefix the benchmark and verification command lines with an +ssh command.

    +

    Example:

    +
    % cmake -G Ninja -D CMAKE_C_COMPILER=path/to/clang \
    +        -C ../test-suite/cmake/caches/target-arm64-iphoneos-internal.cmake \
    +        -D TEST_SUITE_REMOTE_HOST=mydevice \
    +        ../test-suite
    +% ninja
    +% ninja rsync
    +% llvm-lit -j1 -o result.json .
    +
    +
    +
  • +
  • You can specify a simulator for the target machine with the +TEST_SUITE_RUN_UNDER setting. The lit runner will prefix all benchmark +invocations with it.

  • +
+
+
+
+

Running the test-suite via LNT

+

The LNT tool can run the test-suite. Use this when submitting test results to +an LNT instance. See +https://llvm.org/docs/lnt/tests.html#llvm-cmake-test-suite +for details.

+
+
+

Running the test-suite via Makefiles (deprecated)

+

Note: The test-suite comes with a set of Makefiles that are considered +deprecated. They do not support newer testing modes like Bitcode or +Microbenchmarks and are harder to use.

+

Old documentation is available in the +test-suite Makefile Guide.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TestSuiteMakefileGuide.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TestSuiteMakefileGuide.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TestSuiteMakefileGuide.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TestSuiteMakefileGuide.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,314 @@ + + + + + + + + + test-suite Makefile Guide (deprecated) — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

test-suite Makefile Guide (deprecated)

+ +
+

Overview

+

First, all tests are executed within the LLVM object directory tree. +They are not executed inside of the LLVM source tree. This is because +the test suite creates temporary files during execution.

+

To run the test suite, you need to use the following steps:

+
    +
  1. Check out the test-suite module with:

    +
    % git clone https://github.com/llvm/llvm-test-suite.git test-suite
    +
    +
    +
  2. +
  3. FIXME: these directions are outdated and won’t work. Figure out +what the correct thing to do is, and write it down here.

  4. +
  5. Configure and build llvm.

  6. +
  7. Configure and build llvm-gcc.

  8. +
  9. Install llvm-gcc somewhere.

  10. +
  11. Re-configure llvm from the top level of each build tree (LLVM +object directory tree) in which you want to run the test suite, just +as you do before building LLVM.

    +

    During the re-configuration, you must either: (1) have llvm-gcc +you just built in your path, or (2) specify the directory where your +just-built llvm-gcc is installed using +--with-llvmgccdir=$LLVM_GCC_DIR.

    +

    You must also tell the configure machinery that the test suite is +available so it can be configured for your build tree:

    +
    % cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
    +
    +
    +

    [Remember that $LLVM_GCC_DIR is the directory where you +installed llvm-gcc, not its src or obj directory.]

    +
  12. +
  13. You can now run the test suite from your build tree as follows:

    +
    % cd $LLVM_OBJ_ROOT/projects/test-suite
    +% make
    +
    +
    +
  14. +
+

Note that the second and third steps only need to be done once. After +you have the suite checked out and configured, you don’t need to do it +again (unless the test code or configure script changes).

+
+
+

Configuring External Tests

+

In order to run the External tests in the test-suite module, you +must specify –with-externals. This must be done during the +re-configuration step (see above), and the llvm re-configuration +must recognize the previously-built llvm-gcc. If any of these is +missing or neglected, the External tests won’t work.

+
    +
  • –with-externals

  • +
  • –with-externals=<directory>

  • +
+

This tells LLVM where to find any external tests. They are expected to +be in specifically named subdirectories of <directory>. If +directory is left unspecified, configure uses the default value +/home/vadve/shared/benchmarks/speccpu2000/benchspec. Subdirectory +names known to LLVM include:

+
    +
  • spec95

  • +
  • speccpu2000

  • +
  • speccpu2006

  • +
  • povray31

  • +
+

Others are added from time to time, and can be determined from +configure.

+
+
+

Running Different Tests

+

In addition to the regular “whole program” tests, the test-suite +module also provides a mechanism for compiling the programs in different +ways. If the variable TEST is defined on the gmake command line, the +test system will include a Makefile named +TEST.<value of TEST variable>.Makefile. This Makefile can modify +build rules to yield different results.

+

For example, the LLVM nightly tester uses TEST.nightly.Makefile to +create the nightly test reports. To run the nightly tests, run +gmake TEST=nightly.

+

There are several TEST Makefiles available in the tree. Some of them are +designed for internal LLVM research and will not work outside of the +LLVM research group. They may still be valuable, however, as a guide to +writing your own TEST Makefile for any optimization or analysis passes +that you develop with LLVM.

+
+
+

Generating Test Output

+

There are a number of ways to run the tests and generate output. The +most simple one is simply running gmake with no arguments. This will +compile and run all programs in the tree using a number of different +methods and compare results. Any failures are reported in the output, +but are likely drowned in the other output. Passes are not reported +explicitly.

+

Somewhat better is running gmake TEST=sometest test, which runs the +specified test and usually adds per-program summaries to the output +(depending on which sometest you use). For example, the nightly test +explicitly outputs TEST-PASS or TEST-FAIL for every test after each +program. Though these lines are still drowned in the output, it’s easy +to grep the output logs in the Output directories.

+

Even better are the report and report.format targets (where +format is one of html, csv, text or graphs). The +exact contents of the report are dependent on which TEST you are +running, but the text results are always shown at the end of the run and +the results are always stored in the report.<type>.format file (when +running with TEST=<type>). The report also generate a file +called report.<type>.raw.out containing the output of the entire +test run.

+
+
+

Writing Custom Tests for the test-suite

+

Assuming you can run the test suite, (e.g. +“gmake TEST=nightly report” should work), it is really easy to run +optimizations or code generator components against every program in the +tree, collecting statistics or running custom checks for correctness. At +base, this is how the nightly tester works, it’s just one example of a +general framework.

+

Lets say that you have an LLVM optimization pass, and you want to see +how many times it triggers. First thing you should do is add an LLVM +statistic to your pass, which will +tally counts of things you care about.

+

Following this, you can set up a test and a report that collects these +and formats them for easy viewing. This consists of two files, a +“test-suite/TEST.XXX.Makefile” fragment (where XXX is the name of +your test) and a “test-suite/TEST.XXX.report” file that indicates +how to format the output into a table. There are many example reports of +various levels of sophistication included with the test suite, and the +framework is very general.

+

If you are interested in testing an optimization pass, check out the +“libcalls” test as an example. It can be run like this:

+
% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+% make TEST=libcalls report
+
+
+

This will do a bunch of stuff, then eventually print a table like this:

+
Name                                  | total | #exit |
+...
+FreeBench/analyzer/analyzer           | 51    | 6     |
+FreeBench/fourinarow/fourinarow       | 1     | 1     |
+FreeBench/neural/neural               | 19    | 9     |
+FreeBench/pifft/pifft                 | 5     | 3     |
+MallocBench/cfrac/cfrac               | 1     | *     |
+MallocBench/espresso/espresso         | 52    | 12    |
+MallocBench/gs/gs                     | 4     | *     |
+Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     |
+Prolangs-C/agrep/agrep                | 33    | 12    |
+Prolangs-C/allroots/allroots          | *     | *     |
+Prolangs-C/assembler/assembler        | 47    | *     |
+Prolangs-C/bison/mybison              | 74    | *     |
+...
+
+
+

This basically is grepping the -stats output and displaying it in a +table. You can also use the “TEST=libcalls report.html” target to get +the table in HTML form, similarly for report.csv and report.tex.

+

The source for this is in test-suite/TEST.libcalls.*. The format is +pretty simple: the Makefile indicates how to run the test (in this case, +“opt -simplify-libcalls -stats”), and the report contains one line +for each column of the output. The first value is the header for the +column and the second is the regex to grep the output of the command +for. There are lots of example reports that can do fancy stuff.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TransformMetadata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TransformMetadata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TransformMetadata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TransformMetadata.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,533 @@ + + + + + + + + + Code Transformation Metadata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Code Transformation Metadata

+ +
+

Overview

+

LLVM transformation passes can be controlled by attaching metadata to +the code to transform. By default, transformation passes use heuristics +to determine whether or not to perform transformations, and when doing +so, other details of how the transformations are applied (e.g., which +vectorization factor to select). +Unless the optimizer is otherwise directed, transformations are applied +conservatively. This conservatism generally allows the optimizer to +avoid unprofitable transformations, but in practice, this results in the +optimizer not applying transformations that would be highly profitable.

+

Frontends can give additional hints to LLVM passes on which +transformations they should apply. This can be additional knowledge that +cannot be derived from the emitted IR, or directives passed from the +user/programmer. OpenMP pragmas are an example of the latter.

+

If any such metadata is dropped from the program, the code’s semantics +must not change.

+
+
+

Metadata on Loops

+

Attributes can be attached to loops as described in ‘llvm.loop’. +Attributes can describe properties of the loop, disable transformations, +force specific transformations and set transformation options.

+

Because metadata nodes are immutable (with the exception of +MDNode::replaceOperandWith which is dangerous to use on uniqued +metadata), in order to add or remove a loop attributes, a new MDNode +must be created and assigned as the new llvm.loop metadata. Any +connection between the old MDNode and the loop is lost. The +llvm.loop node is also used as LoopID (Loop::getLoopID()), i.e. +the loop effectively gets a new identifier. For instance, +llvm.mem.parallel_loop_access references the LoopID. Therefore, if +the parallel access property is to be preserved after adding/removing +loop attributes, any llvm.mem.parallel_loop_access reference must be +updated to the new LoopID.

+
+
+

Transformation Metadata Structure

+

Some attributes describe code transformations (unrolling, vectorizing, +loop distribution, etc.). They can either be a hint to the optimizer +that a transformation might be beneficial, instruction to use a specific +option, , or convey a specific request from the user (such as +#pragma clang loop or #pragma omp simd).

+

If a transformation is forced but cannot be carried-out for any reason, +an optimization-missed warning must be emitted. Semantic information +such as a transformation being safe (e.g. +llvm.mem.parallel_loop_access) can be unused by the optimizer +without generating a warning.

+

Unless explicitly disabled, any optimization pass may heuristically +determine whether a transformation is beneficial and apply it. If +metadata for another transformation was specified, applying a different +transformation before it might be inadvertent due to being applied on a +different loop or the loop not existing anymore. To avoid having to +explicitly disable an unknown number of passes, the attribute +llvm.loop.disable_nonforced disables all optional, high-level, +restructuring transformations.

+

The following example avoids the loop being altered before being +vectorized, for instance being unrolled.

+
  br i1 %exitcond, label %for.exit, label %for.header, !llvm.loop !0
+...
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.disable_nonforced"}
+
+
+

After a transformation is applied, follow-up attributes are set on the +transformed and/or new loop(s). This allows additional attributes +including followup-transformations to be specified. Specifying multiple +transformations in the same metadata node is possible for compatibility +reasons, but their execution order is undefined. For instance, when +llvm.loop.vectorize.enable and llvm.loop.unroll.enable are +specified at the same time, unrolling may occur either before or after +vectorization.

+

As an example, the following instructs a loop to be vectorized and only +then unrolled.

+
!0 = distinct !{!0, !1, !2, !3}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.disable_nonforced"}
+!3 = !{!"llvm.loop.vectorize.followup_vectorized", !{"llvm.loop.unroll.enable"}}
+
+
+

If, and only if, no followup is specified, the pass may add attributes itself. +For instance, the vectorizer adds a llvm.loop.isvectorized attribute and +all attributes from the original loop excluding its loop vectorizer +attributes. To avoid this, an empty followup attribute can be used, e.g.

+
!3 = !{!"llvm.loop.vectorize.followup_vectorized"}
+
+
+

The followup attributes of a transformation that cannot be applied will +never be added to a loop and are therefore effectively ignored. This means +that any followup-transformation in such attributes requires that its +prior transformations are applied before the followup-transformation. +The user should receive a warning about the first transformation in the +transformation chain that could not be applied if it a forced +transformation. All following transformations are skipped.

+
+
+

Pass-Specific Transformation Metadata

+

Transformation options are specific to each transformation. In the +following, we present the model for each LLVM loop optimization pass and +the metadata to influence them.

+
+

Loop Vectorization and Interleaving

+

Loop vectorization and interleaving is interpreted as a single +transformation. It is interpreted as forced if +!{"llvm.loop.vectorize.enable", i1 true} is set.

+

Assuming the pre-vectorization loop is

+
for (int i = 0; i < n; i+=1) // original loop
+  Stmt(i);
+
+
+

then the code after vectorization will be approximately (assuming an +SIMD width of 4):

+
int i = 0;
+if (rtc) {
+  for (; i + 3 < n; i+=4) // vectorized/interleaved loop
+    Stmt(i:i+3);
+}
+for (; i < n; i+=1) // epilogue loop
+  Stmt(i);
+
+
+

where rtc is a generated runtime check.

+

llvm.loop.vectorize.followup_vectorized will set the attributes for +the vectorized loop. If not specified, llvm.loop.isvectorized is +combined with the original loop’s attributes to avoid it being +vectorized multiple times.

+

llvm.loop.vectorize.followup_epilogue will set the attributes for +the remainder loop. If not specified, it will have the original loop’s +attributes combined with llvm.loop.isvectorized and +llvm.loop.unroll.runtime.disable (unless the original loop already +has unroll metadata).

+

The attributes specified by llvm.loop.vectorize.followup_all are +added to both loops.

+

When using a follow-up attribute, it replaces any automatically deduced +attributes for the generated loop in question. Therefore it is +recommended to add llvm.loop.isvectorized to +llvm.loop.vectorize.followup_all which avoids that the loop +vectorizer tries to optimize the loops again.

+
+
+

Loop Unrolling

+

Unrolling is interpreted as forced any !{!"llvm.loop.unroll.enable"} +metadata or option (llvm.loop.unroll.count, llvm.loop.unroll.full) +is present. Unrolling can be full unrolling, partial unrolling of a loop +with constant trip count or runtime unrolling of a loop with a trip +count unknown at compile-time.

+

If the loop has been unrolled fully, there is no followup-loop. For +partial/runtime unrolling, the original loop of

+
for (int i = 0; i < n; i+=1) // original loop
+  Stmt(i);
+
+
+

is transformed into (using an unroll factor of 4):

+
int i = 0;
+for (; i + 3 < n; i+=4) { // unrolled loop
+  Stmt(i);
+  Stmt(i+1);
+  Stmt(i+2);
+  Stmt(i+3);
+}
+for (; i < n; i+=1) // remainder loop
+  Stmt(i);
+
+
+

llvm.loop.unroll.followup_unrolled will set the loop attributes of +the unrolled loop. If not specified, the attributes of the original loop +without the llvm.loop.unroll.* attributes are copied and +llvm.loop.unroll.disable added to it.

+

llvm.loop.unroll.followup_remainder defines the attributes of the +remainder loop. If not specified the remainder loop will have no +attributes. The remainder loop might not be present due to being fully +unrolled in which case this attribute has no effect.

+

Attributes defined in llvm.loop.unroll.followup_all are added to the +unrolled and remainder loops.

+

To avoid that the partially unrolled loop is unrolled again, it is +recommended to add llvm.loop.unroll.disable to +llvm.loop.unroll.followup_all. If no follow-up attribute specified +for a generated loop, it is added automatically.

+
+
+

Unroll-And-Jam

+

Unroll-and-jam uses the following transformation model (here with an +unroll factor if 2). Currently, it does not support a fallback version +when the transformation is unsafe.

+
for (int i = 0; i < n; i+=1) { // original outer loop
+  Fore(i);
+  for (int j = 0; j < m; j+=1) // original inner loop
+    SubLoop(i, j);
+  Aft(i);
+}
+
+
+
int i = 0;
+for (; i + 1 < n; i+=2) { // unrolled outer loop
+  Fore(i);
+  Fore(i+1);
+  for (int j = 0; j < m; j+=1) { // unrolled inner loop
+    SubLoop(i, j);
+    SubLoop(i+1, j);
+  }
+  Aft(i);
+  Aft(i+1);
+}
+for (; i < n; i+=1) { // remainder outer loop
+  Fore(i);
+  for (int j = 0; j < m; j+=1) // remainder inner loop
+    SubLoop(i, j);
+  Aft(i);
+}
+
+
+

llvm.loop.unroll_and_jam.followup_outer will set the loop attributes +of the unrolled outer loop. If not specified, the attributes of the +original outer loop without the llvm.loop.unroll.* attributes are +copied and llvm.loop.unroll.disable added to it.

+

llvm.loop.unroll_and_jam.followup_inner will set the loop attributes +of the unrolled inner loop. If not specified, the attributes of the +original inner loop are used unchanged.

+

llvm.loop.unroll_and_jam.followup_remainder_outer sets the loop +attributes of the outer remainder loop. If not specified it will not +have any attributes. The remainder loop might not be present due to +being fully unrolled.

+

llvm.loop.unroll_and_jam.followup_remainder_inner sets the loop +attributes of the inner remainder loop. If not specified it will have +the attributes of the original inner loop. It the outer remainder loop +is unrolled, the inner remainder loop might be present multiple times.

+

Attributes defined in llvm.loop.unroll_and_jam.followup_all are +added to all of the aforementioned output loops.

+

To avoid that the unrolled loop is unrolled again, it is +recommended to add llvm.loop.unroll.disable to +llvm.loop.unroll_and_jam.followup_all. It suppresses unroll-and-jam +as well as an additional inner loop unrolling. If no follow-up +attribute specified for a generated loop, it is added automatically.

+
+
+

Loop Distribution

+

The LoopDistribution pass tries to separate vectorizable parts of a loop +from the non-vectorizable part (which otherwise would make the entire +loop non-vectorizable). Conceptually, it transforms a loop such as

+
for (int i = 1; i < n; i+=1) { // original loop
+  A[i] = i;
+  B[i] = 2 + B[i];
+  C[i] = 3 + C[i - 1];
+}
+
+
+

into the following code:

+
if (rtc) {
+  for (int i = 1; i < n; i+=1) // coincident loop
+    A[i] = i;
+  for (int i = 1; i < n; i+=1) // coincident loop
+    B[i] = 2 + B[i];
+  for (int i = 1; i < n; i+=1) // sequential loop
+    C[i] = 3 + C[i - 1];
+} else {
+  for (int i = 1; i < n; i+=1) { // fallback loop
+    A[i] = i;
+    B[i] = 2 + B[i];
+    C[i] = 3 + C[i - 1];
+  }
+}
+
+
+

where rtc is a generated runtime check.

+

llvm.loop.distribute.followup_coincident sets the loop attributes of +all loops without loop-carried dependencies (i.e. vectorizable loops). +There might be more than one such loops. If not defined, the loops will +inherit the original loop’s attributes.

+

llvm.loop.distribute.followup_sequential sets the loop attributes of the +loop with potentially unsafe dependencies. There should be at most one +such loop. If not defined, the loop will inherit the original loop’s +attributes.

+

llvm.loop.distribute.followup_fallback defines the loop attributes +for the fallback loop, which is a copy of the original loop for when +loop versioning is required. If undefined, the fallback loop inherits +all attributes from the original loop.

+

Attributes defined in llvm.loop.distribute.followup_all are added to +all of the aforementioned output loops.

+

It is recommended to add llvm.loop.disable_nonforced to +llvm.loop.distribute.followup_fallback. This avoids that the +fallback version (which is likely never executed) is further optimized +which would increase the code size.

+
+
+

Versioning LICM

+

The pass hoists code out of loops that are only loop-invariant when +dynamic conditions apply. For instance, it transforms the loop

+
for (int i = 0; i < n; i+=1) // original loop
+  A[i] = B[0];
+
+
+

into:

+
if (rtc) {
+  auto b = B[0];
+  for (int i = 0; i < n; i+=1) // versioned loop
+    A[i] = b;
+} else {
+  for (int i = 0; i < n; i+=1) // unversioned loop
+    A[i] = B[0];
+}
+
+
+

The runtime condition (rtc) checks that the array A and the +element B[0] do not alias.

+

Currently, this transformation does not support followup-attributes.

+
+
+

Loop Interchange

+

Currently, the LoopInterchange pass does not use any metadata.

+
+
+
+

Ambiguous Transformation Order

+

If there multiple transformations defined, the order in which they are +executed depends on the order in LLVM’s pass pipeline, which is subject +to change. The default optimization pipeline (anything higher than +-O0) has the following order.

+

When using the legacy pass manager:

+
+
    +
  • LoopInterchange (if enabled)

  • +
  • SimpleLoopUnroll/LoopFullUnroll (only performs full unrolling)

  • +
  • VersioningLICM (if enabled)

  • +
  • LoopDistribute

  • +
  • LoopVectorizer

  • +
  • LoopUnrollAndJam (if enabled)

  • +
  • LoopUnroll (partial and runtime unrolling)

  • +
+
+

When using the legacy pass manager with LTO:

+
+
    +
  • LoopInterchange (if enabled)

  • +
  • SimpleLoopUnroll/LoopFullUnroll (only performs full unrolling)

  • +
  • LoopVectorizer

  • +
  • LoopUnroll (partial and runtime unrolling)

  • +
+
+

When using the new pass manager:

+
+
    +
  • SimpleLoopUnroll/LoopFullUnroll (only performs full unrolling)

  • +
  • LoopDistribute

  • +
  • LoopVectorizer

  • +
  • LoopUnrollAndJam (if enabled)

  • +
  • LoopUnroll (partial and runtime unrolling)

  • +
+
+
+
+

Leftover Transformations

+

Forced transformations that have not been applied after the last +transformation pass should be reported to the user. The transformation +passes themselves cannot be responsible for this reporting because they +might not be in the pipeline, there might be multiple passes able to +apply a transformation (e.g. LoopInterchange and Polly) or a +transformation attribute may be ‘hidden’ inside another passes’ followup +attribute.

+

The pass -transform-warning (WarnMissedTransformationsPass) +emits such warnings. It should be placed after the last transformation +pass.

+

The current pass pipeline has a fixed order in which transformations +passes are executed. A transformation can be in the followup of a pass +that is executed later and thus leftover. For instance, a loop nest +cannot be distributed and then interchanged with the current pass +pipeline. The loop distribution will execute, but there is no loop +interchange pass following such that any loop interchange metadata will +be ignored. The -transform-warning should emit a warning in this +case.

+

Future versions of LLVM may fix this by executing transformations using +a dynamic ordering.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT1.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT1.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT1.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT1.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,563 @@ + + + + + + + + + 1. Building a JIT: Starting out with KaleidoscopeJIT — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

1. Building a JIT: Starting out with KaleidoscopeJIT

+ +
+

1.1. Chapter 1 Introduction

+

Warning: This tutorial is currently being updated to account for ORC API +changes. Only Chapters 1 and 2 are up-to-date.

+

Example code from Chapters 3 to 5 will compile and run, but has not been +updated

+

Welcome to Chapter 1 of the “Building an ORC-based JIT in LLVM” tutorial. This +tutorial runs through the implementation of a JIT compiler using LLVM’s +On-Request-Compilation (ORC) APIs. It begins with a simplified version of the +KaleidoscopeJIT class used in the +Implementing a language with LLVM tutorials and then +introduces new features like concurrent compilation, optimization, lazy +compilation and remote execution.

+

The goal of this tutorial is to introduce you to LLVM’s ORC JIT APIs, show how +these APIs interact with other parts of LLVM, and to teach you how to recombine +them to build a custom JIT that is suited to your use-case.

+

The structure of the tutorial is:

+
    +
  • Chapter #1: Investigate the simple KaleidoscopeJIT class. This will +introduce some of the basic concepts of the ORC JIT APIs, including the +idea of an ORC Layer.

  • +
  • Chapter #2: Extend the basic KaleidoscopeJIT by adding +a new layer that will optimize IR and generated code.

  • +
  • Chapter #3: Further extend the JIT by adding a +Compile-On-Demand layer to lazily compile IR.

  • +
  • Chapter #4: Improve the laziness of our JIT by +replacing the Compile-On-Demand layer with a custom layer that uses the ORC +Compile Callbacks API directly to defer IR-generation until functions are +called.

  • +
  • Chapter #5: Add process isolation by JITing code into +a remote process with reduced privileges using the JIT Remote APIs.

  • +
+

To provide input for our JIT we will use a lightly modified version of the +Kaleidoscope REPL from Chapter 7 of the “Implementing a +language in LLVM tutorial”.

+

Finally, a word on API generations: ORC is the 3rd generation of LLVM JIT API. +It was preceded by MCJIT, and before that by the (now deleted) legacy JIT. +These tutorials don’t assume any experience with these earlier APIs, but +readers acquainted with them will see many familiar elements. Where appropriate +we will make this connection with the earlier APIs explicit to help people who +are transitioning from them to ORC.

+
+
+

1.2. JIT API Basics

+

The purpose of a JIT compiler is to compile code “on-the-fly” as it is needed, +rather than compiling whole programs to disk ahead of time as a traditional +compiler does. To support that aim our initial, bare-bones JIT API will have +just two functions:

+
    +
  1. Error addModule(std::unique_ptr<Module> M): Make the given IR module +available for execution.

  2. +
  3. Expected<JITEvaluatedSymbol> lookup(): Search for pointers to +symbols (functions or variables) that have been added to the JIT.

  4. +
+

A basic use-case for this API, executing the ‘main’ function from a module, +will look like:

+
JIT J;
+J.addModule(buildModule());
+auto *Main = (int(*)(int, char*[]))J.lookup("main").getAddress();
+int Result = Main();
+
+
+

The APIs that we build in these tutorials will all be variations on this simple +theme. Behind this API we will refine the implementation of the JIT to add +support for concurrent compilation, optimization and lazy compilation. +Eventually we will extend the API itself to allow higher-level program +representations (e.g. ASTs) to be added to the JIT.

+
+
+

1.3. KaleidoscopeJIT

+

In the previous section we described our API, now we examine a simple +implementation of it: The KaleidoscopeJIT class 1 that was used in the +Implementing a language with LLVM tutorials. We will use +the REPL code from Chapter 7 of that tutorial to supply the +input for our JIT: Each time the user enters an expression the REPL will add a +new IR module containing the code for that expression to the JIT. If the +expression is a top-level expression like ‘1+1’ or ‘sin(x)’, the REPL will also +use the lookup method of our JIT class find and execute the code for the +expression. In later chapters of this tutorial we will modify the REPL to enable +new interactions with our JIT class, but for now we will take this setup for +granted and focus our attention on the implementation of our JIT itself.

+

Our KaleidoscopeJIT class is defined in the KaleidoscopeJIT.h header. After the +usual include guards and #includes 2, we get to the definition of our class:

+
#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include <memory>
+
+namespace llvm {
+namespace orc {
+
+class KaleidoscopeJIT {
+private:
+  ExecutionSession ES;
+  RTDyldObjectLinkingLayer ObjectLayer;
+  IRCompileLayer CompileLayer;
+
+  DataLayout DL;
+  MangleAndInterner Mangle;
+  ThreadSafeContext Ctx;
+
+public:
+  KaleidoscopeJIT(JITTargetMachineBuilder JTMB, DataLayout DL)
+      : ObjectLayer(ES,
+                    []() { return std::make_unique<SectionMemoryManager>(); }),
+        CompileLayer(ES, ObjectLayer, ConcurrentIRCompiler(std::move(JTMB))),
+        DL(std::move(DL)), Mangle(ES, this->DL),
+        Ctx(std::make_unique<LLVMContext>()) {
+    ES.getMainJITDylib().addGenerator(
+        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(DL.getGlobalPrefix())));
+  }
+
+
+

Our class begins with six member variables: An ExecutionSession member, ES, +which provides context for our running JIT’d code (including the string pool, +global mutex, and error reporting facilities); An RTDyldObjectLinkingLayer, +ObjectLayer, that can be used to add object files to our JIT (though we will +not use it directly); An IRCompileLayer, CompileLayer, that can be used to +add LLVM Modules to our JIT (and which builds on the ObjectLayer), A DataLayout +and MangleAndInterner, DL and Mangle, that will be used for symbol mangling +(more on that later); and finally an LLVMContext that clients will use when +building IR files for the JIT.

+

Next up we have our class constructor, which takes a JITTargetMachineBuilder` +that will be used by our IRCompiler, and a DataLayout that we will use to +initialize our DL member. The constructor begins by initializing our +ObjectLayer. The ObjectLayer requires a reference to the ExecutionSession, and +a function object that will build a JIT memory manager for each module that is +added (a JIT memory manager manages memory allocations, memory permissions, and +registration of exception handlers for JIT’d code). For this we use a lambda +that returns a SectionMemoryManager, an off-the-shelf utility that provides all +the basic memory management functionality required for this chapter. Next we +initialize our CompileLayer. The CompileLayer needs three things: (1) A +reference to the ExecutionSession, (2) A reference to our object layer, and (3) +a compiler instance to use to perform the actual compilation from IR to object +files. We use the off-the-shelf ConcurrentIRCompiler utility as our compiler, +which we construct using this constructor’s JITTargetMachineBuilder argument. +The ConcurrentIRCompiler utility will use the JITTargetMachineBuilder to build +llvm TargetMachines (which are not thread safe) as needed for compiles. After +this, we initialize our supporting members: DL, Mangler and Ctx with +the input DataLayout, the ExecutionSession and DL member, and a new default +constructed LLVMContext respectively. Now that our members have been initialized, +so the one thing that remains to do is to tweak the configuration of the +JITDylib that we will store our code in. We want to modify this dylib to +contain not only the symbols that we add to it, but also the symbols from our +REPL process as well. We do this by attaching a +DynamicLibrarySearchGenerator instance using the +DynamicLibrarySearchGenerator::GetForCurrentProcess method.

+
static Expected<std::unique_ptr<KaleidoscopeJIT>> Create() {
+  auto JTMB = JITTargetMachineBuilder::detectHost();
+
+  if (!JTMB)
+    return JTMB.takeError();
+
+  auto DL = JTMB->getDefaultDataLayoutForTarget();
+  if (!DL)
+    return DL.takeError();
+
+  return std::make_unique<KaleidoscopeJIT>(std::move(*JTMB), std::move(*DL));
+}
+
+const DataLayout &getDataLayout() const { return DL; }
+
+LLVMContext &getContext() { return *Ctx.getContext(); }
+
+
+

Next we have a named constructor, Create, which will build a KaleidoscopeJIT +instance that is configured to generate code for our host process. It does this +by first generating a JITTargetMachineBuilder instance using that classes’ +detectHost method and then using that instance to generate a datalayout for +the target process. Each of these operations can fail, so each returns its +result wrapped in an Expected value 3 that we must check for error before +continuing. If both operations succeed we can unwrap their results (using the +dereference operator) and pass them into KaleidoscopeJIT’s constructor on the +last line of the function.

+

Following the named constructor we have the getDataLayout() and +getContext() methods. These are used to make data structures created and +managed by the JIT (especially the LLVMContext) available to the REPL code that +will build our IR modules.

+
void addModule(std::unique_ptr<Module> M) {
+  cantFail(CompileLayer.add(ES.getMainJITDylib(),
+                            ThreadSafeModule(std::move(M), Ctx)));
+}
+
+Expected<JITEvaluatedSymbol> lookup(StringRef Name) {
+  return ES.lookup({&ES.getMainJITDylib()}, Mangle(Name.str()));
+}
+
+
+

Now we come to the first of our JIT API methods: addModule. This method is +responsible for adding IR to the JIT and making it available for execution. In +this initial implementation of our JIT we will make our modules “available for +execution” by adding them to the CompileLayer, which will it turn store the +Module in the main JITDylib. This process will create new symbol table entries +in the JITDylib for each definition in the module, and will defer compilation of +the module until any of its definitions is looked up. Note that this is not lazy +compilation: just referencing a definition, even if it is never used, will be +enough to trigger compilation. In later chapters we will teach our JIT to defer +compilation of functions until they’re actually called. To add our Module we +must first wrap it in a ThreadSafeModule instance, which manages the lifetime of +the Module’s LLVMContext (our Ctx member) in a thread-friendly way. In our +example, all modules will share the Ctx member, which will exist for the +duration of the JIT. Once we switch to concurrent compilation in later chapters +we will use a new context per module.

+

Our last method is lookup, which allows us to look up addresses for +function and variable definitions added to the JIT based on their symbol names. +As noted above, lookup will implicitly trigger compilation for any symbol +that has not already been compiled. Our lookup method calls through to +ExecutionSession::lookup, passing in a list of dylibs to search (in our case +just the main dylib), and the symbol name to search for, with a twist: We have +to mangle the name of the symbol we’re searching for first. The ORC JIT +components use mangled symbols internally the same way a static compiler and +linker would, rather than using plain IR symbol names. This allows JIT’d code +to interoperate easily with precompiled code in the application or shared +libraries. The kind of mangling will depend on the DataLayout, which in turn +depends on the target platform. To allow us to remain portable and search based +on the un-mangled name, we just re-produce this mangling ourselves using our +Mangle member function object.

+

This brings us to the end of Chapter 1 of Building a JIT. You now have a basic +but fully functioning JIT stack that you can use to take LLVM IR and make it +executable within the context of your JIT process. In the next chapter we’ll +look at how to extend this JIT to produce better quality code, and in the +process take a deeper look at the ORC layer concept.

+

Next: Extending the KaleidoscopeJIT

+
+
+

1.4. Full Code Listing

+

Here is the complete code listing for our running example. To build this +example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains a simple JIT definition for use in the kaleidoscope tutorials.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include <memory>
+
+namespace llvm {
+namespace orc {
+
+class KaleidoscopeJIT {
+private:
+  std::unique_ptr<ExecutionSession> ES;
+
+  DataLayout DL;
+  MangleAndInterner Mangle;
+
+  RTDyldObjectLinkingLayer ObjectLayer;
+  IRCompileLayer CompileLayer;
+
+  JITDylib &MainJD;
+
+public:
+  KaleidoscopeJIT(std::unique_ptr<ExecutionSession> ES,
+                  JITTargetMachineBuilder JTMB, DataLayout DL)
+      : ES(std::move(ES)), DL(std::move(DL)), Mangle(*this->ES, this->DL),
+        ObjectLayer(*this->ES,
+                    []() { return std::make_unique<SectionMemoryManager>(); }),
+        CompileLayer(*this->ES, ObjectLayer,
+                     std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
+        MainJD(this->ES->createBareJITDylib("<main>")) {
+    MainJD.addGenerator(
+        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
+            DL.getGlobalPrefix())));
+  }
+
+  ~KaleidoscopeJIT() {
+    if (auto Err = ES->endSession())
+      ES->reportError(std::move(Err));
+  }
+
+  static Expected<std::unique_ptr<KaleidoscopeJIT>> Create() {
+    auto EPC = SelfExecutorProcessControl::Create();
+    if (!EPC)
+      return EPC.takeError();
+
+    auto ES = std::make_unique<ExecutionSession>(std::move(*EPC));
+
+    JITTargetMachineBuilder JTMB(
+        ES->getExecutorProcessControl().getTargetTriple());
+
+    auto DL = JTMB.getDefaultDataLayoutForTarget();
+    if (!DL)
+      return DL.takeError();
+
+    return std::make_unique<KaleidoscopeJIT>(std::move(ES), std::move(JTMB),
+                                             std::move(*DL));
+  }
+
+  const DataLayout &getDataLayout() const { return DL; }
+
+  JITDylib &getMainJITDylib() { return MainJD; }
+
+  Error addModule(ThreadSafeModule TSM, ResourceTrackerSP RT = nullptr) {
+    if (!RT)
+      RT = MainJD.getDefaultResourceTracker();
+    return CompileLayer.add(RT, std::move(TSM));
+  }
+
+  Expected<JITEvaluatedSymbol> lookup(StringRef Name) {
+    return ES->lookup({&MainJD}, Mangle(Name.str()));
+  }
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+
+
+
1
+

Actually we use a cut-down version of KaleidoscopeJIT that makes a +simplifying assumption: symbols cannot be re-defined. This will make it +impossible to re-define symbols in the REPL, but will make our symbol +lookup logic simpler. Re-introducing support for symbol redefinition is +left as an exercise for the reader. (The KaleidoscopeJIT.h used in the +original tutorials will be a helpful reference).

+
+
2
+
++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

File

Reason for inclusion

JITSymbol.h

Defines the lookup result type +JITEvaluatedSymbol

CompileUtils.h

Provides the SimpleCompiler class.

Core.h

Core utilities such as ExecutionSession and +JITDylib.

ExecutionUtils.h

Provides the DynamicLibrarySearchGenerator +class.

IRCompileLayer.h

Provides the IRCompileLayer class.

JITTargetMachineBuilder.h

Provides the JITTargetMachineBuilder class.

RTDyldObjectLinkingLayer.h

Provides the RTDyldObjectLinkingLayer class.

SectionMemoryManager.h

Provides the SectionMemoryManager class.

DataLayout.h

Provides the DataLayout class.

LLVMContext.h

Provides the LLVMContext class.

+
+
3
+

See the ErrorHandling section in the LLVM Programmer’s Manual +(https://llvm.org/docs/ProgrammersManual.html#error-handling)

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT2.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT2.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT2.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT2.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,526 @@ + + + + + + + + + 2. Building a JIT: Adding Optimizations – An introduction to ORC Layers — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

2. Building a JIT: Adding Optimizations – An introduction to ORC Layers

+ +

This tutorial is under active development. It is incomplete and details may +change frequently. Nonetheless we invite you to try it out as it stands, and +we welcome any feedback.

+
+

2.1. Chapter 2 Introduction

+

Warning: This tutorial is currently being updated to account for ORC API +changes. Only Chapters 1 and 2 are up-to-date.

+

Example code from Chapters 3 to 5 will compile and run, but has not been +updated

+

Welcome to Chapter 2 of the “Building an ORC-based JIT in LLVM” tutorial. In +Chapter 1 of this series we examined a basic JIT +class, KaleidoscopeJIT, that could take LLVM IR modules as input and produce +executable code in memory. KaleidoscopeJIT was able to do this with relatively +little code by composing two off-the-shelf ORC layers: IRCompileLayer and +ObjectLinkingLayer, to do much of the heavy lifting.

+

In this layer we’ll learn more about the ORC layer concept by using a new layer, +IRTransformLayer, to add IR optimization support to KaleidoscopeJIT.

+
+
+

2.2. Optimizing Modules using the IRTransformLayer

+

In Chapter 4 of the “Implementing a language with LLVM” +tutorial series the llvm FunctionPassManager is introduced as a means for +optimizing LLVM IR. Interested readers may read that chapter for details, but +in short: to optimize a Module we create an llvm::FunctionPassManager +instance, configure it with a set of optimizations, then run the PassManager on +a Module to mutate it into a (hopefully) more optimized but semantically +equivalent form. In the original tutorial series the FunctionPassManager was +created outside the KaleidoscopeJIT and modules were optimized before being +added to it. In this Chapter we will make optimization a phase of our JIT +instead. For now this will provide us a motivation to learn more about ORC +layers, but in the long term making optimization part of our JIT will yield an +important benefit: When we begin lazily compiling code (i.e. deferring +compilation of each function until the first time it’s run) having +optimization managed by our JIT will allow us to optimize lazily too, rather +than having to do all our optimization up-front.

+

To add optimization support to our JIT we will take the KaleidoscopeJIT from +Chapter 1 and compose an ORC IRTransformLayer on top. We will look at how the +IRTransformLayer works in more detail below, but the interface is simple: the +constructor for this layer takes a reference to the execution session and the +layer below (as all layers do) plus an IR optimization function that it will +apply to each Module that is added via addModule:

+
class KaleidoscopeJIT {
+private:
+  ExecutionSession ES;
+  RTDyldObjectLinkingLayer ObjectLayer;
+  IRCompileLayer CompileLayer;
+  IRTransformLayer TransformLayer;
+
+  DataLayout DL;
+  MangleAndInterner Mangle;
+  ThreadSafeContext Ctx;
+
+public:
+
+  KaleidoscopeJIT(JITTargetMachineBuilder JTMB, DataLayout DL)
+      : ObjectLayer(ES,
+                    []() { return std::make_unique<SectionMemoryManager>(); }),
+        CompileLayer(ES, ObjectLayer, ConcurrentIRCompiler(std::move(JTMB))),
+        TransformLayer(ES, CompileLayer, optimizeModule),
+        DL(std::move(DL)), Mangle(ES, this->DL),
+        Ctx(std::make_unique<LLVMContext>()) {
+    ES.getMainJITDylib().addGenerator(
+        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(DL.getGlobalPrefix())));
+  }
+
+
+

Our extended KaleidoscopeJIT class starts out the same as it did in Chapter 1, +but after the CompileLayer we introduce a new member, TransformLayer, which sits +on top of our CompileLayer. We initialize our OptimizeLayer with a reference to +the ExecutionSession and output layer (standard practice for layers), along with +a transform function. For our transform function we supply our classes +optimizeModule static method.

+
// ...
+return cantFail(OptimizeLayer.addModule(std::move(M),
+                                        std::move(Resolver)));
+// ...
+
+
+

Next we need to update our addModule method to replace the call to +CompileLayer::add with a call to OptimizeLayer::add instead.

+
static Expected<ThreadSafeModule>
+optimizeModule(ThreadSafeModule M, const MaterializationResponsibility &R) {
+  // Create a function pass manager.
+  auto FPM = std::make_unique<legacy::FunctionPassManager>(M.get());
+
+  // Add some optimizations.
+  FPM->add(createInstructionCombiningPass());
+  FPM->add(createReassociatePass());
+  FPM->add(createGVNPass());
+  FPM->add(createCFGSimplificationPass());
+  FPM->doInitialization();
+
+  // Run the optimizations over all functions in the module being added to
+  // the JIT.
+  for (auto &F : *M)
+    FPM->run(F);
+
+  return M;
+}
+
+
+

At the bottom of our JIT we add a private method to do the actual optimization: +optimizeModule. This function takes the module to be transformed as input (as +a ThreadSafeModule) along with a reference to a reference to a new class: +MaterializationResponsibility. The MaterializationResponsibility argument +can be used to query JIT state for the module being transformed, such as the set +of definitions in the module that JIT’d code is actively trying to call/access. +For now we will ignore this argument and use a standard optimization +pipeline. To do this we set up a FunctionPassManager, add some passes to it, run +it over every function in the module, and then return the mutated module. The +specific optimizations are the same ones used in Chapter 4 +of the “Implementing a language with LLVM” tutorial series. Readers may visit +that chapter for a more in-depth discussion of these, and of IR optimization in +general.

+

And that’s it in terms of changes to KaleidoscopeJIT: When a module is added via +addModule the OptimizeLayer will call our optimizeModule function before passing +the transformed module on to the CompileLayer below. Of course, we could have +called optimizeModule directly in our addModule function and not gone to the +bother of using the IRTransformLayer, but doing so gives us another opportunity +to see how layers compose. It also provides a neat entry point to the layer +concept itself, because IRTransformLayer is one of the simplest layers that +can be implemented.

+
// From IRTransformLayer.h:
+class IRTransformLayer : public IRLayer {
+public:
+  using TransformFunction = std::function<Expected<ThreadSafeModule>(
+      ThreadSafeModule, const MaterializationResponsibility &R)>;
+
+  IRTransformLayer(ExecutionSession &ES, IRLayer &BaseLayer,
+                   TransformFunction Transform = identityTransform);
+
+  void setTransform(TransformFunction Transform) {
+    this->Transform = std::move(Transform);
+  }
+
+  static ThreadSafeModule
+  identityTransform(ThreadSafeModule TSM,
+                    const MaterializationResponsibility &R) {
+    return TSM;
+  }
+
+  void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
+
+private:
+  IRLayer &BaseLayer;
+  TransformFunction Transform;
+};
+
+// From IRTransformLayer.cpp:
+
+IRTransformLayer::IRTransformLayer(ExecutionSession &ES,
+                                   IRLayer &BaseLayer,
+                                   TransformFunction Transform)
+    : IRLayer(ES), BaseLayer(BaseLayer), Transform(std::move(Transform)) {}
+
+void IRTransformLayer::emit(MaterializationResponsibility R,
+                            ThreadSafeModule TSM) {
+  assert(TSM.getModule() && "Module must not be null");
+
+  if (auto TransformedTSM = Transform(std::move(TSM), R))
+    BaseLayer.emit(std::move(R), std::move(*TransformedTSM));
+  else {
+    R.failMaterialization();
+    getExecutionSession().reportError(TransformedTSM.takeError());
+  }
+}
+
+
+

This is the whole definition of IRTransformLayer, from +llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h and +llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp. This class is concerned +with two very simple jobs: (1) Running every IR Module that is emitted via this +layer through the transform function object, and (2) implementing the ORC +IRLayer interface (which itself conforms to the general ORC Layer concept, +more on that below). Most of the class is straightforward: a typedef for the +transform function, a constructor to initialize the members, a setter for the +transform function value, and a default no-op transform. The most important +method is emit as this is half of our IRLayer interface. The emit method +applies our transform to each module that it is called on and, if the transform +succeeds, passes the transformed module to the base layer. If the transform +fails, our emit function calls +MaterializationResponsibility::failMaterialization (this JIT clients who +may be waiting on other threads know that the code they were waiting for has +failed to compile) and logs the error with the execution session before bailing +out.

+

The other half of the IRLayer interface we inherit unmodified from the IRLayer +class:

+
Error IRLayer::add(JITDylib &JD, ThreadSafeModule TSM, VModuleKey K) {
+  return JD.define(std::make_unique<BasicIRLayerMaterializationUnit>(
+      *this, std::move(K), std::move(TSM)));
+}
+
+
+

This code, from llvm/lib/ExecutionEngine/Orc/Layer.cpp, adds a +ThreadSafeModule to a given JITDylib by wrapping it up in a +MaterializationUnit (in this case a BasicIRLayerMaterializationUnit). +Most layers that derived from IRLayer can rely on this default implementation +of the add method.

+

These two operations, add and emit, together constitute the layer +concept: A layer is a way to wrap a part of a compiler pipeline (in this case +the “opt” phase of an LLVM compiler) whose API is opaque to ORC with an +interface that ORC can call as needed. The add method takes an +module in some input program representation (in this case an LLVM IR module) +and stores it in the target JITDylib, arranging for it to be passed back +to the layer’s emit method when any symbol defined by that module is requested. +Each layer can complete its own work by calling the emit method of its base +layer. For example, in this tutorial our IRTransformLayer calls through to +our IRCompileLayer to compile the transformed IR, and our IRCompileLayer in +turn calls our ObjectLayer to link the object file produced by our compiler.

+

So far we have learned how to optimize and compile our LLVM IR, but we have +not focused on when compilation happens. Our current REPL optimizes and +compiles each function as soon as it is referenced by any other code, +regardless of whether it is ever called at runtime. In the next chapter we +will introduce a fully lazy compilation, in which functions are not compiled +until they are first called at run-time. At this point the trade-offs get much +more interesting: the lazier we are, the quicker we can start executing the +first function, but the more often we will have to pause to compile newly +encountered functions. If we only code-gen lazily, but optimize eagerly, we +will have a longer startup time (as everything is optimized at that time) but +relatively short pauses as each function just passes through code-gen. If we +both optimize and code-gen lazily we can start executing the first function +more quickly, but we will have longer pauses as each function has to be both +optimized and code-gen’d when it is first executed. Things become even more +interesting if we consider interprocedural optimizations like inlining, which +must be performed eagerly. These are complex trade-offs, and there is no +one-size-fits all solution to them, but by providing composable layers we leave +the decisions to the person implementing the JIT, and make it easy for them to +experiment with different configurations.

+

Next: Adding Per-function Lazy Compilation

+
+
+

2.3. Full Code Listing

+

Here is the complete code listing for our running example with an +IRTransformLayer added to enable optimization. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains a simple JIT definition for use in the kaleidoscope tutorials.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include <memory>
+
+namespace llvm {
+namespace orc {
+
+class KaleidoscopeJIT {
+private:
+  std::unique_ptr<ExecutionSession> ES;
+
+  DataLayout DL;
+  MangleAndInterner Mangle;
+
+  RTDyldObjectLinkingLayer ObjectLayer;
+  IRCompileLayer CompileLayer;
+  IRTransformLayer OptimizeLayer;
+
+  JITDylib &MainJD;
+
+public:
+  KaleidoscopeJIT(std::unique_ptr<ExecutionSession> ES,
+                  JITTargetMachineBuilder JTMB, DataLayout DL)
+      : ES(std::move(ES)), DL(std::move(DL)), Mangle(*this->ES, this->DL),
+        ObjectLayer(*this->ES,
+                    []() { return std::make_unique<SectionMemoryManager>(); }),
+        CompileLayer(*this->ES, ObjectLayer,
+                     std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
+        OptimizeLayer(*this->ES, CompileLayer, optimizeModule),
+        MainJD(this->ES->createBareJITDylib("<main>")) {
+    MainJD.addGenerator(
+        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
+            DL.getGlobalPrefix())));
+  }
+
+  ~KaleidoscopeJIT() {
+    if (auto Err = ES->endSession())
+      ES->reportError(std::move(Err));
+  }
+
+  static Expected<std::unique_ptr<KaleidoscopeJIT>> Create() {
+    auto EPC = SelfExecutorProcessControl::Create();
+    if (!EPC)
+      return EPC.takeError();
+
+    auto ES = std::make_unique<ExecutionSession>(std::move(*EPC));
+
+    JITTargetMachineBuilder JTMB(
+        ES->getExecutorProcessControl().getTargetTriple());
+
+    auto DL = JTMB.getDefaultDataLayoutForTarget();
+    if (!DL)
+      return DL.takeError();
+
+    return std::make_unique<KaleidoscopeJIT>(std::move(ES), std::move(JTMB),
+                                             std::move(*DL));
+  }
+
+  const DataLayout &getDataLayout() const { return DL; }
+
+  JITDylib &getMainJITDylib() { return MainJD; }
+
+  Error addModule(ThreadSafeModule TSM, ResourceTrackerSP RT = nullptr) {
+    if (!RT)
+      RT = MainJD.getDefaultResourceTracker();
+
+    return OptimizeLayer.add(RT, std::move(TSM));
+  }
+
+  Expected<JITEvaluatedSymbol> lookup(StringRef Name) {
+    return ES->lookup({&MainJD}, Mangle(Name.str()));
+  }
+
+private:
+  static Expected<ThreadSafeModule>
+  optimizeModule(ThreadSafeModule TSM, const MaterializationResponsibility &R) {
+    TSM.withModuleDo([](Module &M) {
+      // Create a function pass manager.
+      auto FPM = std::make_unique<legacy::FunctionPassManager>(&M);
+
+      // Add some optimizations.
+      FPM->add(createInstructionCombiningPass());
+      FPM->add(createReassociatePass());
+      FPM->add(createGVNPass());
+      FPM->add(createCFGSimplificationPass());
+      FPM->doInitialization();
+
+      // Run the optimizations over all functions in the module being added to
+      // the JIT.
+      for (auto &F : M)
+        FPM->run(F);
+    });
+
+    return std::move(TSM);
+  }
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT3.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT3.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT3.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT3.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,473 @@ + + + + + + + + + 3. Building a JIT: Per-function Lazy Compilation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

3. Building a JIT: Per-function Lazy Compilation

+ +

This tutorial is under active development. It is incomplete and details may +change frequently. Nonetheless we invite you to try it out as it stands, and +we welcome any feedback.

+
+

3.1. Chapter 3 Introduction

+

Warning: This text is currently out of date due to ORC API updates.

+

The example code has been updated and can be used. The text will be updated +once the API churn dies down.

+

Welcome to Chapter 3 of the “Building an ORC-based JIT in LLVM” tutorial. This +chapter discusses lazy JITing and shows you how to enable it by adding an ORC +CompileOnDemand layer the JIT from Chapter 2.

+
+
+

3.2. Lazy Compilation

+

When we add a module to the KaleidoscopeJIT class from Chapter 2 it is +immediately optimized, compiled and linked for us by the IRTransformLayer, +IRCompileLayer and RTDyldObjectLinkingLayer respectively. This scheme, where all the +work to make a Module executable is done up front, is simple to understand and +its performance characteristics are easy to reason about. However, it will lead +to very high startup times if the amount of code to be compiled is large, and +may also do a lot of unnecessary compilation if only a few compiled functions +are ever called at runtime. A truly “just-in-time” compiler should allow us to +defer the compilation of any given function until the moment that function is +first called, improving launch times and eliminating redundant work. In fact, +the ORC APIs provide us with a layer to lazily compile LLVM IR: +CompileOnDemandLayer.

+

The CompileOnDemandLayer class conforms to the layer interface described in +Chapter 2, but its addModule method behaves quite differently from the layers +we have seen so far: rather than doing any work up front, it just scans the +Modules being added and arranges for each function in them to be compiled the +first time it is called. To do this, the CompileOnDemandLayer creates two small +utilities for each function that it scans: a stub and a compile +callback. The stub is a pair of a function pointer (which will be pointed at +the function’s implementation once the function has been compiled) and an +indirect jump through the pointer. By fixing the address of the indirect jump +for the lifetime of the program we can give the function a permanent “effective +address”, one that can be safely used for indirection and function pointer +comparison even if the function’s implementation is never compiled, or if it is +compiled more than once (due to, for example, recompiling the function at a +higher optimization level) and changes address. The second utility, the compile +callback, represents a re-entry point from the program into the compiler that +will trigger compilation and then execution of a function. By initializing the +function’s stub to point at the function’s compile callback, we enable lazy +compilation: The first attempted call to the function will follow the function +pointer and trigger the compile callback instead. The compile callback will +compile the function, update the function pointer for the stub, then execute +the function. On all subsequent calls to the function, the function pointer +will point at the already-compiled function, so there is no further overhead +from the compiler. We will look at this process in more detail in the next +chapter of this tutorial, but for now we’ll trust the CompileOnDemandLayer to +set all the stubs and callbacks up for us. All we need to do is to add the +CompileOnDemandLayer to the top of our stack and we’ll get the benefits of +lazy compilation. We just need a few changes to the source:

+
...
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+...
+
+...
+class KaleidoscopeJIT {
+private:
+  std::unique_ptr<TargetMachine> TM;
+  const DataLayout DL;
+  RTDyldObjectLinkingLayer ObjectLayer;
+  IRCompileLayer<decltype(ObjectLayer), SimpleCompiler> CompileLayer;
+
+  using OptimizeFunction =
+      std::function<std::shared_ptr<Module>(std::shared_ptr<Module>)>;
+
+  IRTransformLayer<decltype(CompileLayer), OptimizeFunction> OptimizeLayer;
+
+  std::unique_ptr<JITCompileCallbackManager> CompileCallbackManager;
+  CompileOnDemandLayer<decltype(OptimizeLayer)> CODLayer;
+
+public:
+  using ModuleHandle = decltype(CODLayer)::ModuleHandleT;
+
+
+

First we need to include the CompileOnDemandLayer.h header, then add two new +members: a std::unique_ptr<JITCompileCallbackManager> and a CompileOnDemandLayer, +to our class. The CompileCallbackManager member is used by the CompileOnDemandLayer +to create the compile callback needed for each function.

+
KaleidoscopeJIT()
+    : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
+      ObjectLayer([]() { return std::make_shared<SectionMemoryManager>(); }),
+      CompileLayer(ObjectLayer, SimpleCompiler(*TM)),
+      OptimizeLayer(CompileLayer,
+                    [this](std::shared_ptr<Module> M) {
+                      return optimizeModule(std::move(M));
+                    }),
+      CompileCallbackManager(
+          orc::createLocalCompileCallbackManager(TM->getTargetTriple(), 0)),
+      CODLayer(OptimizeLayer,
+               [this](Function &F) { return std::set<Function*>({&F}); },
+               *CompileCallbackManager,
+               orc::createLocalIndirectStubsManagerBuilder(
+                 TM->getTargetTriple())) {
+  llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
+}
+
+
+

Next we have to update our constructor to initialize the new members. To create +an appropriate compile callback manager we use the +createLocalCompileCallbackManager function, which takes a TargetMachine and a +JITTargetAddress to call if it receives a request to compile an unknown +function. In our simple JIT this situation is unlikely to come up, so we’ll +cheat and just pass ‘0’ here. In a production quality JIT you could give the +address of a function that throws an exception in order to unwind the JIT’d +code’s stack.

+

Now we can construct our CompileOnDemandLayer. Following the pattern from +previous layers we start by passing a reference to the next layer down in our +stack – the OptimizeLayer. Next we need to supply a ‘partitioning function’: +when a not-yet-compiled function is called, the CompileOnDemandLayer will call +this function to ask us what we would like to compile. At a minimum we need to +compile the function being called (given by the argument to the partitioning +function), but we could also request that the CompileOnDemandLayer compile other +functions that are unconditionally called (or highly likely to be called) from +the function being called. For KaleidoscopeJIT we’ll keep it simple and just +request compilation of the function that was called. Next we pass a reference to +our CompileCallbackManager. Finally, we need to supply an “indirect stubs +manager builder”: a utility function that constructs IndirectStubManagers, which +are in turn used to build the stubs for the functions in each module. The +CompileOnDemandLayer will call the indirect stub manager builder once for each +call to addModule, and use the resulting indirect stubs manager to create +stubs for all functions in all modules in the set. If/when the module set is +removed from the JIT the indirect stubs manager will be deleted, freeing any +memory allocated to the stubs. We supply this function by using the +createLocalIndirectStubsManagerBuilder utility.

+
// ...
+        if (auto Sym = CODLayer.findSymbol(Name, false))
+// ...
+return cantFail(CODLayer.addModule(std::move(Ms),
+                                   std::move(Resolver)));
+// ...
+
+// ...
+return CODLayer.findSymbol(MangledNameStream.str(), true);
+// ...
+
+// ...
+CODLayer.removeModule(H);
+// ...
+
+
+

Finally, we need to replace the references to OptimizeLayer in our addModule, +findSymbol, and removeModule methods. With that, we’re up and running.

+

To be done:

+

** Chapter conclusion.**

+
+
+

3.3. Full Code Listing

+

Here is the complete code listing for our running example with a CompileOnDemand +layer added to enable lazy function-at-a-time compilation. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains a simple JIT definition for use in the kaleidoscope tutorials.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include <memory>
+
+namespace llvm {
+namespace orc {
+
+class KaleidoscopeJIT {
+private:
+  std::unique_ptr<ExecutionSession> ES;
+  std::unique_ptr<EPCIndirectionUtils> EPCIU;
+
+  DataLayout DL;
+  MangleAndInterner Mangle;
+
+  RTDyldObjectLinkingLayer ObjectLayer;
+  IRCompileLayer CompileLayer;
+  IRTransformLayer OptimizeLayer;
+  CompileOnDemandLayer CODLayer;
+
+  JITDylib &MainJD;
+
+  static void handleLazyCallThroughError() {
+    errs() << "LazyCallThrough error: Could not find function body";
+    exit(1);
+  }
+
+public:
+  KaleidoscopeJIT(std::unique_ptr<ExecutionSession> ES,
+                  std::unique_ptr<EPCIndirectionUtils> EPCIU,
+                  JITTargetMachineBuilder JTMB, DataLayout DL)
+      : ES(std::move(ES)), EPCIU(std::move(EPCIU)), DL(std::move(DL)),
+        Mangle(*this->ES, this->DL),
+        ObjectLayer(*this->ES,
+                    []() { return std::make_unique<SectionMemoryManager>(); }),
+        CompileLayer(*this->ES, ObjectLayer,
+                     std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
+        OptimizeLayer(*this->ES, CompileLayer, optimizeModule),
+        CODLayer(*this->ES, OptimizeLayer,
+                 this->EPCIU->getLazyCallThroughManager(),
+                 [this] { return this->EPCIU->createIndirectStubsManager(); }),
+        MainJD(this->ES->createBareJITDylib("<main>")) {
+    MainJD.addGenerator(
+        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
+            DL.getGlobalPrefix())));
+  }
+
+  ~KaleidoscopeJIT() {
+    if (auto Err = ES->endSession())
+      ES->reportError(std::move(Err));
+    if (auto Err = EPCIU->cleanup())
+      ES->reportError(std::move(Err));
+  }
+
+  static Expected<std::unique_ptr<KaleidoscopeJIT>> Create() {
+    auto EPC = SelfExecutorProcessControl::Create();
+    if (!EPC)
+      return EPC.takeError();
+
+    auto ES = std::make_unique<ExecutionSession>(std::move(*EPC));
+
+    auto EPCIU = EPCIndirectionUtils::Create(ES->getExecutorProcessControl());
+    if (!EPCIU)
+      return EPCIU.takeError();
+
+    (*EPCIU)->createLazyCallThroughManager(
+        *ES, pointerToJITTargetAddress(&handleLazyCallThroughError));
+
+    if (auto Err = setUpInProcessLCTMReentryViaEPCIU(**EPCIU))
+      return std::move(Err);
+
+    JITTargetMachineBuilder JTMB(
+        ES->getExecutorProcessControl().getTargetTriple());
+
+    auto DL = JTMB.getDefaultDataLayoutForTarget();
+    if (!DL)
+      return DL.takeError();
+
+    return std::make_unique<KaleidoscopeJIT>(std::move(ES), std::move(*EPCIU),
+                                             std::move(JTMB), std::move(*DL));
+  }
+
+  const DataLayout &getDataLayout() const { return DL; }
+
+  JITDylib &getMainJITDylib() { return MainJD; }
+
+  Error addModule(ThreadSafeModule TSM, ResourceTrackerSP RT = nullptr) {
+    if (!RT)
+      RT = MainJD.getDefaultResourceTracker();
+
+    return OptimizeLayer.add(RT, std::move(TSM));
+  }
+
+  Expected<JITEvaluatedSymbol> lookup(StringRef Name) {
+    return ES->lookup({&MainJD}, Mangle(Name.str()));
+  }
+
+private:
+  static Expected<ThreadSafeModule>
+  optimizeModule(ThreadSafeModule TSM, const MaterializationResponsibility &R) {
+    TSM.withModuleDo([](Module &M) {
+      // Create a function pass manager.
+      auto FPM = std::make_unique<legacy::FunctionPassManager>(&M);
+
+      // Add some optimizations.
+      FPM->add(createInstructionCombiningPass());
+      FPM->add(createReassociatePass());
+      FPM->add(createGVNPass());
+      FPM->add(createCFGSimplificationPass());
+      FPM->doInitialization();
+
+      // Run the optimizations over all functions in the module being added to
+      // the JIT.
+      for (auto &F : M)
+        FPM->run(F);
+    });
+
+    return std::move(TSM);
+  }
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+
+

Next: Extreme Laziness – Using Compile Callbacks to JIT directly from ASTs

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT4.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT4.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/BuildingAJIT4.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/BuildingAJIT4.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,428 @@ + + + + + + + + + 4. Building a JIT: Extreme Laziness - Using LazyReexports to JIT from ASTs — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

4. Building a JIT: Extreme Laziness - Using LazyReexports to JIT from ASTs

+ +

This tutorial is under active development. It is incomplete and details may +change frequently. Nonetheless we invite you to try it out as it stands, and +we welcome any feedback.

+
+

4.1. Chapter 4 Introduction

+

Welcome to Chapter 4 of the “Building an ORC-based JIT in LLVM” tutorial. This +chapter introduces custom MaterializationUnits and Layers, and the lazy +reexports API. Together these will be used to replace the CompileOnDemandLayer +from Chapter 3 with a custom lazy-JITing scheme that JITs +directly from Kaleidoscope ASTs.

+

To be done:

+

(1) Describe the drawbacks of JITing from IR (have to compile to IR first, +which reduces the benefits of laziness).

+

(2) Describe CompileCallbackManagers and IndirectStubManagers in detail.

+

(3) Run through the implementation of addFunctionAST.

+
+
+

4.2. Full Code Listing

+

Here is the complete code listing for our running example that JITs lazily from +Kaleidoscope ASTS. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains a simple JIT definition for use in the kaleidoscope tutorials.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
+#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include <memory>
+
+class PrototypeAST;
+class ExprAST;
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+
+  const PrototypeAST& getProto() const;
+  const std::string& getName() const;
+  llvm::Function *codegen();
+};
+
+/// This will compile FnAST to IR, rename the function to add the given
+/// suffix (needed to prevent a name-clash with the function's stub),
+/// and then take ownership of the module that the function was compiled
+/// into.
+llvm::orc::ThreadSafeModule irgenAndTakeOwnership(FunctionAST &FnAST,
+                                                  const std::string &Suffix);
+
+namespace llvm {
+namespace orc {
+
+class KaleidoscopeASTLayer;
+class KaleidoscopeJIT;
+
+class KaleidoscopeASTMaterializationUnit : public MaterializationUnit {
+public:
+  KaleidoscopeASTMaterializationUnit(KaleidoscopeASTLayer &L,
+                                     std::unique_ptr<FunctionAST> F);
+
+  StringRef getName() const override {
+    return "KaleidoscopeASTMaterializationUnit";
+  }
+
+  void materialize(std::unique_ptr<MaterializationResponsibility> R) override;
+
+private:
+  void discard(const JITDylib &JD, const SymbolStringPtr &Sym) override {
+    llvm_unreachable("Kaleidoscope functions are not overridable");
+  }
+
+  KaleidoscopeASTLayer &L;
+  std::unique_ptr<FunctionAST> F;
+};
+
+class KaleidoscopeASTLayer {
+public:
+  KaleidoscopeASTLayer(IRLayer &BaseLayer, const DataLayout &DL)
+      : BaseLayer(BaseLayer), DL(DL) {}
+
+  Error add(ResourceTrackerSP RT, std::unique_ptr<FunctionAST> F) {
+    return RT->getJITDylib().define(
+        std::make_unique<KaleidoscopeASTMaterializationUnit>(*this,
+                                                             std::move(F)),
+        RT);
+  }
+
+  void emit(std::unique_ptr<MaterializationResponsibility> MR,
+            std::unique_ptr<FunctionAST> F) {
+    BaseLayer.emit(std::move(MR), irgenAndTakeOwnership(*F, ""));
+  }
+
+  SymbolFlagsMap getInterface(FunctionAST &F) {
+    MangleAndInterner Mangle(BaseLayer.getExecutionSession(), DL);
+    SymbolFlagsMap Symbols;
+    Symbols[Mangle(F.getName())] =
+        JITSymbolFlags(JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+    return Symbols;
+  }
+
+private:
+  IRLayer &BaseLayer;
+  const DataLayout &DL;
+};
+
+KaleidoscopeASTMaterializationUnit::KaleidoscopeASTMaterializationUnit(
+    KaleidoscopeASTLayer &L, std::unique_ptr<FunctionAST> F)
+    : MaterializationUnit(L.getInterface(*F), nullptr), L(L), F(std::move(F)) {}
+
+void KaleidoscopeASTMaterializationUnit::materialize(
+    std::unique_ptr<MaterializationResponsibility> R) {
+  L.emit(std::move(R), std::move(F));
+}
+
+class KaleidoscopeJIT {
+private:
+  std::unique_ptr<ExecutionSession> ES;
+  std::unique_ptr<EPCIndirectionUtils> EPCIU;
+
+  DataLayout DL;
+  MangleAndInterner Mangle;
+
+  RTDyldObjectLinkingLayer ObjectLayer;
+  IRCompileLayer CompileLayer;
+  IRTransformLayer OptimizeLayer;
+  KaleidoscopeASTLayer ASTLayer;
+
+  JITDylib &MainJD;
+
+  static void handleLazyCallThroughError() {
+    errs() << "LazyCallThrough error: Could not find function body";
+    exit(1);
+  }
+
+public:
+  KaleidoscopeJIT(std::unique_ptr<ExecutionSession> ES,
+                  std::unique_ptr<EPCIndirectionUtils> EPCIU,
+                  JITTargetMachineBuilder JTMB, DataLayout DL)
+      : ES(std::move(ES)), EPCIU(std::move(EPCIU)), DL(std::move(DL)),
+        Mangle(*this->ES, this->DL),
+        ObjectLayer(*this->ES,
+                    []() { return std::make_unique<SectionMemoryManager>(); }),
+        CompileLayer(*this->ES, ObjectLayer,
+                     std::make_unique<ConcurrentIRCompiler>(std::move(JTMB))),
+        OptimizeLayer(*this->ES, CompileLayer, optimizeModule),
+        ASTLayer(OptimizeLayer, this->DL),
+        MainJD(this->ES->createBareJITDylib("<main>")) {
+    MainJD.addGenerator(
+        cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
+            DL.getGlobalPrefix())));
+  }
+
+  ~KaleidoscopeJIT() {
+    if (auto Err = ES->endSession())
+      ES->reportError(std::move(Err));
+    if (auto Err = EPCIU->cleanup())
+      ES->reportError(std::move(Err));
+  }
+
+  static Expected<std::unique_ptr<KaleidoscopeJIT>> Create() {
+    auto EPC = SelfExecutorProcessControl::Create();
+    if (!EPC)
+      return EPC.takeError();
+
+    auto ES = std::make_unique<ExecutionSession>(std::move(*EPC));
+
+    auto EPCIU = EPCIndirectionUtils::Create(ES->getExecutorProcessControl());
+    if (!EPCIU)
+      return EPCIU.takeError();
+
+    (*EPCIU)->createLazyCallThroughManager(
+        *ES, pointerToJITTargetAddress(&handleLazyCallThroughError));
+
+    if (auto Err = setUpInProcessLCTMReentryViaEPCIU(**EPCIU))
+      return std::move(Err);
+
+    JITTargetMachineBuilder JTMB((*EPC)->getTargetTriple());
+
+    auto DL = JTMB.getDefaultDataLayoutForTarget();
+    if (!DL)
+      return DL.takeError();
+
+    return std::make_unique<KaleidoscopeJIT>(std::move(ES), std::move(*EPCIU),
+                                             std::move(JTMB), std::move(*DL));
+  }
+
+  const DataLayout &getDataLayout() const { return DL; }
+
+  JITDylib &getMainJITDylib() { return MainJD; }
+
+  Error addModule(ThreadSafeModule TSM, ResourceTrackerSP RT = nullptr) {
+    if (!RT)
+      RT = MainJD.getDefaultResourceTracker();
+
+    return OptimizeLayer.add(RT, std::move(TSM));
+  }
+
+  Error addAST(std::unique_ptr<FunctionAST> F, ResourceTrackerSP RT = nullptr) {
+    if (!RT)
+      RT = MainJD.getDefaultResourceTracker();
+    return ASTLayer.add(RT, std::move(F));
+  }
+
+  Expected<JITEvaluatedSymbol> lookup(StringRef Name) {
+    return ES->lookup({&MainJD}, Mangle(Name.str()));
+  }
+
+private:
+  static Expected<ThreadSafeModule>
+  optimizeModule(ThreadSafeModule TSM, const MaterializationResponsibility &R) {
+    TSM.withModuleDo([](Module &M) {
+      // Create a function pass manager.
+      auto FPM = std::make_unique<legacy::FunctionPassManager>(&M);
+
+      // Add some optimizations.
+      FPM->add(createInstructionCombiningPass());
+      FPM->add(createReassociatePass());
+      FPM->add(createGVNPass());
+      FPM->add(createCFGSimplificationPass());
+      FPM->doInitialization();
+
+      // Run the optimizations over all functions in the module being added to
+      // the JIT.
+      for (auto &F : M)
+        FPM->run(F);
+    });
+
+    return std::move(TSM);
+  }
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+
+

Next: Remote-JITing – Process-isolation and laziness-at-a-distance

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/index.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,195 @@ + + + + + + + + + LLVM Tutorial: Table of Contents — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

LLVM Tutorial: Table of Contents

+ + +
+

External Tutorials

+
+
Tutorial: Creating an LLVM Backend for the Cpu0 Architecture

A step-by-step tutorial for developing an LLVM backend. Under +active development at https://github.com/Jonathan2251/lbd (please +contribute!).

+
+
Howto: Implementing LLVM Integrated Assembler

A simple guide for how to implement an LLVM integrated assembler for an +architecture.

+
+
+
+
+

Advanced Topics

+
    +
  1. Writing an Optimization for LLVM

  2. +
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl01.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl01.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl01.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl01.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl02.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl02.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl02.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl02.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl03.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl03.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl03.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl03.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl04.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl04.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl04.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl04.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl05.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl05.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl05.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl05.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl06.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl06.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl06.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl06.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl07.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl07.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl07.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl07.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl08.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl08.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl08.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl08.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl09.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl09.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl09.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl09.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/LangImpl10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/LangImpl10.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,127 @@ + + + + + + + + + Kaleidoscope Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Kaleidoscope Tutorial

+

The Kaleidoscope Tutorial has moved to My First Language Frontend with LLVM Tutorial.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/index.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/index.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/index.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/index.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,220 @@ + + + + + + + + + My First Language Frontend with LLVM Tutorial — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

My First Language Frontend with LLVM Tutorial

+
+
+

Requirements: This tutorial assumes you know C++, but no previous +compiler experience is necessary.

+

Welcome to the “My First Language Frontend with LLVM” tutorial. Here we +run through the implementation of a simple language, showing +how fun and easy it can be. This tutorial will get you up and running +fast and show a concrete example of something that uses LLVM to generate +code.

+

This tutorial introduces the simple “Kaleidoscope” language, building it +iteratively over the course of several chapters, showing how it is built +over time. This lets us cover a range of language design and LLVM-specific +ideas, showing and explaining the code for it all along the way, +and reduces the overwhelming amount of details up front. We strongly +encourage that you work with this code - make a copy and hack it up and +experiment.

+

Warning: In order to focus on teaching compiler techniques and LLVM +specifically, +this tutorial does not show best practices in software engineering +principles. For example, the code uses global variables +pervasively, doesn’t use +visitors, etc… but +instead keeps things simple and focuses on the topics at hand.

+

This tutorial is structured into chapters covering individual topics, +allowing you to skip ahead as you wish:

+
    +
  • Chapter #1: Kaleidoscope language and Lexer - +This shows where we are +going and the basic functionality that we want to build. A lexer +is also the first part of building a parser for a language, and we +use a simple C++ lexer which is easy to understand.

  • +
  • Chapter #2: Implementing a Parser and AST - +With the lexer in place, we can talk about parsing techniques and +basic AST construction. This tutorial describes recursive descent +parsing and operator precedence parsing.

  • +
  • Chapter #3: Code generation to LLVM IR - with +the AST ready, we show how easy it is to generate LLVM IR, and show +a simple way to incorporate LLVM into your project.

  • +
  • Chapter #4: Adding JIT and Optimizer Support - +One great thing about LLVM is its support for JIT compilation, so +we’ll dive right into it and show you the 3 lines it takes to add JIT +support. Later chapters show how to generate .o files.

  • +
  • Chapter #5: Extending the Language: Control Flow - With +the basic language up and running, we show how to extend +it with control flow operations (‘if’ statement and a ‘for’ loop). This +gives us a chance to talk about SSA construction and control +flow.

  • +
  • Chapter #6: Extending the Language: User-defined Operators - This chapter extends the language to let +users define arbitrary unary and binary operators - with assignable +precedence! This allows us to build a significant piece of the +“language” as library routines.

  • +
  • Chapter #7: Extending the Language: Mutable Variables - This chapter talks about adding user-defined local +variables along with an assignment operator. This shows how easy it is +to construct SSA form in LLVM: LLVM does not require your front-end +to construct SSA form in order to use it!

  • +
  • Chapter #8: Compiling to Object Files - This +chapter explains how to take LLVM IR and compile it down to object +files, like a static compiler does.

  • +
  • Chapter #9: Debug Information - A real language +needs to support debuggers, so we +add debug information that allows setting breakpoints in Kaleidoscope +functions, print out argument variables, and call functions!

  • +
  • Chapter #10: Conclusion and other tidbits - This +chapter wraps up the series by discussing ways to extend the language +and includes pointers to info on “special topics” like adding garbage +collection support, exceptions, debugging, support for “spaghetti +stacks”, etc.

  • +
+

By the end of the tutorial, we’ll have written a bit less than 1000 lines +of (non-comment, non-blank) lines of code. With this small amount of +code, we’ll have built up a nice little compiler for a non-trivial +language including a hand-written lexer, parser, AST, as well as code +generation support - both static and JIT! The breadth of this is a great +testament to the strengths of LLVM and shows why it is such a popular +target for language designers and others who need high performance code +generation.

+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl01.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl01.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl01.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl01.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,316 @@ + + + + + + + + + 1. Kaleidoscope: Kaleidoscope Introduction and the Lexer — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

1. Kaleidoscope: Kaleidoscope Introduction and the Lexer

+ +
+

1.1. The Kaleidoscope Language

+

This tutorial is illustrated with a toy language called +“Kaleidoscope” (derived +from “meaning beautiful, form, and view”). Kaleidoscope is a procedural +language that allows you to define functions, use conditionals, math, +etc. Over the course of the tutorial, we’ll extend Kaleidoscope to +support the if/then/else construct, a for loop, user defined operators, +JIT compilation with a simple command line interface, debug info, etc.

+

We want to keep things simple, so the only datatype in Kaleidoscope +is a 64-bit floating point type (aka ‘double’ in C parlance). As such, +all values are implicitly double precision and the language doesn’t +require type declarations. This gives the language a very nice and +simple syntax. For example, the following simple example computes +Fibonacci numbers:

+
# Compute the x'th fibonacci number.
+def fib(x)
+  if x < 3 then
+    1
+  else
+    fib(x-1)+fib(x-2)
+
+# This expression will compute the 40th number.
+fib(40)
+
+
+

We also allow Kaleidoscope to call into standard library functions - the +LLVM JIT makes this really easy. This means that you can use the +‘extern’ keyword to define a function before you use it (this is also +useful for mutually recursive functions). For example:

+
extern sin(arg);
+extern cos(arg);
+extern atan2(arg1 arg2);
+
+atan2(sin(.4), cos(42))
+
+
+

A more interesting example is included in Chapter 6 where we write a +little Kaleidoscope application that displays a Mandelbrot +Set at various levels of magnification.

+

Let’s dive into the implementation of this language!

+
+
+

1.2. The Lexer

+

When it comes to implementing a language, the first thing needed is the +ability to process a text file and recognize what it says. The +traditional way to do this is to use a +“lexer” (aka +‘scanner’) to break the input up into “tokens”. Each token returned by +the lexer includes a token code and potentially some metadata (e.g. the +numeric value of a number). First, we define the possibilities:

+
// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5,
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+
+

Each token returned by our lexer will either be one of the Token enum +values or it will be an ‘unknown’ character like ‘+’, which is returned +as its ASCII value. If the current token is an identifier, the +IdentifierStr global variable holds the name of the identifier. If +the current token is a numeric literal (like 1.0), NumVal holds its +value. We use global variables for simplicity, but this is not the +best choice for a real language implementation :).

+

The actual implementation of the lexer is a single function named +gettok. The gettok function is called to return the next token +from standard input. Its definition starts as:

+
/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+
+

gettok works by calling the C getchar() function to read +characters one at a time from standard input. It eats them as it +recognizes them and stores the last character read, but not processed, +in LastChar. The first thing that it has to do is ignore whitespace +between tokens. This is accomplished with the loop above.

+

The next thing gettok needs to do is recognize identifiers and +specific keywords like “def”. Kaleidoscope does this with this simple +loop:

+
if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+  IdentifierStr = LastChar;
+  while (isalnum((LastChar = getchar())))
+    IdentifierStr += LastChar;
+
+  if (IdentifierStr == "def")
+    return tok_def;
+  if (IdentifierStr == "extern")
+    return tok_extern;
+  return tok_identifier;
+}
+
+
+

Note that this code sets the ‘IdentifierStr’ global whenever it +lexes an identifier. Also, since language keywords are matched by the +same loop, we handle them here inline. Numeric values are similar:

+
if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+  std::string NumStr;
+  do {
+    NumStr += LastChar;
+    LastChar = getchar();
+  } while (isdigit(LastChar) || LastChar == '.');
+
+  NumVal = strtod(NumStr.c_str(), 0);
+  return tok_number;
+}
+
+
+

This is all pretty straightforward code for processing input. When +reading a numeric value from input, we use the C strtod function to +convert it to a numeric value that we store in NumVal. Note that +this isn’t doing sufficient error checking: it will incorrectly read +“1.23.45.67” and handle it as if you typed in “1.23”. Feel free to +extend it! Next we handle comments:

+
if (LastChar == '#') {
+  // Comment until end of line.
+  do
+    LastChar = getchar();
+  while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+  if (LastChar != EOF)
+    return gettok();
+}
+
+
+

We handle comments by skipping to the end of the line and then return +the next token. Finally, if the input doesn’t match one of the above +cases, it is either an operator character like ‘+’ or the end of the +file. These are handled with this code:

+
  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+
+

With this, we have the complete lexer for the basic Kaleidoscope +language (the full code listing for the Lexer +is available in the next chapter of the tutorial). +Next we’ll build a simple parser that uses this to build an Abstract +Syntax Tree. When we have that, we’ll include a +driver so that you can use the lexer and parser together.

+

Next: Implementing a Parser and AST

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl02.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl02.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl02.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl02.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,1256 @@ + + + + + + + + + 2. Kaleidoscope: Implementing a Parser and AST — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

2. Kaleidoscope: Implementing a Parser and AST

+ +
+

2.1. Chapter 2 Introduction

+

Welcome to Chapter 2 of the “Implementing a language with +LLVM” tutorial. This chapter shows you how to use the +lexer, built in Chapter 1, to build a full +parser for our Kaleidoscope +language. Once we have a parser, we’ll define and build an Abstract +Syntax Tree (AST).

+

The parser we will build uses a combination of Recursive Descent +Parsing and +Operator-Precedence +Parsing to +parse the Kaleidoscope language (the latter for binary expressions and +the former for everything else). Before we get to parsing though, let’s +talk about the output of the parser: the Abstract Syntax Tree.

+
+
+

2.2. The Abstract Syntax Tree (AST)

+

The AST for a program captures its behavior in such a way that it is +easy for later stages of the compiler (e.g. code generation) to +interpret. We basically want one object for each construct in the +language, and the AST should closely model the language. In +Kaleidoscope, we have expressions, a prototype, and a function object. +We’ll start with expressions first:

+
/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+};
+
+
+

The code above shows the definition of the base ExprAST class and one +subclass which we use for numeric literals. The important thing to note +about this code is that the NumberExprAST class captures the numeric +value of the literal as an instance variable. This allows later phases +of the compiler to know what the stored numeric value is.

+

Right now we only create the AST, so there are no useful accessor +methods on them. It would be very easy to add a virtual method to pretty +print the code, for example. Here are the other expression AST node +definitions that we’ll use in the basic form of the Kaleidoscope +language:

+
/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+    : Op(op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+    : Callee(Callee), Args(std::move(Args)) {}
+};
+
+
+

This is all (intentionally) rather straight-forward: variables capture +the variable name, binary operators capture their opcode (e.g. ‘+’), and +calls capture a function name as well as a list of any argument +expressions. One thing that is nice about our AST is that it captures +the language features without talking about the syntax of the language. +Note that there is no discussion about precedence of binary operators, +lexical structure, etc.

+

For our basic language, these are all of the expression nodes we’ll +define. Because it doesn’t have conditional control flow, it isn’t +Turing-complete; we’ll fix that in a later installment. The two things +we need next are a way to talk about the interface to a function, and a +way to talk about functions themselves:

+
/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+
+public:
+  PrototypeAST(const std::string &name, std::vector<std::string> Args)
+    : Name(name), Args(std::move(Args)) {}
+
+  const std::string &getName() const { return Name; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+    : Proto(std::move(Proto)), Body(std::move(Body)) {}
+};
+
+
+

In Kaleidoscope, functions are typed with just a count of their +arguments. Since all values are double precision floating point, the +type of each argument doesn’t need to be stored anywhere. In a more +aggressive and realistic language, the “ExprAST” class would probably +have a type field.

+

With this scaffolding, we can now talk about parsing expressions and +function bodies in Kaleidoscope.

+
+
+

2.3. Parser Basics

+

Now that we have an AST to build, we need to define the parser code to +build it. The idea here is that we want to parse something like “x+y” +(which is returned as three tokens by the lexer) into an AST that could +be generated with calls like this:

+
auto LHS = std::make_unique<VariableExprAST>("x");
+auto RHS = std::make_unique<VariableExprAST>("y");
+auto Result = std::make_unique<BinaryExprAST>('+', std::move(LHS),
+                                              std::move(RHS));
+
+
+

In order to do this, we’ll start by defining some basic helper routines:

+
/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+
+

This implements a simple token buffer around the lexer. This allows us +to look one token ahead at what the lexer is returning. Every function +in our parser will assume that CurTok is the current token that needs to +be parsed.

+
/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "LogError: %s\n", Str);
+  return nullptr;
+}
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+
+

The LogError routines are simple helper routines that our parser will +use to handle errors. The error recovery in our parser will not be the +best and is not particular user-friendly, but it will be enough for our +tutorial. These routines make it easier to handle errors in routines +that have various return types: they always return null.

+

With these basic helper functions, we can implement the first piece of +our grammar: numeric literals.

+
+
+

2.4. Basic Expression Parsing

+

We start with numeric literals, because they are the simplest to +process. For each production in our grammar, we’ll define a function +which parses that production. For numeric literals, we have:

+
/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+
+

This routine is very simple: it expects to be called when the current +token is a tok_number token. It takes the current number value, +creates a NumberExprAST node, advances the lexer to the next token, +and finally returns.

+

There are some interesting aspects to this. The most important one is +that this routine eats all of the tokens that correspond to the +production and returns the lexer buffer with the next token (which is +not part of the grammar production) ready to go. This is a fairly +standard way to go for recursive descent parsers. For a better example, +the parenthesis operator is defined like this:

+
/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+
+

This function illustrates a number of interesting things about the +parser:

+

1) It shows how we use the LogError routines. When called, this function +expects that the current token is a ‘(‘ token, but after parsing the +subexpression, it is possible that there is no ‘)’ waiting. For example, +if the user types in “(4 x” instead of “(4)”, the parser should emit an +error. Because errors can occur, the parser needs a way to indicate that +they happened: in our parser, we return null on an error.

+

2) Another interesting aspect of this function is that it uses recursion +by calling ParseExpression (we will soon see that +ParseExpression can call ParseParenExpr). This is powerful +because it allows us to handle recursive grammars, and keeps each +production very simple. Note that parentheses do not cause construction +of AST nodes themselves. While we could do it this way, the most +important role of parentheses are to guide the parser and provide +grouping. Once the parser constructs the AST, parentheses are not +needed.

+

The next simple production is for handling variable references and +function calls:

+
/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken();  // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken();  // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (1) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+
+

This routine follows the same style as the other routines. (It expects +to be called if the current token is a tok_identifier token). It +also has recursion and error handling. One interesting aspect of this is +that it uses look-ahead to determine if the current identifier is a +stand alone variable reference or if it is a function call expression. +It handles this by checking to see if the token after the identifier is +a ‘(‘ token, constructing either a VariableExprAST or +CallExprAST node as appropriate.

+

Now that we have all of our simple expression-parsing logic in place, we +can define a helper function to wrap it together into one entry point. +We call this class of expressions “primary” expressions, for reasons +that will become more clear later in the +tutorial. In order to parse an arbitrary +primary expression, we need to determine what sort of expression it is:

+
/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  }
+}
+
+
+

Now that you see the definition of this function, it is more obvious why +we can assume the state of CurTok in the various functions. This uses +look-ahead to determine which sort of expression is being inspected, and +then parses it with a function call.

+

Now that basic expressions are handled, we need to handle binary +expressions. They are a bit more complex.

+
+
+

2.5. Binary Expression Parsing

+

Binary expressions are significantly harder to parse because they are +often ambiguous. For example, when given the string “x+y*z”, the parser +can choose to parse it as either “(x+y)*z” or “x+(y*z)”. With common +definitions from mathematics, we expect the later parse, because “*” +(multiplication) has higher precedence than “+” (addition).

+

There are many ways to handle this, but an elegant and efficient way is +to use Operator-Precedence +Parsing. +This parsing technique uses the precedence of binary operators to guide +recursion. To start with, we need a table of precedences:

+
/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+  ...
+}
+
+
+

For the basic form of Kaleidoscope, we will only support 4 binary +operators (this can obviously be extended by you, our brave and intrepid +reader). The GetTokPrecedence function returns the precedence for +the current token, or -1 if the token is not a binary operator. Having a +map makes it easy to add new operators and makes it clear that the +algorithm doesn’t depend on the specific operators involved, but it +would be easy enough to eliminate the map and do the comparisons in the +GetTokPrecedence function. (Or just use a fixed-size array).

+

With the helper above defined, we can now start parsing binary +expressions. The basic idea of operator precedence parsing is to break +down an expression with potentially ambiguous binary operators into +pieces. Consider, for example, the expression “a+b+(c+d)*e*f+g”. +Operator precedence parsing considers this as a stream of primary +expressions separated by binary operators. As such, it will first parse +the leading primary expression “a”, then it will see the pairs [+, b] +[+, (c+d)] [*, e] [*, f] and [+, g]. Note that because parentheses are +primary expressions, the binary expression parser doesn’t need to worry +about nested subexpressions like (c+d) at all.

+

To start, an expression is a primary expression potentially followed by +a sequence of [binop,primaryexpr] pairs:

+
/// expression
+///   ::= primary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+
+

ParseBinOpRHS is the function that parses the sequence of pairs for +us. It takes a precedence and a pointer to an expression for the part +that has been parsed so far. Note that “x” is a perfectly valid +expression: As such, “binoprhs” is allowed to be empty, in which case it +returns the expression that is passed into it. In our example above, the +code passes the expression for “a” into ParseBinOpRHS and the +current token is “+”.

+

The precedence value passed into ParseBinOpRHS indicates the +minimal operator precedence that the function is allowed to eat. For +example, if the current pair stream is [+, x] and ParseBinOpRHS is +passed in a precedence of 40, it will not consume any tokens (because +the precedence of ‘+’ is only 20). With this in mind, ParseBinOpRHS +starts with:

+
/// binoprhs
+///   ::= ('+' primary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+
+

This code gets the precedence of the current token and checks to see if +if is too low. Because we defined invalid tokens to have a precedence of +-1, this check implicitly knows that the pair-stream ends when the token +stream runs out of binary operators. If this check succeeds, we know +that the token is a binary operator and that it will be included in this +expression:

+
// Okay, we know this is a binop.
+int BinOp = CurTok;
+getNextToken();  // eat binop
+
+// Parse the primary expression after the binary operator.
+auto RHS = ParsePrimary();
+if (!RHS)
+  return nullptr;
+
+
+

As such, this code eats (and remembers) the binary operator and then +parses the primary expression that follows. This builds up the whole +pair, the first of which is [+, b] for the running example.

+

Now that we parsed the left-hand side of an expression and one pair of +the RHS sequence, we have to decide which way the expression associates. +In particular, we could have “(a+b) binop unparsed” or “a + (b binop +unparsed)”. To determine this, we look ahead at “binop” to determine its +precedence and compare it to BinOp’s precedence (which is ‘+’ in this +case):

+
// If BinOp binds less tightly with RHS than the operator after RHS, let
+// the pending operator take RHS as its LHS.
+int NextPrec = GetTokPrecedence();
+if (TokPrec < NextPrec) {
+
+
+

If the precedence of the binop to the right of “RHS” is lower or equal +to the precedence of our current operator, then we know that the +parentheses associate as “(a+b) binop …”. In our example, the current +operator is “+” and the next operator is “+”, we know that they have the +same precedence. In this case we’ll create the AST node for “a+b”, and +then continue parsing:

+
      ... if body omitted ...
+    }
+
+    // Merge LHS/RHS.
+    LHS = std::make_unique<BinaryExprAST>(BinOp, std::move(LHS),
+                                           std::move(RHS));
+  }  // loop around to the top of the while loop.
+}
+
+
+

In our example above, this will turn “a+b+” into “(a+b)” and execute the +next iteration of the loop, with “+” as the current token. The code +above will eat, remember, and parse “(c+d)” as the primary expression, +which makes the current pair equal to [+, (c+d)]. It will then evaluate +the ‘if’ conditional above with “*” as the binop to the right of the +primary. In this case, the precedence of “*” is higher than the +precedence of “+” so the if condition will be entered.

+

The critical question left here is “how can the if condition parse the +right hand side in full”? In particular, to build the AST correctly for +our example, it needs to get all of “(c+d)*e*f” as the RHS expression +variable. The code to do this is surprisingly simple (code from the +above two blocks duplicated for context):

+
    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+    // Merge LHS/RHS.
+    LHS = std::make_unique<BinaryExprAST>(BinOp, std::move(LHS),
+                                           std::move(RHS));
+  }  // loop around to the top of the while loop.
+}
+
+
+

At this point, we know that the binary operator to the RHS of our +primary has higher precedence than the binop we are currently parsing. +As such, we know that any sequence of pairs whose operators are all +higher precedence than “+” should be parsed together and returned as +“RHS”. To do this, we recursively invoke the ParseBinOpRHS function +specifying “TokPrec+1” as the minimum precedence required for it to +continue. In our example above, this will cause it to return the AST +node for “(c+d)*e*f” as RHS, which is then set as the RHS of the ‘+’ +expression.

+

Finally, on the next iteration of the while loop, the “+g” piece is +parsed and added to the AST. With this little bit of code (14 +non-trivial lines), we correctly handle fully general binary expression +parsing in a very elegant way. This was a whirlwind tour of this code, +and it is somewhat subtle. I recommend running through it with a few +tough examples to see how it works.

+

This wraps up handling of expressions. At this point, we can point the +parser at an arbitrary token stream and build an expression from it, +stopping at the first token that is not part of the expression. Next up +we need to handle function definitions, etc.

+
+
+

2.6. Parsing the Rest

+

The next thing missing is handling of function prototypes. In +Kaleidoscope, these are used both for ‘extern’ function declarations as +well as function body definitions. The code to do this is +straight-forward and not very interesting (once you’ve survived +expressions):

+
/// prototype
+///   ::= id '(' id* ')'
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return LogErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  // Read the list of argument names.
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken();  // eat ')'.
+
+  return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
+}
+
+
+

Given this, a function definition is very simple, just a prototype plus +an expression to implement the body:

+
/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken();  // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto) return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+
+

In addition, we support ‘extern’ to declare functions like ‘sin’ and +‘cos’ as well as to support forward declaration of user functions. These +‘extern’s are just prototypes with no body:

+
/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+
+

Finally, we’ll also let the user type in arbitrary top-level expressions +and evaluate them on the fly. We will handle this by defining anonymous +nullary (zero argument) functions for them:

+
/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("", std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+
+

Now that we have all the pieces, let’s build a little driver that will +let us actually execute this code we’ve built!

+
+
+

2.7. The Driver

+

The driver for this simply invokes all of the parsing pieces with a +top-level dispatch loop. There isn’t much interesting here, so I’ll just +include the top-level loop. See below for full code in the +“Top-Level Parsing” section.

+
/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+
+

The most interesting part of this is that we ignore top-level +semicolons. Why is this, you ask? The basic reason is that if you type +“4 + 5” at the command line, the parser doesn’t know whether that is the +end of what you will type or not. For example, on the next line you +could type “def foo…” in which case 4+5 is the end of a top-level +expression. Alternatively you could type “* 6”, which would continue +the expression. Having top-level semicolons allows you to type “4+5;”, +and the parser will know you are done.

+
+
+

2.8. Conclusions

+

With just under 400 lines of commented code (240 lines of non-comment, +non-blank code), we fully defined our minimal language, including a +lexer, parser, and AST builder. With this done, the executable will +validate Kaleidoscope code and tell us if it is grammatically invalid. +For example, here is a sample interaction:

+
$ ./a.out
+ready> def foo(x y) x+foo(y, 4.0);
+Parsed a function definition.
+ready> def foo(x y) x+y y;
+Parsed a function definition.
+Parsed a top-level expr
+ready> def foo(x y) x+y );
+Parsed a function definition.
+Error: unknown token when expecting an expression
+ready> extern sin(a);
+ready> Parsed an extern
+ready> ^D
+$
+
+
+

There is a lot of room for extension here. You can define new AST nodes, +extend the language in many ways, etc. In the next +installment, we will describe how to generate LLVM +Intermediate Representation (IR) from the AST.

+
+
+

2.9. Full Code Listing

+

Here is the complete code listing for our running example. Because this +uses the LLVM libraries, we need to link them in. To do this, we use the +llvm-config tool to inform +our makefile/command line about which options to use:

+
# Compile
+clang++ -g -O3 toy.cpp `llvm-config --cxxflags`
+# Run
+./a.out
+
+
+

Here is the code:

+
#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() = default;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
+
+  const std::string &getName() const { return Name; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (true) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (true) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken(); // eat binop
+
+    // Parse the primary expression after the binary operator.
+    auto RHS = ParsePrimary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS =
+        std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return LogErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("__anon_expr",
+                                                std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing
+//===----------------------------------------------------------------------===//
+
+static void HandleDefinition() {
+  if (ParseDefinition()) {
+    fprintf(stderr, "Parsed a function definition.\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (ParseExtern()) {
+    fprintf(stderr, "Parsed an extern\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (ParseTopLevelExpr()) {
+    fprintf(stderr, "Parsed a top-level expr\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (true) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
+
+
+

Next: Implementing Code Generation to LLVM IR

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl03.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl03.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl03.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl03.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,1271 @@ + + + + + + + + + 3. Kaleidoscope: Code generation to LLVM IR — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

3. Kaleidoscope: Code generation to LLVM IR

+ +
+

3.1. Chapter 3 Introduction

+

Welcome to Chapter 3 of the “Implementing a language with +LLVM” tutorial. This chapter shows you how to transform +the Abstract Syntax Tree, built in Chapter 2, into +LLVM IR. This will teach you a little bit about how LLVM does things, as +well as demonstrate how easy it is to use. It’s much more work to build +a lexer and parser than it is to generate LLVM IR code. :)

+

Please note: the code in this chapter and later require LLVM 3.7 or +later. LLVM 3.6 and before will not work with it. Also note that you +need to use a version of this tutorial that matches your LLVM release: +If you are using an official LLVM release, use the version of the +documentation included with your release or on the llvm.org releases +page.

+
+
+

3.2. Code Generation Setup

+

In order to generate LLVM IR, we want some simple setup to get started. +First we define virtual code generation (codegen) methods in each AST +class:

+
/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+  virtual Value *codegen();
+};
+...
+
+
+

The codegen() method says to emit IR for that AST node along with all +the things it depends on, and they all return an LLVM Value object. +“Value” is the class used to represent a “Static Single Assignment +(SSA) +register” or “SSA value” in LLVM. The most distinct aspect of SSA values +is that their value is computed as the related instruction executes, and +it does not get a new value until (and if) the instruction re-executes. +In other words, there is no way to “change” an SSA value. For more +information, please read up on Static Single +Assignment +- the concepts are really quite natural once you grok them.

+

Note that instead of adding virtual methods to the ExprAST class +hierarchy, it could also make sense to use a visitor +pattern or some other +way to model this. Again, this tutorial won’t dwell on good software +engineering practices: for our purposes, adding a virtual method is +simplest.

+

The second thing we want is a “LogError” method like we used for the +parser, which will be used to report errors found during code generation +(for example, use of an undeclared parameter):

+
static LLVMContext TheContext;
+static IRBuilder<> Builder(TheContext);
+static std::unique_ptr<Module> TheModule;
+static std::map<std::string, Value *> NamedValues;
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+
+

The static variables will be used during code generation. TheContext +is an opaque object that owns a lot of core LLVM data structures, such as +the type and constant value tables. We don’t need to understand it in +detail, we just need a single instance to pass into APIs that require it.

+

The Builder object is a helper object that makes it easy to generate +LLVM instructions. Instances of the +IRBuilder +class template keep track of the current place to insert instructions +and has methods to create new instructions.

+

TheModule is an LLVM construct that contains functions and global +variables. In many ways, it is the top-level structure that the LLVM IR +uses to contain code. It will own the memory for all of the IR that we +generate, which is why the codegen() method returns a raw Value*, +rather than a unique_ptr<Value>.

+

The NamedValues map keeps track of which values are defined in the +current scope and what their LLVM representation is. (In other words, it +is a symbol table for the code). In this form of Kaleidoscope, the only +things that can be referenced are function parameters. As such, function +parameters will be in this map when generating code for their function +body.

+

With these basics in place, we can start talking about how to generate +code for each expression. Note that this assumes that the Builder +has been set up to generate code into something. For now, we’ll assume +that this has already been done, and we’ll just use it to emit code.

+
+
+

3.3. Expression Code Generation

+

Generating LLVM code for expression nodes is very straightforward: less +than 45 lines of commented code for all four of our expression nodes. +First we’ll do numeric literals:

+
Value *NumberExprAST::codegen() {
+  return ConstantFP::get(TheContext, APFloat(Val));
+}
+
+
+

In the LLVM IR, numeric constants are represented with the +ConstantFP class, which holds the numeric value in an APFloat +internally (APFloat has the capability of holding floating point +constants of Arbitrary Precision). This code basically just creates +and returns a ConstantFP. Note that in the LLVM IR that constants +are all uniqued together and shared. For this reason, the API uses the +“foo::get(…)” idiom instead of “new foo(..)” or “foo::Create(..)”.

+
Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    LogErrorV("Unknown variable name");
+  return V;
+}
+
+
+

References to variables are also quite simple using LLVM. In the simple +version of Kaleidoscope, we assume that the variable has already been +emitted somewhere and its value is available. In practice, the only +values that can be in the NamedValues map are function arguments. +This code simply checks to see that the specified name is in the map (if +not, an unknown variable is being referenced) and returns the value for +it. In future chapters, we’ll add support for loop induction +variables in the symbol table, and for local +variables.

+
Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder.CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder.CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder.CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
+                                "booltmp");
+  default:
+    return LogErrorV("invalid binary operator");
+  }
+}
+
+
+

Binary operators start to get more interesting. The basic idea here is +that we recursively emit code for the left-hand side of the expression, +then the right-hand side, then we compute the result of the binary +expression. In this code, we do a simple switch on the opcode to create +the right LLVM instruction.

+

In the example above, the LLVM builder class is starting to show its +value. IRBuilder knows where to insert the newly created instruction, +all you have to do is specify what instruction to create (e.g. with +CreateFAdd), which operands to use (L and R here) and +optionally provide a name for the generated instruction.

+

One nice thing about LLVM is that the name is just a hint. For instance, +if the code above emits multiple “addtmp” variables, LLVM will +automatically provide each one with an increasing, unique numeric +suffix. Local value names for instructions are purely optional, but it +makes it much easier to read the IR dumps.

+

LLVM instructions are constrained by strict +rules: for example, the Left and Right operators of an add +instruction must have the same type, and the +result type of the add must match the operand types. Because all values +in Kaleidoscope are doubles, this makes for very simple code for add, +sub and mul.

+

On the other hand, LLVM specifies that the fcmp +instruction always returns an ‘i1’ value (a +one bit integer). The problem with this is that Kaleidoscope wants the +value to be a 0.0 or 1.0 value. In order to get these semantics, we +combine the fcmp instruction with a uitofp +instruction. This instruction converts its +input integer into a floating point value by treating the input as an +unsigned value. In contrast, if we used the sitofp +instruction, the Kaleidoscope ‘<’ operator +would return 0.0 and -1.0, depending on the input value.

+
Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+
+

Code generation for function calls is quite straightforward with LLVM. The code +above initially does a function name lookup in the LLVM Module’s symbol table. +Recall that the LLVM Module is the container that holds the functions we are +JIT’ing. By giving each function the same name as what the user specifies, we +can use the LLVM symbol table to resolve function names for us.

+

Once we have the function to call, we recursively codegen each argument +that is to be passed in, and create an LLVM call +instruction. Note that LLVM uses the native C +calling conventions by default, allowing these calls to also call into +standard library functions like “sin” and “cos”, with no additional +effort.

+

This wraps up our handling of the four basic expressions that we have so +far in Kaleidoscope. Feel free to go in and add some more. For example, +by browsing the LLVM language reference you’ll find +several other interesting instructions that are really easy to plug into +our basic framework.

+
+
+

3.4. Function Code Generation

+

Code generation for prototypes and functions must handle a number of +details, which make their code less beautiful than expression code +generation, but allows us to illustrate some important points. First, +let’s talk about code generation for prototypes: they are used both for +function bodies and external function declarations. The code starts +with:

+
Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type*> Doubles(Args.size(),
+                             Type::getDoubleTy(TheContext));
+  FunctionType *FT =
+    FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false);
+
+  Function *F =
+    Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+
+

This code packs a lot of power into a few lines. Note first that this +function returns a “Function*” instead of a “Value*”. Because a +“prototype” really talks about the external interface for a function +(not the value computed by an expression), it makes sense for it to +return the LLVM Function it corresponds to when codegen’d.

+

The call to FunctionType::get creates the FunctionType that +should be used for a given Prototype. Since all function arguments in +Kaleidoscope are of type double, the first line creates a vector of “N” +LLVM double types. It then uses the Functiontype::get method to +create a function type that takes “N” doubles as arguments, returns one +double as a result, and that is not vararg (the false parameter +indicates this). Note that Types in LLVM are uniqued just like Constants +are, so you don’t “new” a type, you “get” it.

+

The final line above actually creates the IR Function corresponding to +the Prototype. This indicates the type, linkage and name to use, as +well as which module to insert into. “external +linkage” means that the function may be +defined outside the current module and/or that it is callable by +functions outside the module. The Name passed in is the name the user +specified: since “TheModule” is specified, this name is registered +in “TheModule”s symbol table.

+
// Set names for all arguments.
+unsigned Idx = 0;
+for (auto &Arg : F->args())
+  Arg.setName(Args[Idx++]);
+
+return F;
+
+
+

Finally, we set the name of each of the function’s arguments according to the +names given in the Prototype. This step isn’t strictly necessary, but keeping +the names consistent makes the IR more readable, and allows subsequent code to +refer directly to the arguments for their names, rather than having to look up +them up in the Prototype AST.

+

At this point we have a function prototype with no body. This is how LLVM IR +represents function declarations. For extern statements in Kaleidoscope, this +is as far as we need to go. For function definitions however, we need to +codegen and attach a function body.

+
Function *FunctionAST::codegen() {
+    // First, check for an existing function from a previous 'extern' declaration.
+  Function *TheFunction = TheModule->getFunction(Proto->getName());
+
+  if (!TheFunction)
+    TheFunction = Proto->codegen();
+
+  if (!TheFunction)
+    return nullptr;
+
+  if (!TheFunction->empty())
+    return (Function*)LogErrorV("Function cannot be redefined.");
+
+
+

For function definitions, we start by searching TheModule’s symbol table for an +existing version of this function, in case one has already been created using an +‘extern’ statement. If Module::getFunction returns null then no previous version +exists, so we’ll codegen one from the Prototype. In either case, we want to +assert that the function is empty (i.e. has no body yet) before we start.

+
// Create a new basic block to start insertion into.
+BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
+Builder.SetInsertPoint(BB);
+
+// Record the function arguments in the NamedValues map.
+NamedValues.clear();
+for (auto &Arg : TheFunction->args())
+  NamedValues[Arg.getName()] = &Arg;
+
+
+

Now we get to the point where the Builder is set up. The first line +creates a new basic block +(named “entry”), which is inserted into TheFunction. The second line +then tells the builder that new instructions should be inserted into the +end of the new basic block. Basic blocks in LLVM are an important part +of functions that define the Control Flow +Graph. Since we +don’t have any control flow, our functions will only contain one block +at this point. We’ll fix this in Chapter 5 :).

+

Next we add the function arguments to the NamedValues map (after first clearing +it out) so that they’re accessible to VariableExprAST nodes.

+
if (Value *RetVal = Body->codegen()) {
+  // Finish off the function.
+  Builder.CreateRet(RetVal);
+
+  // Validate the generated code, checking for consistency.
+  verifyFunction(*TheFunction);
+
+  return TheFunction;
+}
+
+
+

Once the insertion point has been set up and the NamedValues map populated, +we call the codegen() method for the root expression of the function. If no +error happens, this emits code to compute the expression into the entry block +and returns the value that was computed. Assuming no error, we then create an +LLVM ret instruction, which completes the function. +Once the function is built, we call verifyFunction, which is +provided by LLVM. This function does a variety of consistency checks on +the generated code, to determine if our compiler is doing everything +right. Using this is important: it can catch a lot of bugs. Once the +function is finished and validated, we return it.

+
  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return nullptr;
+}
+
+
+

The only piece left here is handling of the error case. For simplicity, +we handle this by merely deleting the function we produced with the +eraseFromParent method. This allows the user to redefine a function +that they incorrectly typed in before: if we didn’t delete it, it would +live in the symbol table, with a body, preventing future redefinition.

+

This code does have a bug, though: If the FunctionAST::codegen() method +finds an existing IR Function, it does not validate its signature against the +definition’s own prototype. This means that an earlier ‘extern’ declaration will +take precedence over the function definition’s signature, which can cause +codegen to fail, for instance if the function arguments are named differently. +There are a number of ways to fix this bug, see what you can come up with! Here +is a testcase:

+
extern foo(a);     # ok, defines foo.
+def foo(b) b;      # Error: Unknown variable name. (decl using 'a' takes precedence).
+
+
+
+
+

3.5. Driver Changes and Closing Thoughts

+

For now, code generation to LLVM doesn’t really get us much, except that +we can look at the pretty IR calls. The sample code inserts calls to +codegen into the “HandleDefinition”, “HandleExtern” etc +functions, and then dumps out the LLVM IR. This gives a nice way to look +at the LLVM IR for simple functions. For example:

+
ready> 4+5;
+Read top-level expression:
+define double @0() {
+entry:
+  ret double 9.000000e+00
+}
+
+
+

Note how the parser turns the top-level expression into anonymous +functions for us. This will be handy when we add JIT +support in the next chapter. Also note that the +code is very literally transcribed, no optimizations are being performed +except simple constant folding done by IRBuilder. We will add +optimizations explicitly in the next +chapter.

+
ready> def foo(a b) a*a + 2*a*b + b*b;
+Read function definition:
+define double @foo(double %a, double %b) {
+entry:
+  %multmp = fmul double %a, %a
+  %multmp1 = fmul double 2.000000e+00, %a
+  %multmp2 = fmul double %multmp1, %b
+  %addtmp = fadd double %multmp, %multmp2
+  %multmp3 = fmul double %b, %b
+  %addtmp4 = fadd double %addtmp, %multmp3
+  ret double %addtmp4
+}
+
+
+

This shows some simple arithmetic. Notice the striking similarity to the +LLVM builder calls that we use to create the instructions.

+
ready> def bar(a) foo(a, 4.0) + bar(31337);
+Read function definition:
+define double @bar(double %a) {
+entry:
+  %calltmp = call double @foo(double %a, double 4.000000e+00)
+  %calltmp1 = call double @bar(double 3.133700e+04)
+  %addtmp = fadd double %calltmp, %calltmp1
+  ret double %addtmp
+}
+
+
+

This shows some function calls. Note that this function will take a long +time to execute if you call it. In the future we’ll add conditional +control flow to actually make recursion useful :).

+
ready> extern cos(x);
+Read extern:
+declare double @cos(double)
+
+ready> cos(1.234);
+Read top-level expression:
+define double @1() {
+entry:
+  %calltmp = call double @cos(double 1.234000e+00)
+  ret double %calltmp
+}
+
+
+

This shows an extern for the libm “cos” function, and a call to it.

+
ready> ^D
+; ModuleID = 'my cool jit'
+
+define double @0() {
+entry:
+  %addtmp = fadd double 4.000000e+00, 5.000000e+00
+  ret double %addtmp
+}
+
+define double @foo(double %a, double %b) {
+entry:
+  %multmp = fmul double %a, %a
+  %multmp1 = fmul double 2.000000e+00, %a
+  %multmp2 = fmul double %multmp1, %b
+  %addtmp = fadd double %multmp, %multmp2
+  %multmp3 = fmul double %b, %b
+  %addtmp4 = fadd double %addtmp, %multmp3
+  ret double %addtmp4
+}
+
+define double @bar(double %a) {
+entry:
+  %calltmp = call double @foo(double %a, double 4.000000e+00)
+  %calltmp1 = call double @bar(double 3.133700e+04)
+  %addtmp = fadd double %calltmp, %calltmp1
+  ret double %addtmp
+}
+
+declare double @cos(double)
+
+define double @1() {
+entry:
+  %calltmp = call double @cos(double 1.234000e+00)
+  ret double %calltmp
+}
+
+
+

When you quit the current demo (by sending an EOF via CTRL+D on Linux +or CTRL+Z and ENTER on Windows), it dumps out the IR for the entire +module generated. Here you can see the big picture with all the +functions referencing each other.

+

This wraps up the third chapter of the Kaleidoscope tutorial. Up next, +we’ll describe how to add JIT codegen and optimizer +support to this so we can actually start running +code!

+
+
+

3.6. Full Code Listing

+

Here is the complete code listing for our running example, enhanced with +the LLVM code generator. Because this uses the LLVM libraries, we need +to link them in. To do this, we use the +llvm-config tool to inform +our makefile/command line about which options to use:

+
# Compile
+clang++ -g -O3 toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core` -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include <algorithm>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() = default;
+
+  virtual Value *codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+
+  Value *codegen() override;
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+
+  Value *codegen() override;
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+
+  Value *codegen() override;
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+
+  Value *codegen() override;
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
+
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+
+  Function *codegen();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (true) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (true) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken(); // eat binop
+
+    // Parse the primary expression after the binary operator.
+    auto RHS = ParsePrimary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS =
+        std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return LogErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static std::map<std::string, Value *> NamedValues;
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+Value *NumberExprAST::codegen() {
+  return ConstantFP::get(*TheContext, APFloat(Val));
+}
+
+Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    return LogErrorV("Unknown variable name");
+  return V;
+}
+
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder->CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder->CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder->CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder->CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
+  default:
+    return LogErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // First, check for an existing function from a previous 'extern' declaration.
+  Function *TheFunction = TheModule->getFunction(Proto->getName());
+
+  if (!TheFunction)
+    TheFunction = Proto->codegen();
+
+  if (!TheFunction)
+    return nullptr;
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
+  Builder->SetInsertPoint(BB);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[std::string(Arg.getName())] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
+    // Finish off the function.
+    Builder->CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    return TheFunction;
+  }
+
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void InitializeModule() {
+  // Open a new context and module.
+  TheContext = std::make_unique<LLVMContext>();
+  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
+
+  // Create a new builder for the module.
+  Builder = std::make_unique<IRBuilder<>>(*TheContext);
+}
+
+static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read function definition:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
+      fprintf(stderr, "Read extern: ");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read top-level expression:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+
+      // Remove the anonymous expression.
+      FnIR->eraseFromParent();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (true) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  InitializeModule();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  // Print out all of the generated code.
+  TheModule->print(errs(), nullptr);
+
+  return 0;
+}
+
+
+

Next: Adding JIT and Optimizer Support

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl04.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl04.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl04.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl04.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,1441 @@ + + + + + + + + + 4. Kaleidoscope: Adding JIT and Optimizer Support — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

4. Kaleidoscope: Adding JIT and Optimizer Support

+ +
+

4.1. Chapter 4 Introduction

+

Welcome to Chapter 4 of the “Implementing a language with +LLVM” tutorial. Chapters 1-3 described the implementation +of a simple language and added support for generating LLVM IR. This +chapter describes two new techniques: adding optimizer support to your +language, and adding JIT compiler support. These additions will +demonstrate how to get nice, efficient code for the Kaleidoscope +language.

+
+
+

4.2. Trivial Constant Folding

+

Our demonstration for Chapter 3 is elegant and easy to extend. +Unfortunately, it does not produce wonderful code. The IRBuilder, +however, does give us obvious optimizations when compiling simple code:

+
ready> def test(x) 1+2+x;
+Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 3.000000e+00, %x
+        ret double %addtmp
+}
+
+
+

This code is not a literal transcription of the AST built by parsing the +input. That would be:

+
ready> def test(x) 1+2+x;
+Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 2.000000e+00, 1.000000e+00
+        %addtmp1 = fadd double %addtmp, %x
+        ret double %addtmp1
+}
+
+
+

Constant folding, as seen above, in particular, is a very common and +very important optimization: so much so that many language implementors +implement constant folding support in their AST representation.

+

With LLVM, you don’t need this support in the AST. Since all calls to +build LLVM IR go through the LLVM IR builder, the builder itself checked +to see if there was a constant folding opportunity when you call it. If +so, it just does the constant fold and return the constant instead of +creating an instruction.

+

Well, that was easy :). In practice, we recommend always using +IRBuilder when generating code like this. It has no “syntactic +overhead” for its use (you don’t have to uglify your compiler with +constant checks everywhere) and it can dramatically reduce the amount of +LLVM IR that is generated in some cases (particular for languages with a +macro preprocessor or that use a lot of constants).

+

On the other hand, the IRBuilder is limited by the fact that it does +all of its analysis inline with the code as it is built. If you take a +slightly more complex example:

+
ready> def test(x) (1+2+x)*(x+(1+2));
+ready> Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 3.000000e+00, %x
+        %addtmp1 = fadd double %x, 3.000000e+00
+        %multmp = fmul double %addtmp, %addtmp1
+        ret double %multmp
+}
+
+
+

In this case, the LHS and RHS of the multiplication are the same value. +We’d really like to see this generate “tmp = x+3; result = tmp*tmp;” +instead of computing “x+3” twice.

+

Unfortunately, no amount of local analysis will be able to detect and +correct this. This requires two transformations: reassociation of +expressions (to make the add’s lexically identical) and Common +Subexpression Elimination (CSE) to delete the redundant add instruction. +Fortunately, LLVM provides a broad range of optimizations that you can +use, in the form of “passes”.

+
+
+

4.3. LLVM Optimization Passes

+
+

Warning

+

Due to the transition to the new PassManager infrastructure this tutorial +is based on llvm::legacy::FunctionPassManager which can be found in +LegacyPassManager.h. +For the purpose of the this tutorial the above should be used until +the pass manager transition is complete.

+
+

LLVM provides many optimization passes, which do many different sorts of +things and have different tradeoffs. Unlike other systems, LLVM doesn’t +hold to the mistaken notion that one set of optimizations is right for +all languages and for all situations. LLVM allows a compiler implementor +to make complete decisions about what optimizations to use, in which +order, and in what situation.

+

As a concrete example, LLVM supports both “whole module” passes, which +look across as large of body of code as they can (often a whole file, +but if run at link time, this can be a substantial portion of the whole +program). It also supports and includes “per-function” passes which just +operate on a single function at a time, without looking at other +functions. For more information on passes and how they are run, see the +How to Write a Pass document and the +List of LLVM Passes.

+

For Kaleidoscope, we are currently generating functions on the fly, one +at a time, as the user types them in. We aren’t shooting for the +ultimate optimization experience in this setting, but we also want to +catch the easy and quick stuff where possible. As such, we will choose +to run a few per-function optimizations as the user types the function +in. If we wanted to make a “static Kaleidoscope compiler”, we would use +exactly the code we have now, except that we would defer running the +optimizer until the entire file has been parsed.

+

In order to get per-function optimizations going, we need to set up a +FunctionPassManager to hold +and organize the LLVM optimizations that we want to run. Once we have +that, we can add a set of optimizations to run. We’ll need a new +FunctionPassManager for each module that we want to optimize, so we’ll +write a function to create and initialize both the module and pass manager +for us:

+
void InitializeModuleAndPassManager(void) {
+  // Open a new module.
+  TheModule = std::make_unique<Module>("my cool jit", TheContext);
+
+  // Create a new pass manager attached to it.
+  TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
+
+
+

This code initializes the global module TheModule, and the function pass +manager TheFPM, which is attached to TheModule. Once the pass manager is +set up, we use a series of “add” calls to add a bunch of LLVM passes.

+

In this case, we choose to add four optimization passes. +The passes we choose here are a pretty standard set +of “cleanup” optimizations that are useful for a wide variety of code. I won’t +delve into what they do but, believe me, they are a good starting place :).

+

Once the PassManager is set up, we need to make use of it. We do this by +running it after our newly created function is constructed (in +FunctionAST::codegen()), but before it is returned to the client:

+
if (Value *RetVal = Body->codegen()) {
+  // Finish off the function.
+  Builder.CreateRet(RetVal);
+
+  // Validate the generated code, checking for consistency.
+  verifyFunction(*TheFunction);
+
+  // Optimize the function.
+  TheFPM->run(*TheFunction);
+
+  return TheFunction;
+}
+
+
+

As you can see, this is pretty straightforward. The +FunctionPassManager optimizes and updates the LLVM Function* in +place, improving (hopefully) its body. With this in place, we can try +our test above again:

+
ready> def test(x) (1+2+x)*(x+(1+2));
+ready> Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double %x, 3.000000e+00
+        %multmp = fmul double %addtmp, %addtmp
+        ret double %multmp
+}
+
+
+

As expected, we now get our nicely optimized code, saving a floating +point add instruction from every execution of this function.

+

LLVM provides a wide variety of optimizations that can be used in +certain circumstances. Some documentation about the various +passes is available, but it isn’t very complete. +Another good source of ideas can come from looking at the passes that +Clang runs to get started. The “opt” tool allows you to +experiment with passes from the command line, so you can see if they do +anything.

+

Now that we have reasonable code coming out of our front-end, let’s talk +about executing it!

+
+
+

4.4. Adding a JIT Compiler

+

Code that is available in LLVM IR can have a wide variety of tools +applied to it. For example, you can run optimizations on it (as we did +above), you can dump it out in textual or binary forms, you can compile +the code to an assembly file (.s) for some target, or you can JIT +compile it. The nice thing about the LLVM IR representation is that it +is the “common currency” between many different parts of the compiler.

+

In this section, we’ll add JIT compiler support to our interpreter. The +basic idea that we want for Kaleidoscope is to have the user enter +function bodies as they do now, but immediately evaluate the top-level +expressions they type in. For example, if they type in “1 + 2;”, we +should evaluate and print out 3. If they define a function, they should +be able to call it from the command line.

+

In order to do this, we first prepare the environment to create code for +the current native target and declare and initialize the JIT. This is +done by calling some InitializeNativeTarget\* functions and +adding a global variable TheJIT, and initializing it in +main:

+
static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+...
+int main() {
+  InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
+  InitializeNativeTargetAsmParser();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  TheJIT = std::make_unique<KaleidoscopeJIT>();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
+
+
+

We also need to setup the data layout for the JIT:

+
void InitializeModuleAndPassManager(void) {
+  // Open a new module.
+  TheModule = std::make_unique<Module>("my cool jit", TheContext);
+  TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
+
+  // Create a new pass manager attached to it.
+  TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get());
+  ...
+
+
+

The KaleidoscopeJIT class is a simple JIT built specifically for these +tutorials, available inside the LLVM source code +at llvm-src/examples/Kaleidoscope/include/KaleidoscopeJIT.h. +In later chapters we will look at how it works and extend it with +new features, but for now we will take it as given. Its API is very simple: +addModule adds an LLVM IR module to the JIT, making its functions +available for execution; removeModule removes a module, freeing any +memory associated with the code in that module; and findSymbol allows us +to look up pointers to the compiled code.

+

We can take this simple API and change our code that parses top-level expressions to +look like this:

+
static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
+
+      // JIT the module containing the anonymous expression, keeping a handle so
+      // we can free it later.
+      auto H = TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
+      assert(ExprSymbol && "Function not found");
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
+      fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      TheJIT->removeModule(H);
+    }
+
+
+

If parsing and codegen succeed, the next step is to add the module containing +the top-level expression to the JIT. We do this by calling addModule, which +triggers code generation for all the functions in the module, and returns a +handle that can be used to remove the module from the JIT later. Once the module +has been added to the JIT it can no longer be modified, so we also open a new +module to hold subsequent code by calling InitializeModuleAndPassManager().

+

Once we’ve added the module to the JIT we need to get a pointer to the final +generated code. We do this by calling the JIT’s findSymbol method, and passing +the name of the top-level expression function: __anon_expr. Since we just +added this function, we assert that findSymbol returned a result.

+

Next, we get the in-memory address of the __anon_expr function by calling +getAddress() on the symbol. Recall that we compile top-level expressions +into a self-contained LLVM function that takes no arguments and returns the +computed double. Because the LLVM JIT compiler matches the native platform ABI, +this means that you can just cast the result pointer to a function pointer of +that type and call it directly. This means, there is no difference between JIT +compiled code and native machine code that is statically linked into your +application.

+

Finally, since we don’t support re-evaluation of top-level expressions, we +remove the module from the JIT when we’re done to free the associated memory. +Recall, however, that the module we created a few lines earlier (via +InitializeModuleAndPassManager) is still open and waiting for new code to be +added.

+

With just these two changes, let’s see how Kaleidoscope works now!

+
ready> 4+5;
+Read top-level expression:
+define double @0() {
+entry:
+  ret double 9.000000e+00
+}
+
+Evaluated to 9.000000
+
+
+

Well this looks like it is basically working. The dump of the function +shows the “no argument function that always returns double” that we +synthesize for each top-level expression that is typed in. This +demonstrates very basic functionality, but can we do more?

+
ready> def testfunc(x y) x + y*2;
+Read function definition:
+define double @testfunc(double %x, double %y) {
+entry:
+  %multmp = fmul double %y, 2.000000e+00
+  %addtmp = fadd double %multmp, %x
+  ret double %addtmp
+}
+
+ready> testfunc(4, 10);
+Read top-level expression:
+define double @1() {
+entry:
+  %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
+  ret double %calltmp
+}
+
+Evaluated to 24.000000
+
+ready> testfunc(5, 10);
+ready> LLVM ERROR: Program used external function 'testfunc' which could not be resolved!
+
+
+

Function definitions and calls also work, but something went very wrong on that +last line. The call looks valid, so what happened? As you may have guessed from +the API a Module is a unit of allocation for the JIT, and testfunc was part +of the same module that contained anonymous expression. When we removed that +module from the JIT to free the memory for the anonymous expression, we deleted +the definition of testfunc along with it. Then, when we tried to call +testfunc a second time, the JIT could no longer find it.

+

The easiest way to fix this is to put the anonymous expression in a separate +module from the rest of the function definitions. The JIT will happily resolve +function calls across module boundaries, as long as each of the functions called +has a prototype, and is added to the JIT before it is called. By putting the +anonymous expression in a different module we can delete it without affecting +the rest of the functions.

+

In fact, we’re going to go a step further and put every function in its own +module. Doing so allows us to exploit a useful property of the KaleidoscopeJIT +that will make our environment more REPL-like: Functions can be added to the +JIT more than once (unlike a module where every function must have a unique +definition). When you look up a symbol in KaleidoscopeJIT it will always return +the most recent definition:

+
ready> def foo(x) x + 1;
+Read function definition:
+define double @foo(double %x) {
+entry:
+  %addtmp = fadd double %x, 1.000000e+00
+  ret double %addtmp
+}
+
+ready> foo(2);
+Evaluated to 3.000000
+
+ready> def foo(x) x + 2;
+define double @foo(double %x) {
+entry:
+  %addtmp = fadd double %x, 2.000000e+00
+  ret double %addtmp
+}
+
+ready> foo(2);
+Evaluated to 4.000000
+
+
+

To allow each function to live in its own module we’ll need a way to +re-generate previous function declarations into each new module we open:

+
static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+
+...
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+...
+
+Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = getFunction(Callee);
+
+...
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+
+

To enable this, we’ll start by adding a new global, FunctionProtos, that +holds the most recent prototype for each function. We’ll also add a convenience +method, getFunction(), to replace calls to TheModule->getFunction(). +Our convenience method searches TheModule for an existing function +declaration, falling back to generating a new declaration from FunctionProtos if +it doesn’t find one. In CallExprAST::codegen() we just need to replace the +call to TheModule->getFunction(). In FunctionAST::codegen() we need to +update the FunctionProtos map first, then call getFunction(). With this +done, we can always obtain a function declaration in the current module for any +previously declared function.

+

We also need to update HandleDefinition and HandleExtern:

+
static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read function definition:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
+    }
+  } else {
+    // Skip token for error recovery.
+     getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
+      fprintf(stderr, "Read extern: ");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+
+

In HandleDefinition, we add two lines to transfer the newly defined function to +the JIT and open a new module. In HandleExtern, we just need to add one line to +add the prototype to FunctionProtos.

+

With these changes made, let’s try our REPL again (I removed the dump of the +anonymous functions this time, you should get the idea by now :) :

+
ready> def foo(x) x + 1;
+ready> foo(2);
+Evaluated to 3.000000
+
+ready> def foo(x) x + 2;
+ready> foo(2);
+Evaluated to 4.000000
+
+
+

It works!

+

Even with this simple code, we get some surprisingly powerful capabilities - +check this out:

+
ready> extern sin(x);
+Read extern:
+declare double @sin(double)
+
+ready> extern cos(x);
+Read extern:
+declare double @cos(double)
+
+ready> sin(1.0);
+Read top-level expression:
+define double @2() {
+entry:
+  ret double 0x3FEAED548F090CEE
+}
+
+Evaluated to 0.841471
+
+ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
+Read function definition:
+define double @foo(double %x) {
+entry:
+  %calltmp = call double @sin(double %x)
+  %multmp = fmul double %calltmp, %calltmp
+  %calltmp2 = call double @cos(double %x)
+  %multmp4 = fmul double %calltmp2, %calltmp2
+  %addtmp = fadd double %multmp, %multmp4
+  ret double %addtmp
+}
+
+ready> foo(4.0);
+Read top-level expression:
+define double @3() {
+entry:
+  %calltmp = call double @foo(double 4.000000e+00)
+  ret double %calltmp
+}
+
+Evaluated to 1.000000
+
+
+

Whoa, how does the JIT know about sin and cos? The answer is surprisingly +simple: The KaleidoscopeJIT has a straightforward symbol resolution rule that +it uses to find symbols that aren’t available in any given module: First +it searches all the modules that have already been added to the JIT, from the +most recent to the oldest, to find the newest definition. If no definition is +found inside the JIT, it falls back to calling “dlsym("sin")” on the +Kaleidoscope process itself. Since “sin” is defined within the JIT’s +address space, it simply patches up calls in the module to call the libm +version of sin directly. But in some cases this even goes further: +as sin and cos are names of standard math functions, the constant folder +will directly evaluate the function calls to the correct result when called +with constants like in the “sin(1.0)” above.

+

In the future we’ll see how tweaking this symbol resolution rule can be used to +enable all sorts of useful features, from security (restricting the set of +symbols available to JIT’d code), to dynamic code generation based on symbol +names, and even lazy compilation.

+

One immediate benefit of the symbol resolution rule is that we can now extend +the language by writing arbitrary C++ code to implement operations. For example, +if we add:

+
#ifdef _WIN32
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" DLLEXPORT double putchard(double X) {
+  fputc((char)X, stderr);
+  return 0;
+}
+
+
+

Note, that for Windows we need to actually export the functions because +the dynamic symbol loader will use GetProcAddress to find the symbols.

+

Now we can produce simple output to the console by using things like: +“extern putchard(x); putchard(120);”, which prints a lowercase ‘x’ +on the console (120 is the ASCII code for ‘x’). Similar code could be +used to implement file I/O, console input, and many other capabilities +in Kaleidoscope.

+

This completes the JIT and optimizer chapter of the Kaleidoscope +tutorial. At this point, we can compile a non-Turing-complete +programming language, optimize and JIT compile it in a user-driven way. +Next up we’ll look into extending the language with control flow +constructs, tackling some interesting LLVM IR issues +along the way.

+
+
+

4.5. Full Code Listing

+

Here is the complete code listing for our running example, enhanced with +the LLVM JIT and optimizer. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

If you are compiling this on Linux, make sure to add the “-rdynamic” +option as well. This makes sure that the external functions are resolved +properly at runtime.

+

Here is the code:

+
#include "../include/KaleidoscopeJIT.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::orc;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() = default;
+
+  virtual Value *codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+
+  Value *codegen() override;
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+
+  Value *codegen() override;
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+
+  Value *codegen() override;
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+
+  Value *codegen() override;
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
+
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+
+  Function *codegen();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (true) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (true) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken(); // eat binop
+
+    // Parse the primary expression after the binary operator.
+    auto RHS = ParsePrimary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS =
+        std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return LogErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static std::map<std::string, Value *> NamedValues;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
+static ExitOnError ExitOnErr;
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+Value *NumberExprAST::codegen() {
+  return ConstantFP::get(*TheContext, APFloat(Val));
+}
+
+Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    return LogErrorV("Unknown variable name");
+  return V;
+}
+
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder->CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder->CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder->CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder->CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
+  default:
+    return LogErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
+  Builder->SetInsertPoint(BB);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[std::string(Arg.getName())] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
+    // Finish off the function.
+    Builder->CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Run the optimizer on the function.
+    TheFPM->run(*TheFunction);
+
+    return TheFunction;
+  }
+
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void InitializeModuleAndPassManager() {
+  // Open a new context and module.
+  TheContext = std::make_unique<LLVMContext>();
+  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
+  TheModule->setDataLayout(TheJIT->getDataLayout());
+
+  // Create a new builder for the module.
+  Builder = std::make_unique<IRBuilder<>>(*TheContext);
+
+  // Create a new pass manager attached to it.
+  TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
+
+static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read function definition:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      ExitOnErr(TheJIT->addModule(
+          ThreadSafeModule(std::move(TheModule), std::move(TheContext))));
+      InitializeModuleAndPassManager();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
+      fprintf(stderr, "Read extern: ");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
+      // Create a ResourceTracker to track JIT'd memory allocated to our
+      // anonymous expression -- that way we can free it after executing.
+      auto RT = TheJIT->getMainJITDylib().createResourceTracker();
+
+      auto TSM = ThreadSafeModule(std::move(TheModule), std::move(TheContext));
+      ExitOnErr(TheJIT->addModule(std::move(TSM), RT));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = ExitOnErr(TheJIT->lookup("__anon_expr"));
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
+      fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      ExitOnErr(RT->remove());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (true) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" DLLEXPORT double putchard(double X) {
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" DLLEXPORT double printd(double X) {
+  fprintf(stderr, "%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
+  InitializeNativeTargetAsmParser();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  TheJIT = ExitOnErr(KaleidoscopeJIT::Create());
+
+  InitializeModuleAndPassManager();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
+
+
+

Next: Extending the language: control flow

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl05.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl05.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl05.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl05.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,1871 @@ + + + + + + + + + 5. Kaleidoscope: Extending the Language: Control Flow — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

5. Kaleidoscope: Extending the Language: Control Flow

+ +
+

5.1. Chapter 5 Introduction

+

Welcome to Chapter 5 of the “Implementing a language with +LLVM” tutorial. Parts 1-4 described the implementation of +the simple Kaleidoscope language and included support for generating +LLVM IR, followed by optimizations and a JIT compiler. Unfortunately, as +presented, Kaleidoscope is mostly useless: it has no control flow other +than call and return. This means that you can’t have conditional +branches in the code, significantly limiting its power. In this episode +of “build that compiler”, we’ll extend Kaleidoscope to have an +if/then/else expression plus a simple ‘for’ loop.

+
+
+

5.2. If/Then/Else

+

Extending Kaleidoscope to support if/then/else is quite straightforward. +It basically requires adding support for this “new” concept to the +lexer, parser, AST, and LLVM code emitter. This example is nice, because +it shows how easy it is to “grow” a language over time, incrementally +extending it as new ideas are discovered.

+

Before we get going on “how” we add this extension, let’s talk about +“what” we want. The basic idea is that we want to be able to write this +sort of thing:

+
def fib(x)
+  if x < 3 then
+    1
+  else
+    fib(x-1)+fib(x-2);
+
+
+

In Kaleidoscope, every construct is an expression: there are no +statements. As such, the if/then/else expression needs to return a value +like any other. Since we’re using a mostly functional form, we’ll have +it evaluate its conditional, then return the ‘then’ or ‘else’ value +based on how the condition was resolved. This is very similar to the C +“?:” expression.

+

The semantics of the if/then/else expression is that it evaluates the +condition to a boolean equality value: 0.0 is considered to be false and +everything else is considered to be true. If the condition is true, the +first subexpression is evaluated and returned, if the condition is +false, the second subexpression is evaluated and returned. Since +Kaleidoscope allows side-effects, this behavior is important to nail +down.

+

Now that we know what we “want”, let’s break this down into its +constituent pieces.

+
+

5.2.1. Lexer Extensions for If/Then/Else

+

The lexer extensions are straightforward. First we add new enum values +for the relevant tokens:

+
// control
+tok_if = -6,
+tok_then = -7,
+tok_else = -8,
+
+
+

Once we have that, we recognize the new keywords in the lexer. This is +pretty simple stuff:

+
...
+if (IdentifierStr == "def")
+  return tok_def;
+if (IdentifierStr == "extern")
+  return tok_extern;
+if (IdentifierStr == "if")
+  return tok_if;
+if (IdentifierStr == "then")
+  return tok_then;
+if (IdentifierStr == "else")
+  return tok_else;
+return tok_identifier;
+
+
+
+
+

5.2.2. AST Extensions for If/Then/Else

+

To represent the new expression we add a new AST node for it:

+
/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  std::unique_ptr<ExprAST> Cond, Then, Else;
+
+public:
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+    : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+
+  Value *codegen() override;
+};
+
+
+

The AST node just has pointers to the various subexpressions.

+
+
+

5.2.3. Parser Extensions for If/Then/Else

+

Now that we have the relevant tokens coming from the lexer and we have +the AST node to build, our parsing logic is relatively straightforward. +First we define a new parsing function:

+
/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static std::unique_ptr<ExprAST> ParseIfExpr() {
+  getNextToken();  // eat the if.
+
+  // condition.
+  auto Cond = ParseExpression();
+  if (!Cond)
+    return nullptr;
+
+  if (CurTok != tok_then)
+    return LogError("expected then");
+  getNextToken();  // eat the then
+
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
+
+  if (CurTok != tok_else)
+    return LogError("expected else");
+
+  getNextToken();
+
+  auto Else = ParseExpression();
+  if (!Else)
+    return nullptr;
+
+  return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
+}
+
+
+

Next we hook it up as a primary expression:

+
static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  }
+}
+
+
+
+
+

5.2.4. LLVM IR for If/Then/Else

+

Now that we have it parsing and building the AST, the final piece is +adding LLVM code generation support. This is the most interesting part +of the if/then/else example, because this is where it starts to +introduce new concepts. All of the code above has been thoroughly +described in previous chapters.

+

To motivate the code we want to produce, let’s take a look at a simple +example. Consider:

+
extern foo();
+extern bar();
+def baz(x) if x then foo() else bar();
+
+
+

If you disable optimizations, the code you’ll (soon) get from +Kaleidoscope looks like this:

+
declare double @foo()
+
+declare double @bar()
+
+define double @baz(double %x) {
+entry:
+  %ifcond = fcmp one double %x, 0.000000e+00
+  br i1 %ifcond, label %then, label %else
+
+then:       ; preds = %entry
+  %calltmp = call double @foo()
+  br label %ifcont
+
+else:       ; preds = %entry
+  %calltmp1 = call double @bar()
+  br label %ifcont
+
+ifcont:     ; preds = %else, %then
+  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
+  ret double %iftmp
+}
+
+
+

To visualize the control flow graph, you can use a nifty feature of the +LLVM ‘opt’ tool. If you put this LLVM +IR into “t.ll” and run “llvm-as < t.ll | opt -analyze -view-cfg”, a +window will pop up and you’ll +see this graph:

+
+Example CFG +

Example CFG

+
+

Another way to get this is to call “F->viewCFG()” or +“F->viewCFGOnly()” (where F is a “Function*”) either by +inserting actual calls into the code and recompiling or by calling these +in the debugger. LLVM has many nice features for visualizing various +graphs.

+

Getting back to the generated code, it is fairly simple: the entry block +evaluates the conditional expression (“x” in our case here) and compares +the result to 0.0 with the “fcmp one” instruction (‘one’ is “Ordered +and Not Equal”). Based on the result of this expression, the code jumps +to either the “then” or “else” blocks, which contain the expressions for +the true/false cases.

+

Once the then/else blocks are finished executing, they both branch back +to the ‘ifcont’ block to execute the code that happens after the +if/then/else. In this case the only thing left to do is to return to the +caller of the function. The question then becomes: how does the code +know which expression to return?

+

The answer to this question involves an important SSA operation: the +Phi +operation. +If you’re not familiar with SSA, the wikipedia +article +is a good introduction and there are various other introductions to it +available on your favorite search engine. The short version is that +“execution” of the Phi operation requires “remembering” which block +control came from. The Phi operation takes on the value corresponding to +the input control block. In this case, if control comes in from the +“then” block, it gets the value of “calltmp”. If control comes from the +“else” block, it gets the value of “calltmp1”.

+

At this point, you are probably starting to think “Oh no! This means my +simple and elegant front-end will have to start generating SSA form in +order to use LLVM!”. Fortunately, this is not the case, and we strongly +advise not implementing an SSA construction algorithm in your +front-end unless there is an amazingly good reason to do so. In +practice, there are two sorts of values that float around in code +written for your average imperative programming language that might need +Phi nodes:

+
    +
  1. Code that involves user variables: x = 1; x = x + 1;

  2. +
  3. Values that are implicit in the structure of your AST, such as the +Phi node in this case.

  4. +
+

In Chapter 7 of this tutorial (“mutable variables”), +we’ll talk about #1 in depth. For now, just believe me that you don’t +need SSA construction to handle this case. For #2, you have the choice +of using the techniques that we will describe for #1, or you can insert +Phi nodes directly, if convenient. In this case, it is really +easy to generate the Phi node, so we choose to do it directly.

+

Okay, enough of the motivation and overview, let’s generate code!

+
+
+

5.2.5. Code Generation for If/Then/Else

+

In order to generate code for this, we implement the codegen method +for IfExprAST:

+
Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  CondV = Builder.CreateFCmpONE(
+      CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
+
+
+

This code is straightforward and similar to what we saw before. We emit +the expression for the condition, then compare that value to zero to get +a truth value as a 1-bit (bool) value.

+
Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+// Create blocks for the then and else cases.  Insert the 'then' block at the
+// end of the function.
+BasicBlock *ThenBB =
+    BasicBlock::Create(TheContext, "then", TheFunction);
+BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else");
+BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont");
+
+Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+
+

This code creates the basic blocks that are related to the if/then/else +statement, and correspond directly to the blocks in the example above. +The first line gets the current Function object that is being built. It +gets this by asking the builder for the current BasicBlock, and asking +that block for its “parent” (the function it is currently embedded +into).

+

Once it has that, it creates three blocks. Note that it passes +“TheFunction” into the constructor for the “then” block. This causes the +constructor to automatically insert the new block into the end of the +specified function. The other two blocks are created, but aren’t yet +inserted into the function.

+

Once the blocks are created, we can emit the conditional branch that +chooses between them. Note that creating new blocks does not implicitly +affect the IRBuilder, so it is still inserting into the block that the +condition went into. Also note that it is creating a branch to the +“then” block and the “else” block, even though the “else” block isn’t +inserted into the function yet. This is all ok: it is the standard way +that LLVM supports forward references.

+
// Emit then value.
+Builder.SetInsertPoint(ThenBB);
+
+Value *ThenV = Then->codegen();
+if (!ThenV)
+  return nullptr;
+
+Builder.CreateBr(MergeBB);
+// Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+ThenBB = Builder.GetInsertBlock();
+
+
+

After the conditional branch is inserted, we move the builder to start +inserting into the “then” block. Strictly speaking, this call moves the +insertion point to be at the end of the specified block. However, since +the “then” block is empty, it also starts out by inserting at the +beginning of the block. :)

+

Once the insertion point is set, we recursively codegen the “then” +expression from the AST. To finish off the “then” block, we create an +unconditional branch to the merge block. One interesting (and very +important) aspect of the LLVM IR is that it requires all basic +blocks to be “terminated” with a control +flow instruction such as return or branch. This means +that all control flow, including fall throughs must be made explicit +in the LLVM IR. If you violate this rule, the verifier will emit an +error.

+

The final line here is quite subtle, but is very important. The basic +issue is that when we create the Phi node in the merge block, we need to +set up the block/value pairs that indicate how the Phi will work. +Importantly, the Phi node expects to have an entry for each predecessor +of the block in the CFG. Why then, are we getting the current block when +we just set it to ThenBB 5 lines above? The problem is that the “Then” +expression may actually itself change the block that the Builder is +emitting into if, for example, it contains a nested “if/then/else” +expression. Because calling codegen() recursively could arbitrarily change +the notion of the current block, we are required to get an up-to-date +value for code that will set up the Phi node.

+
// Emit else block.
+TheFunction->getBasicBlockList().push_back(ElseBB);
+Builder.SetInsertPoint(ElseBB);
+
+Value *ElseV = Else->codegen();
+if (!ElseV)
+  return nullptr;
+
+Builder.CreateBr(MergeBB);
+// codegen of 'Else' can change the current block, update ElseBB for the PHI.
+ElseBB = Builder.GetInsertBlock();
+
+
+

Code generation for the ‘else’ block is basically identical to codegen +for the ‘then’ block. The only significant difference is the first line, +which adds the ‘else’ block to the function. Recall previously that the +‘else’ block was created, but not added to the function. Now that the +‘then’ and ‘else’ blocks are emitted, we can finish up with the merge +code:

+
  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN =
+    Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp");
+
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+
+

The first two lines here are now familiar: the first adds the “merge” +block to the Function object (it was previously floating, like the else +block above). The second changes the insertion point so that newly +created code will go into the “merge” block. Once that is done, we need +to create the PHI node and set up the block/value pairs for the PHI.

+

Finally, the CodeGen function returns the phi node as the value computed +by the if/then/else expression. In our example above, this returned +value will feed into the code for the top-level function, which will +create the return instruction.

+

Overall, we now have the ability to execute conditional code in +Kaleidoscope. With this extension, Kaleidoscope is a fairly complete +language that can calculate a wide variety of numeric functions. Next up +we’ll add another useful expression that is familiar from non-functional +languages…

+
+
+
+

5.3. ‘for’ Loop Expression

+

Now that we know how to add basic control flow constructs to the +language, we have the tools to add more powerful things. Let’s add +something more aggressive, a ‘for’ expression:

+
extern putchard(char);
+def printstar(n)
+  for i = 1, i < n, 1.0 in
+    putchard(42);  # ascii 42 = '*'
+
+# print 100 '*' characters
+printstar(100);
+
+
+

This expression defines a new variable (“i” in this case) which iterates +from a starting value, while the condition (“i < n” in this case) is +true, incrementing by an optional step value (“1.0” in this case). If +the step value is omitted, it defaults to 1.0. While the loop is true, +it executes its body expression. Because we don’t have anything better +to return, we’ll just define the loop as always returning 0.0. In the +future when we have mutable variables, it will get more useful.

+

As before, let’s talk about the changes that we need to Kaleidoscope to +support this.

+
+

5.3.1. Lexer Extensions for the ‘for’ Loop

+

The lexer extensions are the same sort of thing as for if/then/else:

+
... in enum Token ...
+// control
+tok_if = -6, tok_then = -7, tok_else = -8,
+tok_for = -9, tok_in = -10
+
+... in gettok ...
+if (IdentifierStr == "def")
+  return tok_def;
+if (IdentifierStr == "extern")
+  return tok_extern;
+if (IdentifierStr == "if")
+  return tok_if;
+if (IdentifierStr == "then")
+  return tok_then;
+if (IdentifierStr == "else")
+  return tok_else;
+if (IdentifierStr == "for")
+  return tok_for;
+if (IdentifierStr == "in")
+  return tok_in;
+return tok_identifier;
+
+
+
+
+

5.3.2. AST Extensions for the ‘for’ Loop

+

The AST node is just as simple. It basically boils down to capturing the +variable name and the constituent expressions in the node.

+
/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
+
+public:
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+    : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+      Step(std::move(Step)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+
+
+
+

5.3.3. Parser Extensions for the ‘for’ Loop

+

The parser code is also fairly standard. The only interesting thing here +is handling of the optional step value. The parser code handles it by +checking to see if the second comma is present. If not, it sets the step +value to null in the AST node:

+
/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static std::unique_ptr<ExprAST> ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after for");
+
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+
+  if (CurTok != '=')
+    return LogError("expected '=' after for");
+  getNextToken();  // eat '='.
+
+
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
+  if (CurTok != ',')
+    return LogError("expected ',' after for start value");
+  getNextToken();
+
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
+
+  // The step value is optional.
+  std::unique_ptr<ExprAST> Step;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (!Step)
+      return nullptr;
+  }
+
+  if (CurTok != tok_in)
+    return LogError("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<ForExprAST>(IdName, std::move(Start),
+                                       std::move(End), std::move(Step),
+                                       std::move(Body));
+}
+
+
+

And again we hook it up as a primary expression:

+
static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  case tok_for:
+    return ParseForExpr();
+  }
+}
+
+
+
+
+

5.3.4. LLVM IR for the ‘for’ Loop

+

Now we get to the good part: the LLVM IR we want to generate for this +thing. With the simple example above, we get this LLVM IR (note that +this dump is generated with optimizations disabled for clarity):

+
declare double @putchard(double)
+
+define double @printstar(double %n) {
+entry:
+  ; initial value = 1.0 (inlined into phi)
+  br label %loop
+
+loop:       ; preds = %loop, %entry
+  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
+  ; body
+  %calltmp = call double @putchard(double 4.200000e+01)
+  ; increment
+  %nextvar = fadd double %i, 1.000000e+00
+
+  ; termination test
+  %cmptmp = fcmp ult double %i, %n
+  %booltmp = uitofp i1 %cmptmp to double
+  %loopcond = fcmp one double %booltmp, 0.000000e+00
+  br i1 %loopcond, label %loop, label %afterloop
+
+afterloop:      ; preds = %loop
+  ; loop always returns 0.0
+  ret double 0.000000e+00
+}
+
+
+

This loop contains all the same constructs we saw before: a phi node, +several expressions, and some basic blocks. Let’s see how this fits +together.

+
+
+

5.3.5. Code Generation for the ‘for’ Loop

+

The first part of codegen is very simple: we just output the start +expression for the loop value:

+
Value *ForExprAST::codegen() {
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
+
+
+

With this out of the way, the next step is to set up the LLVM basic +block for the start of the loop body. In the case above, the whole loop +body is one block, but remember that the body code itself could consist +of multiple blocks (e.g. if it contains an if/then/else or a for/in +expression).

+
// Make the new basic block for the loop header, inserting after current
+// block.
+Function *TheFunction = Builder.GetInsertBlock()->getParent();
+BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+BasicBlock *LoopBB =
+    BasicBlock::Create(TheContext, "loop", TheFunction);
+
+// Insert an explicit fall through from the current block to the LoopBB.
+Builder.CreateBr(LoopBB);
+
+
+

This code is similar to what we saw for if/then/else. Because we will +need it to create the Phi node, we remember the block that falls through +into the loop. Once we have that, we create the actual block that starts +the loop and create an unconditional branch for the fall-through between +the two blocks.

+
// Start insertion in LoopBB.
+Builder.SetInsertPoint(LoopBB);
+
+// Start the PHI node with an entry for Start.
+PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(TheContext),
+                                      2, VarName.c_str());
+Variable->addIncoming(StartVal, PreheaderBB);
+
+
+

Now that the “preheader” for the loop is set up, we switch to emitting +code for the loop body. To begin with, we move the insertion point and +create the PHI node for the loop induction variable. Since we already +know the incoming value for the starting value, we add it to the Phi +node. Note that the Phi will eventually get a second value for the +backedge, but we can’t set it up yet (because it doesn’t exist!).

+
// Within the loop, the variable is defined equal to the PHI node.  If it
+// shadows an existing variable, we have to restore it, so save it now.
+Value *OldVal = NamedValues[VarName];
+NamedValues[VarName] = Variable;
+
+// Emit the body of the loop.  This, like any other expr, can change the
+// current BB.  Note that we ignore the value computed by the body, but don't
+// allow an error.
+if (!Body->codegen())
+  return nullptr;
+
+
+

Now the code starts to get more interesting. Our ‘for’ loop introduces a +new variable to the symbol table. This means that our symbol table can +now contain either function arguments or loop variables. To handle this, +before we codegen the body of the loop, we add the loop variable as the +current value for its name. Note that it is possible that there is a +variable of the same name in the outer scope. It would be easy to make +this an error (emit an error and return null if there is already an +entry for VarName) but we choose to allow shadowing of variables. In +order to handle this correctly, we remember the Value that we are +potentially shadowing in OldVal (which will be null if there is no +shadowed variable).

+

Once the loop variable is set into the symbol table, the code +recursively codegen’s the body. This allows the body to use the loop +variable: any references to it will naturally find it in the symbol +table.

+
// Emit the step value.
+Value *StepVal = nullptr;
+if (Step) {
+  StepVal = Step->codegen();
+  if (!StepVal)
+    return nullptr;
+} else {
+  // If not specified, use 1.0.
+  StepVal = ConstantFP::get(TheContext, APFloat(1.0));
+}
+
+Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+
+

Now that the body is emitted, we compute the next value of the iteration +variable by adding the step value, or 1.0 if it isn’t present. +‘NextVar’ will be the value of the loop variable on the next +iteration of the loop.

+
// Compute the end condition.
+Value *EndCond = End->codegen();
+if (!EndCond)
+  return nullptr;
+
+// Convert condition to a bool by comparing non-equal to 0.0.
+EndCond = Builder.CreateFCmpONE(
+    EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
+
+
+

Finally, we evaluate the exit value of the loop, to determine whether +the loop should exit. This mirrors the condition evaluation for the +if/then/else statement.

+
// Create the "after loop" block and insert it.
+BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+BasicBlock *AfterBB =
+    BasicBlock::Create(TheContext, "afterloop", TheFunction);
+
+// Insert the conditional branch into the end of LoopEndBB.
+Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+// Any new code will be inserted in AfterBB.
+Builder.SetInsertPoint(AfterBB);
+
+
+

With the code for the body of the loop complete, we just need to finish +up the control flow for it. This code remembers the end block (for the +phi node), then creates the block for the loop exit (“afterloop”). Based +on the value of the exit condition, it creates a conditional branch that +chooses between executing the loop again and exiting the loop. Any +future code is emitted in the “afterloop” block, so it sets the +insertion position to it.

+
  // Add a new entry to the PHI node for the backedge.
+  Variable->addIncoming(NextVar, LoopEndBB);
+
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(TheContext));
+}
+
+
+

The final code handles various cleanups: now that we have the “NextVar” +value, we can add the incoming value to the loop PHI node. After that, +we remove the loop variable from the symbol table, so that it isn’t in +scope after the for loop. Finally, code generation of the for loop +always returns 0.0, so that is what we return from +ForExprAST::codegen().

+

With this, we conclude the “adding control flow to Kaleidoscope” chapter +of the tutorial. In this chapter we added two control flow constructs, +and used them to motivate a couple of aspects of the LLVM IR that are +important for front-end implementors to know. In the next chapter of our +saga, we will get a bit crazier and add user-defined +operators to our poor innocent language.

+
+
+
+

5.4. Full Code Listing

+

Here is the complete code listing for our running example, enhanced with +the if/then/else and for expressions. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
#include "../include/KaleidoscopeJIT.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::orc;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5,
+
+  // control
+  tok_if = -6,
+  tok_then = -7,
+  tok_else = -8,
+  tok_for = -9,
+  tok_in = -10
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    if (IdentifierStr == "if")
+      return tok_if;
+    if (IdentifierStr == "then")
+      return tok_then;
+    if (IdentifierStr == "else")
+      return tok_else;
+    if (IdentifierStr == "for")
+      return tok_for;
+    if (IdentifierStr == "in")
+      return tok_in;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() = default;
+
+  virtual Value *codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+
+  Value *codegen() override;
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+
+  Value *codegen() override;
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+
+  Value *codegen() override;
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+
+  Value *codegen() override;
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  std::unique_ptr<ExprAST> Cond, Then, Else;
+
+public:
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+      : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+
+  Value *codegen() override;
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
+
+public:
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
+
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+
+  Function *codegen();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (true) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static std::unique_ptr<ExprAST> ParseIfExpr() {
+  getNextToken(); // eat the if.
+
+  // condition.
+  auto Cond = ParseExpression();
+  if (!Cond)
+    return nullptr;
+
+  if (CurTok != tok_then)
+    return LogError("expected then");
+  getNextToken(); // eat the then
+
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
+
+  if (CurTok != tok_else)
+    return LogError("expected else");
+
+  getNextToken();
+
+  auto Else = ParseExpression();
+  if (!Else)
+    return nullptr;
+
+  return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static std::unique_ptr<ExprAST> ParseForExpr() {
+  getNextToken(); // eat the for.
+
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after for");
+
+  std::string IdName = IdentifierStr;
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '=')
+    return LogError("expected '=' after for");
+  getNextToken(); // eat '='.
+
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
+  if (CurTok != ',')
+    return LogError("expected ',' after for start value");
+  getNextToken();
+
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
+
+  // The step value is optional.
+  std::unique_ptr<ExprAST> Step;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (!Step)
+      return nullptr;
+  }
+
+  if (CurTok != tok_in)
+    return LogError("expected 'in' after for");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  case tok_for:
+    return ParseForExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (true) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken(); // eat binop
+
+    // Parse the primary expression after the binary operator.
+    auto RHS = ParsePrimary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS =
+        std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return LogErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static std::map<std::string, Value *> NamedValues;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
+static ExitOnError ExitOnErr;
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+Value *NumberExprAST::codegen() {
+  return ConstantFP::get(*TheContext, APFloat(Val));
+}
+
+Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    return LogErrorV("Unknown variable name");
+  return V;
+}
+
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder->CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder->CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder->CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder->CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
+  default:
+    return LogErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  CondV = Builder->CreateFCmpONE(
+      CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
+  BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
+
+  Builder->CreateCondBr(CondV, ThenBB, ElseBB);
+
+  // Emit then value.
+  Builder->SetInsertPoint(ThenBB);
+
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder->GetInsertBlock();
+
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder->SetInsertPoint(ElseBB);
+
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder->GetInsertBlock();
+
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder->SetInsertPoint(MergeBB);
+  PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
+
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+// Output for-loop as:
+//   ...
+//   start = startexpr
+//   goto loop
+// loop:
+//   variable = phi [start, loopheader], [nextvariable, loopend]
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   nextvariable = variable + step
+//   endcond = endexpr
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
+
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+  BasicBlock *PreheaderBB = Builder->GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
+
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder->CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder->SetInsertPoint(LoopBB);
+
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable =
+      Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, VarName);
+  Variable->addIncoming(StartVal, PreheaderBB);
+
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (!Body->codegen())
+    return nullptr;
+
+  // Emit the step value.
+  Value *StepVal = nullptr;
+  if (Step) {
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
+  }
+
+  Value *NextVar = Builder->CreateFAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  EndCond = Builder->CreateFCmpONE(
+      EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
+
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder->GetInsertBlock();
+  BasicBlock *AfterBB =
+      BasicBlock::Create(*TheContext, "afterloop", TheFunction);
+
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
+
+  // Any new code will be inserted in AfterBB.
+  Builder->SetInsertPoint(AfterBB);
+
+  // Add a new entry to the PHI node for the backedge.
+  Variable->addIncoming(NextVar, LoopEndBB);
+
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(*TheContext));
+}
+
+Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
+  Builder->SetInsertPoint(BB);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[std::string(Arg.getName())] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
+    // Finish off the function.
+    Builder->CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Run the optimizer on the function.
+    TheFPM->run(*TheFunction);
+
+    return TheFunction;
+  }
+
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheContext = std::make_unique<LLVMContext>();
+  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
+  TheModule->setDataLayout(TheJIT->getDataLayout());
+
+  // Create a new builder for the module.
+  Builder = std::make_unique<IRBuilder<>>(*TheContext);
+
+  // Create a new pass manager attached to it.
+  TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
+
+static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read function definition:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      ExitOnErr(TheJIT->addModule(
+          ThreadSafeModule(std::move(TheModule), std::move(TheContext))));
+      InitializeModuleAndPassManager();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
+      fprintf(stderr, "Read extern: ");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
+      // Create a ResourceTracker to track JIT'd memory allocated to our
+      // anonymous expression -- that way we can free it after executing.
+      auto RT = TheJIT->getMainJITDylib().createResourceTracker();
+
+      auto TSM = ThreadSafeModule(std::move(TheModule), std::move(TheContext));
+      ExitOnErr(TheJIT->addModule(std::move(TSM), RT));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = ExitOnErr(TheJIT->lookup("__anon_expr"));
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
+      fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      ExitOnErr(RT->remove());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (true) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" DLLEXPORT double putchard(double X) {
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" DLLEXPORT double printd(double X) {
+  fprintf(stderr, "%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
+  InitializeNativeTargetAsmParser();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  TheJIT = ExitOnErr(KaleidoscopeJIT::Create());
+
+  InitializeModuleAndPassManager();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
+
+
+

Next: Extending the language: user-defined operators

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl06.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl06.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl06.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl06.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,1961 @@ + + + + + + + + + 6. Kaleidoscope: Extending the Language: User-defined Operators — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

6. Kaleidoscope: Extending the Language: User-defined Operators

+ +
+

6.1. Chapter 6 Introduction

+

Welcome to Chapter 6 of the “Implementing a language with +LLVM” tutorial. At this point in our tutorial, we now +have a fully functional language that is fairly minimal, but also +useful. There is still one big problem with it, however. Our language +doesn’t have many useful operators (like division, logical negation, or +even any comparisons besides less-than).

+

This chapter of the tutorial takes a wild digression into adding +user-defined operators to the simple and beautiful Kaleidoscope +language. This digression now gives us a simple and ugly language in +some ways, but also a powerful one at the same time. One of the great +things about creating your own language is that you get to decide what +is good or bad. In this tutorial we’ll assume that it is okay to use +this as a way to show some interesting parsing techniques.

+

At the end of this tutorial, we’ll run through an example Kaleidoscope +application that renders the Mandelbrot set. This gives an +example of what you can build with Kaleidoscope and its feature set.

+
+
+

6.2. User-defined Operators: the Idea

+

The “operator overloading” that we will add to Kaleidoscope is more +general than in languages like C++. In C++, you are only allowed to +redefine existing operators: you can’t programmatically change the +grammar, introduce new operators, change precedence levels, etc. In this +chapter, we will add this capability to Kaleidoscope, which will let the +user round out the set of operators that are supported.

+

The point of going into user-defined operators in a tutorial like this +is to show the power and flexibility of using a hand-written parser. +Thus far, the parser we have been implementing uses recursive descent +for most parts of the grammar and operator precedence parsing for the +expressions. See Chapter 2 for details. By +using operator precedence parsing, it is very easy to allow +the programmer to introduce new operators into the grammar: the grammar +is dynamically extensible as the JIT runs.

+

The two specific features we’ll add are programmable unary operators +(right now, Kaleidoscope has no unary operators at all) as well as +binary operators. An example of this is:

+
# Logical unary not.
+def unary!(v)
+  if v then
+    0
+  else
+    1;
+
+# Define > with the same precedence as <.
+def binary> 10 (LHS RHS)
+  RHS < LHS;
+
+# Binary "logical or", (note that it does not "short circuit")
+def binary| 5 (LHS RHS)
+  if LHS then
+    1
+  else if RHS then
+    1
+  else
+    0;
+
+# Define = with slightly lower precedence than relationals.
+def binary= 9 (LHS RHS)
+  !(LHS < RHS | LHS > RHS);
+
+
+

Many languages aspire to being able to implement their standard runtime +library in the language itself. In Kaleidoscope, we can implement +significant parts of the language in the library!

+

We will break down implementation of these features into two parts: +implementing support for user-defined binary operators and adding unary +operators.

+
+
+

6.3. User-defined Binary Operators

+

Adding support for user-defined binary operators is pretty simple with +our current framework. We’ll first add support for the unary/binary +keywords:

+
enum Token {
+  ...
+  // operators
+  tok_binary = -11,
+  tok_unary = -12
+};
+...
+static int gettok() {
+...
+    if (IdentifierStr == "for")
+      return tok_for;
+    if (IdentifierStr == "in")
+      return tok_in;
+    if (IdentifierStr == "binary")
+      return tok_binary;
+    if (IdentifierStr == "unary")
+      return tok_unary;
+    return tok_identifier;
+
+
+

This just adds lexer support for the unary and binary keywords, like we +did in previous chapters. One nice thing +about our current AST, is that we represent binary operators with full +generalisation by using their ASCII code as the opcode. For our extended +operators, we’ll use this same representation, so we don’t need any new +AST or parser support.

+

On the other hand, we have to be able to represent the definitions of +these new operators, in the “def binary| 5” part of the function +definition. In our grammar so far, the “name” for the function +definition is parsed as the “prototype” production and into the +PrototypeAST AST node. To represent our new user-defined operators +as prototypes, we have to extend the PrototypeAST AST node like +this:

+
/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its argument names as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool IsOperator;
+  unsigned Precedence;  // Precedence if a binary op.
+
+public:
+  PrototypeAST(const std::string &name, std::vector<std::string> Args,
+               bool IsOperator = false, unsigned Prec = 0)
+  : Name(name), Args(std::move(Args)), IsOperator(IsOperator),
+    Precedence(Prec) {}
+
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
+
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size() - 1];
+  }
+
+  unsigned getBinaryPrecedence() const { return Precedence; }
+};
+
+
+

Basically, in addition to knowing a name for the prototype, we now keep +track of whether it was an operator, and if it was, what precedence +level the operator is at. The precedence is only used for binary +operators (as you’ll see below, it just doesn’t apply for unary +operators). Now that we have a way to represent the prototype for a +user-defined operator, we need to parse it:

+
/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  std::string FnName;
+
+  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+
+  switch (CurTok) {
+  default:
+    return LogErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return LogErrorP("Invalid precedence: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken();  // eat ')'.
+
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return LogErrorP("Invalid number of operands for operator");
+
+  return std::make_unique<PrototypeAST>(FnName, std::move(ArgNames), Kind != 0,
+                                         BinaryPrecedence);
+}
+
+
+

This is all fairly straightforward parsing code, and we have already +seen a lot of similar code in the past. One interesting part about the +code above is the couple lines that set up FnName for binary +operators. This builds names like “binary@” for a newly defined “@” +operator. It then takes advantage of the fact that symbol names in the +LLVM symbol table are allowed to have any character in them, including +embedded nul characters.

+

The next interesting thing to add, is codegen support for these binary +operators. Given our current structure, this is a simple addition of a +default case for our existing binary operator node:

+
Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder.CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder.CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder.CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
+                                "booltmp");
+  default:
+    break;
+  }
+
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = getFunction(std::string("binary") + Op);
+  assert(F && "binary operator not found!");
+
+  Value *Ops[2] = { L, R };
+  return Builder.CreateCall(F, Ops, "binop");
+}
+
+
+

As you can see above, the new code is actually really simple. It just +does a lookup for the appropriate operator in the symbol table and +generates a function call to it. Since user-defined operators are just +built as normal functions (because the “prototype” boils down to a +function with the right name) everything falls into place.

+

The final piece of code we are missing, is a bit of top-level magic:

+
Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // If this is an operator, install it.
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
+  ...
+
+
+

Basically, before codegening a function, if it is a user-defined +operator, we register it in the precedence table. This allows the binary +operator parsing logic we already have in place to handle it. Since we +are working on a fully-general operator precedence parser, this is all +we need to do to “extend the grammar”.

+

Now we have useful user-defined binary operators. This builds a lot on +the previous framework we built for other operators. Adding unary +operators is a bit more challenging, because we don’t have any framework +for it yet - let’s see what it takes.

+
+
+

6.4. User-defined Unary Operators

+

Since we don’t currently support unary operators in the Kaleidoscope +language, we’ll need to add everything to support them. Above, we added +simple support for the ‘unary’ keyword to the lexer. In addition to +that, we need an AST node:

+
/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  std::unique_ptr<ExprAST> Operand;
+
+public:
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+    : Opcode(Opcode), Operand(std::move(Operand)) {}
+
+  Value *codegen() override;
+};
+
+
+

This AST node is very simple and obvious by now. It directly mirrors the +binary operator AST node, except that it only has one child. With this, +we need to add the parsing logic. Parsing a unary operator is pretty +simple: we’ll add a new function to do it:

+
/// unary
+///   ::= primary
+///   ::= '!' unary
+static std::unique_ptr<ExprAST> ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (auto Operand = ParseUnary())
+    return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
+}
+
+
+

The grammar we add is pretty straightforward here. If we see a unary +operator when parsing a primary operator, we eat the operator as a +prefix and parse the remaining piece as another unary operator. This +allows us to handle multiple unary operators (e.g. “!!x”). Note that +unary operators can’t have ambiguous parses like binary operators can, +so there is no need for precedence information.

+

The problem with this function, is that we need to call ParseUnary from +somewhere. To do this, we change previous callers of ParsePrimary to +call ParseUnary instead:

+
/// binoprhs
+///   ::= ('+' unary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  ...
+    // Parse the unary expression after the binary operator.
+    auto RHS = ParseUnary();
+    if (!RHS)
+      return nullptr;
+  ...
+}
+/// expression
+///   ::= unary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+
+

With these two simple changes, we are now able to parse unary operators +and build the AST for them. Next up, we need to add parser support for +prototypes, to parse the unary operator prototype. We extend the binary +operator code above with:

+
/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  std::string FnName;
+
+  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+
+  switch (CurTok) {
+  default:
+    return LogErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    ...
+
+
+

As with binary operators, we name unary operators with a name that +includes the operator character. This assists us at code generation +time. Speaking of, the final piece we need to add is codegen support for +unary operators. It looks like this:

+
Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
+
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
+    return LogErrorV("Unknown unary operator");
+
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+
+
+

This code is similar to, but simpler than, the code for binary +operators. It is simpler primarily because it doesn’t need to handle any +predefined operators.

+
+
+

6.5. Kicking the Tires

+

It is somewhat hard to believe, but with a few simple extensions we’ve +covered in the last chapters, we have grown a real-ish language. With +this, we can do a lot of interesting things, including I/O, math, and a +bunch of other things. For example, we can now add a nice sequencing +operator (printd is defined to print out the specified value and a +newline):

+
ready> extern printd(x);
+Read extern:
+declare double @printd(double)
+
+ready> def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.
+...
+ready> printd(123) : printd(456) : printd(789);
+123.000000
+456.000000
+789.000000
+Evaluated to 0.000000
+
+
+

We can also define a bunch of other “primitive” operations, such as:

+
# Logical unary not.
+def unary!(v)
+  if v then
+    0
+  else
+    1;
+
+# Unary negate.
+def unary-(v)
+  0-v;
+
+# Define > with the same precedence as <.
+def binary> 10 (LHS RHS)
+  RHS < LHS;
+
+# Binary logical or, which does not short circuit.
+def binary| 5 (LHS RHS)
+  if LHS then
+    1
+  else if RHS then
+    1
+  else
+    0;
+
+# Binary logical and, which does not short circuit.
+def binary& 6 (LHS RHS)
+  if !LHS then
+    0
+  else
+    !!RHS;
+
+# Define = with slightly lower precedence than relationals.
+def binary = 9 (LHS RHS)
+  !(LHS < RHS | LHS > RHS);
+
+# Define ':' for sequencing: as a low-precedence operator that ignores operands
+# and just returns the RHS.
+def binary : 1 (x y) y;
+
+
+

Given the previous if/then/else support, we can also define interesting +functions for I/O. For example, the following prints out a character +whose “density” reflects the value passed in: the lower the value, the +denser the character:

+
ready> extern putchard(char);
+...
+ready> def printdensity(d)
+  if d > 8 then
+    putchard(32)  # ' '
+  else if d > 4 then
+    putchard(46)  # '.'
+  else if d > 2 then
+    putchard(43)  # '+'
+  else
+    putchard(42); # '*'
+...
+ready> printdensity(1): printdensity(2): printdensity(3):
+       printdensity(4): printdensity(5): printdensity(9):
+       putchard(10);
+**++.
+Evaluated to 0.000000
+
+
+

Based on these simple primitive operations, we can start to define more +interesting things. For example, here’s a little function that determines +the number of iterations it takes for a certain function in the complex +plane to diverge:

+
# Determine whether the specific location diverges.
+# Solve for z = z^2 + c in the complex plane.
+def mandelconverger(real imag iters creal cimag)
+  if iters > 255 | (real*real + imag*imag > 4) then
+    iters
+  else
+    mandelconverger(real*real - imag*imag + creal,
+                    2*real*imag + cimag,
+                    iters+1, creal, cimag);
+
+# Return the number of iterations required for the iteration to escape
+def mandelconverge(real imag)
+  mandelconverger(real, imag, 0, real, imag);
+
+
+

This “z = z2 + c” function is a beautiful little creature that is +the basis for computation of the Mandelbrot +Set. Our +mandelconverge function returns the number of iterations that it +takes for a complex orbit to escape, saturating to 255. This is not a +very useful function by itself, but if you plot its value over a +two-dimensional plane, you can see the Mandelbrot set. Given that we are +limited to using putchard here, our amazing graphical output is limited, +but we can whip together something using the density plotter above:

+
# Compute and plot the mandelbrot set with the specified 2 dimensional range
+# info.
+def mandelhelp(xmin xmax xstep   ymin ymax ystep)
+  for y = ymin, y < ymax, ystep in (
+    (for x = xmin, x < xmax, xstep in
+       printdensity(mandelconverge(x,y)))
+    : putchard(10)
+  )
+
+# mandel - This is a convenient helper function for plotting the mandelbrot set
+# from the specified position with the specified Magnification.
+def mandel(realstart imagstart realmag imagmag)
+  mandelhelp(realstart, realstart+realmag*78, realmag,
+             imagstart, imagstart+imagmag*40, imagmag);
+
+
+

Given this, we can try plotting out the mandelbrot set! Lets try it out:

+
ready> mandel(-2.3, -1.3, 0.05, 0.07);
+*******************************+++++++++++*************************************
+*************************+++++++++++++++++++++++*******************************
+**********************+++++++++++++++++++++++++++++****************************
+*******************+++++++++++++++++++++.. ...++++++++*************************
+*****************++++++++++++++++++++++.... ...+++++++++***********************
+***************+++++++++++++++++++++++.....   ...+++++++++*********************
+**************+++++++++++++++++++++++....     ....+++++++++********************
+*************++++++++++++++++++++++......      .....++++++++*******************
+************+++++++++++++++++++++.......       .......+++++++******************
+***********+++++++++++++++++++....                ... .+++++++*****************
+**********+++++++++++++++++.......                     .+++++++****************
+*********++++++++++++++...........                    ...+++++++***************
+********++++++++++++............                      ...++++++++**************
+********++++++++++... ..........                        .++++++++**************
+*******+++++++++.....                                   .+++++++++*************
+*******++++++++......                                  ..+++++++++*************
+*******++++++.......                                   ..+++++++++*************
+*******+++++......                                     ..+++++++++*************
+*******.... ....                                      ...+++++++++*************
+*******.... .                                         ...+++++++++*************
+*******+++++......                                    ...+++++++++*************
+*******++++++.......                                   ..+++++++++*************
+*******++++++++......                                   .+++++++++*************
+*******+++++++++.....                                  ..+++++++++*************
+********++++++++++... ..........                        .++++++++**************
+********++++++++++++............                      ...++++++++**************
+*********++++++++++++++..........                     ...+++++++***************
+**********++++++++++++++++........                     .+++++++****************
+**********++++++++++++++++++++....                ... ..+++++++****************
+***********++++++++++++++++++++++.......       .......++++++++*****************
+************+++++++++++++++++++++++......      ......++++++++******************
+**************+++++++++++++++++++++++....      ....++++++++********************
+***************+++++++++++++++++++++++.....   ...+++++++++*********************
+*****************++++++++++++++++++++++....  ...++++++++***********************
+*******************+++++++++++++++++++++......++++++++*************************
+*********************++++++++++++++++++++++.++++++++***************************
+*************************+++++++++++++++++++++++*******************************
+******************************+++++++++++++************************************
+*******************************************************************************
+*******************************************************************************
+*******************************************************************************
+Evaluated to 0.000000
+ready> mandel(-2, -1, 0.02, 0.04);
+**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
+***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
+*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
+*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
+***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
+**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
+************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
+***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
+**********++++++++++++++++++++++++++++++++++++++++++++++.............
+********+++++++++++++++++++++++++++++++++++++++++++..................
+*******+++++++++++++++++++++++++++++++++++++++.......................
+******+++++++++++++++++++++++++++++++++++...........................
+*****++++++++++++++++++++++++++++++++............................
+*****++++++++++++++++++++++++++++...............................
+****++++++++++++++++++++++++++......   .........................
+***++++++++++++++++++++++++.........     ......    ...........
+***++++++++++++++++++++++............
+**+++++++++++++++++++++..............
+**+++++++++++++++++++................
+*++++++++++++++++++.................
+*++++++++++++++++............ ...
+*++++++++++++++..............
+*+++....++++................
+*..........  ...........
+*
+*..........  ...........
+*+++....++++................
+*++++++++++++++..............
+*++++++++++++++++............ ...
+*++++++++++++++++++.................
+**+++++++++++++++++++................
+**+++++++++++++++++++++..............
+***++++++++++++++++++++++............
+***++++++++++++++++++++++++.........     ......    ...........
+****++++++++++++++++++++++++++......   .........................
+*****++++++++++++++++++++++++++++...............................
+*****++++++++++++++++++++++++++++++++............................
+******+++++++++++++++++++++++++++++++++++...........................
+*******+++++++++++++++++++++++++++++++++++++++.......................
+********+++++++++++++++++++++++++++++++++++++++++++..................
+Evaluated to 0.000000
+ready> mandel(-0.9, -1.4, 0.02, 0.03);
+*******************************************************************************
+*******************************************************************************
+*******************************************************************************
+**********+++++++++++++++++++++************************************************
+*+++++++++++++++++++++++++++++++++++++++***************************************
++++++++++++++++++++++++++++++++++++++++++++++**********************************
+++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
+++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
++++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
++++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
++++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
+++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
++++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
+++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
+++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
++++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
+++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
+++++++++++++++++++++...........                .........++++++++++++++++++++++*
+++++++++++++++++++............                  ...........++++++++++++++++++++
+++++++++++++++++...............                 .............++++++++++++++++++
+++++++++++++++.................                 ...............++++++++++++++++
+++++++++++++..................                  .................++++++++++++++
++++++++++..................                      .................+++++++++++++
+++++++........        .                               .........  ..++++++++++++
+++............                                         ......    ....++++++++++
+..............                                                    ...++++++++++
+..............                                                    ....+++++++++
+..............                                                    .....++++++++
+.............                                                    ......++++++++
+...........                                                     .......++++++++
+.........                                                       ........+++++++
+.........                                                       ........+++++++
+.........                                                           ....+++++++
+........                                                             ...+++++++
+.......                                                              ...+++++++
+                                                                    ....+++++++
+                                                                   .....+++++++
+                                                                    ....+++++++
+                                                                    ....+++++++
+                                                                    ....+++++++
+Evaluated to 0.000000
+ready> ^D
+
+
+

At this point, you may be starting to realize that Kaleidoscope is a +real and powerful language. It may not be self-similar :), but it can be +used to plot things that are!

+

With this, we conclude the “adding user-defined operators” chapter of +the tutorial. We have successfully augmented our language, adding the +ability to extend the language in the library, and we have shown how +this can be used to build a simple but interesting end-user application +in Kaleidoscope. At this point, Kaleidoscope can build a variety of +applications that are functional and can call functions with +side-effects, but it can’t actually define and mutate a variable itself.

+

Strikingly, variable mutation is an important feature of some languages, +and it is not at all obvious how to add support for mutable +variables without having to add an “SSA construction” +phase to your front-end. In the next chapter, we will describe how you +can add variable mutation without building SSA in your front-end.

+
+
+

6.6. Full Code Listing

+

Here is the complete code listing for our running example, enhanced with +the support for user-defined operators. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

On some platforms, you will need to specify -rdynamic or +-Wl,–export-dynamic when linking. This ensures that symbols defined in +the main executable are exported to the dynamic linker and so are +available for symbol resolution at run time. This is not needed if you +compile your support code into a shared library, although doing that +will cause problems on Windows.

+

Here is the code:

+
#include "../include/KaleidoscopeJIT.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::orc;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5,
+
+  // control
+  tok_if = -6,
+  tok_then = -7,
+  tok_else = -8,
+  tok_for = -9,
+  tok_in = -10,
+
+  // operators
+  tok_binary = -11,
+  tok_unary = -12
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    if (IdentifierStr == "if")
+      return tok_if;
+    if (IdentifierStr == "then")
+      return tok_then;
+    if (IdentifierStr == "else")
+      return tok_else;
+    if (IdentifierStr == "for")
+      return tok_for;
+    if (IdentifierStr == "in")
+      return tok_in;
+    if (IdentifierStr == "binary")
+      return tok_binary;
+    if (IdentifierStr == "unary")
+      return tok_unary;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() = default;
+
+  virtual Value *codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+
+  Value *codegen() override;
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+
+  Value *codegen() override;
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  std::unique_ptr<ExprAST> Operand;
+
+public:
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+      : Opcode(Opcode), Operand(std::move(Operand)) {}
+
+  Value *codegen() override;
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+
+  Value *codegen() override;
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+
+  Value *codegen() override;
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  std::unique_ptr<ExprAST> Cond, Then, Else;
+
+public:
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+      : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+
+  Value *codegen() override;
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
+
+public:
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool IsOperator;
+  unsigned Precedence; // Precedence if a binary op.
+
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args,
+               bool IsOperator = false, unsigned Prec = 0)
+      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec) {}
+
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
+
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size() - 1];
+  }
+
+  unsigned getBinaryPrecedence() const { return Precedence; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+
+  Function *codegen();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (true) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static std::unique_ptr<ExprAST> ParseIfExpr() {
+  getNextToken(); // eat the if.
+
+  // condition.
+  auto Cond = ParseExpression();
+  if (!Cond)
+    return nullptr;
+
+  if (CurTok != tok_then)
+    return LogError("expected then");
+  getNextToken(); // eat the then
+
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
+
+  if (CurTok != tok_else)
+    return LogError("expected else");
+
+  getNextToken();
+
+  auto Else = ParseExpression();
+  if (!Else)
+    return nullptr;
+
+  return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static std::unique_ptr<ExprAST> ParseForExpr() {
+  getNextToken(); // eat the for.
+
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after for");
+
+  std::string IdName = IdentifierStr;
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '=')
+    return LogError("expected '=' after for");
+  getNextToken(); // eat '='.
+
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
+  if (CurTok != ',')
+    return LogError("expected ',' after for start value");
+  getNextToken();
+
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
+
+  // The step value is optional.
+  std::unique_ptr<ExprAST> Step;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (!Step)
+      return nullptr;
+  }
+
+  if (CurTok != tok_in)
+    return LogError("expected 'in' after for");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  case tok_for:
+    return ParseForExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static std::unique_ptr<ExprAST> ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (auto Operand = ParseUnary())
+    return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (true) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken(); // eat binop
+
+    // Parse the unary expression after the binary operator.
+    auto RHS = ParseUnary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS =
+        std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  std::string FnName;
+
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+
+  switch (CurTok) {
+  default:
+    return LogErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return LogErrorP("Invalid precedence: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return LogErrorP("Invalid number of operands for operator");
+
+  return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
+                                         BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static std::map<std::string, Value *> NamedValues;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
+static ExitOnError ExitOnErr;
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+Value *NumberExprAST::codegen() {
+  return ConstantFP::get(*TheContext, APFloat(Val));
+}
+
+Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    return LogErrorV("Unknown variable name");
+  return V;
+}
+
+Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
+
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
+    return LogErrorV("Unknown unary operator");
+
+  return Builder->CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder->CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder->CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder->CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder->CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
+  default:
+    break;
+  }
+
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = getFunction(std::string("binary") + Op);
+  assert(F && "binary operator not found!");
+
+  Value *Ops[] = {L, R};
+  return Builder->CreateCall(F, Ops, "binop");
+}
+
+Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  CondV = Builder->CreateFCmpONE(
+      CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
+  BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
+
+  Builder->CreateCondBr(CondV, ThenBB, ElseBB);
+
+  // Emit then value.
+  Builder->SetInsertPoint(ThenBB);
+
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder->GetInsertBlock();
+
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder->SetInsertPoint(ElseBB);
+
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder->GetInsertBlock();
+
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder->SetInsertPoint(MergeBB);
+  PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
+
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+// Output for-loop as:
+//   ...
+//   start = startexpr
+//   goto loop
+// loop:
+//   variable = phi [start, loopheader], [nextvariable, loopend]
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   nextvariable = variable + step
+//   endcond = endexpr
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
+
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+  BasicBlock *PreheaderBB = Builder->GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
+
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder->CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder->SetInsertPoint(LoopBB);
+
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable =
+      Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, VarName);
+  Variable->addIncoming(StartVal, PreheaderBB);
+
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (!Body->codegen())
+    return nullptr;
+
+  // Emit the step value.
+  Value *StepVal = nullptr;
+  if (Step) {
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
+  }
+
+  Value *NextVar = Builder->CreateFAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  EndCond = Builder->CreateFCmpONE(
+      EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
+
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder->GetInsertBlock();
+  BasicBlock *AfterBB =
+      BasicBlock::Create(*TheContext, "afterloop", TheFunction);
+
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
+
+  // Any new code will be inserted in AfterBB.
+  Builder->SetInsertPoint(AfterBB);
+
+  // Add a new entry to the PHI node for the backedge.
+  Variable->addIncoming(NextVar, LoopEndBB);
+
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(*TheContext));
+}
+
+Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // If this is an operator, install it.
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
+  Builder->SetInsertPoint(BB);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[std::string(Arg.getName())] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
+    // Finish off the function.
+    Builder->CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Run the optimizer on the function.
+    TheFPM->run(*TheFunction);
+
+    return TheFunction;
+  }
+
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (P.isBinaryOp())
+    BinopPrecedence.erase(P.getOperatorName());
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheContext = std::make_unique<LLVMContext>();
+  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
+  TheModule->setDataLayout(TheJIT->getDataLayout());
+
+  // Create a new builder for the module.
+  Builder = std::make_unique<IRBuilder<>>(*TheContext);
+
+  // Create a new pass manager attached to it.
+  TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
+
+static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read function definition:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      ExitOnErr(TheJIT->addModule(
+          ThreadSafeModule(std::move(TheModule), std::move(TheContext))));
+      InitializeModuleAndPassManager();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
+      fprintf(stderr, "Read extern: ");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
+      // Create a ResourceTracker to track JIT'd memory allocated to our
+      // anonymous expression -- that way we can free it after executing.
+      auto RT = TheJIT->getMainJITDylib().createResourceTracker();
+
+      auto TSM = ThreadSafeModule(std::move(TheModule), std::move(TheContext));
+      ExitOnErr(TheJIT->addModule(std::move(TSM), RT));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = ExitOnErr(TheJIT->lookup("__anon_expr"));
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
+      fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      ExitOnErr(RT->remove());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (true) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" DLLEXPORT double putchard(double X) {
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" DLLEXPORT double printd(double X) {
+  fprintf(stderr, "%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
+  InitializeNativeTargetAsmParser();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  TheJIT = ExitOnErr(KaleidoscopeJIT::Create());
+
+  InitializeModuleAndPassManager();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
+
+
+

Next: Extending the language: mutable variables / SSA +construction

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl07.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl07.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl07.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl07.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,2221 @@ + + + + + + + + + 7. Kaleidoscope: Extending the Language: Mutable Variables — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

7. Kaleidoscope: Extending the Language: Mutable Variables

+ +
+

7.1. Chapter 7 Introduction

+

Welcome to Chapter 7 of the “Implementing a language with +LLVM” tutorial. In chapters 1 through 6, we’ve built a +very respectable, albeit simple, functional programming +language. In our +journey, we learned some parsing techniques, how to build and represent +an AST, how to build LLVM IR, and how to optimize the resultant code as +well as JIT compile it.

+

While Kaleidoscope is interesting as a functional language, the fact +that it is functional makes it “too easy” to generate LLVM IR for it. In +particular, a functional language makes it very easy to build LLVM IR +directly in SSA +form. +Since LLVM requires that the input code be in SSA form, this is a very +nice property and it is often unclear to newcomers how to generate code +for an imperative language with mutable variables.

+

The short (and happy) summary of this chapter is that there is no need +for your front-end to build SSA form: LLVM provides highly tuned and +well tested support for this, though the way it works is a bit +unexpected for some.

+
+
+

7.2. Why is this a hard problem?

+

To understand why mutable variables cause complexities in SSA +construction, consider this extremely simple C example:

+
int G, H;
+int test(_Bool Condition) {
+  int X;
+  if (Condition)
+    X = G;
+  else
+    X = H;
+  return X;
+}
+
+
+

In this case, we have the variable “X”, whose value depends on the path +executed in the program. Because there are two different possible values +for X before the return instruction, a PHI node is inserted to merge the +two values. The LLVM IR that we want for this example looks like this:

+
@G = weak global i32 0   ; type of @G is i32*
+@H = weak global i32 0   ; type of @H is i32*
+
+define i32 @test(i1 %Condition) {
+entry:
+  br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+  %X.0 = load i32, i32* @G
+  br label %cond_next
+
+cond_false:
+  %X.1 = load i32, i32* @H
+  br label %cond_next
+
+cond_next:
+  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+  ret i32 %X.2
+}
+
+
+

In this example, the loads from the G and H global variables are +explicit in the LLVM IR, and they live in the then/else branches of the +if statement (cond_true/cond_false). In order to merge the incoming +values, the X.2 phi node in the cond_next block selects the right value +to use based on where control flow is coming from: if control flow comes +from the cond_false block, X.2 gets the value of X.1. Alternatively, if +control flow comes from cond_true, it gets the value of X.0. The intent +of this chapter is not to explain the details of SSA form. For more +information, see one of the many online +references.

+

The question for this article is “who places the phi nodes when lowering +assignments to mutable variables?”. The issue here is that LLVM +requires that its IR be in SSA form: there is no “non-ssa” mode for +it. However, SSA construction requires non-trivial algorithms and data +structures, so it is inconvenient and wasteful for every front-end to +have to reproduce this logic.

+
+
+

7.3. Memory in LLVM

+

The ‘trick’ here is that while LLVM does require all register values to +be in SSA form, it does not require (or permit) memory objects to be in +SSA form. In the example above, note that the loads from G and H are +direct accesses to G and H: they are not renamed or versioned. This +differs from some other compiler systems, which do try to version memory +objects. In LLVM, instead of encoding dataflow analysis of memory into +the LLVM IR, it is handled with Analysis +Passes which are computed on demand.

+

With this in mind, the high-level idea is that we want to make a stack +variable (which lives in memory, because it is on the stack) for each +mutable object in a function. To take advantage of this trick, we need +to talk about how LLVM represents stack variables.

+

In LLVM, all memory accesses are explicit with load/store instructions, +and it is carefully designed not to have (or need) an “address-of” +operator. Notice how the type of the @G/@H global variables is actually +“i32*” even though the variable is defined as “i32”. What this means is +that @G defines space for an i32 in the global data area, but its +name actually refers to the address for that space. Stack variables +work the same way, except that instead of being declared with global +variable definitions, they are declared with the LLVM alloca +instruction:

+
define i32 @example() {
+entry:
+  %X = alloca i32           ; type of %X is i32*.
+  ...
+  %tmp = load i32, i32* %X  ; load the stack value %X from the stack.
+  %tmp2 = add i32 %tmp, 1   ; increment it
+  store i32 %tmp2, i32* %X  ; store it back
+  ...
+
+
+

This code shows an example of how you can declare and manipulate a stack +variable in the LLVM IR. Stack memory allocated with the alloca +instruction is fully general: you can pass the address of the stack slot +to functions, you can store it in other variables, etc. In our example +above, we could rewrite the example to use the alloca technique to avoid +using a PHI node:

+
@G = weak global i32 0   ; type of @G is i32*
+@H = weak global i32 0   ; type of @H is i32*
+
+define i32 @test(i1 %Condition) {
+entry:
+  %X = alloca i32           ; type of %X is i32*.
+  br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+  %X.0 = load i32, i32* @G
+  store i32 %X.0, i32* %X   ; Update X
+  br label %cond_next
+
+cond_false:
+  %X.1 = load i32, i32* @H
+  store i32 %X.1, i32* %X   ; Update X
+  br label %cond_next
+
+cond_next:
+  %X.2 = load i32, i32* %X  ; Read X
+  ret i32 %X.2
+}
+
+
+

With this, we have discovered a way to handle arbitrary mutable +variables without the need to create Phi nodes at all:

+
    +
  1. Each mutable variable becomes a stack allocation.

  2. +
  3. Each read of the variable becomes a load from the stack.

  4. +
  5. Each update of the variable becomes a store to the stack.

  6. +
  7. Taking the address of a variable just uses the stack address +directly.

  8. +
+

While this solution has solved our immediate problem, it introduced +another one: we have now apparently introduced a lot of stack traffic +for very simple and common operations, a major performance problem. +Fortunately for us, the LLVM optimizer has a highly-tuned optimization +pass named “mem2reg” that handles this case, promoting allocas like this +into SSA registers, inserting Phi nodes as appropriate. If you run this +example through the pass, for example, you’ll get:

+
$ llvm-as < example.ll | opt -mem2reg | llvm-dis
+@G = weak global i32 0
+@H = weak global i32 0
+
+define i32 @test(i1 %Condition) {
+entry:
+  br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+  %X.0 = load i32, i32* @G
+  br label %cond_next
+
+cond_false:
+  %X.1 = load i32, i32* @H
+  br label %cond_next
+
+cond_next:
+  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+  ret i32 %X.01
+}
+
+
+

The mem2reg pass implements the standard “iterated dominance frontier” +algorithm for constructing SSA form and has a number of optimizations +that speed up (very common) degenerate cases. The mem2reg optimization +pass is the answer to dealing with mutable variables, and we highly +recommend that you depend on it. Note that mem2reg only works on +variables in certain circumstances:

+
    +
  1. mem2reg is alloca-driven: it looks for allocas and if it can handle +them, it promotes them. It does not apply to global variables or heap +allocations.

  2. +
  3. mem2reg only looks for alloca instructions in the entry block of the +function. Being in the entry block guarantees that the alloca is only +executed once, which makes analysis simpler.

  4. +
  5. mem2reg only promotes allocas whose uses are direct loads and stores. +If the address of the stack object is passed to a function, or if any +funny pointer arithmetic is involved, the alloca will not be +promoted.

  6. +
  7. mem2reg only works on allocas of first +class values (such as pointers, +scalars and vectors), and only if the array size of the allocation is +1 (or missing in the .ll file). mem2reg is not capable of promoting +structs or arrays to registers. Note that the “sroa” pass is +more powerful and can promote structs, “unions”, and arrays in many +cases.

  8. +
+

All of these properties are easy to satisfy for most imperative +languages, and we’ll illustrate it below with Kaleidoscope. The final +question you may be asking is: should I bother with this nonsense for my +front-end? Wouldn’t it be better if I just did SSA construction +directly, avoiding use of the mem2reg optimization pass? In short, we +strongly recommend that you use this technique for building SSA form, +unless there is an extremely good reason not to. Using this technique +is:

+
    +
  • Proven and well tested: clang uses this technique +for local mutable variables. As such, the most common clients of LLVM +are using this to handle a bulk of their variables. You can be sure +that bugs are found fast and fixed early.

  • +
  • Extremely Fast: mem2reg has a number of special cases that make it +fast in common cases as well as fully general. For example, it has +fast-paths for variables that are only used in a single block, +variables that only have one assignment point, good heuristics to +avoid insertion of unneeded phi nodes, etc.

  • +
  • Needed for debug info generation: Debug information in +LLVM relies on having the address of +the variable exposed so that debug info can be attached to it. This +technique dovetails very naturally with this style of debug info.

  • +
+

If nothing else, this makes it much easier to get your front-end up and +running, and is very simple to implement. Let’s extend Kaleidoscope with +mutable variables now!

+
+
+

7.4. Mutable Variables in Kaleidoscope

+

Now that we know the sort of problem we want to tackle, let’s see what +this looks like in the context of our little Kaleidoscope language. +We’re going to add two features:

+
    +
  1. The ability to mutate variables with the ‘=’ operator.

  2. +
  3. The ability to define new variables.

  4. +
+

While the first item is really what this is about, we only have +variables for incoming arguments as well as for induction variables, and +redefining those only goes so far :). Also, the ability to define new +variables is a useful thing regardless of whether you will be mutating +them. Here’s a motivating example that shows how we could use these:

+
# Define ':' for sequencing: as a low-precedence operator that ignores operands
+# and just returns the RHS.
+def binary : 1 (x y) y;
+
+# Recursive fib, we could do this before.
+def fib(x)
+  if (x < 3) then
+    1
+  else
+    fib(x-1)+fib(x-2);
+
+# Iterative fib.
+def fibi(x)
+  var a = 1, b = 1, c in
+  (for i = 3, i < x in
+     c = a + b :
+     a = b :
+     b = c) :
+  b;
+
+# Call it.
+fibi(10);
+
+
+

In order to mutate variables, we have to change our existing variables +to use the “alloca trick”. Once we have that, we’ll add our new +operator, then extend Kaleidoscope to support new variable definitions.

+
+
+

7.5. Adjusting Existing Variables for Mutation

+

The symbol table in Kaleidoscope is managed at code generation time by +the ‘NamedValues’ map. This map currently keeps track of the LLVM +“Value*” that holds the double value for the named variable. In order +to support mutation, we need to change this slightly, so that +NamedValues holds the memory location of the variable in question. +Note that this change is a refactoring: it changes the structure of the +code, but does not (by itself) change the behavior of the compiler. All +of these changes are isolated in the Kaleidoscope code generator.

+

At this point in Kaleidoscope’s development, it only supports variables +for two things: incoming arguments to functions and the induction +variable of ‘for’ loops. For consistency, we’ll allow mutation of these +variables in addition to other user-defined variables. This means that +these will both need memory locations.

+

To start our transformation of Kaleidoscope, we’ll change the +NamedValues map so that it maps to AllocaInst* instead of Value*. Once +we do this, the C++ compiler will tell us what parts of the code we need +to update:

+
static std::map<std::string, AllocaInst*> NamedValues;
+
+
+

Also, since we will need to create these allocas, we’ll use a helper +function that ensures that the allocas are created in the entry block of +the function:

+
/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          const std::string &VarName) {
+  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                 TheFunction->getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), 0,
+                           VarName.c_str());
+}
+
+
+

This funny looking code creates an IRBuilder object that is pointing at +the first instruction (.begin()) of the entry block. It then creates an +alloca with the expected name and returns it. Because all values in +Kaleidoscope are doubles, there is no need to pass in a type to use.

+

With this in place, the first functionality change we want to make belongs to +variable references. In our new scheme, variables live on the stack, so +code generating a reference to them actually needs to produce a load +from the stack slot:

+
Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    return LogErrorV("Unknown variable name");
+
+  // Load the value.
+  return Builder.CreateLoad(V, Name.c_str());
+}
+
+
+

As you can see, this is pretty straightforward. Now we need to update +the things that define the variables to set up the alloca. We’ll start +with ForExprAST::codegen() (see the full code listing for +the unabridged code):

+
Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+// Create an alloca for the variable in the entry block.
+AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+// Emit the start code first, without 'variable' in scope.
+Value *StartVal = Start->codegen();
+if (!StartVal)
+  return nullptr;
+
+// Store the value into the alloca.
+Builder.CreateStore(StartVal, Alloca);
+...
+
+// Compute the end condition.
+Value *EndCond = End->codegen();
+if (!EndCond)
+  return nullptr;
+
+// Reload, increment, and restore the alloca.  This handles the case where
+// the body of the loop mutates the variable.
+Value *CurVar = Builder.CreateLoad(Alloca);
+Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+Builder.CreateStore(NextVar, Alloca);
+...
+
+
+

This code is virtually identical to the code before we allowed mutable +variables. The big difference is that we +no longer have to construct a PHI node, and we use load/store to access +the variable as needed.

+

To support mutable argument variables, we need to also make allocas for +them. The code for this is also pretty simple:

+
Function *FunctionAST::codegen() {
+  ...
+  Builder.SetInsertPoint(BB);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args()) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
+
+    // Store the initial value into the alloca.
+    Builder.CreateStore(&Arg, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[Arg.getName()] = Alloca;
+  }
+
+  if (Value *RetVal = Body->codegen()) {
+    ...
+
+
+

For each argument, we make an alloca, store the input value to the +function into the alloca, and register the alloca as the memory location +for the argument. This method gets invoked by FunctionAST::codegen() +right after it sets up the entry block for the function.

+

The final missing piece is adding the mem2reg pass, which allows us to +get good codegen once again:

+
// Promote allocas to registers.
+TheFPM->add(createPromoteMemoryToRegisterPass());
+// Do simple "peephole" optimizations and bit-twiddling optzns.
+TheFPM->add(createInstructionCombiningPass());
+// Reassociate expressions.
+TheFPM->add(createReassociatePass());
+...
+
+
+

It is interesting to see what the code looks like before and after the +mem2reg optimization runs. For example, this is the before/after code +for our recursive fib function. Before the optimization:

+
define double @fib(double %x) {
+entry:
+  %x1 = alloca double
+  store double %x, double* %x1
+  %x2 = load double, double* %x1
+  %cmptmp = fcmp ult double %x2, 3.000000e+00
+  %booltmp = uitofp i1 %cmptmp to double
+  %ifcond = fcmp one double %booltmp, 0.000000e+00
+  br i1 %ifcond, label %then, label %else
+
+then:       ; preds = %entry
+  br label %ifcont
+
+else:       ; preds = %entry
+  %x3 = load double, double* %x1
+  %subtmp = fsub double %x3, 1.000000e+00
+  %calltmp = call double @fib(double %subtmp)
+  %x4 = load double, double* %x1
+  %subtmp5 = fsub double %x4, 2.000000e+00
+  %calltmp6 = call double @fib(double %subtmp5)
+  %addtmp = fadd double %calltmp, %calltmp6
+  br label %ifcont
+
+ifcont:     ; preds = %else, %then
+  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+  ret double %iftmp
+}
+
+
+

Here there is only one variable (x, the input argument) but you can +still see the extremely simple-minded code generation strategy we are +using. In the entry block, an alloca is created, and the initial input +value is stored into it. Each reference to the variable does a reload +from the stack. Also, note that we didn’t modify the if/then/else +expression, so it still inserts a PHI node. While we could make an +alloca for it, it is actually easier to create a PHI node for it, so we +still just make the PHI.

+

Here is the code after the mem2reg pass runs:

+
define double @fib(double %x) {
+entry:
+  %cmptmp = fcmp ult double %x, 3.000000e+00
+  %booltmp = uitofp i1 %cmptmp to double
+  %ifcond = fcmp one double %booltmp, 0.000000e+00
+  br i1 %ifcond, label %then, label %else
+
+then:
+  br label %ifcont
+
+else:
+  %subtmp = fsub double %x, 1.000000e+00
+  %calltmp = call double @fib(double %subtmp)
+  %subtmp5 = fsub double %x, 2.000000e+00
+  %calltmp6 = call double @fib(double %subtmp5)
+  %addtmp = fadd double %calltmp, %calltmp6
+  br label %ifcont
+
+ifcont:     ; preds = %else, %then
+  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+  ret double %iftmp
+}
+
+
+

This is a trivial case for mem2reg, since there are no redefinitions of +the variable. The point of showing this is to calm your tension about +inserting such blatant inefficiencies :).

+

After the rest of the optimizers run, we get:

+
define double @fib(double %x) {
+entry:
+  %cmptmp = fcmp ult double %x, 3.000000e+00
+  %booltmp = uitofp i1 %cmptmp to double
+  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
+  br i1 %ifcond, label %else, label %ifcont
+
+else:
+  %subtmp = fsub double %x, 1.000000e+00
+  %calltmp = call double @fib(double %subtmp)
+  %subtmp5 = fsub double %x, 2.000000e+00
+  %calltmp6 = call double @fib(double %subtmp5)
+  %addtmp = fadd double %calltmp, %calltmp6
+  ret double %addtmp
+
+ifcont:
+  ret double 1.000000e+00
+}
+
+
+

Here we see that the simplifycfg pass decided to clone the return +instruction into the end of the ‘else’ block. This allowed it to +eliminate some branches and the PHI node.

+

Now that all symbol table references are updated to use stack variables, +we’ll add the assignment operator.

+
+
+

7.6. New Assignment Operator

+

With our current framework, adding a new assignment operator is really +simple. We will parse it just like any other binary operator, but handle +it internally (instead of allowing the user to define it). The first +step is to set a precedence:

+
int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['='] = 2;
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+
+
+

Now that the parser knows the precedence of the binary operator, it +takes care of all the parsing and AST generation. We just need to +implement codegen for the assignment operator. This looks like:

+
Value *BinaryExprAST::codegen() {
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS.get());
+    if (!LHSE)
+      return LogErrorV("destination of '=' must be a variable");
+
+
+

Unlike the rest of the binary operators, our assignment operator doesn’t +follow the “emit LHS, emit RHS, do computation” model. As such, it is +handled as a special case before the other binary operators are handled. +The other strange thing is that it requires the LHS to be a variable. It +is invalid to have “(x+1) = expr” - only things like “x = expr” are +allowed.

+
  // Codegen the RHS.
+  Value *Val = RHS->codegen();
+  if (!Val)
+    return nullptr;
+
+  // Look up the name.
+  Value *Variable = NamedValues[LHSE->getName()];
+  if (!Variable)
+    return LogErrorV("Unknown variable name");
+
+  Builder.CreateStore(Val, Variable);
+  return Val;
+}
+...
+
+
+

Once we have the variable, codegen’ing the assignment is +straightforward: we emit the RHS of the assignment, create a store, and +return the computed value. Returning a value allows for chained +assignments like “X = (Y = Z)”.

+

Now that we have an assignment operator, we can mutate loop variables +and arguments. For example, we can now run code like this:

+
# Function to print a double.
+extern printd(x);
+
+# Define ':' for sequencing: as a low-precedence operator that ignores operands
+# and just returns the RHS.
+def binary : 1 (x y) y;
+
+def test(x)
+  printd(x) :
+  x = 4 :
+  printd(x);
+
+test(123);
+
+
+

When run, this example prints “123” and then “4”, showing that we did +actually mutate the value! Okay, we have now officially implemented our +goal: getting this to work requires SSA construction in the general +case. However, to be really useful, we want the ability to define our +own local variables, let’s add this next!

+
+
+

7.7. User-defined Local Variables

+

Adding var/in is just like any other extension we made to +Kaleidoscope: we extend the lexer, the parser, the AST and the code +generator. The first step for adding our new ‘var/in’ construct is to +extend the lexer. As before, this is pretty trivial, the code looks like +this:

+
enum Token {
+  ...
+  // var definition
+  tok_var = -13
+...
+}
+...
+static int gettok() {
+...
+    if (IdentifierStr == "in")
+      return tok_in;
+    if (IdentifierStr == "binary")
+      return tok_binary;
+    if (IdentifierStr == "unary")
+      return tok_unary;
+    if (IdentifierStr == "var")
+      return tok_var;
+    return tok_identifier;
+...
+
+
+

The next step is to define the AST node that we will construct. For +var/in, it looks like this:

+
/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  VarExprAST(std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
+             std::unique_ptr<ExprAST> Body)
+    : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+
+

var/in allows a list of names to be defined all at once, and each name +can optionally have an initializer value. As such, we capture this +information in the VarNames vector. Also, var/in has a body, this body +is allowed to access the variables defined by the var/in.

+

With this in place, we can define the parser pieces. The first thing we +do is add it as a primary expression:

+
/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+///   ::= varexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  case tok_for:
+    return ParseForExpr();
+  case tok_var:
+    return ParseVarExpr();
+  }
+}
+
+
+

Next we define ParseVarExpr:

+
/// varexpr ::= 'var' identifier ('=' expression)?
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static std::unique_ptr<ExprAST> ParseVarExpr() {
+  getNextToken();  // eat the var.
+
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after var");
+
+
+

The first part of this code parses the list of identifier/expr pairs +into the local VarNames vector.

+
while (1) {
+  std::string Name = IdentifierStr;
+  getNextToken();  // eat identifier.
+
+  // Read the optional initializer.
+  std::unique_ptr<ExprAST> Init;
+  if (CurTok == '=') {
+    getNextToken(); // eat the '='.
+
+    Init = ParseExpression();
+    if (!Init) return nullptr;
+  }
+
+  VarNames.push_back(std::make_pair(Name, std::move(Init)));
+
+  // End of var list, exit loop.
+  if (CurTok != ',') break;
+  getNextToken(); // eat the ','.
+
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier list after var");
+}
+
+
+

Once all the variables are parsed, we then parse the body and create the +AST node:

+
  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return LogError("expected 'in' keyword after 'var'");
+  getNextToken();  // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<VarExprAST>(std::move(VarNames),
+                                       std::move(Body));
+}
+
+
+

Now that we can parse and represent the code, we need to support +emission of LLVM IR for it. This code starts out with:

+
Value *VarExprAST::codegen() {
+  std::vector<AllocaInst *> OldBindings;
+
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second.get();
+
+
+

Basically it loops over all the variables, installing them one at a +time. For each variable we put into the symbol table, we remember the +previous value that we replace in OldBindings.

+
  // Emit the initializer before adding the variable to scope, this prevents
+  // the initializer from referencing the variable itself, and permits stuff
+  // like this:
+  //  var a = 1 in
+  //    var a = a in ...   # refers to outer 'a'.
+  Value *InitVal;
+  if (Init) {
+    InitVal = Init->codegen();
+    if (!InitVal)
+      return nullptr;
+  } else { // If not specified, use 0.0.
+    InitVal = ConstantFP::get(TheContext, APFloat(0.0));
+  }
+
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+  Builder.CreateStore(InitVal, Alloca);
+
+  // Remember the old variable binding so that we can restore the binding when
+  // we unrecurse.
+  OldBindings.push_back(NamedValues[VarName]);
+
+  // Remember this binding.
+  NamedValues[VarName] = Alloca;
+}
+
+
+

There are more comments here than code. The basic idea is that we emit +the initializer, create the alloca, then update the symbol table to +point to it. Once all the variables are installed in the symbol table, +we evaluate the body of the var/in expression:

+
// Codegen the body, now that all vars are in scope.
+Value *BodyVal = Body->codegen();
+if (!BodyVal)
+  return nullptr;
+
+
+

Finally, before returning, we restore the previous variable bindings:

+
  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+
+
+

The end result of all of this is that we get properly scoped variable +definitions, and we even (trivially) allow mutation of them :).

+

With this, we completed what we set out to do. Our nice iterative fib +example from the intro compiles and runs just fine. The mem2reg pass +optimizes all of our stack variables into SSA registers, inserting PHI +nodes where needed, and our front-end remains simple: no “iterated +dominance frontier” computation anywhere in sight.

+
+
+

7.8. Full Code Listing

+

Here is the complete code listing for our running example, enhanced with +mutable variables and var/in support. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
#include "../include/KaleidoscopeJIT.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::orc;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5,
+
+  // control
+  tok_if = -6,
+  tok_then = -7,
+  tok_else = -8,
+  tok_for = -9,
+  tok_in = -10,
+
+  // operators
+  tok_binary = -11,
+  tok_unary = -12,
+
+  // var definition
+  tok_var = -13
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    if (IdentifierStr == "if")
+      return tok_if;
+    if (IdentifierStr == "then")
+      return tok_then;
+    if (IdentifierStr == "else")
+      return tok_else;
+    if (IdentifierStr == "for")
+      return tok_for;
+    if (IdentifierStr == "in")
+      return tok_in;
+    if (IdentifierStr == "binary")
+      return tok_binary;
+    if (IdentifierStr == "unary")
+      return tok_unary;
+    if (IdentifierStr == "var")
+      return tok_var;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() = default;
+
+  virtual Value *codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+
+  Value *codegen() override;
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+
+  Value *codegen() override;
+  const std::string &getName() const { return Name; }
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  std::unique_ptr<ExprAST> Operand;
+
+public:
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+      : Opcode(Opcode), Operand(std::move(Operand)) {}
+
+  Value *codegen() override;
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+
+  Value *codegen() override;
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+
+  Value *codegen() override;
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  std::unique_ptr<ExprAST> Cond, Then, Else;
+
+public:
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+      : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+
+  Value *codegen() override;
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
+
+public:
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  VarExprAST(
+      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
+      std::unique_ptr<ExprAST> Body)
+      : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool IsOperator;
+  unsigned Precedence; // Precedence if a binary op.
+
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args,
+               bool IsOperator = false, unsigned Prec = 0)
+      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec) {}
+
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
+
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size() - 1];
+  }
+
+  unsigned getBinaryPrecedence() const { return Precedence; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+
+  Function *codegen();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (true) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static std::unique_ptr<ExprAST> ParseIfExpr() {
+  getNextToken(); // eat the if.
+
+  // condition.
+  auto Cond = ParseExpression();
+  if (!Cond)
+    return nullptr;
+
+  if (CurTok != tok_then)
+    return LogError("expected then");
+  getNextToken(); // eat the then
+
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
+
+  if (CurTok != tok_else)
+    return LogError("expected else");
+
+  getNextToken();
+
+  auto Else = ParseExpression();
+  if (!Else)
+    return nullptr;
+
+  return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static std::unique_ptr<ExprAST> ParseForExpr() {
+  getNextToken(); // eat the for.
+
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after for");
+
+  std::string IdName = IdentifierStr;
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '=')
+    return LogError("expected '=' after for");
+  getNextToken(); // eat '='.
+
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
+  if (CurTok != ',')
+    return LogError("expected ',' after for start value");
+  getNextToken();
+
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
+
+  // The step value is optional.
+  std::unique_ptr<ExprAST> Step;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (!Step)
+      return nullptr;
+  }
+
+  if (CurTok != tok_in)
+    return LogError("expected 'in' after for");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
+}
+
+/// varexpr ::= 'var' identifier ('=' expression)?
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static std::unique_ptr<ExprAST> ParseVarExpr() {
+  getNextToken(); // eat the var.
+
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after var");
+
+  while (true) {
+    std::string Name = IdentifierStr;
+    getNextToken(); // eat identifier.
+
+    // Read the optional initializer.
+    std::unique_ptr<ExprAST> Init = nullptr;
+    if (CurTok == '=') {
+      getNextToken(); // eat the '='.
+
+      Init = ParseExpression();
+      if (!Init)
+        return nullptr;
+    }
+
+    VarNames.push_back(std::make_pair(Name, std::move(Init)));
+
+    // End of var list, exit loop.
+    if (CurTok != ',')
+      break;
+    getNextToken(); // eat the ','.
+
+    if (CurTok != tok_identifier)
+      return LogError("expected identifier list after var");
+  }
+
+  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return LogError("expected 'in' keyword after 'var'");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+///   ::= varexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  case tok_for:
+    return ParseForExpr();
+  case tok_var:
+    return ParseVarExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static std::unique_ptr<ExprAST> ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (auto Operand = ParseUnary())
+    return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (true) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken(); // eat binop
+
+    // Parse the unary expression after the binary operator.
+    auto RHS = ParseUnary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS =
+        std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  std::string FnName;
+
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+
+  switch (CurTok) {
+  default:
+    return LogErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return LogErrorP("Invalid precedence: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return LogErrorP("Invalid number of operands for operator");
+
+  return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
+                                         BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static std::map<std::string, AllocaInst *> NamedValues;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
+static ExitOnError ExitOnErr;
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          StringRef VarName) {
+  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                   TheFunction->getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName);
+}
+
+Value *NumberExprAST::codegen() {
+  return ConstantFP::get(*TheContext, APFloat(Val));
+}
+
+Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  AllocaInst *A = NamedValues[Name];
+  if (!A)
+    return LogErrorV("Unknown variable name");
+
+  // Load the value.
+  return Builder->CreateLoad(A->getAllocatedType(), A, Name.c_str());
+}
+
+Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
+
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
+    return LogErrorV("Unknown unary operator");
+
+  return Builder->CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::codegen() {
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    // This assume we're building without RTTI because LLVM builds that way by
+    // default.  If you build LLVM with RTTI this can be changed to a
+    // dynamic_cast for automatic error checking.
+    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
+    if (!LHSE)
+      return LogErrorV("destination of '=' must be a variable");
+    // Codegen the RHS.
+    Value *Val = RHS->codegen();
+    if (!Val)
+      return nullptr;
+
+    // Look up the name.
+    Value *Variable = NamedValues[LHSE->getName()];
+    if (!Variable)
+      return LogErrorV("Unknown variable name");
+
+    Builder->CreateStore(Val, Variable);
+    return Val;
+  }
+
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder->CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder->CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder->CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder->CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
+  default:
+    break;
+  }
+
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = getFunction(std::string("binary") + Op);
+  assert(F && "binary operator not found!");
+
+  Value *Ops[] = {L, R};
+  return Builder->CreateCall(F, Ops, "binop");
+}
+
+Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  CondV = Builder->CreateFCmpONE(
+      CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
+  BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
+
+  Builder->CreateCondBr(CondV, ThenBB, ElseBB);
+
+  // Emit then value.
+  Builder->SetInsertPoint(ThenBB);
+
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder->GetInsertBlock();
+
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder->SetInsertPoint(ElseBB);
+
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder->GetInsertBlock();
+
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder->SetInsertPoint(MergeBB);
+  PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
+
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+// Output for-loop as:
+//   var = alloca double
+//   ...
+//   start = startexpr
+//   store start -> var
+//   goto loop
+// loop:
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   endcond = endexpr
+//
+//   curvar = load var
+//   nextvar = curvar + step
+//   store nextvar -> var
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create an alloca for the variable in the entry block.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
+
+  // Store the value into the alloca.
+  Builder->CreateStore(StartVal, Alloca);
+
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
+
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder->CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder->SetInsertPoint(LoopBB);
+
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  AllocaInst *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Alloca;
+
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (!Body->codegen())
+    return nullptr;
+
+  // Emit the step value.
+  Value *StepVal = nullptr;
+  if (Step) {
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
+  }
+
+  // Compute the end condition.
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
+
+  // Reload, increment, and restore the alloca.  This handles the case where
+  // the body of the loop mutates the variable.
+  Value *CurVar =
+      Builder->CreateLoad(Alloca->getAllocatedType(), Alloca, VarName.c_str());
+  Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar");
+  Builder->CreateStore(NextVar, Alloca);
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  EndCond = Builder->CreateFCmpONE(
+      EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
+
+  // Create the "after loop" block and insert it.
+  BasicBlock *AfterBB =
+      BasicBlock::Create(*TheContext, "afterloop", TheFunction);
+
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
+
+  // Any new code will be inserted in AfterBB.
+  Builder->SetInsertPoint(AfterBB);
+
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(*TheContext));
+}
+
+Value *VarExprAST::codegen() {
+  std::vector<AllocaInst *> OldBindings;
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second.get();
+
+    // Emit the initializer before adding the variable to scope, this prevents
+    // the initializer from referencing the variable itself, and permits stuff
+    // like this:
+    //  var a = 1 in
+    //    var a = a in ...   # refers to outer 'a'.
+    Value *InitVal;
+    if (Init) {
+      InitVal = Init->codegen();
+      if (!InitVal)
+        return nullptr;
+    } else { // If not specified, use 0.0.
+      InitVal = ConstantFP::get(*TheContext, APFloat(0.0));
+    }
+
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+    Builder->CreateStore(InitVal, Alloca);
+
+    // Remember the old variable binding so that we can restore the binding when
+    // we unrecurse.
+    OldBindings.push_back(NamedValues[VarName]);
+
+    // Remember this binding.
+    NamedValues[VarName] = Alloca;
+  }
+
+  // Codegen the body, now that all vars are in scope.
+  Value *BodyVal = Body->codegen();
+  if (!BodyVal)
+    return nullptr;
+
+  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+
+Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // If this is an operator, install it.
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
+  Builder->SetInsertPoint(BB);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args()) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
+
+    // Store the initial value into the alloca.
+    Builder->CreateStore(&Arg, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[std::string(Arg.getName())] = Alloca;
+  }
+
+  if (Value *RetVal = Body->codegen()) {
+    // Finish off the function.
+    Builder->CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Run the optimizer on the function.
+    TheFPM->run(*TheFunction);
+
+    return TheFunction;
+  }
+
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (P.isBinaryOp())
+    BinopPrecedence.erase(P.getOperatorName());
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheContext = std::make_unique<LLVMContext>();
+  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
+  TheModule->setDataLayout(TheJIT->getDataLayout());
+
+  // Create a new builder for the module.
+  Builder = std::make_unique<IRBuilder<>>(*TheContext);
+
+  // Create a new pass manager attached to it.
+  TheFPM = std::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Promote allocas to registers.
+  TheFPM->add(createPromoteMemoryToRegisterPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
+
+static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read function definition:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      ExitOnErr(TheJIT->addModule(
+          ThreadSafeModule(std::move(TheModule), std::move(TheContext))));
+      InitializeModuleAndPassManager();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
+      fprintf(stderr, "Read extern: ");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
+      // Create a ResourceTracker to track JIT'd memory allocated to our
+      // anonymous expression -- that way we can free it after executing.
+      auto RT = TheJIT->getMainJITDylib().createResourceTracker();
+
+      auto TSM = ThreadSafeModule(std::move(TheModule), std::move(TheContext));
+      ExitOnErr(TheJIT->addModule(std::move(TSM), RT));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = ExitOnErr(TheJIT->lookup("__anon_expr"));
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
+      fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      ExitOnErr(RT->remove());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (true) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" DLLEXPORT double putchard(double X) {
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" DLLEXPORT double printd(double X) {
+  fprintf(stderr, "%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
+  InitializeNativeTargetAsmParser();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['='] = 2;
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  TheJIT = ExitOnErr(KaleidoscopeJIT::Create());
+
+  InitializeModuleAndPassManager();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
+
+
+

Next: Compiling to Object Code

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl08.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl08.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl08.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl08.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,1607 @@ + + + + + + + + + 8. Kaleidoscope: Compiling to Object Code — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

8. Kaleidoscope: Compiling to Object Code

+ +
+

8.1. Chapter 8 Introduction

+

Welcome to Chapter 8 of the “Implementing a language with LLVM” tutorial. This chapter describes how to compile our +language down to object files.

+
+
+

8.2. Choosing a target

+

LLVM has native support for cross-compilation. You can compile to the +architecture of your current machine, or just as easily compile for +other architectures. In this tutorial, we’ll target the current +machine.

+

To specify the architecture that you want to target, we use a string +called a “target triple”. This takes the form +<arch><sub>-<vendor>-<sys>-<abi> (see the cross compilation docs).

+

As an example, we can see what clang thinks is our current target +triple:

+
$ clang --version | grep Target
+Target: x86_64-unknown-linux-gnu
+
+
+

Running this command may show something different on your machine as +you might be using a different architecture or operating system to me.

+

Fortunately, we don’t need to hard-code a target triple to target the +current machine. LLVM provides sys::getDefaultTargetTriple, which +returns the target triple of the current machine.

+
auto TargetTriple = sys::getDefaultTargetTriple();
+
+
+

LLVM doesn’t require us to link in all the target +functionality. For example, if we’re just using the JIT, we don’t need +the assembly printers. Similarly, if we’re only targeting certain +architectures, we can only link in the functionality for those +architectures.

+

For this example, we’ll initialize all the targets for emitting object +code.

+
InitializeAllTargetInfos();
+InitializeAllTargets();
+InitializeAllTargetMCs();
+InitializeAllAsmParsers();
+InitializeAllAsmPrinters();
+
+
+

We can now use our target triple to get a Target:

+
std::string Error;
+auto Target = TargetRegistry::lookupTarget(TargetTriple, Error);
+
+// Print an error and exit if we couldn't find the requested target.
+// This generally occurs if we've forgotten to initialise the
+// TargetRegistry or we have a bogus target triple.
+if (!Target) {
+  errs() << Error;
+  return 1;
+}
+
+
+
+
+

8.3. Target Machine

+

We will also need a TargetMachine. This class provides a complete +machine description of the machine we’re targeting. If we want to +target a specific feature (such as SSE) or a specific CPU (such as +Intel’s Sandylake), we do so now.

+

To see which features and CPUs that LLVM knows about, we can use +llc. For example, let’s look at x86:

+
$ llvm-as < /dev/null | llc -march=x86 -mattr=help
+Available CPUs for this target:
+
+  amdfam10      - Select the amdfam10 processor.
+  athlon        - Select the athlon processor.
+  athlon-4      - Select the athlon-4 processor.
+  ...
+
+Available features for this target:
+
+  16bit-mode            - 16-bit mode (i8086).
+  32bit-mode            - 32-bit mode (80386).
+  3dnow                 - Enable 3DNow! instructions.
+  3dnowa                - Enable 3DNow! Athlon instructions.
+  ...
+
+
+

For our example, we’ll use the generic CPU without any additional +features, options or relocation model.

+
auto CPU = "generic";
+auto Features = "";
+
+TargetOptions opt;
+auto RM = Optional<Reloc::Model>();
+auto TargetMachine = Target->createTargetMachine(TargetTriple, CPU, Features, opt, RM);
+
+
+
+
+

8.4. Configuring the Module

+

We’re now ready to configure our module, to specify the target and +data layout. This isn’t strictly necessary, but the frontend +performance guide recommends +this. Optimizations benefit from knowing about the target and data +layout.

+
TheModule->setDataLayout(TargetMachine->createDataLayout());
+TheModule->setTargetTriple(TargetTriple);
+
+
+
+
+

8.5. Emit Object Code

+

We’re ready to emit object code! Let’s define where we want to write +our file to:

+
auto Filename = "output.o";
+std::error_code EC;
+raw_fd_ostream dest(Filename, EC, sys::fs::OF_None);
+
+if (EC) {
+  errs() << "Could not open file: " << EC.message();
+  return 1;
+}
+
+
+

Finally, we define a pass that emits object code, then we run that +pass:

+
legacy::PassManager pass;
+auto FileType = CGFT_ObjectFile;
+
+if (TargetMachine->addPassesToEmitFile(pass, dest, nullptr, FileType)) {
+  errs() << "TargetMachine can't emit a file of this type";
+  return 1;
+}
+
+pass.run(*TheModule);
+dest.flush();
+
+
+
+
+

8.6. Putting It All Together

+

Does it work? Let’s give it a try. We need to compile our code, but +note that the arguments to llvm-config are different to the previous chapters.

+
$ clang++ -g -O3 toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs all` -o toy
+
+
+

Let’s run it, and define a simple average function. Press Ctrl-D +when you’re done.

+
$ ./toy
+ready> def average(x y) (x + y) * 0.5;
+^D
+Wrote output.o
+
+
+

We have an object file! To test it, let’s write a simple program and +link it with our output. Here’s the source code:

+
#include <iostream>
+
+extern "C" {
+    double average(double, double);
+}
+
+int main() {
+    std::cout << "average of 3.0 and 4.0: " << average(3.0, 4.0) << std::endl;
+}
+
+
+

We link our program to output.o and check the result is what we +expected:

+
$ clang++ main.cpp output.o -o main
+$ ./main
+average of 3.0 and 4.0: 3.5
+
+
+
+
+

8.7. Full Code Listing

+
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+using namespace llvm::sys;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5,
+
+  // control
+  tok_if = -6,
+  tok_then = -7,
+  tok_else = -8,
+  tok_for = -9,
+  tok_in = -10,
+
+  // operators
+  tok_binary = -11,
+  tok_unary = -12,
+
+  // var definition
+  tok_var = -13
+};
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    if (IdentifierStr == "if")
+      return tok_if;
+    if (IdentifierStr == "then")
+      return tok_then;
+    if (IdentifierStr == "else")
+      return tok_else;
+    if (IdentifierStr == "for")
+      return tok_for;
+    if (IdentifierStr == "in")
+      return tok_in;
+    if (IdentifierStr == "binary")
+      return tok_binary;
+    if (IdentifierStr == "unary")
+      return tok_unary;
+    if (IdentifierStr == "var")
+      return tok_var;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() = default;
+
+  virtual Value *codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+
+  Value *codegen() override;
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+
+  Value *codegen() override;
+  const std::string &getName() const { return Name; }
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  std::unique_ptr<ExprAST> Operand;
+
+public:
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+      : Opcode(Opcode), Operand(std::move(Operand)) {}
+
+  Value *codegen() override;
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+
+  Value *codegen() override;
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+
+  Value *codegen() override;
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  std::unique_ptr<ExprAST> Cond, Then, Else;
+
+public:
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+      : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+
+  Value *codegen() override;
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
+
+public:
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  VarExprAST(
+      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
+      std::unique_ptr<ExprAST> Body)
+      : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
+
+  Value *codegen() override;
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool IsOperator;
+  unsigned Precedence; // Precedence if a binary op.
+
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args,
+               bool IsOperator = false, unsigned Prec = 0)
+      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec) {}
+
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
+
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size() - 1];
+  }
+
+  unsigned getBinaryPrecedence() const { return Precedence; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+
+  Function *codegen();
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (true) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(IdName, std::move(Args));
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static std::unique_ptr<ExprAST> ParseIfExpr() {
+  getNextToken(); // eat the if.
+
+  // condition.
+  auto Cond = ParseExpression();
+  if (!Cond)
+    return nullptr;
+
+  if (CurTok != tok_then)
+    return LogError("expected then");
+  getNextToken(); // eat the then
+
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
+
+  if (CurTok != tok_else)
+    return LogError("expected else");
+
+  getNextToken();
+
+  auto Else = ParseExpression();
+  if (!Else)
+    return nullptr;
+
+  return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static std::unique_ptr<ExprAST> ParseForExpr() {
+  getNextToken(); // eat the for.
+
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after for");
+
+  std::string IdName = IdentifierStr;
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '=')
+    return LogError("expected '=' after for");
+  getNextToken(); // eat '='.
+
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
+  if (CurTok != ',')
+    return LogError("expected ',' after for start value");
+  getNextToken();
+
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
+
+  // The step value is optional.
+  std::unique_ptr<ExprAST> Step;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (!Step)
+      return nullptr;
+  }
+
+  if (CurTok != tok_in)
+    return LogError("expected 'in' after for");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
+}
+
+/// varexpr ::= 'var' identifier ('=' expression)?
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static std::unique_ptr<ExprAST> ParseVarExpr() {
+  getNextToken(); // eat the var.
+
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after var");
+
+  while (true) {
+    std::string Name = IdentifierStr;
+    getNextToken(); // eat identifier.
+
+    // Read the optional initializer.
+    std::unique_ptr<ExprAST> Init = nullptr;
+    if (CurTok == '=') {
+      getNextToken(); // eat the '='.
+
+      Init = ParseExpression();
+      if (!Init)
+        return nullptr;
+    }
+
+    VarNames.push_back(std::make_pair(Name, std::move(Init)));
+
+    // End of var list, exit loop.
+    if (CurTok != ',')
+      break;
+    getNextToken(); // eat the ','.
+
+    if (CurTok != tok_identifier)
+      return LogError("expected identifier list after var");
+  }
+
+  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return LogError("expected 'in' keyword after 'var'");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+///   ::= varexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  case tok_for:
+    return ParseForExpr();
+  case tok_var:
+    return ParseVarExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static std::unique_ptr<ExprAST> ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (auto Operand = ParseUnary())
+    return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (true) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken(); // eat binop
+
+    // Parse the unary expression after the binary operator.
+    auto RHS = ParseUnary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS =
+        std::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  std::string FnName;
+
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+
+  switch (CurTok) {
+  default:
+    return LogErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return LogErrorP("Invalid precedence: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return LogErrorP("Invalid number of operands for operator");
+
+  return std::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
+                                         BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static std::map<std::string, AllocaInst *> NamedValues;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
+static ExitOnError ExitOnErr;
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          StringRef VarName) {
+  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                   TheFunction->getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName);
+}
+
+Value *NumberExprAST::codegen() {
+  return ConstantFP::get(*TheContext, APFloat(Val));
+}
+
+Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    return LogErrorV("Unknown variable name");
+
+  // Load the value.
+  return Builder->CreateLoad(Type::getDoubleTy(*TheContext), V, Name.c_str());
+}
+
+Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
+
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
+    return LogErrorV("Unknown unary operator");
+
+  return Builder->CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::codegen() {
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    // This assume we're building without RTTI because LLVM builds that way by
+    // default.  If you build LLVM with RTTI this can be changed to a
+    // dynamic_cast for automatic error checking.
+    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
+    if (!LHSE)
+      return LogErrorV("destination of '=' must be a variable");
+    // Codegen the RHS.
+    Value *Val = RHS->codegen();
+    if (!Val)
+      return nullptr;
+
+    // Look up the name.
+    Value *Variable = NamedValues[LHSE->getName()];
+    if (!Variable)
+      return LogErrorV("Unknown variable name");
+
+    Builder->CreateStore(Val, Variable);
+    return Val;
+  }
+
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder->CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder->CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder->CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder->CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
+  default:
+    break;
+  }
+
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = getFunction(std::string("binary") + Op);
+  assert(F && "binary operator not found!");
+
+  Value *Ops[] = {L, R};
+  return Builder->CreateCall(F, Ops, "binop");
+}
+
+Value *CallExprAST::codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  CondV = Builder->CreateFCmpONE(
+      CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
+  BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
+
+  Builder->CreateCondBr(CondV, ThenBB, ElseBB);
+
+  // Emit then value.
+  Builder->SetInsertPoint(ThenBB);
+
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder->GetInsertBlock();
+
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder->SetInsertPoint(ElseBB);
+
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder->GetInsertBlock();
+
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder->SetInsertPoint(MergeBB);
+  PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
+
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+// Output for-loop as:
+//   var = alloca double
+//   ...
+//   start = startexpr
+//   store start -> var
+//   goto loop
+// loop:
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   endcond = endexpr
+//
+//   curvar = load var
+//   nextvar = curvar + step
+//   store nextvar -> var
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create an alloca for the variable in the entry block.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
+
+  // Store the value into the alloca.
+  Builder->CreateStore(StartVal, Alloca);
+
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
+
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder->CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder->SetInsertPoint(LoopBB);
+
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  AllocaInst *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Alloca;
+
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (!Body->codegen())
+    return nullptr;
+
+  // Emit the step value.
+  Value *StepVal = nullptr;
+  if (Step) {
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
+  }
+
+  // Compute the end condition.
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
+
+  // Reload, increment, and restore the alloca.  This handles the case where
+  // the body of the loop mutates the variable.
+  Value *CurVar = Builder->CreateLoad(Type::getDoubleTy(*TheContext), Alloca,
+                                      VarName.c_str());
+  Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar");
+  Builder->CreateStore(NextVar, Alloca);
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  EndCond = Builder->CreateFCmpONE(
+      EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
+
+  // Create the "after loop" block and insert it.
+  BasicBlock *AfterBB =
+      BasicBlock::Create(*TheContext, "afterloop", TheFunction);
+
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
+
+  // Any new code will be inserted in AfterBB.
+  Builder->SetInsertPoint(AfterBB);
+
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(*TheContext));
+}
+
+Value *VarExprAST::codegen() {
+  std::vector<AllocaInst *> OldBindings;
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second.get();
+
+    // Emit the initializer before adding the variable to scope, this prevents
+    // the initializer from referencing the variable itself, and permits stuff
+    // like this:
+    //  var a = 1 in
+    //    var a = a in ...   # refers to outer 'a'.
+    Value *InitVal;
+    if (Init) {
+      InitVal = Init->codegen();
+      if (!InitVal)
+        return nullptr;
+    } else { // If not specified, use 0.0.
+      InitVal = ConstantFP::get(*TheContext, APFloat(0.0));
+    }
+
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+    Builder->CreateStore(InitVal, Alloca);
+
+    // Remember the old variable binding so that we can restore the binding when
+    // we unrecurse.
+    OldBindings.push_back(NamedValues[VarName]);
+
+    // Remember this binding.
+    NamedValues[VarName] = Alloca;
+  }
+
+  // Codegen the body, now that all vars are in scope.
+  Value *BodyVal = Body->codegen();
+  if (!BodyVal)
+    return nullptr;
+
+  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+
+Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // If this is an operator, install it.
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
+  Builder->SetInsertPoint(BB);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args()) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
+
+    // Store the initial value into the alloca.
+    Builder->CreateStore(&Arg, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[std::string(Arg.getName())] = Alloca;
+  }
+
+  if (Value *RetVal = Body->codegen()) {
+    // Finish off the function.
+    Builder->CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    return TheFunction;
+  }
+
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (P.isBinaryOp())
+    BinopPrecedence.erase(P.getOperatorName());
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheContext = std::make_unique<LLVMContext>();
+  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
+
+  // Create a new builder for the module.
+  Builder = std::make_unique<IRBuilder<>>(*TheContext);
+}
+
+static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
+      fprintf(stderr, "Read function definition:");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
+      fprintf(stderr, "Read extern: ");
+      FnIR->print(errs());
+      fprintf(stderr, "\n");
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    FnAST->codegen();
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (true) {
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" DLLEXPORT double putchard(double X) {
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" DLLEXPORT double printd(double X) {
+  fprintf(stderr, "%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  InitializeModuleAndPassManager();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  // Initialize the target registry etc.
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmParsers();
+  InitializeAllAsmPrinters();
+
+  auto TargetTriple = sys::getDefaultTargetTriple();
+  TheModule->setTargetTriple(TargetTriple);
+
+  std::string Error;
+  auto Target = TargetRegistry::lookupTarget(TargetTriple, Error);
+
+  // Print an error and exit if we couldn't find the requested target.
+  // This generally occurs if we've forgotten to initialise the
+  // TargetRegistry or we have a bogus target triple.
+  if (!Target) {
+    errs() << Error;
+    return 1;
+  }
+
+  auto CPU = "generic";
+  auto Features = "";
+
+  TargetOptions opt;
+  auto RM = Optional<Reloc::Model>();
+  auto TheTargetMachine =
+      Target->createTargetMachine(TargetTriple, CPU, Features, opt, RM);
+
+  TheModule->setDataLayout(TheTargetMachine->createDataLayout());
+
+  auto Filename = "output.o";
+  std::error_code EC;
+  raw_fd_ostream dest(Filename, EC, sys::fs::OF_None);
+
+  if (EC) {
+    errs() << "Could not open file: " << EC.message();
+    return 1;
+  }
+
+  legacy::PassManager pass;
+  auto FileType = CGFT_ObjectFile;
+
+  if (TheTargetMachine->addPassesToEmitFile(pass, dest, nullptr, FileType)) {
+    errs() << "TheTargetMachine can't emit a file of this type";
+    return 1;
+  }
+
+  pass.run(*TheModule);
+  dest.flush();
+
+  outs() << "Wrote " << Filename << "\n";
+
+  return 0;
+}
+
+
+

Next: Adding Debug Information

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl09.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl09.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl09.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl09.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,2013 @@ + + + + + + + + + 9. Kaleidoscope: Adding Debug Information — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

9. Kaleidoscope: Adding Debug Information

+ +
+

9.1. Chapter 9 Introduction

+

Welcome to Chapter 9 of the “Implementing a language with +LLVM” tutorial. In chapters 1 through 8, we’ve built a +decent little programming language with functions and variables. +What happens if something goes wrong though, how do you debug your +program?

+

Source level debugging uses formatted data that helps a debugger +translate from binary and the state of the machine back to the +source that the programmer wrote. In LLVM we generally use a format +called DWARF. DWARF is a compact encoding +that represents types, source locations, and variable locations.

+

The short summary of this chapter is that we’ll go through the +various things you have to add to a programming language to +support debug info, and how you translate that into DWARF.

+

Caveat: For now we can’t debug via the JIT, so we’ll need to compile +our program down to something small and standalone. As part of this +we’ll make a few modifications to the running of the language and +how programs are compiled. This means that we’ll have a source file +with a simple program written in Kaleidoscope rather than the +interactive JIT. It does involve a limitation that we can only +have one “top level” command at a time to reduce the number of +changes necessary.

+

Here’s the sample program we’ll be compiling:

+
def fib(x)
+  if x < 3 then
+    1
+  else
+    fib(x-1)+fib(x-2);
+
+fib(10)
+
+
+
+
+

9.2. Why is this a hard problem?

+

Debug information is a hard problem for a few different reasons - mostly +centered around optimized code. First, optimization makes keeping source +locations more difficult. In LLVM IR we keep the original source location +for each IR level instruction on the instruction. Optimization passes +should keep the source locations for newly created instructions, but merged +instructions only get to keep a single location - this can cause jumping +around when stepping through optimized programs. Secondly, optimization +can move variables in ways that are either optimized out, shared in memory +with other variables, or difficult to track. For the purposes of this +tutorial we’re going to avoid optimization (as you’ll see with one of the +next sets of patches).

+
+
+

9.3. Ahead-of-Time Compilation Mode

+

To highlight only the aspects of adding debug information to a source +language without needing to worry about the complexities of JIT debugging +we’re going to make a few changes to Kaleidoscope to support compiling +the IR emitted by the front end into a simple standalone program that +you can execute, debug, and see results.

+

First we make our anonymous function that contains our top level +statement be our “main”:

+
-    auto Proto = std::make_unique<PrototypeAST>("", std::vector<std::string>());
++    auto Proto = std::make_unique<PrototypeAST>("main", std::vector<std::string>());
+
+
+

just with the simple change of giving it a name.

+

Then we’re going to remove the command line code wherever it exists:

+
@@ -1129,7 +1129,6 @@ static void HandleTopLevelExpression() {
+ /// top ::= definition | external | expression | ';'
+ static void MainLoop() {
+   while (1) {
+-    fprintf(stderr, "ready> ");
+     switch (CurTok) {
+     case tok_eof:
+       return;
+@@ -1184,7 +1183,6 @@ int main() {
+   BinopPrecedence['*'] = 40; // highest.
+
+   // Prime the first token.
+-  fprintf(stderr, "ready> ");
+   getNextToken();
+
+
+

Lastly we’re going to disable all of the optimization passes and the JIT so +that the only thing that happens after we’re done parsing and generating +code is that the LLVM IR goes to standard error:

+
@@ -1108,17 +1108,8 @@ static void HandleExtern() {
+ static void HandleTopLevelExpression() {
+   // Evaluate a top-level expression into an anonymous function.
+   if (auto FnAST = ParseTopLevelExpr()) {
+-    if (auto *FnIR = FnAST->codegen()) {
+-      // We're just doing this to make sure it executes.
+-      TheExecutionEngine->finalizeObject();
+-      // JIT the function, returning a function pointer.
+-      void *FPtr = TheExecutionEngine->getPointerToFunction(FnIR);
+-
+-      // Cast it to the right type (takes no arguments, returns a double) so we
+-      // can call it as a native function.
+-      double (*FP)() = (double (*)())(intptr_t)FPtr;
+-      // Ignore the return value for this.
+-      (void)FP;
++    if (!F->codegen()) {
++      fprintf(stderr, "Error generating code for top level expr");
+     }
+   } else {
+     // Skip token for error recovery.
+@@ -1439,11 +1459,11 @@ int main() {
+   // target lays out data structures.
+   TheModule->setDataLayout(TheExecutionEngine->getDataLayout());
+   OurFPM.add(new DataLayoutPass());
++#if 0
+   OurFPM.add(createBasicAliasAnalysisPass());
+   // Promote allocas to registers.
+   OurFPM.add(createPromoteMemoryToRegisterPass());
+@@ -1218,7 +1210,7 @@ int main() {
+   OurFPM.add(createGVNPass());
+   // Simplify the control flow graph (deleting unreachable blocks, etc).
+   OurFPM.add(createCFGSimplificationPass());
+-
++  #endif
+   OurFPM.doInitialization();
+
+   // Set the global so the code gen can use this.
+
+
+

This relatively small set of changes get us to the point that we can compile +our piece of Kaleidoscope language down to an executable program via this +command line:

+
Kaleidoscope-Ch9 < fib.ks | & clang -x ir -
+
+
+

which gives an a.out/a.exe in the current working directory.

+
+
+

9.4. Compile Unit

+

The top level container for a section of code in DWARF is a compile unit. +This contains the type and function data for an individual translation unit +(read: one file of source code). So the first thing we need to do is +construct one for our fib.ks file.

+
+
+

9.5. DWARF Emission Setup

+

Similar to the IRBuilder class we have a +DIBuilder class +that helps in constructing debug metadata for an LLVM IR file. It +corresponds 1:1 similarly to IRBuilder and LLVM IR, but with nicer names. +Using it does require that you be more familiar with DWARF terminology than +you needed to be with IRBuilder and Instruction names, but if you +read through the general documentation on the +Metadata Format it +should be a little more clear. We’ll be using this class to construct all +of our IR level descriptions. Construction for it takes a module so we +need to construct it shortly after we construct our module. We’ve left it +as a global static variable to make it a bit easier to use.

+

Next we’re going to create a small container to cache some of our frequent +data. The first will be our compile unit, but we’ll also write a bit of +code for our one type since we won’t have to worry about multiple typed +expressions:

+
static DIBuilder *DBuilder;
+
+struct DebugInfo {
+  DICompileUnit *TheCU;
+  DIType *DblTy;
+
+  DIType *getDoubleTy();
+} KSDbgInfo;
+
+DIType *DebugInfo::getDoubleTy() {
+  if (DblTy)
+    return DblTy;
+
+  DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float);
+  return DblTy;
+}
+
+
+

And then later on in main when we’re constructing our module:

+
DBuilder = new DIBuilder(*TheModule);
+
+KSDbgInfo.TheCU = DBuilder->createCompileUnit(
+    dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."),
+    "Kaleidoscope Compiler", 0, "", 0);
+
+
+

There are a couple of things to note here. First, while we’re producing a +compile unit for a language called Kaleidoscope we used the language +constant for C. This is because a debugger wouldn’t necessarily understand +the calling conventions or default ABI for a language it doesn’t recognize +and we follow the C ABI in our LLVM code generation so it’s the closest +thing to accurate. This ensures we can actually call functions from the +debugger and have them execute. Secondly, you’ll see the “fib.ks” in the +call to createCompileUnit. This is a default hard coded value since +we’re using shell redirection to put our source into the Kaleidoscope +compiler. In a usual front end you’d have an input file name and it would +go there.

+

One last thing as part of emitting debug information via DIBuilder is that +we need to “finalize” the debug information. The reasons are part of the +underlying API for DIBuilder, but make sure you do this near the end of +main:

+
DBuilder->finalize();
+
+
+

before you dump out the module.

+
+
+

9.6. Functions

+

Now that we have our Compile Unit and our source locations, we can add +function definitions to the debug info. So in PrototypeAST::codegen() we +add a few lines of code to describe a context for our subprogram, in this +case the “File”, and the actual definition of the function itself.

+

So the context:

+
DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(),
+                                    KSDbgInfo.TheCU.getDirectory());
+
+
+

giving us an DIFile and asking the Compile Unit we created above for the +directory and filename where we are currently. Then, for now, we use some +source locations of 0 (since our AST doesn’t currently have source location +information) and construct our function definition:

+
DIScope *FContext = Unit;
+unsigned LineNo = 0;
+unsigned ScopeLine = 0;
+DISubprogram *SP = DBuilder->createFunction(
+    FContext, P.getName(), StringRef(), Unit, LineNo,
+    CreateFunctionType(TheFunction->arg_size(), Unit),
+    false /* internal linkage */, true /* definition */, ScopeLine,
+    DINode::FlagPrototyped, false);
+TheFunction->setSubprogram(SP);
+
+
+

and we now have an DISubprogram that contains a reference to all of our +metadata for the function.

+
+
+

9.7. Source Locations

+

The most important thing for debug information is accurate source location - +this makes it possible to map your source code back. We have a problem though, +Kaleidoscope really doesn’t have any source location information in the lexer +or parser so we’ll need to add it.

+
struct SourceLocation {
+  int Line;
+  int Col;
+};
+static SourceLocation CurLoc;
+static SourceLocation LexLoc = {1, 0};
+
+static int advance() {
+  int LastChar = getchar();
+
+  if (LastChar == '\n' || LastChar == '\r') {
+    LexLoc.Line++;
+    LexLoc.Col = 0;
+  } else
+    LexLoc.Col++;
+  return LastChar;
+}
+
+
+

In this set of code we’ve added some functionality on how to keep track of the +line and column of the “source file”. As we lex every token we set our current +current “lexical location” to the assorted line and column for the beginning +of the token. We do this by overriding all of the previous calls to +getchar() with our new advance() that keeps track of the information +and then we have added to all of our AST classes a source location:

+
class ExprAST {
+  SourceLocation Loc;
+
+  public:
+    ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
+    virtual ~ExprAST() {}
+    virtual Value* codegen() = 0;
+    int getLine() const { return Loc.Line; }
+    int getCol() const { return Loc.Col; }
+    virtual raw_ostream &dump(raw_ostream &out, int ind) {
+      return out << ':' << getLine() << ':' << getCol() << '\n';
+    }
+
+
+

that we pass down through when we create a new expression:

+
LHS = std::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS),
+                                       std::move(RHS));
+
+
+

giving us locations for each of our expressions and variables.

+

To make sure that every instruction gets proper source location information, +we have to tell Builder whenever we’re at a new source location. +We use a small helper function for this:

+
void DebugInfo::emitLocation(ExprAST *AST) {
+  DIScope *Scope;
+  if (LexicalBlocks.empty())
+    Scope = TheCU;
+  else
+    Scope = LexicalBlocks.back();
+  Builder.SetCurrentDebugLocation(
+      DILocation::get(Scope->getContext(), AST->getLine(), AST->getCol(), Scope));
+}
+
+
+

This both tells the main IRBuilder where we are, but also what scope +we’re in. The scope can either be on compile-unit level or be the nearest +enclosing lexical block like the current function. +To represent this we create a stack of scopes:

+
std::vector<DIScope *> LexicalBlocks;
+
+
+

and push the scope (function) to the top of the stack when we start +generating the code for each function:

+
KSDbgInfo.LexicalBlocks.push_back(SP);
+
+
+

Also, we may not forget to pop the scope back off of the scope stack at the +end of the code generation for the function:

+
// Pop off the lexical block for the function since we added it
+// unconditionally.
+KSDbgInfo.LexicalBlocks.pop_back();
+
+
+

Then we make sure to emit the location every time we start to generate code +for a new AST object:

+
KSDbgInfo.emitLocation(this);
+
+
+
+
+

9.8. Variables

+

Now that we have functions, we need to be able to print out the variables +we have in scope. Let’s get our function arguments set up so we can get +decent backtraces and see how our functions are being called. It isn’t +a lot of code, and we generally handle it when we’re creating the +argument allocas in FunctionAST::codegen.

+
// Record the function arguments in the NamedValues map.
+NamedValues.clear();
+unsigned ArgIdx = 0;
+for (auto &Arg : TheFunction->args()) {
+  // Create an alloca for this variable.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
+
+  // Create a debug descriptor for the variable.
+  DILocalVariable *D = DBuilder->createParameterVariable(
+      SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
+      true);
+
+  DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
+                          DILocation::get(SP->getContext(), LineNo, 0, SP),
+                          Builder.GetInsertBlock());
+
+  // Store the initial value into the alloca.
+  Builder.CreateStore(&Arg, Alloca);
+
+  // Add arguments to variable symbol table.
+  NamedValues[Arg.getName()] = Alloca;
+}
+
+
+

Here we’re first creating the variable, giving it the scope (SP), +the name, source location, type, and since it’s an argument, the argument +index. Next, we create an lvm.dbg.declare call to indicate at the IR +level that we’ve got a variable in an alloca (and it gives a starting +location for the variable), and setting a source location for the +beginning of the scope on the declare.

+

One interesting thing to note at this point is that various debuggers have +assumptions based on how code and debug information was generated for them +in the past. In this case we need to do a little bit of a hack to avoid +generating line information for the function prologue so that the debugger +knows to skip over those instructions when setting a breakpoint. So in +FunctionAST::CodeGen we add some more lines:

+
// Unset the location for the prologue emission (leading instructions with no
+// location in a function are considered part of the prologue and the debugger
+// will run past them when breaking on a function)
+KSDbgInfo.emitLocation(nullptr);
+
+
+

and then emit a new location when we actually start generating code for the +body of the function:

+
KSDbgInfo.emitLocation(Body.get());
+
+
+

With this we have enough debug information to set breakpoints in functions, +print out argument variables, and call functions. Not too bad for just a +few simple lines of code!

+
+
+

9.9. Full Code Listing

+

Here is the complete code listing for our running example, enhanced with +debug information. To build this example, use:

+
# Compile
+clang++ -g toy.cpp `llvm-config --cxxflags --ldflags --system-libs --libs core orcjit native` -O3 -o toy
+# Run
+./toy
+
+
+

Here is the code:

+
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include <cctype>
+#include <cstdio>
+#include <map>
+#include <string>
+#include <vector>
+#include "../include/KaleidoscopeJIT.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2,
+  tok_extern = -3,
+
+  // primary
+  tok_identifier = -4,
+  tok_number = -5,
+
+  // control
+  tok_if = -6,
+  tok_then = -7,
+  tok_else = -8,
+  tok_for = -9,
+  tok_in = -10,
+
+  // operators
+  tok_binary = -11,
+  tok_unary = -12,
+
+  // var definition
+  tok_var = -13
+};
+
+std::string getTokName(int Tok) {
+  switch (Tok) {
+  case tok_eof:
+    return "eof";
+  case tok_def:
+    return "def";
+  case tok_extern:
+    return "extern";
+  case tok_identifier:
+    return "identifier";
+  case tok_number:
+    return "number";
+  case tok_if:
+    return "if";
+  case tok_then:
+    return "then";
+  case tok_else:
+    return "else";
+  case tok_for:
+    return "for";
+  case tok_in:
+    return "in";
+  case tok_binary:
+    return "binary";
+  case tok_unary:
+    return "unary";
+  case tok_var:
+    return "var";
+  }
+  return std::string(1, (char)Tok);
+}
+
+namespace {
+class PrototypeAST;
+class ExprAST;
+}
+
+struct DebugInfo {
+  DICompileUnit *TheCU;
+  DIType *DblTy;
+  std::vector<DIScope *> LexicalBlocks;
+
+  void emitLocation(ExprAST *AST);
+  DIType *getDoubleTy();
+} KSDbgInfo;
+
+struct SourceLocation {
+  int Line;
+  int Col;
+};
+static SourceLocation CurLoc;
+static SourceLocation LexLoc = {1, 0};
+
+static int advance() {
+  int LastChar = getchar();
+
+  if (LastChar == '\n' || LastChar == '\r') {
+    LexLoc.Line++;
+    LexLoc.Col = 0;
+  } else
+    LexLoc.Col++;
+  return LastChar;
+}
+
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = advance();
+
+  CurLoc = LexLoc;
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = advance())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
+    if (IdentifierStr == "if")
+      return tok_if;
+    if (IdentifierStr == "then")
+      return tok_then;
+    if (IdentifierStr == "else")
+      return tok_else;
+    if (IdentifierStr == "for")
+      return tok_for;
+    if (IdentifierStr == "in")
+      return tok_in;
+    if (IdentifierStr == "binary")
+      return tok_binary;
+    if (IdentifierStr == "unary")
+      return tok_unary;
+    if (IdentifierStr == "var")
+      return tok_var;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = advance();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), nullptr);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do
+      LastChar = advance();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+    if (LastChar != EOF)
+      return gettok();
+  }
+
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = advance();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+namespace {
+
+raw_ostream &indent(raw_ostream &O, int size) {
+  return O << std::string(size, ' ');
+}
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+  SourceLocation Loc;
+
+public:
+  ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
+  virtual ~ExprAST() {}
+  virtual Value *codegen() = 0;
+  int getLine() const { return Loc.Line; }
+  int getCol() const { return Loc.Col; }
+  virtual raw_ostream &dump(raw_ostream &out, int ind) {
+    return out << ':' << getLine() << ':' << getCol() << '\n';
+  }
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+
+public:
+  NumberExprAST(double Val) : Val(Val) {}
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    return ExprAST::dump(out << Val, ind);
+  }
+  Value *codegen() override;
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+
+public:
+  VariableExprAST(SourceLocation Loc, const std::string &Name)
+      : ExprAST(Loc), Name(Name) {}
+  const std::string &getName() const { return Name; }
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    return ExprAST::dump(out << Name, ind);
+  }
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  std::unique_ptr<ExprAST> Operand;
+
+public:
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+      : Opcode(Opcode), Operand(std::move(Operand)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    ExprAST::dump(out << "unary" << Opcode, ind);
+    Operand->dump(out, ind + 1);
+    return out;
+  }
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
+public:
+  BinaryExprAST(SourceLocation Loc, char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : ExprAST(Loc), Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    ExprAST::dump(out << "binary" << Op, ind);
+    LHS->dump(indent(out, ind) << "LHS:", ind + 1);
+    RHS->dump(indent(out, ind) << "RHS:", ind + 1);
+    return out;
+  }
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
+public:
+  CallExprAST(SourceLocation Loc, const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : ExprAST(Loc), Callee(Callee), Args(std::move(Args)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    ExprAST::dump(out << "call " << Callee, ind);
+    for (const auto &Arg : Args)
+      Arg->dump(indent(out, ind + 1), ind + 1);
+    return out;
+  }
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  std::unique_ptr<ExprAST> Cond, Then, Else;
+
+public:
+  IfExprAST(SourceLocation Loc, std::unique_ptr<ExprAST> Cond,
+            std::unique_ptr<ExprAST> Then, std::unique_ptr<ExprAST> Else)
+      : ExprAST(Loc), Cond(std::move(Cond)), Then(std::move(Then)),
+        Else(std::move(Else)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    ExprAST::dump(out << "if", ind);
+    Cond->dump(indent(out, ind) << "Cond:", ind + 1);
+    Then->dump(indent(out, ind) << "Then:", ind + 1);
+    Else->dump(indent(out, ind) << "Else:", ind + 1);
+    return out;
+  }
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
+
+public:
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    ExprAST::dump(out << "for", ind);
+    Start->dump(indent(out, ind) << "Cond:", ind + 1);
+    End->dump(indent(out, ind) << "End:", ind + 1);
+    Step->dump(indent(out, ind) << "Step:", ind + 1);
+    Body->dump(indent(out, ind) << "Body:", ind + 1);
+    return out;
+  }
+};
+
+/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  VarExprAST(
+      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
+      std::unique_ptr<ExprAST> Body)
+      : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
+    ExprAST::dump(out << "var", ind);
+    for (const auto &NamedVar : VarNames)
+      NamedVar.second->dump(indent(out, ind) << NamedVar.first << ':', ind + 1);
+    Body->dump(indent(out, ind) << "Body:", ind + 1);
+    return out;
+  }
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool IsOperator;
+  unsigned Precedence; // Precedence if a binary op.
+  int Line;
+
+public:
+  PrototypeAST(SourceLocation Loc, const std::string &Name,
+               std::vector<std::string> Args, bool IsOperator = false,
+               unsigned Prec = 0)
+      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec), Line(Loc.Line) {}
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
+
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size() - 1];
+  }
+
+  unsigned getBinaryPrecedence() const { return Precedence; }
+  int getLine() const { return Line; }
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
+public:
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+  Function *codegen();
+  raw_ostream &dump(raw_ostream &out, int ind) {
+    indent(out, ind) << "FunctionAST\n";
+    ++ind;
+    indent(out, ind) << "Body:";
+    return Body ? Body->dump(out, ind) : out << "null\n";
+  }
+};
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() { return CurTok = gettok(); }
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0)
+    return -1;
+  return TokPrec;
+}
+
+/// LogError* - These are little helper functions for error handling.
+std::unique_ptr<ExprAST> LogError(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+
+std::unique_ptr<PrototypeAST> LogErrorP(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = std::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return LogError("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+
+  SourceLocation LitLoc = CurLoc;
+
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '(') // Simple variable ref.
+    return std::make_unique<VariableExprAST>(LitLoc, IdName);
+
+  // Call.
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
+  if (CurTok != ')') {
+    while (1) {
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
+
+      if (CurTok == ')')
+        break;
+
+      if (CurTok != ',')
+        return LogError("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+
+  return std::make_unique<CallExprAST>(LitLoc, IdName, std::move(Args));
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static std::unique_ptr<ExprAST> ParseIfExpr() {
+  SourceLocation IfLoc = CurLoc;
+
+  getNextToken(); // eat the if.
+
+  // condition.
+  auto Cond = ParseExpression();
+  if (!Cond)
+    return nullptr;
+
+  if (CurTok != tok_then)
+    return LogError("expected then");
+  getNextToken(); // eat the then
+
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
+
+  if (CurTok != tok_else)
+    return LogError("expected else");
+
+  getNextToken();
+
+  auto Else = ParseExpression();
+  if (!Else)
+    return nullptr;
+
+  return std::make_unique<IfExprAST>(IfLoc, std::move(Cond), std::move(Then),
+                                      std::move(Else));
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static std::unique_ptr<ExprAST> ParseForExpr() {
+  getNextToken(); // eat the for.
+
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after for");
+
+  std::string IdName = IdentifierStr;
+  getNextToken(); // eat identifier.
+
+  if (CurTok != '=')
+    return LogError("expected '=' after for");
+  getNextToken(); // eat '='.
+
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
+  if (CurTok != ',')
+    return LogError("expected ',' after for start value");
+  getNextToken();
+
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
+
+  // The step value is optional.
+  std::unique_ptr<ExprAST> Step;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (!Step)
+      return nullptr;
+  }
+
+  if (CurTok != tok_in)
+    return LogError("expected 'in' after for");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
+}
+
+/// varexpr ::= 'var' identifier ('=' expression)?
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static std::unique_ptr<ExprAST> ParseVarExpr() {
+  getNextToken(); // eat the var.
+
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return LogError("expected identifier after var");
+
+  while (1) {
+    std::string Name = IdentifierStr;
+    getNextToken(); // eat identifier.
+
+    // Read the optional initializer.
+    std::unique_ptr<ExprAST> Init = nullptr;
+    if (CurTok == '=') {
+      getNextToken(); // eat the '='.
+
+      Init = ParseExpression();
+      if (!Init)
+        return nullptr;
+    }
+
+    VarNames.push_back(std::make_pair(Name, std::move(Init)));
+
+    // End of var list, exit loop.
+    if (CurTok != ',')
+      break;
+    getNextToken(); // eat the ','.
+
+    if (CurTok != tok_identifier)
+      return LogError("expected identifier list after var");
+  }
+
+  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return LogError("expected 'in' keyword after 'var'");
+  getNextToken(); // eat 'in'.
+
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
+
+  return std::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+///   ::= varexpr
+static std::unique_ptr<ExprAST> ParsePrimary() {
+  switch (CurTok) {
+  default:
+    return LogError("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
+  case tok_if:
+    return ParseIfExpr();
+  case tok_for:
+    return ParseForExpr();
+  case tok_var:
+    return ParseVarExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static std::unique_ptr<ExprAST> ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (auto Operand = ParseUnary())
+    return std::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    SourceLocation BinLoc = CurLoc;
+    getNextToken(); // eat binop
+
+    // Parse the unary expression after the binary operator.
+    auto RHS = ParseUnary();
+    if (!RHS)
+      return nullptr;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
+    }
+
+    // Merge LHS/RHS.
+    LHS = std::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS),
+                                           std::move(RHS));
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
+  std::string FnName;
+
+  SourceLocation FnLoc = CurLoc;
+
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+
+  switch (CurTok) {
+  default:
+    return LogErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return LogErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return LogErrorP("Invalid precedence: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+
+  if (CurTok != '(')
+    return LogErrorP("Expected '(' in prototype");
+
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return LogErrorP("Expected ')' in prototype");
+
+  // success.
+  getNextToken(); // eat ')'.
+
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return LogErrorP("Invalid number of operands for operator");
+
+  return std::make_unique<PrototypeAST>(FnLoc, FnName, ArgNames, Kind != 0,
+                                         BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
+
+  if (auto E = ParseExpression())
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
+}
+
+/// toplevelexpr ::= expression
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  SourceLocation FnLoc = CurLoc;
+  if (auto E = ParseExpression()) {
+    // Make an anonymous proto.
+    auto Proto = std::make_unique<PrototypeAST>(FnLoc, "__anon_expr",
+                                                 std::vector<std::string>());
+    return std::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  }
+  return nullptr;
+}
+
+/// external ::= 'extern' prototype
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation Globals
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<LLVMContext> TheContext;
+static std::unique_ptr<Module> TheModule;
+static std::unique_ptr<IRBuilder<>> Builder;
+static ExitOnError ExitOnErr;
+
+static std::map<std::string, AllocaInst *> NamedValues;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
+
+//===----------------------------------------------------------------------===//
+// Debug Info Support
+//===----------------------------------------------------------------------===//
+
+static std::unique_ptr<DIBuilder> DBuilder;
+
+DIType *DebugInfo::getDoubleTy() {
+  if (DblTy)
+    return DblTy;
+
+  DblTy = DBuilder->createBasicType("double", 64, dwarf::DW_ATE_float);
+  return DblTy;
+}
+
+void DebugInfo::emitLocation(ExprAST *AST) {
+  if (!AST)
+    return Builder->SetCurrentDebugLocation(DebugLoc());
+  DIScope *Scope;
+  if (LexicalBlocks.empty())
+    Scope = TheCU;
+  else
+    Scope = LexicalBlocks.back();
+  Builder->SetCurrentDebugLocation(DILocation::get(
+      Scope->getContext(), AST->getLine(), AST->getCol(), Scope));
+}
+
+static DISubroutineType *CreateFunctionType(unsigned NumArgs, DIFile *Unit) {
+  SmallVector<Metadata *, 8> EltTys;
+  DIType *DblTy = KSDbgInfo.getDoubleTy();
+
+  // Add the result type.
+  EltTys.push_back(DblTy);
+
+  for (unsigned i = 0, e = NumArgs; i != e; ++i)
+    EltTys.push_back(DblTy);
+
+  return DBuilder->createSubroutineType(DBuilder->getOrCreateTypeArray(EltTys));
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+Value *LogErrorV(const char *Str) {
+  LogError(Str);
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          StringRef VarName) {
+  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                   TheFunction->getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName);
+}
+
+Value *NumberExprAST::codegen() {
+  KSDbgInfo.emitLocation(this);
+  return ConstantFP::get(*TheContext, APFloat(Val));
+}
+
+Value *VariableExprAST::codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (!V)
+    return LogErrorV("Unknown variable name");
+
+  KSDbgInfo.emitLocation(this);
+  // Load the value.
+  return Builder->CreateLoad(Type::getDoubleTy(*TheContext), V, Name.c_str());
+}
+
+Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
+
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
+    return LogErrorV("Unknown unary operator");
+
+  KSDbgInfo.emitLocation(this);
+  return Builder->CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::codegen() {
+  KSDbgInfo.emitLocation(this);
+
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    // This assume we're building without RTTI because LLVM builds that way by
+    // default.  If you build LLVM with RTTI this can be changed to a
+    // dynamic_cast for automatic error checking.
+    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
+    if (!LHSE)
+      return LogErrorV("destination of '=' must be a variable");
+    // Codegen the RHS.
+    Value *Val = RHS->codegen();
+    if (!Val)
+      return nullptr;
+
+    // Look up the name.
+    Value *Variable = NamedValues[LHSE->getName()];
+    if (!Variable)
+      return LogErrorV("Unknown variable name");
+
+    Builder->CreateStore(Val, Variable);
+    return Val;
+  }
+
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
+  switch (Op) {
+  case '+':
+    return Builder->CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder->CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder->CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder->CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp");
+  default:
+    break;
+  }
+
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = getFunction(std::string("binary") + Op);
+  assert(F && "binary operator not found!");
+
+  Value *Ops[] = {L, R};
+  return Builder->CreateCall(F, Ops, "binop");
+}
+
+Value *CallExprAST::codegen() {
+  KSDbgInfo.emitLocation(this);
+
+  // Look up the name in the global module table.
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
+    return LogErrorV("Unknown function referenced");
+
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return LogErrorV("Incorrect # arguments passed");
+
+  std::vector<Value *> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
+  }
+
+  return Builder->CreateCall(CalleeF, ArgsV, "calltmp");
+}
+
+Value *IfExprAST::codegen() {
+  KSDbgInfo.emitLocation(this);
+
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  CondV = Builder->CreateFCmpONE(
+      CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond");
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else");
+  BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont");
+
+  Builder->CreateCondBr(CondV, ThenBB, ElseBB);
+
+  // Emit then value.
+  Builder->SetInsertPoint(ThenBB);
+
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder->GetInsertBlock();
+
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder->SetInsertPoint(ElseBB);
+
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
+
+  Builder->CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder->GetInsertBlock();
+
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder->SetInsertPoint(MergeBB);
+  PHINode *PN = Builder->CreatePHI(Type::getDoubleTy(*TheContext), 2, "iftmp");
+
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+// Output for-loop as:
+//   var = alloca double
+//   ...
+//   start = startexpr
+//   store start -> var
+//   goto loop
+// loop:
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   endcond = endexpr
+//
+//   curvar = load var
+//   nextvar = curvar + step
+//   store nextvar -> var
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Create an alloca for the variable in the entry block.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+  KSDbgInfo.emitLocation(this);
+
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
+
+  // Store the value into the alloca.
+  Builder->CreateStore(StartVal, Alloca);
+
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction);
+
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder->CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder->SetInsertPoint(LoopBB);
+
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  AllocaInst *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Alloca;
+
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (!Body->codegen())
+    return nullptr;
+
+  // Emit the step value.
+  Value *StepVal = nullptr;
+  if (Step) {
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(*TheContext, APFloat(1.0));
+  }
+
+  // Compute the end condition.
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
+
+  // Reload, increment, and restore the alloca.  This handles the case where
+  // the body of the loop mutates the variable.
+  Value *CurVar = Builder->CreateLoad(Type::getDoubleTy(*TheContext), Alloca,
+                                      VarName.c_str());
+  Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar");
+  Builder->CreateStore(NextVar, Alloca);
+
+  // Convert condition to a bool by comparing non-equal to 0.0.
+  EndCond = Builder->CreateFCmpONE(
+      EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond");
+
+  // Create the "after loop" block and insert it.
+  BasicBlock *AfterBB =
+      BasicBlock::Create(*TheContext, "afterloop", TheFunction);
+
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder->CreateCondBr(EndCond, LoopBB, AfterBB);
+
+  // Any new code will be inserted in AfterBB.
+  Builder->SetInsertPoint(AfterBB);
+
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(*TheContext));
+}
+
+Value *VarExprAST::codegen() {
+  std::vector<AllocaInst *> OldBindings;
+
+  Function *TheFunction = Builder->GetInsertBlock()->getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second.get();
+
+    // Emit the initializer before adding the variable to scope, this prevents
+    // the initializer from referencing the variable itself, and permits stuff
+    // like this:
+    //  var a = 1 in
+    //    var a = a in ...   # refers to outer 'a'.
+    Value *InitVal;
+    if (Init) {
+      InitVal = Init->codegen();
+      if (!InitVal)
+        return nullptr;
+    } else { // If not specified, use 0.0.
+      InitVal = ConstantFP::get(*TheContext, APFloat(0.0));
+    }
+
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+    Builder->CreateStore(InitVal, Alloca);
+
+    // Remember the old variable binding so that we can restore the binding when
+    // we unrecurse.
+    OldBindings.push_back(NamedValues[VarName]);
+
+    // Remember this binding.
+    NamedValues[VarName] = Alloca;
+  }
+
+  KSDbgInfo.emitLocation(this);
+
+  // Codegen the body, now that all vars are in scope.
+  Value *BodyVal = Body->codegen();
+  if (!BodyVal)
+    return nullptr;
+
+  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+
+Function *PrototypeAST::codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<Type *> Doubles(Args.size(), Type::getDoubleTy(*TheContext));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // If this is an operator, install it.
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction);
+  Builder->SetInsertPoint(BB);
+
+  // Create a subprogram DIE for this function.
+  DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(),
+                                      KSDbgInfo.TheCU->getDirectory());
+  DIScope *FContext = Unit;
+  unsigned LineNo = P.getLine();
+  unsigned ScopeLine = LineNo;
+  DISubprogram *SP = DBuilder->createFunction(
+      FContext, P.getName(), StringRef(), Unit, LineNo,
+      CreateFunctionType(TheFunction->arg_size(), Unit), ScopeLine,
+      DINode::FlagPrototyped, DISubprogram::SPFlagDefinition);
+  TheFunction->setSubprogram(SP);
+
+  // Push the current scope.
+  KSDbgInfo.LexicalBlocks.push_back(SP);
+
+  // Unset the location for the prologue emission (leading instructions with no
+  // location in a function are considered part of the prologue and the debugger
+  // will run past them when breaking on a function)
+  KSDbgInfo.emitLocation(nullptr);
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  unsigned ArgIdx = 0;
+  for (auto &Arg : TheFunction->args()) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
+
+    // Create a debug descriptor for the variable.
+    DILocalVariable *D = DBuilder->createParameterVariable(
+        SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
+        true);
+
+    DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
+                            DILocation::get(SP->getContext(), LineNo, 0, SP),
+                            Builder->GetInsertBlock());
+
+    // Store the initial value into the alloca.
+    Builder->CreateStore(&Arg, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[std::string(Arg.getName())] = Alloca;
+  }
+
+  KSDbgInfo.emitLocation(Body.get());
+
+  if (Value *RetVal = Body->codegen()) {
+    // Finish off the function.
+    Builder->CreateRet(RetVal);
+
+    // Pop off the lexical block for the function.
+    KSDbgInfo.LexicalBlocks.pop_back();
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    return TheFunction;
+  }
+
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (P.isBinaryOp())
+    BinopPrecedence.erase(Proto->getOperatorName());
+
+  // Pop off the lexical block for the function since we added it
+  // unconditionally.
+  KSDbgInfo.LexicalBlocks.pop_back();
+
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void InitializeModule() {
+  // Open a new module.
+  TheContext = std::make_unique<LLVMContext>();
+  TheModule = std::make_unique<Module>("my cool jit", *TheContext);
+  TheModule->setDataLayout(TheJIT->getDataLayout());
+
+  Builder = std::make_unique<IRBuilder<>>(*TheContext);
+}
+
+static void HandleDefinition() {
+  if (auto FnAST = ParseDefinition()) {
+    if (!FnAST->codegen())
+      fprintf(stderr, "Error reading function definition:");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (auto ProtoAST = ParseExtern()) {
+    if (!ProtoAST->codegen())
+      fprintf(stderr, "Error reading extern");
+    else
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (!FnAST->codegen()) {
+      fprintf(stderr, "Error generating code for top level expr");
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    switch (CurTok) {
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+#ifdef _WIN32
+#define DLLEXPORT __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" DLLEXPORT double putchard(double X) {
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" DLLEXPORT double printd(double X) {
+  fprintf(stderr, "%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
+  InitializeNativeTargetAsmParser();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['='] = 2;
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40; // highest.
+
+  // Prime the first token.
+  getNextToken();
+
+  TheJIT = ExitOnErr(KaleidoscopeJIT::Create());
+
+  InitializeModule();
+
+  // Add the current debug info version into the module.
+  TheModule->addModuleFlag(Module::Warning, "Debug Info Version",
+                           DEBUG_METADATA_VERSION);
+
+  // Darwin only supports dwarf2.
+  if (Triple(sys::getProcessTriple()).isOSDarwin())
+    TheModule->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 2);
+
+  // Construct the DIBuilder, we do this here because we need the module.
+  DBuilder = std::make_unique<DIBuilder>(*TheModule);
+
+  // Create the compile unit for the module.
+  // Currently down as "fib.ks" as a filename since we're redirecting stdin
+  // but we'd like actual source locations.
+  KSDbgInfo.TheCU = DBuilder->createCompileUnit(
+      dwarf::DW_LANG_C, DBuilder->createFile("fib.ks", "."),
+      "Kaleidoscope Compiler", 0, "", 0);
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  // Finalize the debug info.
+  DBuilder->finalize();
+
+  // Print out all of the generated code.
+  TheModule->print(errs(), nullptr);
+
+  return 0;
+}
+
+
+

Next: Conclusion and other useful LLVM tidbits

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl10.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl10.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl10.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/tutorial/MyFirstLanguageFrontend/LangImpl10.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,387 @@ + + + + + + + + + 10. Kaleidoscope: Conclusion and other useful LLVM tidbits — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

10. Kaleidoscope: Conclusion and other useful LLVM tidbits

+ +
+

10.1. Tutorial Conclusion

+

Welcome to the final chapter of the “Implementing a language with +LLVM” tutorial. In the course of this tutorial, we have +grown our little Kaleidoscope language from being a useless toy, to +being a semi-interesting (but probably still useless) toy. :)

+

It is interesting to see how far we’ve come, and how little code it has +taken. We built the entire lexer, parser, AST, code generator, an +interactive run-loop (with a JIT!), and emitted debug information in +standalone executables - all in under 1000 lines of (non-comment/non-blank) +code.

+

Our little language supports a couple of interesting features: it +supports user defined binary and unary operators, it uses JIT +compilation for immediate evaluation, and it supports a few control flow +constructs with SSA construction.

+

Part of the idea of this tutorial was to show you how easy and fun it +can be to define, build, and play with languages. Building a compiler +need not be a scary or mystical process! Now that you’ve seen some of +the basics, I strongly encourage you to take the code and hack on it. +For example, try adding:

+
    +
  • global variables - While global variables have questionable value +in modern software engineering, they are often useful when putting +together quick little hacks like the Kaleidoscope compiler itself. +Fortunately, our current setup makes it very easy to add global +variables: just have value lookup check to see if an unresolved +variable is in the global variable symbol table before rejecting it. +To create a new global variable, make an instance of the LLVM +GlobalVariable class.

  • +
  • typed variables - Kaleidoscope currently only supports variables +of type double. This gives the language a very nice elegance, because +only supporting one type means that you never have to specify types. +Different languages have different ways of handling this. The easiest +way is to require the user to specify types for every variable +definition, and record the type of the variable in the symbol table +along with its Value*.

  • +
  • arrays, structs, vectors, etc - Once you add types, you can start +extending the type system in all sorts of interesting ways. Simple +arrays are very easy and are quite useful for many different +applications. Adding them is mostly an exercise in learning how the +LLVM getelementptr instruction +works: it is so nifty/unconventional, it has its own +FAQ!

  • +
  • standard runtime - Our current language allows the user to access +arbitrary external functions, and we use it for things like “printd” +and “putchard”. As you extend the language to add higher-level +constructs, often these constructs make the most sense if they are +lowered to calls into a language-supplied runtime. For example, if +you add hash tables to the language, it would probably make sense to +add the routines to a runtime, instead of inlining them all the way.

  • +
  • memory management - Currently we can only access the stack in +Kaleidoscope. It would also be useful to be able to allocate heap +memory, either with calls to the standard libc malloc/free interface +or with a garbage collector. If you would like to use garbage +collection, note that LLVM fully supports Accurate Garbage +Collection including algorithms that +move objects and need to scan/update the stack.

  • +
  • exception handling support - LLVM supports generation of zero +cost exceptions which interoperate with +code compiled in other languages. You could also generate code by +implicitly making every function return an error value and checking +it. You could also make explicit use of setjmp/longjmp. There are +many different ways to go here.

  • +
  • object orientation, generics, database access, complex numbers, +geometric programming, … - Really, there is no end of crazy +features that you can add to the language.

  • +
  • unusual domains - We’ve been talking about applying LLVM to a +domain that many people are interested in: building a compiler for a +specific language. However, there are many other domains that can use +compiler technology that are not typically considered. For example, +LLVM has been used to implement OpenGL graphics acceleration, +translate C++ code to ActionScript, and many other cute and clever +things. Maybe you will be the first to JIT compile a regular +expression interpreter into native code with LLVM?

  • +
+

Have fun - try doing something crazy and unusual. Building a language +like everyone else always has, is much less fun than trying something a +little crazy or off the wall and seeing how it turns out. If you get +stuck or want to talk about it, feel free to email the llvm-dev mailing +list: it has lots +of people who are interested in languages and are often willing to help +out.

+

Before we end this tutorial, I want to talk about some “tips and tricks” +for generating LLVM IR. These are some of the more subtle things that +may not be obvious, but are very useful if you want to take advantage of +LLVM’s capabilities.

+
+
+

10.2. Properties of the LLVM IR

+

We have a couple of common questions about code in the LLVM IR form - +let’s just get these out of the way right now, shall we?

+
+

10.2.1. Target Independence

+

Kaleidoscope is an example of a “portable language”: any program written +in Kaleidoscope will work the same way on any target that it runs on. +Many other languages have this property, e.g. lisp, java, haskell, +javascript, python, etc (note that while these languages are portable, +not all their libraries are).

+

One nice aspect of LLVM is that it is often capable of preserving target +independence in the IR: you can take the LLVM IR for a +Kaleidoscope-compiled program and run it on any target that LLVM +supports, even emitting C code and compiling that on targets that LLVM +doesn’t support natively. You can trivially tell that the Kaleidoscope +compiler generates target-independent code because it never queries for +any target-specific information when generating code.

+

The fact that LLVM provides a compact, target-independent, +representation for code gets a lot of people excited. Unfortunately, +these people are usually thinking about C or a language from the C +family when they are asking questions about language portability. I say +“unfortunately”, because there is really no way to make (fully general) +C code portable, other than shipping the source code around (and of +course, C source code is not actually portable in general either - ever +port a really old application from 32- to 64-bits?).

+

The problem with C (again, in its full generality) is that it is heavily +laden with target specific assumptions. As one simple example, the +preprocessor often destructively removes target-independence from the +code when it processes the input text:

+
#ifdef __i386__
+  int X = 1;
+#else
+  int X = 42;
+#endif
+
+
+

While it is possible to engineer more and more complex solutions to +problems like this, it cannot be solved in full generality in a way that +is better than shipping the actual source code.

+

That said, there are interesting subsets of C that can be made portable. +If you are willing to fix primitive types to a fixed size (say int = +32-bits, and long = 64-bits), don’t care about ABI compatibility with +existing binaries, and are willing to give up some other minor features, +you can have portable code. This can make sense for specialized domains +such as an in-kernel language.

+
+
+

10.2.2. Safety Guarantees

+

Many of the languages above are also “safe” languages: it is impossible +for a program written in Java to corrupt its address space and crash the +process (assuming the JVM has no bugs). Safety is an interesting +property that requires a combination of language design, runtime +support, and often operating system support.

+

It is certainly possible to implement a safe language in LLVM, but LLVM +IR does not itself guarantee safety. The LLVM IR allows unsafe pointer +casts, use after free bugs, buffer over-runs, and a variety of other +problems. Safety needs to be implemented as a layer on top of LLVM and, +conveniently, several groups have investigated this. Ask on the llvm-dev +mailing list if +you are interested in more details.

+
+
+

10.2.3. Language-Specific Optimizations

+

One thing about LLVM that turns off many people is that it does not +solve all the world’s problems in one system. One specific +complaint is that people perceive LLVM as being incapable of performing +high-level language-specific optimization: LLVM “loses too much +information”. Here are a few observations about this:

+

First, you’re right that LLVM does lose information. For example, as of +this writing, there is no way to distinguish in the LLVM IR whether an +SSA-value came from a C “int” or a C “long” on an ILP32 machine (other +than debug info). Both get compiled down to an ‘i32’ value and the +information about what it came from is lost. The more general issue +here, is that the LLVM type system uses “structural equivalence” instead +of “name equivalence”. Another place this surprises people is if you +have two types in a high-level language that have the same structure +(e.g. two different structs that have a single int field): these types +will compile down into a single LLVM type and it will be impossible to +tell what it came from.

+

Second, while LLVM does lose information, LLVM is not a fixed target: we +continue to enhance and improve it in many different ways. In addition +to adding new features (LLVM did not always support exceptions or debug +info), we also extend the IR to capture important information for +optimization (e.g. whether an argument is sign or zero extended, +information about pointers aliasing, etc). Many of the enhancements are +user-driven: people want LLVM to include some specific feature, so they +go ahead and extend it.

+

Third, it is possible and easy to add language-specific optimizations, +and you have a number of choices in how to do it. As one trivial +example, it is easy to add language-specific optimization passes that +“know” things about code compiled for a language. In the case of the C +family, there is an optimization pass that “knows” about the standard C +library functions. If you call “exit(0)” in main(), it knows that it is +safe to optimize that into “return 0;” because C specifies what the +‘exit’ function does.

+

In addition to simple library knowledge, it is possible to embed a +variety of other language-specific information into the LLVM IR. If you +have a specific need and run into a wall, please bring the topic up on +the llvm-dev list. At the very worst, you can always treat LLVM as if it +were a “dumb code generator” and implement the high-level optimizations +you desire in your front-end, on the language-specific AST.

+
+
+
+

10.3. Tips and Tricks

+

There is a variety of useful tips and tricks that you come to know after +working on/with LLVM that aren’t obvious at first glance. Instead of +letting everyone rediscover them, this section talks about some of these +issues.

+
+

10.3.1. Implementing portable offsetof/sizeof

+

One interesting thing that comes up, if you are trying to keep the code +generated by your compiler “target independent”, is that you often need +to know the size of some LLVM type or the offset of some field in an +llvm structure. For example, you might need to pass the size of a type +into a function that allocates memory.

+

Unfortunately, this can vary widely across targets: for example the +width of a pointer is trivially target-specific. However, there is a +clever way to use the getelementptr +instruction +that allows you to compute this in a portable way.

+
+
+

10.3.2. Garbage Collected Stack Frames

+

Some languages want to explicitly manage their stack frames, often so +that they are garbage collected or to allow easy implementation of +closures. There are often better ways to implement these features than +explicit stack frames, but LLVM does support +them, +if you want. It requires your front-end to convert the code into +Continuation Passing +Style and +the use of tail calls (which LLVM also supports).

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TypeMetadata.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TypeMetadata.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/TypeMetadata.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/TypeMetadata.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,501 @@ + + + + + + + + + Type Metadata — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Type Metadata

+

Type metadata is a mechanism that allows IR modules to co-operatively build +pointer sets corresponding to addresses within a given set of globals. LLVM’s +control flow integrity implementation uses this metadata to efficiently +check (at each call site) that a given address corresponds to either a +valid vtable or function pointer for a given class or function type, and its +whole-program devirtualization pass uses the metadata to identify potential +callees for a given virtual call.

+

To use the mechanism, a client creates metadata nodes with two elements:

+
    +
  1. a byte offset into the global (generally zero for functions)

  2. +
  3. a metadata object representing an identifier for the type

  4. +
+

These metadata nodes are associated with globals by using global object +metadata attachments with the !type metadata kind.

+

Each type identifier must exclusively identify either global variables +or functions.

+
+

Limitation

+

The current implementation only supports attaching metadata to functions on +the x86-32 and x86-64 architectures.

+
+

An intrinsic, llvm.type.test, is used to test whether a +given pointer is associated with a type identifier.

+
+

Representing Type Information using Type Metadata

+

This section describes how Clang represents C++ type information associated with +virtual tables using type metadata.

+

Consider the following inheritance hierarchy:

+
struct A {
+  virtual void f();
+};
+
+struct B : A {
+  virtual void f();
+  virtual void g();
+};
+
+struct C {
+  virtual void h();
+};
+
+struct D : A, C {
+  virtual void f();
+  virtual void h();
+};
+
+
+

The virtual table objects for A, B, C and D look like this (under the Itanium ABI):

+ + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Virtual Table Layout for A, B, C, D

Class

0

1

2

3

4

5

6

A

A::offset-to-top

&A::rtti

&A::f

B

B::offset-to-top

&B::rtti

&B::f

&B::g

C

C::offset-to-top

&C::rtti

&C::h

D

D::offset-to-top

&D::rtti

&D::f

&D::h

D::offset-to-top

&D::rtti

thunk for &D::h

+

When an object of type A is constructed, the address of &A::f in A’s +virtual table object is stored in the object’s vtable pointer. In ABI parlance +this address is known as an address point. Similarly, when an object of type +B is constructed, the address of &B::f is stored in the vtable pointer. In +this way, the vtable in B’s virtual table object is compatible with A’s vtable.

+

D is a little more complicated, due to the use of multiple inheritance. Its +virtual table object contains two vtables, one compatible with A’s vtable and +the other compatible with C’s vtable. Objects of type D contain two virtual +pointers, one belonging to the A subobject and containing the address of +the vtable compatible with A’s vtable, and the other belonging to the C +subobject and containing the address of the vtable compatible with C’s vtable.

+

The full set of compatibility information for the above class hierarchy is +shown below. The following table shows the name of a class, the offset of an +address point within that class’s vtable and the name of one of the classes +with which that address point is compatible.

+ + +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Type Offsets for A, B, C, D

VTable for

Offset

Compatible Class

A

16

A

B

16

A

B

C

16

C

D

16

A

D

48

C

+

The next step is to encode this compatibility information into the IR. The way +this is done is to create type metadata named after each of the compatible +classes, with which we associate each of the compatible address points in +each vtable. For example, these type metadata entries encode the compatibility +information for the above hierarchy:

+
@_ZTV1A = constant [...], !type !0
+@_ZTV1B = constant [...], !type !0, !type !1
+@_ZTV1C = constant [...], !type !2
+@_ZTV1D = constant [...], !type !0, !type !3, !type !4
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}
+!2 = !{i64 16, !"_ZTS1C"}
+!3 = !{i64 16, !"_ZTS1D"}
+!4 = !{i64 48, !"_ZTS1C"}
+
+
+

With this type metadata, we can now use the llvm.type.test intrinsic to +test whether a given pointer is compatible with a type identifier. Working +backwards, if llvm.type.test returns true for a particular pointer, +we can also statically determine the identities of the virtual functions +that a particular virtual call may call. For example, if a program assumes +a pointer to be a member of !"_ZST1A", we know that the address can +be only be one of _ZTV1A+16, _ZTV1B+16 or _ZTV1D+16 (i.e. the +address points of the vtables of A, B and D respectively). If we then load +an address from that pointer, we know that the address can only be one of +&A::f, &B::f or &D::f.

+
+
+

Testing Addresses For Type Membership

+

If a program tests an address using llvm.type.test, this will cause +a link-time optimization pass, LowerTypeTests, to replace calls to this +intrinsic with efficient code to perform type member tests. At a high level, +the pass will lay out referenced globals in a consecutive memory region in +the object file, construct bit vectors that map onto that memory region, +and generate code at each of the llvm.type.test call sites to test +pointers against those bit vectors. Because of the layout manipulation, the +globals’ definitions must be available at LTO time. For more information, +see the control flow integrity design document.

+

A type identifier that identifies functions is transformed into a jump table, +which is a block of code consisting of one branch instruction for each +of the functions associated with the type identifier that branches to the +target function. The pass will redirect any taken function addresses to the +corresponding jump table entry. In the object file’s symbol table, the jump +table entries take the identities of the original functions, so that addresses +taken outside the module will pass any verification done inside the module.

+

Jump tables may call external functions, so their definitions need not +be available at LTO time. Note that if an externally defined function is +associated with a type identifier, there is no guarantee that its identity +within the module will be the same as its identity outside of the module, +as the former will be the jump table entry if a jump table is necessary.

+

The GlobalLayoutBuilder class is responsible for laying out the globals +efficiently to minimize the sizes of the underlying bitsets.

+
+
Example
+

+
+
target datalayout = "e-p:32:32"
+
+@a = internal global i32 0, !type !0
+@b = internal global i32 0, !type !0, !type !1
+@c = internal global i32 0, !type !1
+@d = internal global [2 x i32] [i32 0, i32 0], !type !2
+
+define void @e() !type !3 {
+  ret void
+}
+
+define void @f() {
+  ret void
+}
+
+declare void @g() !type !3
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
+!2 = !{i32 4, !"typeid2"}
+!3 = !{i32 0, !"typeid3"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %typeid) nounwind readnone
+
+define i1 @foo(i32* %p) {
+  %pi8 = bitcast i32* %p to i8*
+  %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid1")
+  ret i1 %x
+}
+
+define i1 @bar(i32* %p) {
+  %pi8 = bitcast i32* %p to i8*
+  %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid2")
+  ret i1 %x
+}
+
+define i1 @baz(void ()* %p) {
+  %pi8 = bitcast void ()* %p to i8*
+  %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid3")
+  ret i1 %x
+}
+
+define void @main() {
+  %a1 = call i1 @foo(i32* @a) ; returns 1
+  %b1 = call i1 @foo(i32* @b) ; returns 1
+  %c1 = call i1 @foo(i32* @c) ; returns 0
+  %a2 = call i1 @bar(i32* @a) ; returns 0
+  %b2 = call i1 @bar(i32* @b) ; returns 1
+  %c2 = call i1 @bar(i32* @c) ; returns 1
+  %d02 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 0)) ; returns 0
+  %d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1
+  %e = call i1 @baz(void ()* @e) ; returns 1
+  %f = call i1 @baz(void ()* @f) ; returns 0
+  %g = call i1 @baz(void ()* @g) ; returns 1
+  ret void
+}
+
+
+
+
+

!vcall_visibility Metadata

+

In order to allow removing unused function pointers from vtables, we need to +know whether every virtual call which could use it is known to the compiler, or +whether another translation unit could introduce more calls through the vtable. +This is not the same as the linkage of the vtable, because call sites could be +using a pointer of a more widely-visible base class. For example, consider this +code:

+
__attribute__((visibility("default")))
+struct A {
+  virtual void f();
+};
+
+__attribute__((visibility("hidden")))
+struct B : A {
+  virtual void f();
+};
+
+
+

With LTO, we know that all code which can see the declaration of B is +visible to us. However, a pointer to a B could be cast to A* and passed +to another linkage unit, which could then call f on it. This call would +load from the vtable for B (using the object pointer), and then call +B::f. This means we can’t remove the function pointer from B’s vtable, +or the implementation of B::f. However, if we can see all code which knows +about any dynamic base class (which would be the case if B only inherited +from classes with hidden visibility), then this optimisation would be valid.

+

This concept is represented in IR by the !vcall_visibility metadata +attached to vtable objects, with the following values:

+ ++++ + + + + + + + + + + + + + + + + +

Value

Behavior

0 (or omitted)

+
Public

Virtual function calls using this vtable could be made from external +code.

+
+
+

1

+
Linkage Unit

All virtual function calls which might use this vtable are in the +current LTO unit, meaning they will be in the current module once +LTO linking has been performed.

+
+
+

2

+
Translation Unit

All virtual function calls which might use this vtable are in the +current module.

+
+
+
+

In addition, all function pointer loads from a vtable marked with the +!vcall_visibility metadata (with a non-zero value) must be done using the +llvm.type.checked.load intrinsic, so that virtual +calls sites can be correlated with the vtables which they might load from. +Other parts of the vtable (RTTI, offset-to-top, …) can still be accessed with +normal loads.

+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/UserGuides.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/UserGuides.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/UserGuides.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/UserGuides.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,288 @@ + + + + + + + + + User Guides — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

User Guides

+

NOTE: If you are a user who is only interested in using an LLVM-based compiler, +you should look into Clang instead. The +documentation here is intended for users who have a need to work with the +intermediate LLVM representation.

+ +
+
+
+

Clang

+
+
How To Build On ARM

Notes on building and testing LLVM/Clang on ARM.

+
+
How To Build Clang and LLVM with Profile-Guided Optimizations

Notes on building LLVM/Clang with PGO.

+
+
How To Cross-Compile Clang/LLVM using Clang/LLVM

Notes on cross-building and testing LLVM/Clang.

+
+
How to build the C, C++, ObjC, and ObjC++ front end

Instructions for building the clang front-end from source.

+
+
LLVM Code Coverage Mapping Format

This describes the format and encoding used for LLVM’s code coverage mapping.

+
+
Control Flow Verification Tool Design Document

A description of the verification tool for Control Flow Integrity.

+
+
+
+
+

LLVM Builds and Distributions

+
+
Building a Distribution of LLVM

A best-practices guide for using LLVM’s CMake build system to package and +distribute LLVM-based tools.

+
+
Building LLVM with CMake

An addendum to the main Getting Started guide for those using the CMake +build system.

+
+
A guide to Dockerfiles for building LLVM

A reference for using Dockerfiles provided with LLVM.

+
+
Support Library

This document describes the LLVM Support Library (lib/Support) and +how to keep LLVM source code portable.

+
+
Advanced Build Configurations

This document describes more advanced build configurations.

+
+
+
+
+

Optimizations

+
+
Writing an LLVM Pass

Information on how to write LLVM transformations and analyses.

+
+
Writing an LLVM Pass

Information on how to write LLVM transformations under the new pass +manager.

+
+
LLVM’s Analysis and Transform Passes

A list of optimizations and analyses implemented in LLVM.

+
+
Stack Safety Analysis

This document describes the design of the stack safety analysis of local +variables.

+
+
MergeFunctions pass, how it works

Describes functions merging optimization.

+
+
LLVM Alias Analysis Infrastructure

Information on how to write a new alias analysis implementation or how to +use existing analyses.

+
+
MemorySSA

Information about the MemorySSA utility in LLVM, as well as how to use it.

+
+
LLVM Loop Terminology (and Canonical Forms)

A document describing Loops and associated terms as used in LLVM.

+
+
Auto-Vectorization in LLVM

This document describes the current status of vectorization in LLVM.

+
+
LLVM Link Time Optimization: Design and Implementation

This document describes the interface between LLVM intermodular optimizer +and the linker and its design

+
+
The LLVM gold plugin

How to build your programs with link-time optimization on Linux.

+
+
Remarks

A reference on the implementation of remarks in LLVM.

+
+
Source Level Debugging with LLVM

This document describes the design and philosophy behind the LLVM +source-level debugger.

+
+
+
+
+

Code Generation

+
+
Writing an LLVM Backend

Information on how to write LLVM backends for machine targets.

+
+
The LLVM Target-Independent Code Generator

The design and implementation of the LLVM code generator. Useful if you are +working on retargetting LLVM to a new architecture, designing a new codegen +pass, or enhancing existing components.

+
+
TableGen

Describes the TableGen tool, which is used heavily by the LLVM code +generator.

+
+
+
+

JIT

+
+
MCJIT Design and Implementation

Describes the inner workings of MCJIT execution engine.

+
+
ORC Design and Implementation

Describes the design and implementation of the ORC APIs, including some +usage examples, and a guide for users transitioning from ORCv1 to ORCv2.

+
+
JITLink and ORC’s ObjectLinkingLayer

Describes the design and APIs for the JITLink library, ORC’s new JIT +linker.

+
+
Debugging JIT-ed Code

How to debug JITed code with GDB.

+
+
+
+

How to debug JITed code with GDB.

+
+
+

Additional Topics

+
+
CommandLine 2.0 Library Manual

Provides information on using the command line parsing library.

+
+
Extending LLVM: Adding instructions, intrinsics, types, etc.

Look here to see how to add instructions and intrinsics to LLVM.

+
+
How To Add A Constrained Floating-Point Intrinsic

Gives the steps necessary when adding a new constrained math intrinsic +to LLVM.

+
+
How to build Windows Itanium applications.

Notes on assembling a Windows Itanium environment.

+
+
How to Cross Compile Compiler-rt Builtins For Arm

Notes on cross-building and testing the compiler-rt builtins for Arm.

+
+
Using ARM NEON instructions in big endian mode

LLVM’s support for generating NEON instructions on big endian ARM targets is +somewhat nonintuitive. This document explains the implementation and rationale.

+
+
Compiling CUDA with clang

LLVM support for CUDA.

+
+
User Guide for NVPTX Back-end

This document describes using the NVPTX backend to compile GPU kernels.

+
+
User Guide for AMDGPU Backend

This document describes using the AMDGPU backend to compile GPU kernels.

+
+
DWARF Extensions For Heterogeneous Debugging

This document describes DWARF extensions to support heterogeneous debugging +for targets such as the AMDGPU backend.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Vectorizers.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Vectorizers.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/Vectorizers.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/Vectorizers.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,569 @@ + + + + + + + + + Auto-Vectorization in LLVM — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Auto-Vectorization in LLVM

+ +

LLVM has two vectorizers: The Loop Vectorizer, +which operates on Loops, and the SLP Vectorizer. These vectorizers +focus on different optimization opportunities and use different techniques. +The SLP vectorizer merges multiple scalars that are found in the code into +vectors while the Loop Vectorizer widens instructions in loops +to operate on multiple consecutive iterations.

+

Both the Loop Vectorizer and the SLP Vectorizer are enabled by default.

+
+

The Loop Vectorizer

+
+

Usage

+

The Loop Vectorizer is enabled by default, but it can be disabled +through clang using the command line flag:

+
$ clang ... -fno-vectorize  file.c
+
+
+
+

Command line flags

+

The loop vectorizer uses a cost model to decide on the optimal vectorization factor +and unroll factor. However, users of the vectorizer can force the vectorizer to use +specific values. Both ‘clang’ and ‘opt’ support the flags below.

+

Users can control the vectorization SIMD width using the command line flag “-force-vector-width”.

+
$ clang  -mllvm -force-vector-width=8 ...
+$ opt -loop-vectorize -force-vector-width=8 ...
+
+
+

Users can control the unroll factor using the command line flag “-force-vector-interleave”

+
$ clang  -mllvm -force-vector-interleave=2 ...
+$ opt -loop-vectorize -force-vector-interleave=2 ...
+
+
+
+
+

Pragma loop hint directives

+

The #pragma clang loop directive allows loop vectorization hints to be +specified for the subsequent for, while, do-while, or c++11 range-based for +loop. The directive allows vectorization and interleaving to be enabled or +disabled. Vector width as well as interleave count can also be manually +specified. The following example explicitly enables vectorization and +interleaving:

+
#pragma clang loop vectorize(enable) interleave(enable)
+while(...) {
+  ...
+}
+
+
+

The following example implicitly enables vectorization and interleaving by +specifying a vector width and interleaving count:

+
#pragma clang loop vectorize_width(2) interleave_count(2)
+for(...) {
+  ...
+}
+
+
+

See the Clang +language extensions +for details.

+
+
+
+

Diagnostics

+

Many loops cannot be vectorized including loops with complicated control flow, +unvectorizable types, and unvectorizable calls. The loop vectorizer generates +optimization remarks which can be queried using command line options to identify +and diagnose loops that are skipped by the loop-vectorizer.

+

Optimization remarks are enabled using:

+

-Rpass=loop-vectorize identifies loops that were successfully vectorized.

+

-Rpass-missed=loop-vectorize identifies loops that failed vectorization and +indicates if vectorization was specified.

+

-Rpass-analysis=loop-vectorize identifies the statements that caused +vectorization to fail. If in addition -fsave-optimization-record is +provided, multiple causes of vectorization failure may be listed (this behavior +might change in the future).

+

Consider the following loop:

+
#pragma clang loop vectorize(enable)
+for (int i = 0; i < Length; i++) {
+  switch(A[i]) {
+  case 0: A[i] = i*2; break;
+  case 1: A[i] = i;   break;
+  default: A[i] = 0;
+  }
+}
+
+
+

The command line -Rpass-missed=loop-vectorize prints the remark:

+
no_switch.cpp:4:5: remark: loop not vectorized: vectorization is explicitly enabled [-Rpass-missed=loop-vectorize]
+
+
+

And the command line -Rpass-analysis=loop-vectorize indicates that the +switch statement cannot be vectorized.

+
no_switch.cpp:4:5: remark: loop not vectorized: loop contains a switch statement [-Rpass-analysis=loop-vectorize]
+  switch(A[i]) {
+  ^
+
+
+

To ensure line and column numbers are produced include the command line options +-gline-tables-only and -gcolumn-info. See the Clang user manual +for details

+
+
+

Features

+

The LLVM Loop Vectorizer has a number of features that allow it to vectorize +complex loops.

+
+

Loops with unknown trip count

+

The Loop Vectorizer supports loops with an unknown trip count. +In the loop below, the iteration start and finish points are unknown, +and the Loop Vectorizer has a mechanism to vectorize loops that do not start +at zero. In this example, ‘n’ may not be a multiple of the vector width, and +the vectorizer has to execute the last few iterations as scalar code. Keeping +a scalar copy of the loop increases the code size.

+
void bar(float *A, float* B, float K, int start, int end) {
+  for (int i = start; i < end; ++i)
+    A[i] *= B[i] + K;
+}
+
+
+
+
+

Runtime Checks of Pointers

+

In the example below, if the pointers A and B point to consecutive addresses, +then it is illegal to vectorize the code because some elements of A will be +written before they are read from array B.

+

Some programmers use the ‘restrict’ keyword to notify the compiler that the +pointers are disjointed, but in our example, the Loop Vectorizer has no way of +knowing that the pointers A and B are unique. The Loop Vectorizer handles this +loop by placing code that checks, at runtime, if the arrays A and B point to +disjointed memory locations. If arrays A and B overlap, then the scalar version +of the loop is executed.

+
void bar(float *A, float* B, float K, int n) {
+  for (int i = 0; i < n; ++i)
+    A[i] *= B[i] + K;
+}
+
+
+
+
+

Reductions

+

In this example the sum variable is used by consecutive iterations of +the loop. Normally, this would prevent vectorization, but the vectorizer can +detect that ‘sum’ is a reduction variable. The variable ‘sum’ becomes a vector +of integers, and at the end of the loop the elements of the array are added +together to create the correct result. We support a number of different +reduction operations, such as addition, multiplication, XOR, AND and OR.

+
int foo(int *A, int n) {
+  unsigned sum = 0;
+  for (int i = 0; i < n; ++i)
+    sum += A[i] + 5;
+  return sum;
+}
+
+
+

We support floating point reduction operations when -ffast-math is used.

+
+
+

Inductions

+

In this example the value of the induction variable i is saved into an +array. The Loop Vectorizer knows to vectorize induction variables.

+
void bar(float *A, int n) {
+  for (int i = 0; i < n; ++i)
+    A[i] = i;
+}
+
+
+
+
+

If Conversion

+

The Loop Vectorizer is able to “flatten” the IF statement in the code and +generate a single stream of instructions. The Loop Vectorizer supports any +control flow in the innermost loop. The innermost loop may contain complex +nesting of IFs, ELSEs and even GOTOs.

+
int foo(int *A, int *B, int n) {
+  unsigned sum = 0;
+  for (int i = 0; i < n; ++i)
+    if (A[i] > B[i])
+      sum += A[i] + 5;
+  return sum;
+}
+
+
+
+
+

Pointer Induction Variables

+

This example uses the “accumulate” function of the standard c++ library. This +loop uses C++ iterators, which are pointers, and not integer indices. +The Loop Vectorizer detects pointer induction variables and can vectorize +this loop. This feature is important because many C++ programs use iterators.

+
int baz(int *A, int n) {
+  return std::accumulate(A, A + n, 0);
+}
+
+
+
+
+

Reverse Iterators

+

The Loop Vectorizer can vectorize loops that count backwards.

+
void foo(int *A, int n) {
+  for (int i = n; i > 0; --i)
+    A[i] +=1;
+}
+
+
+
+
+

Scatter / Gather

+

The Loop Vectorizer can vectorize code that becomes a sequence of scalar instructions +that scatter/gathers memory.

+
void foo(int * A, int * B, int n) {
+  for (intptr_t i = 0; i < n; ++i)
+      A[i] += B[i * 4];
+}
+
+
+

In many situations the cost model will inform LLVM that this is not beneficial +and LLVM will only vectorize such code if forced with “-mllvm -force-vector-width=#”.

+
+
+

Vectorization of Mixed Types

+

The Loop Vectorizer can vectorize programs with mixed types. The Vectorizer +cost model can estimate the cost of the type conversion and decide if +vectorization is profitable.

+
void foo(int *A, char *B, int n) {
+  for (int i = 0; i < n; ++i)
+    A[i] += 4 * B[i];
+}
+
+
+
+
+

Global Structures Alias Analysis

+

Access to global structures can also be vectorized, with alias analysis being +used to make sure accesses don’t alias. Run-time checks can also be added on +pointer access to structure members.

+

Many variations are supported, but some that rely on undefined behaviour being +ignored (as other compilers do) are still being left un-vectorized.

+
struct { int A[100], K, B[100]; } Foo;
+
+void foo() {
+  for (int i = 0; i < 100; ++i)
+    Foo.A[i] = Foo.B[i] + 100;
+}
+
+
+
+
+

Vectorization of function calls

+

The Loop Vectorizer can vectorize intrinsic math functions. +See the table below for a list of these functions.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + +

pow

exp

exp2

sin

cos

sqrt

log

log2

log10

fabs

floor

ceil

fma

trunc

nearbyint

fmuladd

+

Note that the optimizer may not be able to vectorize math library functions +that correspond to these intrinsics if the library calls access external state +such as “errno”. To allow better optimization of C/C++ math library functions, +use “-fno-math-errno”.

+

The loop vectorizer knows about special instructions on the target and will +vectorize a loop containing a function call that maps to the instructions. For +example, the loop below will be vectorized on Intel x86 if the SSE4.1 roundps +instruction is available.

+
void foo(float *f) {
+  for (int i = 0; i != 1024; ++i)
+    f[i] = floorf(f[i]);
+}
+
+
+
+
+

Partial unrolling during vectorization

+

Modern processors feature multiple execution units, and only programs that contain a +high degree of parallelism can fully utilize the entire width of the machine. +The Loop Vectorizer increases the instruction level parallelism (ILP) by +performing partial-unrolling of loops.

+

In the example below the entire array is accumulated into the variable ‘sum’. +This is inefficient because only a single execution port can be used by the processor. +By unrolling the code the Loop Vectorizer allows two or more execution ports +to be used simultaneously.

+
int foo(int *A, int n) {
+  unsigned sum = 0;
+  for (int i = 0; i < n; ++i)
+      sum += A[i];
+  return sum;
+}
+
+
+

The Loop Vectorizer uses a cost model to decide when it is profitable to unroll loops. +The decision to unroll the loop depends on the register pressure and the generated code size.

+
+
+

Epilogue Vectorization

+

When vectorizing a loop, often a scalar remainder (epilogue) loop is necessary +to execute tail iterations of the loop if the loop trip count is unknown or it +does not evenly divide the vectorization and unroll factors. When the +vectorization and unroll factors are large, it’s possible for loops with smaller +trip counts to end up spending most of their time in the scalar (rather than +the vector) code. In order to address this issue, the inner loop vectorizer is +enhanced with a feature that allows it to vectorize epilogue loops with a +vectorization and unroll factor combination that makes it more likely for small +trip count loops to still execute in vectorized code. The diagram below shows +the CFG for a typical epilogue vectorized loop with runtime checks. As +illustrated the control flow is structured in a way that avoids duplicating the +runtime pointer checks and optimizes the path length for loops that have very +small trip counts.

+_images/epilogue-vectorization-cfg.png +
+
+
+

Performance

+

This section shows the execution time of Clang on a simple benchmark: +gcc-loops. +This benchmarks is a collection of loops from the GCC autovectorization +page by Dorit Nuzman.

+

The chart below compares GCC-4.7, ICC-13, and Clang-SVN with and without loop vectorization at -O3, tuned for “corei7-avx”, running on a Sandybridge iMac. +The Y-axis shows the time in msec. Lower is better. The last column shows the geomean of all the kernels.

+_images/gcc-loops.png +

And Linpack-pc with the same configuration. Result is Mflops, higher is better.

+_images/linpack-pc.png +
+
+

Ongoing Development Directions

+
+
+
+
Vectorization Plan

Modeling the process and upgrading the infrastructure of LLVM’s Loop Vectorizer.

+
+
+
+
+
+

The SLP Vectorizer

+
+

Details

+

The goal of SLP vectorization (a.k.a. superword-level parallelism) is +to combine similar independent instructions +into vector instructions. Memory accesses, arithmetic operations, comparison +operations, PHI-nodes, can all be vectorized using this technique.

+

For example, the following function performs very similar operations on its +inputs (a1, b1) and (a2, b2). The basic-block vectorizer may combine these +into vector operations.

+
void foo(int a1, int a2, int b1, int b2, int *A) {
+  A[0] = a1*(a1 + b1);
+  A[1] = a2*(a2 + b2);
+  A[2] = a1*(a1 + b1);
+  A[3] = a2*(a2 + b2);
+}
+
+
+

The SLP-vectorizer processes the code bottom-up, across basic blocks, in search of scalars to combine.

+
+
+

Usage

+

The SLP Vectorizer is enabled by default, but it can be disabled +through clang using the command line flag:

+
$ clang -fno-slp-vectorize file.c
+
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/WritingAnLLVMBackend.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/WritingAnLLVMBackend.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/WritingAnLLVMBackend.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/WritingAnLLVMBackend.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,1905 @@ + + + + + + + + + Writing an LLVM Backend — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Writing an LLVM Backend

+
+
+ +
+

Introduction

+

This document describes techniques for writing compiler backends that convert +the LLVM Intermediate Representation (IR) to code for a specified machine or +other languages. Code intended for a specific machine can take the form of +either assembly code or binary code (usable for a JIT compiler).

+

The backend of LLVM features a target-independent code generator that may +create output for several types of target CPUs — including X86, PowerPC, +ARM, and SPARC. The backend may also be used to generate code targeted at SPUs +of the Cell processor or GPUs to support the execution of compute kernels.

+

The document focuses on existing examples found in subdirectories of +llvm/lib/Target in a downloaded LLVM release. In particular, this document +focuses on the example of creating a static compiler (one that emits text +assembly) for a SPARC target, because SPARC has fairly standard +characteristics, such as a RISC instruction set and straightforward calling +conventions.

+
+

Audience

+

The audience for this document is anyone who needs to write an LLVM backend to +generate code for a specific hardware or software target.

+
+
+

Prerequisite Reading

+

These essential documents must be read before reading this document:

+
    +
  • LLVM Language Reference Manual — a reference manual for +the LLVM assembly language.

  • +
  • The LLVM Target-Independent Code Generator — a guide to the components (classes and code +generation algorithms) for translating the LLVM internal representation into +machine code for a specified target. Pay particular attention to the +descriptions of code generation stages: Instruction Selection, Scheduling and +Formation, SSA-based Optimization, Register Allocation, Prolog/Epilog Code +Insertion, Late Machine Code Optimizations, and Code Emission.

  • +
  • TableGen Overview — a document that describes the TableGen +(tblgen) application that manages domain-specific information to support +LLVM code generation. TableGen processes input from a target description +file (.td suffix) and generates C++ code that can be used for code +generation.

  • +
  • Writing an LLVM Pass — The assembly printer is a FunctionPass, as +are several SelectionDAG processing steps.

  • +
+

To follow the SPARC examples in this document, have a copy of The SPARC +Architecture Manual, Version 8 for +reference. For details about the ARM instruction set, refer to the ARM +Architecture Reference Manual. For more about +the GNU Assembler format (GAS), see Using As, especially for the +assembly printer. “Using As” contains a list of target machine dependent +features.

+
+
+

Basic Steps

+

To write a compiler backend for LLVM that converts the LLVM IR to code for a +specified target (machine or other language), follow these steps:

+
    +
  • Create a subclass of the TargetMachine class that describes +characteristics of your target machine. Copy existing examples of specific +TargetMachine class and header files; for example, start with +SparcTargetMachine.cpp and SparcTargetMachine.h, but change the file +names for your target. Similarly, change code that references “Sparc” to +reference your target.

  • +
  • Describe the register set of the target. Use TableGen to generate code for +register definition, register aliases, and register classes from a +target-specific RegisterInfo.td input file. You should also write +additional code for a subclass of the TargetRegisterInfo class that +represents the class register file data used for register allocation and also +describes the interactions between registers.

  • +
  • Describe the instruction set of the target. Use TableGen to generate code +for target-specific instructions from target-specific versions of +TargetInstrFormats.td and TargetInstrInfo.td. You should write +additional code for a subclass of the TargetInstrInfo class to represent +machine instructions supported by the target machine.

  • +
  • Describe the selection and conversion of the LLVM IR from a Directed Acyclic +Graph (DAG) representation of instructions to native target-specific +instructions. Use TableGen to generate code that matches patterns and +selects instructions based on additional information in a target-specific +version of TargetInstrInfo.td. Write code for XXXISelDAGToDAG.cpp, +where XXX identifies the specific target, to perform pattern matching and +DAG-to-DAG instruction selection. Also write code in XXXISelLowering.cpp +to replace or remove operations and data types that are not supported +natively in a SelectionDAG.

  • +
  • Write code for an assembly printer that converts LLVM IR to a GAS format for +your target machine. You should add assembly strings to the instructions +defined in your target-specific version of TargetInstrInfo.td. You +should also write code for a subclass of AsmPrinter that performs the +LLVM-to-assembly conversion and a trivial subclass of TargetAsmInfo.

  • +
  • Optionally, add support for subtargets (i.e., variants with different +capabilities). You should also write code for a subclass of the +TargetSubtarget class, which allows you to use the -mcpu= and +-mattr= command-line options.

  • +
  • Optionally, add JIT support and create a machine code emitter (subclass of +TargetJITInfo) that is used to emit binary code directly into memory.

  • +
+

In the .cpp and .h. files, initially stub up these methods and then +implement them later. Initially, you may not know which private members that +the class will need and which components will need to be subclassed.

+
+
+

Preliminaries

+

To actually create your compiler backend, you need to create and modify a few +files. The absolute minimum is discussed here. But to actually use the LLVM +target-independent code generator, you must perform the steps described in the +LLVM Target-Independent Code Generator document.

+

First, you should create a subdirectory under lib/Target to hold all the +files related to your target. If your target is called “Dummy”, create the +directory lib/Target/Dummy.

+

In this new directory, create a CMakeLists.txt. It is easiest to copy a +CMakeLists.txt of another target and modify it. It should at least contain +the LLVM_TARGET_DEFINITIONS variable. The library can be named LLVMDummy +(for example, see the MIPS target). Alternatively, you can split the library +into LLVMDummyCodeGen and LLVMDummyAsmPrinter, the latter of which +should be implemented in a subdirectory below lib/Target/Dummy (for example, +see the PowerPC target).

+

Note that these two naming schemes are hardcoded into llvm-config. Using +any other naming scheme will confuse llvm-config and produce a lot of +(seemingly unrelated) linker errors when linking llc.

+

To make your target actually do something, you need to implement a subclass of +TargetMachine. This implementation should typically be in the file +lib/Target/DummyTargetMachine.cpp, but any file in the lib/Target +directory will be built and should work. To use LLVM’s target independent code +generator, you should do what all current machine backends do: create a +subclass of LLVMTargetMachine. (To create a target from scratch, create a +subclass of TargetMachine.)

+

To get LLVM to actually build and link your target, you need to run cmake +with -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=Dummy. This will build your +target without needing to add it to the list of all the targets.

+

Once your target is stable, you can add it to the LLVM_ALL_TARGETS variable +located in the main CMakeLists.txt.

+
+
+
+

Target Machine

+

LLVMTargetMachine is designed as a base class for targets implemented with +the LLVM target-independent code generator. The LLVMTargetMachine class +should be specialized by a concrete target class that implements the various +virtual methods. LLVMTargetMachine is defined as a subclass of +TargetMachine in include/llvm/Target/TargetMachine.h. The +TargetMachine class implementation (TargetMachine.cpp) also processes +numerous command-line options.

+

To create a concrete target-specific subclass of LLVMTargetMachine, start +by copying an existing TargetMachine class and header. You should name the +files that you create to reflect your specific target. For instance, for the +SPARC target, name the files SparcTargetMachine.h and +SparcTargetMachine.cpp.

+

For a target machine XXX, the implementation of XXXTargetMachine must +have access methods to obtain objects that represent target components. These +methods are named get*Info, and are intended to obtain the instruction set +(getInstrInfo), register set (getRegisterInfo), stack frame layout +(getFrameInfo), and similar information. XXXTargetMachine must also +implement the getDataLayout method to access an object with target-specific +data characteristics, such as data type size and alignment requirements.

+

For instance, for the SPARC target, the header file SparcTargetMachine.h +declares prototypes for several get*Info and getDataLayout methods that +simply return a class member.

+
namespace llvm {
+
+class Module;
+
+class SparcTargetMachine : public LLVMTargetMachine {
+  const DataLayout DataLayout;       // Calculates type size & alignment
+  SparcSubtarget Subtarget;
+  SparcInstrInfo InstrInfo;
+  TargetFrameInfo FrameInfo;
+
+protected:
+  virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+public:
+  SparcTargetMachine(const Module &M, const std::string &FS);
+
+  virtual const SparcInstrInfo *getInstrInfo() const {return &InstrInfo; }
+  virtual const TargetFrameInfo *getFrameInfo() const {return &FrameInfo; }
+  virtual const TargetSubtarget *getSubtargetImpl() const{return &Subtarget; }
+  virtual const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const DataLayout *getDataLayout() const { return &DataLayout; }
+  static unsigned getModuleMatchQuality(const Module &M);
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, bool Fast);
+  virtual bool addPreEmitPass(PassManagerBase &PM, bool Fast);
+};
+
+} // end namespace llvm
+
+
+
    +
  • getInstrInfo()

  • +
  • getRegisterInfo()

  • +
  • getFrameInfo()

  • +
  • getDataLayout()

  • +
  • getSubtargetImpl()

  • +
+

For some targets, you also need to support the following methods:

+
    +
  • getTargetLowering()

  • +
  • getJITInfo()

  • +
+

Some architectures, such as GPUs, do not support jumping to an arbitrary +program location and implement branching using masked execution and loop using +special instructions around the loop body. In order to avoid CFG modifications +that introduce irreducible control flow not handled by such hardware, a target +must call setRequiresStructuredCFG(true) when being initialized.

+

In addition, the XXXTargetMachine constructor should specify a +TargetDescription string that determines the data layout for the target +machine, including characteristics such as pointer size, alignment, and +endianness. For example, the constructor for SparcTargetMachine contains +the following:

+
SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
+  : DataLayout("E-p:32:32-f128:128:128"),
+    Subtarget(M, FS), InstrInfo(Subtarget),
+    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+}
+
+
+

Hyphens separate portions of the TargetDescription string.

+
    +
  • An upper-case “E” in the string indicates a big-endian target data model. +A lower-case “e” indicates little-endian.

  • +
  • p:” is followed by pointer information: size, ABI alignment, and +preferred alignment. If only two figures follow “p:”, then the first +value is pointer size, and the second value is both ABI and preferred +alignment.

  • +
  • Then a letter for numeric type alignment: “i”, “f”, “v”, or +“a” (corresponding to integer, floating point, vector, or aggregate). +“i”, “v”, or “a” are followed by ABI alignment and preferred +alignment. “f” is followed by three values: the first indicates the size +of a long double, then ABI alignment, and then ABI preferred alignment.

  • +
+
+
+

Target Registration

+

You must also register your target with the TargetRegistry, which is what +other LLVM tools use to be able to lookup and use your target at runtime. The +TargetRegistry can be used directly, but for most targets there are helper +templates which should take care of the work for you.

+

All targets should declare a global Target object which is used to +represent the target during registration. Then, in the target’s TargetInfo +library, the target should define that object and use the RegisterTarget +template to register the target. For example, the Sparc registration code +looks like this:

+
Target llvm::getTheSparcTarget();
+
+extern "C" void LLVMInitializeSparcTargetInfo() {
+  RegisterTarget<Triple::sparc, /*HasJIT=*/false>
+    X(getTheSparcTarget(), "sparc", "Sparc");
+}
+
+
+

This allows the TargetRegistry to look up the target by name or by target +triple. In addition, most targets will also register additional features which +are available in separate libraries. These registration steps are separate, +because some clients may wish to only link in some parts of the target — the +JIT code generator does not require the use of the assembler printer, for +example. Here is an example of registering the Sparc assembly printer:

+
extern "C" void LLVMInitializeSparcAsmPrinter() {
+  RegisterAsmPrinter<SparcAsmPrinter> X(getTheSparcTarget());
+}
+
+
+

For more information, see “llvm/Target/TargetRegistry.h”.

+
+
+

Register Set and Register Classes

+

You should describe a concrete target-specific class that represents the +register file of a target machine. This class is called XXXRegisterInfo +(where XXX identifies the target) and represents the class register file +data that is used for register allocation. It also describes the interactions +between registers.

+

You also need to define register classes to categorize related registers. A +register class should be added for groups of registers that are all treated the +same way for some instruction. Typical examples are register classes for +integer, floating-point, or vector registers. A register allocator allows an +instruction to use any register in a specified register class to perform the +instruction in a similar manner. Register classes allocate virtual registers +to instructions from these sets, and register classes let the +target-independent register allocator automatically choose the actual +registers.

+

Much of the code for registers, including register definition, register +aliases, and register classes, is generated by TableGen from +XXXRegisterInfo.td input files and placed in XXXGenRegisterInfo.h.inc +and XXXGenRegisterInfo.inc output files. Some of the code in the +implementation of XXXRegisterInfo requires hand-coding.

+
+

Defining a Register

+

The XXXRegisterInfo.td file typically starts with register definitions for +a target machine. The Register class (specified in Target.td) is used +to define an object for each register. The specified string n becomes the +Name of the register. The basic Register object does not have any +subregisters and does not specify any aliases.

+
class Register<string n> {
+  string Namespace = "";
+  string AsmName = n;
+  string Name = n;
+  int SpillSize = 0;
+  int SpillAlignment = 0;
+  list<Register> Aliases = [];
+  list<Register> SubRegs = [];
+  list<int> DwarfNumbers = [];
+}
+
+
+

For example, in the X86RegisterInfo.td file, there are register definitions +that utilize the Register class, such as:

+
def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>;
+
+
+

This defines the register AL and assigns it values (with DwarfRegNum) +that are used by gcc, gdb, or a debug information writer to identify a +register. For register AL, DwarfRegNum takes an array of 3 values +representing 3 different modes: the first element is for X86-64, the second for +exception handling (EH) on X86-32, and the third is generic. -1 is a special +Dwarf number that indicates the gcc number is undefined, and -2 indicates the +register number is invalid for this mode.

+

From the previously described line in the X86RegisterInfo.td file, TableGen +generates this code in the X86GenRegisterInfo.inc file:

+
static const unsigned GR8[] = { X86::AL, ... };
+
+const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 };
+
+const TargetRegisterDesc RegisterDescriptors[] = {
+  ...
+{ "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ...
+
+
+

From the register info file, TableGen generates a TargetRegisterDesc object +for each register. TargetRegisterDesc is defined in +include/llvm/Target/TargetRegisterInfo.h with the following fields:

+
struct TargetRegisterDesc {
+  const char     *AsmName;      // Assembly language name for the register
+  const char     *Name;         // Printable name for the reg (for debugging)
+  const unsigned *AliasSet;     // Register Alias Set
+  const unsigned *SubRegs;      // Sub-register set
+  const unsigned *ImmSubRegs;   // Immediate sub-register set
+  const unsigned *SuperRegs;    // Super-register set
+};
+
+
+

TableGen uses the entire target description file (.td) to determine text +names for the register (in the AsmName and Name fields of +TargetRegisterDesc) and the relationships of other registers to the defined +register (in the other TargetRegisterDesc fields). In this example, other +definitions establish the registers “AX”, “EAX”, and “RAX” as +aliases for one another, so TableGen generates a null-terminated array +(AL_AliasSet) for this register alias set.

+

The Register class is commonly used as a base class for more complex +classes. In Target.td, the Register class is the base for the +RegisterWithSubRegs class that is used to define registers that need to +specify subregisters in the SubRegs list, as shown here:

+
class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
+  let SubRegs = subregs;
+}
+
+
+

In SparcRegisterInfo.td, additional register classes are defined for SPARC: +a Register subclass, SparcReg, and further subclasses: Ri, Rf, +and Rd. SPARC registers are identified by 5-bit ID numbers, which is a +feature common to these subclasses. Note the use of “let” expressions to +override values that are initially defined in a superclass (such as SubRegs +field in the Rd class).

+
class SparcReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "SP";
+}
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> :
+SparcReg<n> {
+  let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> :
+SparcReg<n> {
+  let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+  let Num = num;
+  let SubRegs = subregs;
+}
+
+
+

In the SparcRegisterInfo.td file, there are register definitions that +utilize these subclasses of Register, such as:

+
def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+...
+def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+...
+def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
+def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>;
+
+
+

The last two registers shown above (D0 and D1) are double-precision +floating-point registers that are aliases for pairs of single-precision +floating-point sub-registers. In addition to aliases, the sub-register and +super-register relationships of the defined register are in fields of a +register’s TargetRegisterDesc.

+
+
+

Defining a Register Class

+

The RegisterClass class (specified in Target.td) is used to define an +object that represents a group of related registers and also defines the +default allocation order of the registers. A target description file +XXXRegisterInfo.td that uses Target.td can construct register classes +using the following class:

+
class RegisterClass<string namespace,
+list<ValueType> regTypes, int alignment, dag regList> {
+  string Namespace = namespace;
+  list<ValueType> RegTypes = regTypes;
+  int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
+  int Alignment = alignment;
+
+  // CopyCost is the cost of copying a value between two registers
+  // default value 1 means a single instruction
+  // A negative value means copying is extremely expensive or impossible
+  int CopyCost = 1;
+  dag MemberList = regList;
+
+  // for register classes that are subregisters of this class
+  list<RegisterClass> SubRegClassList = [];
+
+  code MethodProtos = [{}];  // to insert arbitrary code
+  code MethodBodies = [{}];
+}
+
+
+

To define a RegisterClass, use the following 4 arguments:

+
    +
  • The first argument of the definition is the name of the namespace.

  • +
  • The second argument is a list of ValueType register type values that are +defined in include/llvm/CodeGen/ValueTypes.td. Defined values include +integer types (such as i16, i32, and i1 for Boolean), +floating-point types (f32, f64), and vector types (for example, +v8i16 for an 8 x i16 vector). All registers in a RegisterClass +must have the same ValueType, but some registers may store vector data in +different configurations. For example a register that can process a 128-bit +vector may be able to handle 16 8-bit integer elements, 8 16-bit integers, 4 +32-bit integers, and so on.

  • +
  • The third argument of the RegisterClass definition specifies the +alignment required of the registers when they are stored or loaded to +memory.

  • +
  • The final argument, regList, specifies which registers are in this class. +If an alternative allocation order method is not specified, then regList +also defines the order of allocation used by the register allocator. Besides +simply listing registers with (add R0, R1, ...), more advanced set +operators are available. See include/llvm/Target/Target.td for more +information.

  • +
+

In SparcRegisterInfo.td, three RegisterClass objects are defined: +FPRegs, DFPRegs, and IntRegs. For all three register classes, the +first argument defines the namespace with the string “SP”. FPRegs +defines a group of 32 single-precision floating-point registers (F0 to +F31); DFPRegs defines a group of 16 double-precision registers +(D0-D15).

+
// F0, F1, F2, ..., F31
+def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64,
+                            (add D0, D1, D2, D3, D4, D5, D6, D7, D8,
+                                 D9, D10, D11, D12, D13, D14, D15)>;
+
+def IntRegs : RegisterClass<"SP", [i32], 32,
+    (add L0, L1, L2, L3, L4, L5, L6, L7,
+         I0, I1, I2, I3, I4, I5,
+         O0, O1, O2, O3, O4, O5, O7,
+         G1,
+         // Non-allocatable regs:
+         G2, G3, G4,
+         O6,        // stack ptr
+         I6,        // frame ptr
+         I7,        // return address
+         G0,        // constant zero
+         G5, G6, G7 // reserved for kernel
+    )>;
+
+
+

Using SparcRegisterInfo.td with TableGen generates several output files +that are intended for inclusion in other source code that you write. +SparcRegisterInfo.td generates SparcGenRegisterInfo.h.inc, which should +be included in the header file for the implementation of the SPARC register +implementation that you write (SparcRegisterInfo.h). In +SparcGenRegisterInfo.h.inc a new structure is defined called +SparcGenRegisterInfo that uses TargetRegisterInfo as its base. It also +specifies types, based upon the defined register classes: DFPRegsClass, +FPRegsClass, and IntRegsClass.

+

SparcRegisterInfo.td also generates SparcGenRegisterInfo.inc, which is +included at the bottom of SparcRegisterInfo.cpp, the SPARC register +implementation. The code below shows only the generated integer registers and +associated register classes. The order of registers in IntRegs reflects +the order in the definition of IntRegs in the target description file.

+
// IntRegs Register Class...
+static const unsigned IntRegs[] = {
+  SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5,
+  SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3,
+  SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3,
+  SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3,
+  SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5,
+  SP::G6, SP::G7,
+};
+
+// IntRegsVTs Register Class Value Types...
+static const MVT::ValueType IntRegsVTs[] = {
+  MVT::i32, MVT::Other
+};
+
+namespace SP {   // Register class instances
+  DFPRegsClass    DFPRegsRegClass;
+  FPRegsClass     FPRegsRegClass;
+  IntRegsClass    IntRegsRegClass;
+...
+  // IntRegs Sub-register Classes...
+  static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
+    NULL
+  };
+...
+  // IntRegs Super-register Classes..
+  static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
+    NULL
+  };
+...
+  // IntRegs Register Class sub-classes...
+  static const TargetRegisterClass* const IntRegsSubclasses [] = {
+    NULL
+  };
+...
+  // IntRegs Register Class super-classes...
+  static const TargetRegisterClass* const IntRegsSuperclasses [] = {
+    NULL
+  };
+
+  IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID,
+    IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses,
+    IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
+}
+
+
+

The register allocators will avoid using reserved registers, and callee saved +registers are not used until all the volatile registers have been used. That +is usually good enough, but in some cases it may be necessary to provide custom +allocation orders.

+
+
+

Implement a subclass of TargetRegisterInfo

+

The final step is to hand code portions of XXXRegisterInfo, which +implements the interface described in TargetRegisterInfo.h (see +The TargetRegisterInfo class). These functions return 0, NULL, or +false, unless overridden. Here is a list of functions that are overridden +for the SPARC implementation in SparcRegisterInfo.cpp:

+
    +
  • getCalleeSavedRegs — Returns a list of callee-saved registers in the +order of the desired callee-save stack frame offset.

  • +
  • getReservedRegs — Returns a bitset indexed by physical register +numbers, indicating if a particular register is unavailable.

  • +
  • hasFP — Return a Boolean indicating if a function should have a +dedicated frame pointer register.

  • +
  • eliminateCallFramePseudoInstr — If call frame setup or destroy pseudo +instructions are used, this can be called to eliminate them.

  • +
  • eliminateFrameIndex — Eliminate abstract frame indices from +instructions that may use them.

  • +
  • emitPrologue — Insert prologue code into the function.

  • +
  • emitEpilogue — Insert epilogue code into the function.

  • +
+
+
+
+

Instruction Set

+

During the early stages of code generation, the LLVM IR code is converted to a +SelectionDAG with nodes that are instances of the SDNode class +containing target instructions. An SDNode has an opcode, operands, type +requirements, and operation properties. For example, is an operation +commutative, does an operation load from memory. The various operation node +types are described in the include/llvm/CodeGen/SelectionDAGNodes.h file +(values of the NodeType enum in the ISD namespace).

+

TableGen uses the following target description (.td) input files to +generate much of the code for instruction definition:

+
    +
  • Target.td — Where the Instruction, Operand, InstrInfo, and +other fundamental classes are defined.

  • +
  • TargetSelectionDAG.td — Used by SelectionDAG instruction selection +generators, contains SDTC* classes (selection DAG type constraint), +definitions of SelectionDAG nodes (such as imm, cond, bb, +add, fadd, sub), and pattern support (Pattern, Pat, +PatFrag, PatLeaf, ComplexPattern.

  • +
  • XXXInstrFormats.td — Patterns for definitions of target-specific +instructions.

  • +
  • XXXInstrInfo.td — Target-specific definitions of instruction templates, +condition codes, and instructions of an instruction set. For architecture +modifications, a different file name may be used. For example, for Pentium +with SSE instruction, this file is X86InstrSSE.td, and for Pentium with +MMX, this file is X86InstrMMX.td.

  • +
+

There is also a target-specific XXX.td file, where XXX is the name of +the target. The XXX.td file includes the other .td input files, but +its contents are only directly important for subtargets.

+

You should describe a concrete target-specific class XXXInstrInfo that +represents machine instructions supported by a target machine. +XXXInstrInfo contains an array of XXXInstrDescriptor objects, each of +which describes one instruction. An instruction descriptor defines:

+
    +
  • Opcode mnemonic

  • +
  • Number of operands

  • +
  • List of implicit register definitions and uses

  • +
  • Target-independent properties (such as memory access, is commutable)

  • +
  • Target-specific flags

  • +
+

The Instruction class (defined in Target.td) is mostly used as a base for +more complex instruction classes.

+
class Instruction {
+  string Namespace = "";
+  dag OutOperandList;    // A dag containing the MI def operand list.
+  dag InOperandList;     // A dag containing the MI use operand list.
+  string AsmString = ""; // The .s format to print the instruction with.
+  list<dag> Pattern;     // Set to the DAG pattern for this instruction.
+  list<Register> Uses = [];
+  list<Register> Defs = [];
+  list<Predicate> Predicates = [];  // predicates turned into isel match code
+  ... remainder not shown for space ...
+}
+
+
+

A SelectionDAG node (SDNode) should contain an object representing a +target-specific instruction that is defined in XXXInstrInfo.td. The +instruction objects should represent instructions from the architecture manual +of the target machine (such as the SPARC Architecture Manual for the SPARC +target).

+

A single instruction from the architecture manual is often modeled as multiple +target instructions, depending upon its operands. For example, a manual might +describe an add instruction that takes a register or an immediate operand. An +LLVM target could model this with two instructions named ADDri and +ADDrr.

+

You should define a class for each instruction category and define each opcode +as a subclass of the category with appropriate parameters such as the fixed +binary encoding of opcodes and extended opcodes. You should map the register +bits to the bits of the instruction in which they are encoded (for the JIT). +Also you should specify how the instruction should be printed when the +automatic assembly printer is used.

+

As is described in the SPARC Architecture Manual, Version 8, there are three +major 32-bit formats for instructions. Format 1 is only for the CALL +instruction. Format 2 is for branch on condition codes and SETHI (set high +bits of a register) instructions. Format 3 is for other instructions.

+

Each of these formats has corresponding classes in SparcInstrFormat.td. +InstSP is a base class for other instruction classes. Additional base +classes are specified for more precise formats: for example in +SparcInstrFormat.td, F2_1 is for SETHI, and F2_2 is for +branches. There are three other base classes: F3_1 for register/register +operations, F3_2 for register/immediate operations, and F3_3 for +floating-point operations. SparcInstrInfo.td also adds the base class +Pseudo for synthetic SPARC instructions.

+

SparcInstrInfo.td largely consists of operand and instruction definitions +for the SPARC target. In SparcInstrInfo.td, the following target +description file entry, LDrr, defines the Load Integer instruction for a +Word (the LD SPARC opcode) from a memory address to a register. The first +parameter, the value 3 (112), is the operation value for this +category of operation. The second parameter (0000002) is the +specific operation value for LD/Load Word. The third parameter is the +output destination, which is a register operand and defined in the Register +target description file (IntRegs).

+
def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
+                 "ld [$addr], $dst",
+                 [(set i32:$dst, (load ADDRrr:$addr))]>;
+
+
+

The fourth parameter is the input source, which uses the address operand +MEMrr that is defined earlier in SparcInstrInfo.td:

+
def MEMrr : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+
+

The fifth parameter is a string that is used by the assembly printer and can be +left as an empty string until the assembly printer interface is implemented. +The sixth and final parameter is the pattern used to match the instruction +during the SelectionDAG Select Phase described in The LLVM Target-Independent Code Generator. +This parameter is detailed in the next section, Instruction Selector.

+

Instruction class definitions are not overloaded for different operand types, +so separate versions of instructions are needed for register, memory, or +immediate value operands. For example, to perform a Load Integer instruction +for a Word from an immediate operand to a register, the following instruction +class is defined:

+
def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
+                 "ld [$addr], $dst",
+                 [(set i32:$dst, (load ADDRri:$addr))]>;
+
+
+

Writing these definitions for so many similar instructions can involve a lot of +cut and paste. In .td files, the multiclass directive enables the +creation of templates to define several instruction classes at once (using the +defm directive). For example in SparcInstrInfo.td, the multiclass +pattern F3_12 is defined to create 2 instruction classes each time +F3_12 is invoked:

+
multiclass F3_12 <string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+  def rr  : F3_1 <2, Op3Val,
+                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set i32:$dst, (OpNode i32:$b, i32:$c))]>;
+  def ri  : F3_2 <2, Op3Val,
+                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>;
+}
+
+
+

So when the defm directive is used for the XOR and ADD +instructions, as seen below, it creates four instruction objects: XORrr, +XORri, ADDrr, and ADDri.

+
defm XOR   : F3_12<"xor", 0b000011, xor>;
+defm ADD   : F3_12<"add", 0b000000, add>;
+
+
+

SparcInstrInfo.td also includes definitions for condition codes that are +referenced by branch instructions. The following definitions in +SparcInstrInfo.td indicate the bit location of the SPARC condition code. +For example, the 10th bit represents the “greater than” condition for +integers, and the 22nd bit represents the “greater than” condition for +floats.

+
def ICC_NE  : ICC_VAL< 9>;  // Not Equal
+def ICC_E   : ICC_VAL< 1>;  // Equal
+def ICC_G   : ICC_VAL<10>;  // Greater
+...
+def FCC_U   : FCC_VAL<23>;  // Unordered
+def FCC_G   : FCC_VAL<22>;  // Greater
+def FCC_UG  : FCC_VAL<21>;  // Unordered or Greater
+...
+
+
+

(Note that Sparc.h also defines enums that correspond to the same SPARC +condition codes. Care must be taken to ensure the values in Sparc.h +correspond to the values in SparcInstrInfo.td. I.e., SPCC::ICC_NE = 9, +SPCC::FCC_U = 23 and so on.)

+
+

Instruction Operand Mapping

+

The code generator backend maps instruction operands to fields in the +instruction. Operands are assigned to unbound fields in the instruction in the +order they are defined. Fields are bound when they are assigned a value. For +example, the Sparc target defines the XNORrr instruction as a F3_1 +format instruction having three operands.

+
def XNORrr  : F3_1<2, 0b000111,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "xnor $b, $c, $dst",
+                   [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>;
+
+
+

The instruction templates in SparcInstrFormats.td show the base class for +F3_1 is InstSP.

+
class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+  field bits<32> Inst;
+  let Namespace = "SP";
+  bits<2> op;
+  let Inst{31-30} = op;
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+
+

InstSP leaves the op field unbound.

+
class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstSP<outs, ins, asmstr, pattern> {
+  bits<5> rd;
+  bits<6> op3;
+  bits<5> rs1;
+  let op{1} = 1;   // Op = 2 or 3
+  let Inst{29-25} = rd;
+  let Inst{24-19} = op3;
+  let Inst{18-14} = rs1;
+}
+
+
+

F3 binds the op field and defines the rd, op3, and rs1 +fields. F3 format instructions will bind the operands rd, op3, and +rs1 fields.

+
class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<8> asi = 0; // asi not currently used
+  bits<5> rs2;
+  let op         = opVal;
+  let op3        = op3val;
+  let Inst{13}   = 0;     // i field = 0
+  let Inst{12-5} = asi;   // address space identifier
+  let Inst{4-0}  = rs2;
+}
+
+
+

F3_1 binds the op3 field and defines the rs2 fields. F3_1 +format instructions will bind the operands to the rd, rs1, and rs2 +fields. This results in the XNORrr instruction binding $dst, $b, +and $c operands to the rd, rs1, and rs2 fields respectively.

+
+

Instruction Operand Name Mapping

+

TableGen will also generate a function called getNamedOperandIdx() which +can be used to look up an operand’s index in a MachineInstr based on its +TableGen name. Setting the UseNamedOperandTable bit in an instruction’s +TableGen definition will add all of its operands to an enumeration in the +llvm::XXX:OpName namespace and also add an entry for it into the OperandMap +table, which can be queried using getNamedOperandIdx()

+
int DstIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::dst); // => 0
+int BIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::b);     // => 1
+int CIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::c);     // => 2
+int DIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::d);     // => -1
+
+...
+
+
+

The entries in the OpName enum are taken verbatim from the TableGen definitions, +so operands with lowercase names will have lower case entries in the enum.

+

To include the getNamedOperandIdx() function in your backend, you will need +to define a few preprocessor macros in XXXInstrInfo.cpp and XXXInstrInfo.h. +For example:

+

XXXInstrInfo.cpp:

+
#define GET_INSTRINFO_NAMED_OPS // For getNamedOperandIdx() function
+#include "XXXGenInstrInfo.inc"
+
+
+

XXXInstrInfo.h:

+
#define GET_INSTRINFO_OPERAND_ENUM // For OpName enum
+#include "XXXGenInstrInfo.inc"
+
+namespace XXX {
+  int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
+} // End namespace XXX
+
+
+
+
+

Instruction Operand Types

+

TableGen will also generate an enumeration consisting of all named Operand +types defined in the backend, in the llvm::XXX::OpTypes namespace. +Some common immediate Operand types (for instance i8, i32, i64, f32, f64) +are defined for all targets in include/llvm/Target/Target.td, and are +available in each Target’s OpTypes enum. Also, only named Operand types appear +in the enumeration: anonymous types are ignored. +For example, the X86 backend defines brtarget and brtarget8, both +instances of the TableGen Operand class, which represent branch target +operands:

+
def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
+
+

This results in:

+
namespace X86 {
+namespace OpTypes {
+enum OperandType {
+  ...
+  brtarget,
+  brtarget8,
+  ...
+  i32imm,
+  i64imm,
+  ...
+  OPERAND_TYPE_LIST_END
+} // End namespace OpTypes
+} // End namespace X86
+
+
+

In typical TableGen fashion, to use the enum, you will need to define a +preprocessor macro:

+
#define GET_INSTRINFO_OPERAND_TYPES_ENUM // For OpTypes enum
+#include "XXXGenInstrInfo.inc"
+
+
+
+
+
+

Instruction Scheduling

+

Instruction itineraries can be queried using MCDesc::getSchedClass(). The +value can be named by an enumeration in llvm::XXX::Sched namespace generated +by TableGen in XXXGenInstrInfo.inc. The name of the schedule classes are +the same as provided in XXXSchedule.td plus a default NoItinerary class.

+

The schedule models are generated by TableGen by the SubtargetEmitter, +using the CodeGenSchedModels class. This is distinct from the itinerary +method of specifying machine resource use. The tool utils/schedcover.py +can be used to determine which instructions have been covered by the +schedule model description and which haven’t. The first step is to use the +instructions below to create an output file. Then run schedcover.py on the +output file:

+
$ <src>/utils/schedcover.py <build>/lib/Target/AArch64/tblGenSubtarget.with
+instruction, default, CortexA53Model, CortexA57Model, CycloneModel, ExynosM3Model, FalkorModel, KryoModel, ThunderX2T99Model, ThunderXT8XModel
+ABSv16i8, WriteV, , , CyWriteV3, M3WriteNMISC1, FalkorWr_2VXVY_2cyc, KryoWrite_2cyc_XY_XY_150ln, ,
+ABSv1i64, WriteV, , , CyWriteV3, M3WriteNMISC1, FalkorWr_1VXVY_2cyc, KryoWrite_2cyc_XY_noRSV_67ln, ,
+...
+
+
+

To capture the debug output from generating a schedule model, change to the +appropriate target directory and use the following command: +command with the subtarget-emitter debug option:

+
$ <build>/bin/llvm-tblgen -debug-only=subtarget-emitter -gen-subtarget \
+  -I <src>/lib/Target/<target> -I <src>/include \
+  -I <src>/lib/Target <src>/lib/Target/<target>/<target>.td \
+  -o <build>/lib/Target/<target>/<target>GenSubtargetInfo.inc.tmp \
+  > tblGenSubtarget.dbg 2>&1
+
+
+

Where <build> is the build directory, src is the source directory, +and <target> is the name of the target. +To double check that the above command is what is needed, one can capture the +exact TableGen command from a build by using:

+
$ VERBOSE=1 make ...
+
+
+

and search for llvm-tblgen commands in the output.

+
+
+

Instruction Relation Mapping

+

This TableGen feature is used to relate instructions with each other. It is +particularly useful when you have multiple instruction formats and need to +switch between them after instruction selection. This entire feature is driven +by relation models which can be defined in XXXInstrInfo.td files +according to the target-specific instruction set. Relation models are defined +using InstrMapping class as a base. TableGen parses all the models +and generates instruction relation maps using the specified information. +Relation maps are emitted as tables in the XXXGenInstrInfo.inc file +along with the functions to query them. For the detailed information on how to +use this feature, please refer to How To Use Instruction Mappings.

+
+
+

Implement a subclass of TargetInstrInfo

+

The final step is to hand code portions of XXXInstrInfo, which implements +the interface described in TargetInstrInfo.h (see The TargetInstrInfo class). +These functions return 0 or a Boolean or they assert, unless overridden. +Here’s a list of functions that are overridden for the SPARC implementation in +SparcInstrInfo.cpp:

+
    +
  • isLoadFromStackSlot — If the specified machine instruction is a direct +load from a stack slot, return the register number of the destination and the +FrameIndex of the stack slot.

  • +
  • isStoreToStackSlot — If the specified machine instruction is a direct +store to a stack slot, return the register number of the destination and the +FrameIndex of the stack slot.

  • +
  • copyPhysReg — Copy values between a pair of physical registers.

  • +
  • storeRegToStackSlot — Store a register value to a stack slot.

  • +
  • loadRegFromStackSlot — Load a register value from a stack slot.

  • +
  • storeRegToAddr — Store a register value to memory.

  • +
  • loadRegFromAddr — Load a register value from memory.

  • +
  • foldMemoryOperand — Attempt to combine instructions of any load or +store instruction for the specified operand(s).

  • +
+
+
+

Branch Folding and If Conversion

+

Performance can be improved by combining instructions or by eliminating +instructions that are never reached. The analyzeBranch method in +XXXInstrInfo may be implemented to examine conditional instructions and +remove unnecessary instructions. analyzeBranch looks at the end of a +machine basic block (MBB) for opportunities for improvement, such as branch +folding and if conversion. The BranchFolder and IfConverter machine +function passes (see the source files BranchFolding.cpp and +IfConversion.cpp in the lib/CodeGen directory) call analyzeBranch +to improve the control flow graph that represents the instructions.

+

Several implementations of analyzeBranch (for ARM, Alpha, and X86) can be +examined as models for your own analyzeBranch implementation. Since SPARC +does not implement a useful analyzeBranch, the ARM target implementation is +shown below.

+

analyzeBranch returns a Boolean value and takes four parameters:

+
    +
  • MachineBasicBlock &MBB — The incoming block to be examined.

  • +
  • MachineBasicBlock *&TBB — A destination block that is returned. For a +conditional branch that evaluates to true, TBB is the destination.

  • +
  • MachineBasicBlock *&FBB — For a conditional branch that evaluates to +false, FBB is returned as the destination.

  • +
  • std::vector<MachineOperand> &Cond — List of operands to evaluate a +condition for a conditional branch.

  • +
+

In the simplest case, if a block ends without a branch, then it falls through +to the successor block. No destination blocks are specified for either TBB +or FBB, so both parameters return NULL. The start of the +analyzeBranch (see code below for the ARM target) shows the function +parameters and the code for the simplest case.

+
bool ARMInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
+                                 MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 std::vector<MachineOperand> &Cond) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+    return false;
+
+
+

If a block ends with a single unconditional branch instruction, then +analyzeBranch (shown below) should return the destination of that branch in +the TBB parameter.

+
if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+  TBB = LastInst->getOperand(0).getMBB();
+  return false;
+}
+
+
+

If a block ends with two unconditional branches, then the second branch is +never reached. In that situation, as shown below, remove the last branch +instruction and return the penultimate branch in the TBB parameter.

+
if ((SecondLastOpc == ARM::B || SecondLastOpc == ARM::tB) &&
+    (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+  TBB = SecondLastInst->getOperand(0).getMBB();
+  I = LastInst;
+  I->eraseFromParent();
+  return false;
+}
+
+
+

A block may end with a single conditional branch instruction that falls through +to successor block if the condition evaluates to false. In that case, +analyzeBranch (shown below) should return the destination of that +conditional branch in the TBB parameter and a list of operands in the +Cond parameter to evaluate the condition.

+
if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+  // Block ends with fall-through condbranch.
+  TBB = LastInst->getOperand(0).getMBB();
+  Cond.push_back(LastInst->getOperand(1));
+  Cond.push_back(LastInst->getOperand(2));
+  return false;
+}
+
+
+

If a block ends with both a conditional branch and an ensuing unconditional +branch, then analyzeBranch (shown below) should return the conditional +branch destination (assuming it corresponds to a conditional evaluation of +“true”) in the TBB parameter and the unconditional branch destination +in the FBB (corresponding to a conditional evaluation of “false”). A +list of operands to evaluate the condition should be returned in the Cond +parameter.

+
unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+    (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+  TBB =  SecondLastInst->getOperand(0).getMBB();
+  Cond.push_back(SecondLastInst->getOperand(1));
+  Cond.push_back(SecondLastInst->getOperand(2));
+  FBB = LastInst->getOperand(0).getMBB();
+  return false;
+}
+
+
+

For the last two cases (ending with a single conditional branch or ending with +one conditional and one unconditional branch), the operands returned in the +Cond parameter can be passed to methods of other instructions to create new +branches or perform other operations. An implementation of analyzeBranch +requires the helper methods removeBranch and insertBranch to manage +subsequent operations.

+

analyzeBranch should return false indicating success in most circumstances. +analyzeBranch should only return true when the method is stumped about what +to do, for example, if a block has three terminating branches. +analyzeBranch may return true if it encounters a terminator it cannot +handle, such as an indirect branch.

+
+
+
+

Instruction Selector

+

LLVM uses a SelectionDAG to represent LLVM IR instructions, and nodes of +the SelectionDAG ideally represent native target instructions. During code +generation, instruction selection passes are performed to convert non-native +DAG instructions into native target-specific instructions. The pass described +in XXXISelDAGToDAG.cpp is used to match patterns and perform DAG-to-DAG +instruction selection. Optionally, a pass may be defined (in +XXXBranchSelector.cpp) to perform similar DAG-to-DAG operations for branch +instructions. Later, the code in XXXISelLowering.cpp replaces or removes +operations and data types not supported natively (legalizes) in a +SelectionDAG.

+

TableGen generates code for instruction selection using the following target +description input files:

+
    +
  • XXXInstrInfo.td — Contains definitions of instructions in a +target-specific instruction set, generates XXXGenDAGISel.inc, which is +included in XXXISelDAGToDAG.cpp.

  • +
  • XXXCallingConv.td — Contains the calling and return value conventions +for the target architecture, and it generates XXXGenCallingConv.inc, +which is included in XXXISelLowering.cpp.

  • +
+

The implementation of an instruction selection pass must include a header that +declares the FunctionPass class or a subclass of FunctionPass. In +XXXTargetMachine.cpp, a Pass Manager (PM) should add each instruction +selection pass into the queue of passes to run.

+

The LLVM static compiler (llc) is an excellent tool for visualizing the +contents of DAGs. To display the SelectionDAG before or after specific +processing phases, use the command line options for llc, described at +SelectionDAG Instruction Selection Process.

+

To describe instruction selector behavior, you should add patterns for lowering +LLVM code into a SelectionDAG as the last parameter of the instruction +definitions in XXXInstrInfo.td. For example, in SparcInstrInfo.td, +this entry defines a register store operation, and the last parameter describes +a pattern with the store DAG operator.

+
def STrr  : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>;
+
+
+

ADDRrr is a memory mode that is also defined in SparcInstrInfo.td:

+
def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+
+
+

The definition of ADDRrr refers to SelectADDRrr, which is a function +defined in an implementation of the Instructor Selector (such as +SparcISelDAGToDAG.cpp).

+

In lib/Target/TargetSelectionDAG.td, the DAG operator for store is defined +below:

+
def store : PatFrag<(ops node:$val, node:$ptr),
+                    (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return !ST->isTruncatingStore() &&
+           ST->getAddressingMode() == ISD::UNINDEXED;
+  return false;
+}]>;
+
+
+

XXXInstrInfo.td also generates (in XXXGenDAGISel.inc) the +SelectCode method that is used to call the appropriate processing method +for an instruction. In this example, SelectCode calls Select_ISD_STORE +for the ISD::STORE opcode.

+
SDNode *SelectCode(SDValue N) {
+  ...
+  MVT::ValueType NVT = N.getNode()->getValueType(0);
+  switch (N.getOpcode()) {
+  case ISD::STORE: {
+    switch (NVT) {
+    default:
+      return Select_ISD_STORE(N);
+      break;
+    }
+    break;
+  }
+  ...
+
+
+

The pattern for STrr is matched, so elsewhere in XXXGenDAGISel.inc, +code for STrr is created for Select_ISD_STORE. The Emit_22 method +is also generated in XXXGenDAGISel.inc to complete the processing of this +instruction.

+
SDNode *Select_ISD_STORE(const SDValue &N) {
+  SDValue Chain = N.getOperand(0);
+  if (Predicate_store(N.getNode())) {
+    SDValue N1 = N.getOperand(1);
+    SDValue N2 = N.getOperand(2);
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+
+    // Pattern: (st:void i32:i32:$src,
+    //           ADDRrr:i32:$addr)<<P:Predicate_store>>
+    // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
+    // Pattern complexity = 13  cost = 1  size = 0
+    if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) &&
+        N1.getNode()->getValueType(0) == MVT::i32 &&
+        N2.getNode()->getValueType(0) == MVT::i32) {
+      return Emit_22(N, SP::STrr, CPTmp0, CPTmp1);
+    }
+...
+
+
+
+

The SelectionDAG Legalize Phase

+

The Legalize phase converts a DAG to use types and operations that are natively +supported by the target. For natively unsupported types and operations, you +need to add code to the target-specific XXXTargetLowering implementation to +convert unsupported types and operations to supported ones.

+

In the constructor for the XXXTargetLowering class, first use the +addRegisterClass method to specify which types are supported and which +register classes are associated with them. The code for the register classes +are generated by TableGen from XXXRegisterInfo.td and placed in +XXXGenRegisterInfo.h.inc. For example, the implementation of the +constructor for the SparcTargetLowering class (in SparcISelLowering.cpp) +starts with the following code:

+
addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+
+

You should examine the node types in the ISD namespace +(include/llvm/CodeGen/SelectionDAGNodes.h) and determine which operations +the target natively supports. For operations that do not have native +support, add a callback to the constructor for the XXXTargetLowering class, +so the instruction selection process knows what to do. The TargetLowering +class callback methods (declared in llvm/Target/TargetLowering.h) are:

+
    +
  • setOperationAction — General operation.

  • +
  • setLoadExtAction — Load with extension.

  • +
  • setTruncStoreAction — Truncating store.

  • +
  • setIndexedLoadAction — Indexed load.

  • +
  • setIndexedStoreAction — Indexed store.

  • +
  • setConvertAction — Type conversion.

  • +
  • setCondCodeAction — Support for a given condition code.

  • +
+

Note: on older releases, setLoadXAction is used instead of +setLoadExtAction. Also, on older releases, setCondCodeAction may not +be supported. Examine your release to see what methods are specifically +supported.

+

These callbacks are used to determine that an operation does or does not work +with a specified type (or types). And in all cases, the third parameter is a +LegalAction type enum value: Promote, Expand, Custom, or +Legal. SparcISelLowering.cpp contains examples of all four +LegalAction values.

+
+

Promote

+

For an operation without native support for a given type, the specified type +may be promoted to a larger type that is supported. For example, SPARC does +not support a sign-extending load for Boolean values (i1 type), so in +SparcISelLowering.cpp the third parameter below, Promote, changes +i1 type values to a large type before loading.

+
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+
+
+
+

Expand

+

For a type without native support, a value may need to be broken down further, +rather than promoted. For an operation without native support, a combination +of other operations may be used to similar effect. In SPARC, the +floating-point sine and cosine trig operations are supported by expansion to +other operations, as indicated by the third parameter, Expand, to +setOperationAction:

+
setOperationAction(ISD::FSIN, MVT::f32, Expand);
+setOperationAction(ISD::FCOS, MVT::f32, Expand);
+
+
+
+
+

Custom

+

For some operations, simple type promotion or operation expansion may be +insufficient. In some cases, a special intrinsic function must be implemented.

+

For example, a constant value may require special treatment, or an operation +may require spilling and restoring registers in the stack and working with +register allocators.

+

As seen in SparcISelLowering.cpp code below, to perform a type conversion +from a floating point value to a signed integer, first the +setOperationAction should be called with Custom as the third parameter:

+
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+
+

In the LowerOperation method, for each Custom operation, a case +statement should be added to indicate what function to call. In the following +code, an FP_TO_SINT opcode will call the LowerFP_TO_SINT method:

+
SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  switch (Op.getOpcode()) {
+  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+  ...
+  }
+}
+
+
+

Finally, the LowerFP_TO_SINT method is implemented, using an FP register to +convert the floating-point value to an integer.

+
static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+  assert(Op.getValueType() == MVT::i32);
+  Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
+}
+
+
+
+ +
+
+

Calling Conventions

+

To support target-specific calling conventions, XXXGenCallingConv.td uses +interfaces (such as CCIfType and CCAssignToReg) that are defined in +lib/Target/TargetCallingConv.td. TableGen can take the target descriptor +file XXXGenCallingConv.td and generate the header file +XXXGenCallingConv.inc, which is typically included in +XXXISelLowering.cpp. You can use the interfaces in +TargetCallingConv.td to specify:

+
    +
  • The order of parameter allocation.

  • +
  • Where parameters and return values are placed (that is, on the stack or in +registers).

  • +
  • Which registers may be used.

  • +
  • Whether the caller or callee unwinds the stack.

  • +
+

The following example demonstrates the use of the CCIfType and +CCAssignToReg interfaces. If the CCIfType predicate is true (that is, +if the current argument is of type f32 or f64), then the action is +performed. In this case, the CCAssignToReg action assigns the argument +value to the first available register: either R0 or R1.

+
CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>>
+
+
+

SparcCallingConv.td contains definitions for a target-specific return-value +calling convention (RetCC_Sparc32) and a basic 32-bit C calling convention +(CC_Sparc32). The definition of RetCC_Sparc32 (shown below) indicates +which registers are used for specified scalar return types. A single-precision +float is returned to register F0, and a double-precision float goes to +register D0. A 32-bit integer is returned in register I0 or I1.

+
def RetCC_Sparc32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[I0, I1]>>,
+  CCIfType<[f32], CCAssignToReg<[F0]>>,
+  CCIfType<[f64], CCAssignToReg<[D0]>>
+]>;
+
+
+

The definition of CC_Sparc32 in SparcCallingConv.td introduces +CCAssignToStack, which assigns the value to a stack slot with the specified +size and alignment. In the example below, the first parameter, 4, indicates +the size of the slot, and the second parameter, also 4, indicates the stack +alignment along 4-byte units. (Special cases: if size is zero, then the ABI +size is used; if alignment is zero, then the ABI alignment is used.)

+
def CC_Sparc32 : CallingConv<[
+  // All arguments get passed in integer registers if there is space.
+  CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCAssignToStack<4, 4>
+]>;
+
+
+

CCDelegateTo is another commonly used interface, which tries to find a +specified sub-calling convention, and, if a match is found, it is invoked. In +the following example (in X86CallingConv.td), the definition of +RetCC_X86_32_C ends with CCDelegateTo. After the current value is +assigned to the register ST0 or ST1, the RetCC_X86Common is +invoked.

+
def RetCC_X86_32_C : CallingConv<[
+  CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>,
+  CCIfType<[f64], CCAssignToReg<[ST0, ST1]>>,
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
+
+

CCIfCC is an interface that attempts to match the given name to the current +calling convention. If the name identifies the current calling convention, +then a specified action is invoked. In the following example (in +X86CallingConv.td), if the Fast calling convention is in use, then +RetCC_X86_32_Fast is invoked. If the SSECall calling convention is in +use, then RetCC_X86_32_SSE is invoked.

+
def RetCC_X86_32 : CallingConv<[
+  CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+  CCIfCC<"CallingConv::X86_SSECall", CCDelegateTo<RetCC_X86_32_SSE>>,
+  CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+
+

Other calling convention interfaces include:

+
    +
  • CCIf <predicate, action> — If the predicate matches, apply the action.

  • +
  • CCIfInReg <action> — If the argument is marked with the “inreg” +attribute, then apply the action.

  • +
  • CCIfNest <action> — If the argument is marked with the “nest” +attribute, then apply the action.

  • +
  • CCIfNotVarArg <action> — If the current function does not take a +variable number of arguments, apply the action.

  • +
  • CCAssignToRegWithShadow <registerList, shadowList> — similar to +CCAssignToReg, but with a shadow list of registers.

  • +
  • CCPassByVal <size, align> — Assign value to a stack slot with the +minimum specified size and alignment.

  • +
  • CCPromoteToType <type> — Promote the current value to the specified +type.

  • +
  • CallingConv <[actions]> — Define each calling convention that is +supported.

  • +
+
+
+
+

Assembly Printer

+

During the code emission stage, the code generator may utilize an LLVM pass to +produce assembly output. To do this, you want to implement the code for a +printer that converts LLVM IR to a GAS-format assembly language for your target +machine, using the following steps:

+
    +
  • Define all the assembly strings for your target, adding them to the +instructions defined in the XXXInstrInfo.td file. (See +Instruction Set.) TableGen will produce an output file +(XXXGenAsmWriter.inc) with an implementation of the printInstruction +method for the XXXAsmPrinter class.

  • +
  • Write XXXTargetAsmInfo.h, which contains the bare-bones declaration of +the XXXTargetAsmInfo class (a subclass of TargetAsmInfo).

  • +
  • Write XXXTargetAsmInfo.cpp, which contains target-specific values for +TargetAsmInfo properties and sometimes new implementations for methods.

  • +
  • Write XXXAsmPrinter.cpp, which implements the AsmPrinter class that +performs the LLVM-to-assembly conversion.

  • +
+

The code in XXXTargetAsmInfo.h is usually a trivial declaration of the +XXXTargetAsmInfo class for use in XXXTargetAsmInfo.cpp. Similarly, +XXXTargetAsmInfo.cpp usually has a few declarations of XXXTargetAsmInfo +replacement values that override the default values in TargetAsmInfo.cpp. +For example in SparcTargetAsmInfo.cpp:

+
SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) {
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = 0;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "!";
+  ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
+}
+
+
+

The X86 assembly printer implementation (X86TargetAsmInfo) is an example +where the target specific TargetAsmInfo class uses an overridden methods: +ExpandInlineAsm.

+

A target-specific implementation of AsmPrinter is written in +XXXAsmPrinter.cpp, which implements the AsmPrinter class that converts +the LLVM to printable assembly. The implementation must include the following +headers that have declarations for the AsmPrinter and +MachineFunctionPass classes. The MachineFunctionPass is a subclass of +FunctionPass.

+
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+
+

As a FunctionPass, AsmPrinter first calls doInitialization to set +up the AsmPrinter. In SparcAsmPrinter, a Mangler object is +instantiated to process variable names.

+

In XXXAsmPrinter.cpp, the runOnMachineFunction method (declared in +MachineFunctionPass) must be implemented for XXXAsmPrinter. In +MachineFunctionPass, the runOnFunction method invokes +runOnMachineFunction. Target-specific implementations of +runOnMachineFunction differ, but generally do the following to process each +machine function:

+
    +
  • Call SetupMachineFunction to perform initialization.

  • +
  • Call EmitConstantPool to print out (to the output stream) constants which +have been spilled to memory.

  • +
  • Call EmitJumpTableInfo to print out jump tables used by the current +function.

  • +
  • Print out the label for the current function.

  • +
  • Print out the code for the function, including basic block labels and the +assembly for the instruction (using printInstruction)

  • +
+

The XXXAsmPrinter implementation must also include the code generated by +TableGen that is output in the XXXGenAsmWriter.inc file. The code in +XXXGenAsmWriter.inc contains an implementation of the printInstruction +method that may call these methods:

+
    +
  • printOperand

  • +
  • printMemOperand

  • +
  • printCCOperand (for conditional statements)

  • +
  • printDataDirective

  • +
  • printDeclare

  • +
  • printImplicitDef

  • +
  • printInlineAsm

  • +
+

The implementations of printDeclare, printImplicitDef, +printInlineAsm, and printLabel in AsmPrinter.cpp are generally +adequate for printing assembly and do not need to be overridden.

+

The printOperand method is implemented with a long switch/case +statement for the type of operand: register, immediate, basic block, external +symbol, global address, constant pool index, or jump table index. For an +instruction with a memory address operand, the printMemOperand method +should be implemented to generate the proper output. Similarly, +printCCOperand should be used to print a conditional operand.

+

doFinalization should be overridden in XXXAsmPrinter, and it should be +called to shut down the assembly printer. During doFinalization, global +variables and constants are printed to output.

+
+
+

Subtarget Support

+

Subtarget support is used to inform the code generation process of instruction +set variations for a given chip set. For example, the LLVM SPARC +implementation provided covers three major versions of the SPARC microprocessor +architecture: Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a +64-bit architecture), and the UltraSPARC architecture. V8 has 16 +double-precision floating-point registers that are also usable as either 32 +single-precision or 8 quad-precision registers. V8 is also purely big-endian. +V9 has 32 double-precision floating-point registers that are also usable as 16 +quad-precision registers, but cannot be used as single-precision registers. +The UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set +extensions.

+

If subtarget support is needed, you should implement a target-specific +XXXSubtarget class for your architecture. This class should process the +command-line options -mcpu= and -mattr=.

+

TableGen uses definitions in the Target.td and Sparc.td files to +generate code in SparcGenSubtarget.inc. In Target.td, shown below, the +SubtargetFeature interface is defined. The first 4 string parameters of +the SubtargetFeature interface are a feature name, an attribute set by the +feature, the value of the attribute, and a description of the feature. (The +fifth parameter is a list of features whose presence is implied, and its +default value is an empty array.)

+
class SubtargetFeature<string n, string a, string v, string d,
+                       list<SubtargetFeature> i = []> {
+  string Name = n;
+  string Attribute = a;
+  string Value = v;
+  string Desc = d;
+  list<SubtargetFeature> Implies = i;
+}
+
+
+

In the Sparc.td file, the SubtargetFeature is used to define the +following features.

+
def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true",
+                     "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated : SubtargetFeature<"deprecated-v8",
+                     "V8DeprecatedInsts", "true",
+                     "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true",
+                     "Enable UltraSPARC Visual Instruction Set extensions">;
+
+
+

Elsewhere in Sparc.td, the Proc class is defined and then is used to +define particular SPARC processor subtypes that may have the previously +described features.

+
class Proc<string Name, list<SubtargetFeature> Features>
+  : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+def : Proc<"v8",              []>;
+def : Proc<"supersparc",      []>;
+def : Proc<"sparclite",       []>;
+def : Proc<"f934",            []>;
+def : Proc<"hypersparc",      []>;
+def : Proc<"sparclite86x",    []>;
+def : Proc<"sparclet",        []>;
+def : Proc<"tsc701",          []>;
+def : Proc<"v9",              [FeatureV9]>;
+def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+

From Target.td and Sparc.td files, the resulting +SparcGenSubtarget.inc specifies enum values to identify the features, +arrays of constants to represent the CPU features and CPU subtypes, and the +ParseSubtargetFeatures method that parses the features string that sets +specified subtarget options. The generated SparcGenSubtarget.inc file +should be included in the SparcSubtarget.cpp. The target-specific +implementation of the XXXSubtarget method should follow this pseudocode:

+
XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) {
+  // Set the default features
+  // Determine default and user specified characteristics of the CPU
+  // Call ParseSubtargetFeatures(FS, CPU) to parse the features string
+  // Perform any additional operations
+}
+
+
+
+
+

JIT Support

+

The implementation of a target machine optionally includes a Just-In-Time (JIT) +code generator that emits machine code and auxiliary structures as binary +output that can be written directly to memory. To do this, implement JIT code +generation by performing the following steps:

+
    +
  • Write an XXXCodeEmitter.cpp file that contains a machine function pass +that transforms target-machine instructions into relocatable machine +code.

  • +
  • Write an XXXJITInfo.cpp file that implements the JIT interfaces for +target-specific code-generation activities, such as emitting machine code and +stubs.

  • +
  • Modify XXXTargetMachine so that it provides a TargetJITInfo object +through its getJITInfo method.

  • +
+

There are several different approaches to writing the JIT support code. For +instance, TableGen and target descriptor files may be used for creating a JIT +code generator, but are not mandatory. For the Alpha and PowerPC target +machines, TableGen is used to generate XXXGenCodeEmitter.inc, which +contains the binary coding of machine instructions and the +getBinaryCodeForInstr method to access those codes. Other JIT +implementations do not.

+

Both XXXJITInfo.cpp and XXXCodeEmitter.cpp must include the +llvm/CodeGen/MachineCodeEmitter.h header file that defines the +MachineCodeEmitter class containing code for several callback functions +that write data (in bytes, words, strings, etc.) to the output stream.

+
+

Machine Code Emitter

+

In XXXCodeEmitter.cpp, a target-specific of the Emitter class is +implemented as a function pass (subclass of MachineFunctionPass). The +target-specific implementation of runOnMachineFunction (invoked by +runOnFunction in MachineFunctionPass) iterates through the +MachineBasicBlock calls emitInstruction to process each instruction and +emit binary code. emitInstruction is largely implemented with case +statements on the instruction types defined in XXXInstrInfo.h. For +example, in X86CodeEmitter.cpp, the emitInstruction method is built +around the following switch/case statements:

+
switch (Desc->TSFlags & X86::FormMask) {
+case X86II::Pseudo:  // for not yet implemented instructions
+   ...               // or pseudo-instructions
+   break;
+case X86II::RawFrm:  // for instructions with a fixed opcode value
+   ...
+   break;
+case X86II::AddRegFrm: // for instructions that have one register operand
+   ...                 // added to their opcode
+   break;
+case X86II::MRMDestReg:// for instructions that use the Mod/RM byte
+   ...                 // to specify a destination (register)
+   break;
+case X86II::MRMDestMem:// for instructions that use the Mod/RM byte
+   ...                 // to specify a destination (memory)
+   break;
+case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte
+   ...                 // to specify a source (register)
+   break;
+case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte
+   ...                 // to specify a source (memory)
+   break;
+case X86II::MRM0r: case X86II::MRM1r:  // for instructions that operate on
+case X86II::MRM2r: case X86II::MRM3r:  // a REGISTER r/m operand and
+case X86II::MRM4r: case X86II::MRM5r:  // use the Mod/RM byte and a field
+case X86II::MRM6r: case X86II::MRM7r:  // to hold extended opcode data
+   ...
+   break;
+case X86II::MRM0m: case X86II::MRM1m:  // for instructions that operate on
+case X86II::MRM2m: case X86II::MRM3m:  // a MEMORY r/m operand and
+case X86II::MRM4m: case X86II::MRM5m:  // use the Mod/RM byte and a field
+case X86II::MRM6m: case X86II::MRM7m:  // to hold extended opcode data
+   ...
+   break;
+case X86II::MRMInitReg: // for instructions whose source and
+   ...                  // destination are the same register
+   break;
+}
+
+
+

The implementations of these case statements often first emit the opcode and +then get the operand(s). Then depending upon the operand, helper methods may +be called to process the operand(s). For example, in X86CodeEmitter.cpp, +for the X86II::AddRegFrm case, the first data emitted (by emitByte) is +the opcode added to the register operand. Then an object representing the +machine operand, MO1, is extracted. The helper methods such as +isImmediate, isGlobalAddress, isExternalSymbol, +isConstantPoolIndex, and isJumpTableIndex determine the operand type. +(X86CodeEmitter.cpp also has private methods such as emitConstant, +emitGlobalAddress, emitExternalSymbolAddress, emitConstPoolAddress, +and emitJumpTableAddress that emit the data into the output stream.)

+
case X86II::AddRegFrm:
+  MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+  if (CurOp != NumOps) {
+    const MachineOperand &MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    if (MO1.isImmediate())
+      emitConstant(MO1.getImm(), Size);
+    else {
+      unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+        : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+      if (Opcode == X86::MOV64ri)
+        rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
+      if (MO1.isGlobalAddress()) {
+        bool NeedStub = isa<Function>(MO1.getGlobal());
+        bool isLazy = gvNeedsLazyPtr(MO1.getGlobal());
+        emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                          NeedStub, isLazy);
+      } else if (MO1.isExternalSymbol())
+        emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+      else if (MO1.isConstantPoolIndex())
+        emitConstPoolAddress(MO1.getIndex(), rt);
+      else if (MO1.isJumpTableIndex())
+        emitJumpTableAddress(MO1.getIndex(), rt);
+    }
+  }
+  break;
+
+
+

In the previous example, XXXCodeEmitter.cpp uses the variable rt, which +is a RelocationType enum that may be used to relocate addresses (for +example, a global address with a PIC base offset). The RelocationType enum +for that target is defined in the short target-specific XXXRelocations.h +file. The RelocationType is used by the relocate method defined in +XXXJITInfo.cpp to rewrite addresses for referenced global symbols.

+

For example, X86Relocations.h specifies the following relocation types for +the X86 addresses. In all four cases, the relocated value is added to the +value already in memory. For reloc_pcrel_word and reloc_picrel_word, +there is an additional initial adjustment.

+
enum RelocationType {
+  reloc_pcrel_word = 0,    // add reloc value after adjusting for the PC loc
+  reloc_picrel_word = 1,   // add reloc value after adjusting for the PIC base
+  reloc_absolute_word = 2, // absolute relocation; no additional adjustment
+  reloc_absolute_dword = 3 // absolute relocation; no additional adjustment
+};
+
+
+
+
+

Target JIT Info

+

XXXJITInfo.cpp implements the JIT interfaces for target-specific +code-generation activities, such as emitting machine code and stubs. At +minimum, a target-specific version of XXXJITInfo implements the following:

+
    +
  • getLazyResolverFunction — Initializes the JIT, gives the target a +function that is used for compilation.

  • +
  • emitFunctionStub — Returns a native function with a specified address +for a callback function.

  • +
  • relocate — Changes the addresses of referenced globals, based on +relocation types.

  • +
  • Callback function that are wrappers to a function stub that is used when the +real target is not initially known.

  • +
+

getLazyResolverFunction is generally trivial to implement. It makes the +incoming parameter as the global JITCompilerFunction and returns the +callback function that will be used a function wrapper. For the Alpha target +(in AlphaJITInfo.cpp), the getLazyResolverFunction implementation is +simply:

+
TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction(
+                                            JITCompilerFn F) {
+  JITCompilerFunction = F;
+  return AlphaCompilationCallback;
+}
+
+
+

For the X86 target, the getLazyResolverFunction implementation is a little +more complicated, because it returns a different callback function for +processors with SSE instructions and XMM registers.

+

The callback function initially saves and later restores the callee register +values, incoming arguments, and frame and return address. The callback +function needs low-level access to the registers or stack, so it is typically +implemented with assembler.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/WritingAnLLVMNewPMPass.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/WritingAnLLVMNewPMPass.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/WritingAnLLVMNewPMPass.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/WritingAnLLVMNewPMPass.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,346 @@ + + + + + + + + + Writing an LLVM Pass — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Writing an LLVM Pass

+ +
+

Introduction — What is a pass?

+

The LLVM pass framework is an important part of the LLVM system, because LLVM +passes are where most of the interesting parts of the compiler exist. Passes +perform the transformations and optimizations that make up the compiler, they +build the analysis results that are used by these transformations, and they +are, above all, a structuring technique for compiler code.

+

Unlike passes under the legacy pass manager where the pass interface is +defined via inheritance, passes under the new pass manager rely on +concept-based polymorphism, meaning there is no explicit interface (see +comments in PassManager.h for more details). All LLVM passes inherit from +the CRTP mix-in PassInfoMixin<PassT>. The pass should have a run() +method which returns a PreservedAnalyses and takes in some unit of IR +along with an analysis manager. For example, a function pass would have a +PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); method.

+

We start by showing you how to construct a pass, from setting up the build, +creating the pass, to executing and testing it. Looking at existing passes is +always a great way to learn details.

+
+

Warning

+

This document deals with the new pass manager. LLVM uses the legacy pass +manager for the codegen pipeline. For more details, see +Writing an LLVM Pass and Using the New Pass Manager.

+
+
+
+

Quick Start — Writing hello world

+

Here we describe how to write the “hello world” of passes. The “HelloWorld” +pass is designed to simply print out the name of non-external functions that +exist in the program being compiled. It does not modify the program at all, +it just inspects it.

+

The code below already exists; feel free to create a pass with a different +name alongside the HelloWorld source files.

+
+

Setting up the build

+

First, configure and build LLVM as described in Getting Started with the LLVM System.

+

Next, we will reuse an existing directory (creating a new directory involves +messing around with more CMake files than we want). For this example, we’ll use +llvm/lib/Transforms/Utils/HelloWorld.cpp, which has already been created. +If you’d like to create your own pass, add a new source file into +llvm/lib/Transforms/Utils/CMakeLists.txt (assuming you want your pass in +the Transforms/Utils directory.

+

Now that we have the build set up for a new pass, we need to write the code +for the pass itself.

+
+
+

Basic code required

+

Now that the build is setup for a new pass, we just have to write it.

+

First we need to define the pass in a header file. We’ll create +llvm/include/llvm/Transforms/Utils/HelloWorld.h. The file should +contain the following boilerplate:

+
#ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H
+#define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class HelloWorldPass : public PassInfoMixin<HelloWorldPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H
+
+
+

This creates the class for the pass with a declaration of the run() +method which actually runs the pass. Inheriting from PassInfoMixin<PassT> +sets up some more boilerplate so that we don’t have to write it ourselves.

+

Our class is in the llvm namespace so that we don’t pollute the global +namespace.

+

Next we’ll create llvm/lib/Transforms/Utils/HelloWorld.cpp, starting +with

+
#include "llvm/Transforms/Utils/HelloWorld.h"
+
+
+

… to include the header file we just created.

+
using namespace llvm;
+
+
+

… is required because the functions from the include files live in the llvm +namespace. This should only be done in non-header files.

+

Next we have the pass’s run() definition:

+
PreservedAnalyses HelloWorldPass::run(Function &F,
+                                      FunctionAnalysisManager &AM) {
+  errs() << F.getName() << "\n";
+  return PreservedAnalyses::all();
+}
+
+
+

… which simply prints out the name of the function to stderr. The pass +manager will ensure that the pass will be run on every function in a module. +The PreservedAnalyses return value says that all analyses (e.g. dominator +tree) are still valid after this pass since we didn’t modify any functions.

+

That’s it for the pass itself. Now in order to “register” the pass, we need +to add it to a couple places. Add the following to +llvm/lib/Passes/PassRegistry.def in the FUNCTION_PASS section

+
FUNCTION_PASS("helloworld", HelloWorldPass())
+
+
+

… which adds the pass under the name “helloworld”.

+

llvm/lib/Passes/PassRegistry.def is #include’d into +llvm/lib/Passes/PassBuilder.cpp multiple times for various reasons. Since +it constructs our pass, we need to also add the proper #include in +llvm/lib/Passes/PassBuilder.cpp:

+
#include "llvm/Transforms/Utils/HelloWorld.h"
+
+
+

This should be all the code necessary for our pass, now it’s time to compile +and run it.

+
+
+

Running a pass with opt

+

Now that you have a brand new shiny pass, we can build opt and use +it to run some LLVM IR through the pass.

+
$ ninja -C build/ opt
+# or whatever build system/build directory you are using
+
+$ cat /tmp/a.ll
+define i32 @foo() {
+  %a = add i32 2, 3
+  ret i32 %a
+}
+
+define void @bar() {
+  ret void
+}
+
+$ build/bin/opt -disable-output /tmp/a.ll -passes=helloworld
+foo
+bar
+
+
+

Our pass ran and printed the names of functions as expected!

+
+
+

Testing a pass

+

Testing our pass is important to prevent future regressions. We’ll add a lit +test at llvm/test/Transforms/Utils/helloworld.ll. See +LLVM Testing Infrastructure Guide for more information on testing.

+
$ cat llvm/test/Transforms/Utils/helloworld.ll
+; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s
+
+; CHECK: {{^}}foo{{$}}
+define i32 @foo() {
+  %a = add i32 2, 3
+  ret i32 %a
+}
+
+; CHECK-NEXT: {{^}}bar{{$}}
+define void @bar() {
+  ret void
+}
+
+$ ninja -C build check-llvm
+# runs our new test alongside all other llvm lit tests
+
+
+
+
+
+

FAQs

+
+

Required passes

+

A pass that defines a static isRequired() method that returns true is a required pass. For example:

+
class HelloWorldPass : public PassInfoMixin<HelloWorldPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  static bool isRequired() { return true; }
+};
+
+
+

A required pass is a pass that may not be skipped. An example of a required +pass is AlwaysInlinerPass, which must always be run to preserve +alwaysinline semantics. Pass managers are required since they may contain +other required passes.

+

An example of how a pass can be skipped is the optnone function +attribute, which specifies that optimizations should not be run on the +function. Required passes will still be run on optnone functions.

+

For more implementation details, see +PassInstrumentation::runBeforePass().

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/WritingAnLLVMPass.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/WritingAnLLVMPass.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/WritingAnLLVMPass.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/WritingAnLLVMPass.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,1397 @@ + + + + + + + + + Writing an LLVM Pass — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Writing an LLVM Pass

+
+ +
+
+

Introduction — What is a pass?

+

The LLVM Pass Framework is an important part of the LLVM system, because LLVM +passes are where most of the interesting parts of the compiler exist. Passes +perform the transformations and optimizations that make up the compiler, they +build the analysis results that are used by these transformations, and they +are, above all, a structuring technique for compiler code.

+

All LLVM passes are subclasses of the Pass class, which implement +functionality by overriding virtual methods inherited from Pass. Depending +on how your pass works, you should inherit from the ModulePass , CallGraphSCCPass, FunctionPass , or LoopPass, or RegionPass classes, which gives the system more +information about what your pass does, and how it can be combined with other +passes. One of the main features of the LLVM Pass Framework is that it +schedules passes to run in an efficient way based on the constraints that your +pass meets (which are indicated by which class they derive from).

+

We start by showing you how to construct a pass, everything from setting up the +code, to compiling, loading, and executing it. After the basics are down, more +advanced features are discussed.

+
+

Warning

+

This document deals with the legacy pass manager. LLVM uses the new pass +manager by default for the optimization pipeline (the codegen pipeline is +still using the legacy pass manager), which has its own way of defining +passes. For more details, see Writing an LLVM Pass and +Using the New Pass Manager. To use the legacy pass manager with opt, pass +the -enable-new-pm=0 flag to all opt invocations.

+
+
+
+

Quick Start — Writing hello world

+

Here we describe how to write the “hello world” of passes. The “Hello” pass is +designed to simply print out the name of non-external functions that exist in +the program being compiled. It does not modify the program at all, it just +inspects it. The source code and files for this pass are available in the LLVM +source tree in the lib/Transforms/Hello directory.

+
+

Setting up the build environment

+

First, configure and build LLVM. Next, you need to create a new directory +somewhere in the LLVM source base. For this example, we’ll assume that you +made lib/Transforms/Hello. Finally, you must set up a build script +that will compile the source code for the new pass. To do this, +copy the following into CMakeLists.txt:

+
add_llvm_library( LLVMHello MODULE
+  Hello.cpp
+
+  PLUGIN_TOOL
+  opt
+  )
+
+
+

and the following line into lib/Transforms/CMakeLists.txt:

+
add_subdirectory(Hello)
+
+
+

(Note that there is already a directory named Hello with a sample “Hello” +pass; you may play with it – in which case you don’t need to modify any +CMakeLists.txt files – or, if you want to create everything from scratch, +use another name.)

+

This build script specifies that Hello.cpp file in the current directory +is to be compiled and linked into a shared object $(LEVEL)/lib/LLVMHello.so that +can be dynamically loaded by the opt tool via its -load +option. If your operating system uses a suffix other than .so (such as +Windows or macOS), the appropriate extension will be used.

+

Now that we have the build scripts set up, we just need to write the code for +the pass itself.

+
+
+

Basic code required

+

Now that we have a way to compile our new pass, we just have to write it. +Start out with:

+
#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_ostream.h"
+
+
+

Which are needed because we are writing a Pass, we are operating on +Functions, and we will +be doing some printing.

+

Next we have:

+
using namespace llvm;
+
+
+

… which is required because the functions from the include files live in the +llvm namespace.

+

Next we have:

+
namespace {
+
+
+

… which starts out an anonymous namespace. Anonymous namespaces are to C++ +what the “static” keyword is to C (at global scope). It makes the things +declared inside of the anonymous namespace visible only to the current file. +If you’re not familiar with them, consult a decent C++ book for more +information.

+

Next, we declare our pass itself:

+
struct Hello : public FunctionPass {
+
+
+

This declares a “Hello” class that is a subclass of FunctionPass. The different builtin pass subclasses +are described in detail later, but +for now, know that FunctionPass operates on a function at a time.

+
static char ID;
+Hello() : FunctionPass(ID) {}
+
+
+

This declares pass identifier used by LLVM to identify pass. This allows LLVM +to avoid using expensive C++ runtime information.

+
  bool runOnFunction(Function &F) override {
+    errs() << "Hello: ";
+    errs().write_escaped(F.getName()) << '\n';
+    return false;
+  }
+}; // end of struct Hello
+}  // end of anonymous namespace
+
+
+

We declare a runOnFunction method, +which overrides an abstract virtual method inherited from FunctionPass. This is where we are supposed to do our +thing, so we just print out our message with the name of each function.

+
char Hello::ID = 0;
+
+
+

We initialize pass ID here. LLVM uses ID’s address to identify a pass, so +initialization value is not important.

+
static RegisterPass<Hello> X("hello", "Hello World Pass",
+                             false /* Only looks at CFG */,
+                             false /* Analysis Pass */);
+
+
+

Lastly, we register our class +Hello, giving it a command line argument “hello”, and a name “Hello +World Pass”. The last two arguments describe its behavior: if a pass walks CFG +without modifying it then the third argument is set to true; if a pass is +an analysis pass, for example dominator tree pass, then true is supplied as +the fourth argument.

+

If we want to register the pass as a step of an existing pipeline, some extension +points are provided, e.g. PassManagerBuilder::EP_EarlyAsPossible to apply our +pass before any optimization, or PassManagerBuilder::EP_FullLinkTimeOptimizationLast +to apply it after Link Time Optimizations.

+
static llvm::RegisterStandardPasses Y(
+    llvm::PassManagerBuilder::EP_EarlyAsPossible,
+    [](const llvm::PassManagerBuilder &Builder,
+       llvm::legacy::PassManagerBase &PM) { PM.add(new Hello()); });
+
+
+

As a whole, the .cpp file looks like:

+
#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+using namespace llvm;
+
+namespace {
+struct Hello : public FunctionPass {
+  static char ID;
+  Hello() : FunctionPass(ID) {}
+
+  bool runOnFunction(Function &F) override {
+    errs() << "Hello: ";
+    errs().write_escaped(F.getName()) << '\n';
+    return false;
+  }
+}; // end of struct Hello
+}  // end of anonymous namespace
+
+char Hello::ID = 0;
+static RegisterPass<Hello> X("hello", "Hello World Pass",
+                             false /* Only looks at CFG */,
+                             false /* Analysis Pass */);
+
+static RegisterStandardPasses Y(
+    PassManagerBuilder::EP_EarlyAsPossible,
+    [](const PassManagerBuilder &Builder,
+       legacy::PassManagerBase &PM) { PM.add(new Hello()); });
+
+
+

Now that it’s all together, compile the file with a simple “gmake” command +from the top level of your build directory and you should get a new file +“lib/LLVMHello.so”. Note that everything in this file is +contained in an anonymous namespace — this reflects the fact that passes +are self contained units that do not need external interfaces (although they +can have them) to be useful.

+
+
+

Running a pass with opt

+

Now that you have a brand new shiny shared object file, we can use the +opt command to run an LLVM program through your pass. Because you +registered your pass with RegisterPass, you will be able to use the +opt tool to access it, once loaded.

+

To test it, follow the example at the end of the Getting Started with the LLVM System to +compile “Hello World” to LLVM. We can now run the bitcode file (hello.bc) for +the program through our transformation like this (or course, any bitcode file +will work):

+
$ opt -load lib/LLVMHello.so -hello < hello.bc > /dev/null
+Hello: __main
+Hello: puts
+Hello: main
+
+
+

The -load option specifies that opt should load your pass +as a shared object, which makes “-hello” a valid command line argument +(which is one reason you need to register your pass). Because the Hello pass does not modify +the program in any interesting way, we just throw away the result of +opt (sending it to /dev/null).

+

To see what happened to the other string you registered, try running +opt with the -help option:

+
$ opt -load lib/LLVMHello.so -help
+OVERVIEW: llvm .bc -> .bc modular optimizer and analysis printer
+
+USAGE: opt [subcommand] [options] <input bitcode file>
+
+OPTIONS:
+  Optimizations available:
+...
+    -guard-widening           - Widen guards
+    -gvn                      - Global Value Numbering
+    -gvn-hoist                - Early GVN Hoisting of Expressions
+    -hello                    - Hello World Pass
+    -indvars                  - Induction Variable Simplification
+    -inferattrs               - Infer set function attributes
+...
+
+
+

The pass name gets added as the information string for your pass, giving some +documentation to users of opt. Now that you have a working pass, +you would go ahead and make it do the cool transformations you want. Once you +get it all working and tested, it may become useful to find out how fast your +pass is. The PassManager provides a +nice command line option (-time-passes) that allows you to get +information about the execution time of your pass along with the other passes +you queue up. For example:

+
$ opt -load lib/LLVMHello.so -hello -time-passes < hello.bc > /dev/null
+Hello: __main
+Hello: puts
+Hello: main
+===-------------------------------------------------------------------------===
+                      ... Pass execution timing report ...
+===-------------------------------------------------------------------------===
+  Total Execution Time: 0.0007 seconds (0.0005 wall clock)
+
+   ---User Time---   --User+System--   ---Wall Time---  --- Name ---
+   0.0004 ( 55.3%)   0.0004 ( 55.3%)   0.0004 ( 75.7%)  Bitcode Writer
+   0.0003 ( 44.7%)   0.0003 ( 44.7%)   0.0001 ( 13.6%)  Hello World Pass
+   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0001 ( 10.7%)  Module Verifier
+   0.0007 (100.0%)   0.0007 (100.0%)   0.0005 (100.0%)  Total
+
+
+

As you can see, our implementation above is pretty fast. The additional +passes listed are automatically inserted by the opt tool to verify +that the LLVM emitted by your pass is still valid and well formed LLVM, which +hasn’t been broken somehow.

+

Now that you have seen the basics of the mechanics behind passes, we can talk +about some more details of how they work and how to use them.

+
+
+
+

Pass classes and requirements

+

One of the first things that you should do when designing a new pass is to +decide what class you should subclass for your pass. The Hello World example uses the FunctionPass class for its implementation, but we did +not discuss why or when this should occur. Here we talk about the classes +available, from the most general to the most specific.

+

When choosing a superclass for your Pass, you should choose the most +specific class possible, while still being able to meet the requirements +listed. This gives the LLVM Pass Infrastructure information necessary to +optimize how passes are run, so that the resultant compiler isn’t unnecessarily +slow.

+
+

The ImmutablePass class

+

The most plain and boring type of pass is the “ImmutablePass” class. This pass +type is used for passes that do not have to be run, do not change state, and +never need to be updated. This is not a normal type of transformation or +analysis, but can provide information about the current compiler configuration.

+

Although this pass class is very infrequently used, it is important for +providing information about the current target machine being compiled for, and +other static information that can affect the various transformations.

+

ImmutablePasses never invalidate other transformations, are never +invalidated, and are never “run”.

+
+
+

The ModulePass class

+

The ModulePass class +is the most general of all superclasses that you can use. Deriving from +ModulePass indicates that your pass uses the entire program as a unit, +referring to function bodies in no predictable order, or adding and removing +functions. Because nothing is known about the behavior of ModulePass +subclasses, no optimization can be done for their execution.

+

A module pass can use function level passes (e.g. dominators) using the +getAnalysis interface getAnalysis<DominatorTree>(llvm::Function *) to +provide the function to retrieve analysis result for, if the function pass does +not require any module or immutable passes. Note that this can only be done +for functions for which the analysis ran, e.g. in the case of dominators you +should only ask for the DominatorTree for function definitions, not +declarations.

+

To write a correct ModulePass subclass, derive from ModulePass and +overload the runOnModule method with the following signature:

+
+

The runOnModule method

+
virtual bool runOnModule(Module &M) = 0;
+
+
+

The runOnModule method performs the interesting work of the pass. It +should return true if the module was modified by the transformation and +false otherwise.

+
+
+
+

The CallGraphSCCPass class

+

The CallGraphSCCPass is used by +passes that need to traverse the program bottom-up on the call graph (callees +before callers). Deriving from CallGraphSCCPass provides some mechanics +for building and traversing the CallGraph, but also allows the system to +optimize execution of CallGraphSCCPasses. If your pass meets the +requirements outlined below, and doesn’t meet the requirements of a +FunctionPass, you should derive from +CallGraphSCCPass.

+

TODO: explain briefly what SCC, Tarjan’s algo, and B-U mean.

+

To be explicit, CallGraphSCCPass subclasses are:

+
    +
  1. not allowed to inspect or modify any Functions other than those +in the current SCC and the direct callers and direct callees of the SCC.

  2. +
  3. required to preserve the current CallGraph object, updating it to +reflect any changes made to the program.

  4. +
  5. not allowed to add or remove SCC’s from the current Module, though +they may change the contents of an SCC.

  6. +
  7. allowed to add or remove global variables from the current Module.

  8. +
  9. allowed to maintain state across invocations of runOnSCC (including global data).

  10. +
+

Implementing a CallGraphSCCPass is slightly tricky in some cases because it +has to handle SCCs with more than one node in it. All of the virtual methods +described below should return true if they modified the program, or +false if they didn’t.

+
+

The doInitialization(CallGraph &) method

+
virtual bool doInitialization(CallGraph &CG);
+
+
+

The doInitialization method is allowed to do most of the things that +CallGraphSCCPasses are not allowed to do. They can add and remove +functions, get pointers to functions, etc. The doInitialization method is +designed to do simple initialization type of stuff that does not depend on the +SCCs being processed. The doInitialization method call is not scheduled to +overlap with any other pass executions (thus it should be very fast).

+
+
+

The runOnSCC method

+
virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
+
+
+

The runOnSCC method performs the interesting work of the pass, and should +return true if the module was modified by the transformation, false +otherwise.

+
+
+

The doFinalization(CallGraph &) method

+
virtual bool doFinalization(CallGraph &CG);
+
+
+

The doFinalization method is an infrequently used method that is called +when the pass framework has finished calling runOnSCC for every SCC in the program being compiled.

+
+
+
+

The FunctionPass class

+

In contrast to ModulePass subclasses, FunctionPass subclasses do have a +predictable, local behavior that can be expected by the system. All +FunctionPass execute on each function in the program independent of all of +the other functions in the program. FunctionPasses do not require that +they are executed in a particular order, and FunctionPasses do not modify +external functions.

+

To be explicit, FunctionPass subclasses are not allowed to:

+
    +
  1. Inspect or modify a Function other than the one currently being processed.

  2. +
  3. Add or remove Functions from the current Module.

  4. +
  5. Add or remove global variables from the current Module.

  6. +
  7. Maintain state across invocations of runOnFunction (including global data).

  8. +
+

Implementing a FunctionPass is usually straightforward (See the Hello +World pass for example). +FunctionPasses may overload three virtual methods to do their work. All +of these methods should return true if they modified the program, or +false if they didn’t.

+
+

The doInitialization(Module &) method

+
virtual bool doInitialization(Module &M);
+
+
+

The doInitialization method is allowed to do most of the things that +FunctionPasses are not allowed to do. They can add and remove functions, +get pointers to functions, etc. The doInitialization method is designed to +do simple initialization type of stuff that does not depend on the functions +being processed. The doInitialization method call is not scheduled to +overlap with any other pass executions (thus it should be very fast).

+

A good example of how this method should be used is the LowerAllocations pass. This pass +converts malloc and free instructions into platform dependent +malloc() and free() function calls. It uses the doInitialization +method to get a reference to the malloc and free functions that it +needs, adding prototypes to the module if necessary.

+
+
+

The runOnFunction method

+
virtual bool runOnFunction(Function &F) = 0;
+
+
+

The runOnFunction method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a true value +should be returned if the function is modified.

+
+
+

The doFinalization(Module &) method

+
virtual bool doFinalization(Module &M);
+
+
+

The doFinalization method is an infrequently used method that is called +when the pass framework has finished calling runOnFunction for every function in the program being +compiled.

+
+
+
+

The LoopPass class

+

All LoopPass execute on each loop in the function +independent of all of the other loops in the function. LoopPass processes +loops in loop nest order such that outer most loop is processed last.

+

LoopPass subclasses are allowed to update loop nest using LPPassManager +interface. Implementing a loop pass is usually straightforward. +LoopPasses may overload three virtual methods to do their work. All +these methods should return true if they modified the program, or false +if they didn’t.

+

A LoopPass subclass which is intended to run as part of the main loop pass +pipeline needs to preserve all of the same function analyses that the other +loop passes in its pipeline require. To make that easier, +a getLoopAnalysisUsage function is provided by LoopUtils.h. It can be +called within the subclass’s getAnalysisUsage override to get consistent +and correct behavior. Analogously, INITIALIZE_PASS_DEPENDENCY(LoopPass) +will initialize this set of function analyses.

+
+

The doInitialization(Loop *, LPPassManager &) method

+
virtual bool doInitialization(Loop *, LPPassManager &LPM);
+
+
+

The doInitialization method is designed to do simple initialization type of +stuff that does not depend on the functions being processed. The +doInitialization method call is not scheduled to overlap with any other +pass executions (thus it should be very fast). LPPassManager interface +should be used to access Function or Module level analysis information.

+
+
+

The runOnLoop method

+
virtual bool runOnLoop(Loop *, LPPassManager &LPM) = 0;
+
+
+

The runOnLoop method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a true value +should be returned if the function is modified. LPPassManager interface +should be used to update loop nest.

+
+
+

The doFinalization() method

+
virtual bool doFinalization();
+
+
+

The doFinalization method is an infrequently used method that is called +when the pass framework has finished calling runOnLoop for every loop in the program being compiled.

+
+
+
+

The RegionPass class

+

RegionPass is similar to LoopPass, +but executes on each single entry single exit region in the function. +RegionPass processes regions in nested order such that the outer most +region is processed last.

+

RegionPass subclasses are allowed to update the region tree by using the +RGPassManager interface. You may overload three virtual methods of +RegionPass to implement your own region pass. All these methods should +return true if they modified the program, or false if they did not.

+
+

The doInitialization(Region *, RGPassManager &) method

+
virtual bool doInitialization(Region *, RGPassManager &RGM);
+
+
+

The doInitialization method is designed to do simple initialization type of +stuff that does not depend on the functions being processed. The +doInitialization method call is not scheduled to overlap with any other +pass executions (thus it should be very fast). RPPassManager interface +should be used to access Function or Module level analysis information.

+
+
+

The runOnRegion method

+
virtual bool runOnRegion(Region *, RGPassManager &RGM) = 0;
+
+
+

The runOnRegion method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a true value should be +returned if the region is modified. RGPassManager interface should be used to +update region tree.

+
+
+

The doFinalization() method

+
virtual bool doFinalization();
+
+
+

The doFinalization method is an infrequently used method that is called +when the pass framework has finished calling runOnRegion for every region in the program being +compiled.

+
+
+
+

The MachineFunctionPass class

+

A MachineFunctionPass is a part of the LLVM code generator that executes on +the machine-dependent representation of each LLVM function in the program.

+

Code generator passes are registered and initialized specially by +TargetMachine::addPassesToEmitFile and similar routines, so they cannot +generally be run from the opt or bugpoint commands.

+

A MachineFunctionPass is also a FunctionPass, so all the restrictions +that apply to a FunctionPass also apply to it. MachineFunctionPasses +also have additional restrictions. In particular, MachineFunctionPasses +are not allowed to do any of the following:

+
    +
  1. Modify or create any LLVM IR Instructions, BasicBlocks, +Arguments, Functions, GlobalVariables, +GlobalAliases, or Modules.

  2. +
  3. Modify a MachineFunction other than the one currently being processed.

  4. +
  5. Maintain state across invocations of runOnMachineFunction (including global data).

  6. +
+
+

The runOnMachineFunction(MachineFunction &MF) method

+
virtual bool runOnMachineFunction(MachineFunction &MF) = 0;
+
+
+

runOnMachineFunction can be considered the main entry point of a +MachineFunctionPass; that is, you should override this method to do the +work of your MachineFunctionPass.

+

The runOnMachineFunction method is called on every MachineFunction in a +Module, so that the MachineFunctionPass may perform optimizations on +the machine-dependent representation of the function. If you want to get at +the LLVM Function for the MachineFunction you’re working on, use +MachineFunction’s getFunction() accessor method — but remember, you +may not modify the LLVM Function or its contents from a +MachineFunctionPass.

+
+
+
+

Pass registration

+

In the Hello World example pass we +illustrated how pass registration works, and discussed some of the reasons that +it is used and what it does. Here we discuss how and why passes are +registered.

+

As we saw above, passes are registered with the RegisterPass template. The +template parameter is the name of the pass that is to be used on the command +line to specify that the pass should be added to a program (for example, with +opt or bugpoint). The first argument is the name of the +pass, which is to be used for the -help output of programs, as well +as for debug output generated by the –debug-pass option.

+

If you want your pass to be easily dumpable, you should implement the virtual +print method:

+
+

The print method

+
virtual void print(llvm::raw_ostream &O, const Module *M) const;
+
+
+

The print method must be implemented by “analyses” in order to print a +human readable version of the analysis results. This is useful for debugging +an analysis itself, as well as for other people to figure out how an analysis +works. Use the opt -analyze argument to invoke this method.

+

The llvm::raw_ostream parameter specifies the stream to write the results +on, and the Module parameter gives a pointer to the top level module of the +program that has been analyzed. Note however that this pointer may be NULL +in certain circumstances (such as calling the Pass::dump() from a +debugger), so it should only be used to enhance debug output, it should not be +depended on.

+
+
+
+

Specifying interactions between passes

+

One of the main responsibilities of the PassManager is to make sure that +passes interact with each other correctly. Because PassManager tries to +optimize the execution of passes it +must know how the passes interact with each other and what dependencies exist +between the various passes. To track this, each pass can declare the set of +passes that are required to be executed before the current pass, and the passes +which are invalidated by the current pass.

+

Typically this functionality is used to require that analysis results are +computed before your pass is run. Running arbitrary transformation passes can +invalidate the computed analysis results, which is what the invalidation set +specifies. If a pass does not implement the getAnalysisUsage method, it defaults to not having any +prerequisite passes, and invalidating all other passes.

+
+

The getAnalysisUsage method

+
virtual void getAnalysisUsage(AnalysisUsage &Info) const;
+
+
+

By implementing the getAnalysisUsage method, the required and invalidated +sets may be specified for your transformation. The implementation should fill +in the AnalysisUsage object with +information about which passes are required and not invalidated. To do this, a +pass may call any of the following methods on the AnalysisUsage object:

+
+
+

The AnalysisUsage::addRequired<> and AnalysisUsage::addRequiredTransitive<> methods

+

If your pass requires a previous pass to be executed (an analysis for example), +it can use one of these methods to arrange for it to be run before your pass. +LLVM has many different types of analyses and passes that can be required, +spanning the range from DominatorSet to BreakCriticalEdges. Requiring +BreakCriticalEdges, for example, guarantees that there will be no critical +edges in the CFG when your pass has been run.

+

Some analyses chain to other analyses to do their job. For example, an +AliasAnalysis <AliasAnalysis> implementation is required to chain to other alias analysis passes. In cases where +analyses chain, the addRequiredTransitive method should be used instead of +the addRequired method. This informs the PassManager that the +transitively required pass should be alive as long as the requiring pass is.

+
+
+

The AnalysisUsage::addPreserved<> method

+

One of the jobs of the PassManager is to optimize how and when analyses are +run. In particular, it attempts to avoid recomputing data unless it needs to. +For this reason, passes are allowed to declare that they preserve (i.e., they +don’t invalidate) an existing analysis if it’s available. For example, a +simple constant folding pass would not modify the CFG, so it can’t possibly +affect the results of dominator analysis. By default, all passes are assumed +to invalidate all others.

+

The AnalysisUsage class provides several methods which are useful in +certain circumstances that are related to addPreserved. In particular, the +setPreservesAll method can be called to indicate that the pass does not +modify the LLVM program at all (which is true for analyses), and the +setPreservesCFG method can be used by transformations that change +instructions in the program but do not modify the CFG or terminator +instructions.

+

addPreserved is particularly useful for transformations like +BreakCriticalEdges. This pass knows how to update a small set of loop and +dominator related analyses if they exist, so it can preserve them, despite the +fact that it hacks on the CFG.

+
+
+

Example implementations of getAnalysisUsage

+
// This example modifies the program, but does not modify the CFG
+void LICM::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<LoopInfoWrapperPass>();
+}
+
+
+
+
+

The getAnalysis<> and getAnalysisIfAvailable<> methods

+

The Pass::getAnalysis<> method is automatically inherited by your class, +providing you with access to the passes that you declared that you required +with the getAnalysisUsage +method. It takes a single template argument that specifies which pass class +you want, and returns a reference to that pass. For example:

+
bool LICM::runOnFunction(Function &F) {
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  //...
+}
+
+
+

This method call returns a reference to the pass desired. You may get a +runtime assertion failure if you attempt to get an analysis that you did not +declare as required in your getAnalysisUsage implementation. This method can be +called by your run* method implementation, or by any other local method +invoked by your run* method.

+

A module level pass can use function level analysis info using this interface. +For example:

+
bool ModuleLevelPass::runOnModule(Module &M) {
+  //...
+  DominatorTree &DT = getAnalysis<DominatorTree>(Func);
+  //...
+}
+
+
+

In above example, runOnFunction for DominatorTree is called by pass +manager before returning a reference to the desired pass.

+

If your pass is capable of updating analyses if they exist (e.g., +BreakCriticalEdges, as described above), you can use the +getAnalysisIfAvailable method, which returns a pointer to the analysis if +it is active. For example:

+
if (DominatorSet *DS = getAnalysisIfAvailable<DominatorSet>()) {
+  // A DominatorSet is active.  This code will update it.
+}
+
+
+
+
+
+

Implementing Analysis Groups

+

Now that we understand the basics of how passes are defined, how they are used, +and how they are required from other passes, it’s time to get a little bit +fancier. All of the pass relationships that we have seen so far are very +simple: one pass depends on one other specific pass to be run before it can +run. For many applications, this is great, for others, more flexibility is +required.

+

In particular, some analyses are defined such that there is a single simple +interface to the analysis results, but multiple ways of calculating them. +Consider alias analysis for example. The most trivial alias analysis returns +“may alias” for any alias query. The most sophisticated analysis a +flow-sensitive, context-sensitive interprocedural analysis that can take a +significant amount of time to execute (and obviously, there is a lot of room +between these two extremes for other implementations). To cleanly support +situations like this, the LLVM Pass Infrastructure supports the notion of +Analysis Groups.

+
+

Analysis Group Concepts

+

An Analysis Group is a single simple interface that may be implemented by +multiple different passes. Analysis Groups can be given human readable names +just like passes, but unlike passes, they need not derive from the Pass +class. An analysis group may have one or more implementations, one of which is +the “default” implementation.

+

Analysis groups are used by client passes just like other passes are: the +AnalysisUsage::addRequired() and Pass::getAnalysis() methods. In order +to resolve this requirement, the PassManager scans the available passes to see if any +implementations of the analysis group are available. If none is available, the +default implementation is created for the pass to use. All standard rules for +interaction between passes still +apply.

+

Although Pass Registration is +optional for normal passes, all analysis group implementations must be +registered, and must use the INITIALIZE_AG_PASS template to join the +implementation pool. Also, a default implementation of the interface must +be registered with RegisterAnalysisGroup.

+

As a concrete example of an Analysis Group in action, consider the +AliasAnalysis +analysis group. The default implementation of the alias analysis interface +(the basic-aa pass) +just does a few simple checks that don’t require significant analysis to +compute (such as: two different globals can never alias each other, etc). +Passes that use the AliasAnalysis interface (for +example the gvn pass), do not +care which implementation of alias analysis is actually provided, they just use +the designated interface.

+

From the user’s perspective, commands work just like normal. Issuing the +command opt -gvn ... will cause the basic-aa class to be instantiated +and added to the pass sequence. Issuing the command opt -somefancyaa -gvn +... will cause the gvn pass to use the somefancyaa alias analysis +(which doesn’t actually exist, it’s just a hypothetical example) instead.

+
+
+

Using RegisterAnalysisGroup

+

The RegisterAnalysisGroup template is used to register the analysis group +itself, while the INITIALIZE_AG_PASS is used to add pass implementations to +the analysis group. First, an analysis group should be registered, with a +human readable name provided for it. Unlike registration of passes, there is +no command line argument to be specified for the Analysis Group Interface +itself, because it is “abstract”:

+
static RegisterAnalysisGroup<AliasAnalysis> A("Alias Analysis");
+
+
+

Once the analysis is registered, passes can declare that they are valid +implementations of the interface by using the following code:

+
namespace {
+  // Declare that we implement the AliasAnalysis interface
+  INITIALIZE_AG_PASS(FancyAA, AliasAnalysis , "somefancyaa",
+      "A more complex alias analysis implementation",
+      false,  // Is CFG Only?
+      true,   // Is Analysis?
+      false); // Is default Analysis Group implementation?
+}
+
+
+

This just shows a class FancyAA that uses the INITIALIZE_AG_PASS macro +both to register and to “join” the AliasAnalysis analysis group. +Every implementation of an analysis group should join using this macro.

+
namespace {
+  // Declare that we implement the AliasAnalysis interface
+  INITIALIZE_AG_PASS(BasicAA, AliasAnalysis, "basic-aa",
+      "Basic Alias Analysis (default AA impl)",
+      false, // Is CFG Only?
+      true,  // Is Analysis?
+      true); // Is default Analysis Group implementation?
+}
+
+
+

Here we show how the default implementation is specified (using the final +argument to the INITIALIZE_AG_PASS template). There must be exactly one +default implementation available at all times for an Analysis Group to be used. +Only default implementation can derive from ImmutablePass. Here we declare +that the BasicAliasAnalysis pass is the default +implementation for the interface.

+
+
+
+
+

Pass Statistics

+

The Statistic class is +designed to be an easy way to expose various success metrics from passes. +These statistics are printed at the end of a run, when the -stats +command line option is enabled on the command line. See the Statistics +section in the Programmer’s Manual for details.

+
+

What PassManager does

+

The PassManager class takes a list of +passes, ensures their prerequisites +are set up correctly, and then schedules passes to run efficiently. All of the +LLVM tools that run passes use the PassManager for execution of these passes.

+

The PassManager does two main things to try to reduce the execution time of a +series of passes:

+
    +
  1. Share analysis results. The PassManager attempts to avoid +recomputing analysis results as much as possible. This means keeping track +of which analyses are available already, which analyses get invalidated, and +which analyses are needed to be run for a pass. An important part of work +is that the PassManager tracks the exact lifetime of all analysis +results, allowing it to free memory allocated to holding analysis results +as soon as they are no longer needed.

  2. +
  3. Pipeline the execution of passes on the program. The PassManager +attempts to get better cache and memory usage behavior out of a series of +passes by pipelining the passes together. This means that, given a series +of consecutive FunctionPass, it +will execute all of the FunctionPass on the first function, then all of the +FunctionPasses on the second +function, etc… until the entire program has been run through the passes.

    +

    This improves the cache behavior of the compiler, because it is only +touching the LLVM program representation for a single function at a time, +instead of traversing the entire program. It reduces the memory consumption +of compiler, because, for example, only one DominatorSet needs to be +calculated at a time. This also makes it possible to implement some +interesting enhancements in the future.

    +
  4. +
+

The effectiveness of the PassManager is influenced directly by how much +information it has about the behaviors of the passes it is scheduling. For +example, the “preserved” set is intentionally conservative in the face of an +unimplemented getAnalysisUsage +method. Not implementing when it should be implemented will have the effect of +not allowing any analysis results to live across the execution of your pass.

+

The PassManager class exposes a --debug-pass command line options that +is useful for debugging pass execution, seeing how things work, and diagnosing +when you should be preserving more analyses than you currently are. (To get +information about all of the variants of the --debug-pass option, just type +“opt -help-hidden”).

+

By using the –debug-pass=Structure option, for example, we can see how our +Hello World pass interacts with other +passes. Lets try it out with the gvn and licm passes:

+
$ opt -load lib/LLVMHello.so -gvn -licm --debug-pass=Structure < hello.bc > /dev/null
+ModulePass Manager
+  FunctionPass Manager
+    Dominator Tree Construction
+    Basic Alias Analysis (stateless AA impl)
+    Function Alias Analysis Results
+    Memory Dependence Analysis
+    Global Value Numbering
+    Natural Loop Information
+    Canonicalize natural loops
+    Loop-Closed SSA Form Pass
+    Basic Alias Analysis (stateless AA impl)
+    Function Alias Analysis Results
+    Scalar Evolution Analysis
+    Loop Pass Manager
+      Loop Invariant Code Motion
+    Module Verifier
+  Bitcode Writer
+
+
+

This output shows us when passes are constructed. +Here we see that GVN uses dominator tree information to do its job. The LICM pass +uses natural loop information, which uses dominator tree as well.

+

After the LICM pass, the module verifier runs (which is automatically added by +the opt tool), which uses the dominator tree to check that the +resultant LLVM code is well formed. Note that the dominator tree is computed +once, and shared by three passes.

+

Lets see how this changes when we run the Hello World pass in between the two passes:

+
$ opt -load lib/LLVMHello.so -gvn -hello -licm --debug-pass=Structure < hello.bc > /dev/null
+ModulePass Manager
+  FunctionPass Manager
+    Dominator Tree Construction
+    Basic Alias Analysis (stateless AA impl)
+    Function Alias Analysis Results
+    Memory Dependence Analysis
+    Global Value Numbering
+    Hello World Pass
+    Dominator Tree Construction
+    Natural Loop Information
+    Canonicalize natural loops
+    Loop-Closed SSA Form Pass
+    Basic Alias Analysis (stateless AA impl)
+    Function Alias Analysis Results
+    Scalar Evolution Analysis
+    Loop Pass Manager
+      Loop Invariant Code Motion
+    Module Verifier
+  Bitcode Writer
+Hello: __main
+Hello: puts
+Hello: main
+
+
+

Here we see that the Hello World pass +has killed the Dominator Tree pass, even though it doesn’t modify the code at +all! To fix this, we need to add the following getAnalysisUsage method to our pass:

+
// We don't modify the program, so we preserve all analyses
+void getAnalysisUsage(AnalysisUsage &AU) const override {
+  AU.setPreservesAll();
+}
+
+
+

Now when we run our pass, we get this output:

+
$ opt -load lib/LLVMHello.so -gvn -hello -licm --debug-pass=Structure < hello.bc > /dev/null
+Pass Arguments:  -gvn -hello -licm
+ModulePass Manager
+  FunctionPass Manager
+    Dominator Tree Construction
+    Basic Alias Analysis (stateless AA impl)
+    Function Alias Analysis Results
+    Memory Dependence Analysis
+    Global Value Numbering
+    Hello World Pass
+    Natural Loop Information
+    Canonicalize natural loops
+    Loop-Closed SSA Form Pass
+    Basic Alias Analysis (stateless AA impl)
+    Function Alias Analysis Results
+    Scalar Evolution Analysis
+    Loop Pass Manager
+      Loop Invariant Code Motion
+    Module Verifier
+  Bitcode Writer
+Hello: __main
+Hello: puts
+Hello: main
+
+
+

Which shows that we don’t accidentally invalidate dominator information +anymore, and therefore do not have to compute it twice.

+
+

The releaseMemory method

+
virtual void releaseMemory();
+
+
+

The PassManager automatically determines when to compute analysis results, +and how long to keep them around for. Because the lifetime of the pass object +itself is effectively the entire duration of the compilation process, we need +some way to free analysis results when they are no longer useful. The +releaseMemory virtual method is the way to do this.

+

If you are writing an analysis or any other pass that retains a significant +amount of state (for use by another pass which “requires” your pass and uses +the getAnalysis method) you should +implement releaseMemory to, well, release the memory allocated to maintain +this internal state. This method is called after the run* method for the +class, before the next call of run* in your pass.

+
+
+
+
+

Building pass plugins

+

As an alternative to using PLUGIN_TOOL, LLVM provides a mechanism to +automatically register pass plugins within clang, opt and bugpoint. +One first needs to create an independent project and add it to either tools/ +or, using the MonoRepo layout, at the root of the repo alongside other projects. +This project must contain the following minimal CMakeLists.txt:

+
add_llvm_pass_plugin(Name source0.cpp)
+
+
+

The pass must provide two entry points for the new pass manager, one for static +registration and one for dynamically loaded plugins:

+
    +
  • llvm::PassPluginLibraryInfo get##Name##PluginInfo();

  • +
  • extern "C" ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() LLVM_ATTRIBUTE_WEAK;

  • +
+

Pass plugins are compiled and link dynamically by default, but it’s +possible to set the following variables to change this behavior:

+
    +
  • LLVM_${NAME}_LINK_INTO_TOOLS, when set to ON, turns the project into +a statically linked extension

  • +
+

When building a tool that uses the new pass manager, one can use the following snippet to +include statically linked pass plugins:

+
// fetch the declaration
+#define HANDLE_EXTENSION(Ext) llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
+#include "llvm/Support/Extension.def"
+
+[...]
+
+// use them, PB is an llvm::PassBuilder instance
+#define HANDLE_EXTENSION(Ext) get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB);
+#include "llvm/Support/Extension.def"
+
+
+
+
+

Registering dynamically loaded passes

+

Size matters when constructing production quality tools using LLVM, both for +the purposes of distribution, and for regulating the resident code size when +running on the target system. Therefore, it becomes desirable to selectively +use some passes, while omitting others and maintain the flexibility to change +configurations later on. You want to be able to do all this, and, provide +feedback to the user. This is where pass registration comes into play.

+

The fundamental mechanisms for pass registration are the +MachinePassRegistry class and subclasses of MachinePassRegistryNode.

+

An instance of MachinePassRegistry is used to maintain a list of +MachinePassRegistryNode objects. This instance maintains the list and +communicates additions and deletions to the command line interface.

+

An instance of MachinePassRegistryNode subclass is used to maintain +information provided about a particular pass. This information includes the +command line name, the command help string and the address of the function used +to create an instance of the pass. A global static constructor of one of these +instances registers with a corresponding MachinePassRegistry, the static +destructor unregisters. Thus a pass that is statically linked in the tool +will be registered at start up. A dynamically loaded pass will register on +load and unregister at unload.

+
+

Using existing registries

+

There are predefined registries to track instruction scheduling +(RegisterScheduler) and register allocation (RegisterRegAlloc) machine +passes. Here we will describe how to register a register allocator machine +pass.

+

Implement your register allocator machine pass. In your register allocator +.cpp file add the following include:

+
#include "llvm/CodeGen/RegAllocRegistry.h"
+
+
+

Also in your register allocator .cpp file, define a creator function in the +form:

+
FunctionPass *createMyRegisterAllocator() {
+  return new MyRegisterAllocator();
+}
+
+
+

Note that the signature of this function should match the type of +RegisterRegAlloc::FunctionPassCtor. In the same file add the “installing” +declaration, in the form:

+
static RegisterRegAlloc myRegAlloc("myregalloc",
+                                   "my register allocator help string",
+                                   createMyRegisterAllocator);
+
+
+

Note the two spaces prior to the help string produces a tidy result on the +-help query.

+
$ llc -help
+  ...
+  -regalloc                    - Register allocator to use (default=linearscan)
+    =linearscan                -   linear scan register allocator
+    =local                     -   local register allocator
+    =simple                    -   simple register allocator
+    =myregalloc                -   my register allocator help string
+  ...
+
+
+

And that’s it. The user is now free to use -regalloc=myregalloc as an +option. Registering instruction schedulers is similar except use the +RegisterScheduler class. Note that the +RegisterScheduler::FunctionPassCtor is significantly different from +RegisterRegAlloc::FunctionPassCtor.

+

To force the load/linking of your register allocator into the +llc/lli tools, add your creator function’s global +declaration to Passes.h and add a “pseudo” call line to +llvm/Codegen/LinkAllCodegenComponents.h.

+
+
+

Creating new registries

+

The easiest way to get started is to clone one of the existing registries; we +recommend llvm/CodeGen/RegAllocRegistry.h. The key things to modify are +the class name and the FunctionPassCtor type.

+

Then you need to declare the registry. Example: if your pass registry is +RegisterMyPasses then define:

+
MachinePassRegistry RegisterMyPasses::Registry;
+
+
+

And finally, declare the command line option for your passes. Example:

+
cl::opt<RegisterMyPasses::FunctionPassCtor, false,
+        RegisterPassParser<RegisterMyPasses> >
+MyPassOpt("mypass",
+          cl::init(&createDefaultMyPass),
+          cl::desc("my pass option help"));
+
+
+

Here the command option is “mypass”, with createDefaultMyPass as the +default creator.

+
+
+

Using GDB with dynamically loaded passes

+

Unfortunately, using GDB with dynamically loaded passes is not as easy as it +should be. First of all, you can’t set a breakpoint in a shared object that +has not been loaded yet, and second of all there are problems with inlined +functions in shared objects. Here are some suggestions to debugging your pass +with GDB.

+

For sake of discussion, I’m going to assume that you are debugging a +transformation invoked by opt, although nothing described here +depends on that.

+
+

Setting a breakpoint in your pass

+

First thing you do is start gdb on the opt process:

+
$ gdb opt
+GNU gdb 5.0
+Copyright 2000 Free Software Foundation, Inc.
+GDB is free software, covered by the GNU General Public License, and you are
+welcome to change it and/or distribute copies of it under certain conditions.
+Type "show copying" to see the conditions.
+There is absolutely no warranty for GDB.  Type "show warranty" for details.
+This GDB was configured as "sparc-sun-solaris2.6"...
+(gdb)
+
+
+

Note that opt has a lot of debugging information in it, so it takes +time to load. Be patient. Since we cannot set a breakpoint in our pass yet +(the shared object isn’t loaded until runtime), we must execute the process, +and have it stop before it invokes our pass, but after it has loaded the shared +object. The most foolproof way of doing this is to set a breakpoint in +PassManager::run and then run the process with the arguments you want:

+
$ (gdb) break llvm::PassManager::run
+Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70.
+(gdb) run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
+Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
+Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70
+70      bool PassManager::run(Module &M) { return PM->run(M); }
+(gdb)
+
+
+

Once the opt stops in the PassManager::run method you are now +free to set breakpoints in your pass so that you can trace through execution or +do other standard debugging stuff.

+
+
+

Miscellaneous Problems

+

Once you have the basics down, there are a couple of problems that GDB has, +some with solutions, some without.

+
    +
  • Inline functions have bogus stack information. In general, GDB does a pretty +good job getting stack traces and stepping through inline functions. When a +pass is dynamically loaded however, it somehow completely loses this +capability. The only solution I know of is to de-inline a function (move it +from the body of a class to a .cpp file).

  • +
  • Restarting the program breaks breakpoints. After following the information +above, you have succeeded in getting some breakpoints planted in your pass. +Next thing you know, you restart the program (i.e., you type “run” again), +and you start getting errors about breakpoints being unsettable. The only +way I have found to “fix” this problem is to delete the breakpoints that are +already set in your pass, run the program, and re-set the breakpoints once +execution stops in PassManager::run.

  • +
+

Hopefully these tips will help with common case debugging situations. If you’d +like to contribute some tips of your own, just contact Chris.

+
+
+
+

Future extensions planned

+

Although the LLVM Pass Infrastructure is very capable as it stands, and does +some nifty stuff, there are things we’d like to add in the future. Here is +where we are going:

+
+

Multithreaded LLVM

+

Multiple CPU machines are becoming more common and compilation can never be +fast enough: obviously we should allow for a multithreaded compiler. Because +of the semantics defined for passes above (specifically they cannot maintain +state across invocations of their run* methods), a nice clean way to +implement a multithreaded compiler would be for the PassManager class to +create multiple instances of each pass object, and allow the separate instances +to be hacking on different parts of the program at the same time.

+

This implementation would prevent each of the passes from having to implement +multithreaded constructs, requiring only the LLVM core to have locking in a few +places (for global resources). Although this is a simple extension, we simply +haven’t had time (or multiprocessor machines, thus a reason) to implement this. +Despite that, we have kept the LLVM passes SMP ready, and you should too.

+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/XRayExample.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/XRayExample.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/XRayExample.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/XRayExample.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,459 @@ + + + + + + + + + Debugging with XRay — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

Debugging with XRay

+

This document shows an example of how you would go about analyzing applications +built with XRay instrumentation. Here we will attempt to debug llc +compiling some sample LLVM IR generated by Clang.

+ +
+

Building with XRay

+

To debug an application with XRay instrumentation, we need to build it with a +Clang that supports the -fxray-instrument option. See XRay +for more technical details of how XRay works for background information.

+

In our example, we need to add -fxray-instrument to the list of flags +passed to Clang when building a binary. Note that we need to link with Clang as +well to get the XRay runtime linked in appropriately. For building llc with +XRay, we do something similar below for our LLVM build:

+
$ mkdir -p llvm-build && cd llvm-build
+# Assume that the LLVM sources are at ../llvm
+$ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_C_FLAGS_RELEASE="-fxray-instrument" -DCMAKE_CXX_FLAGS="-fxray-instrument" \
+# Once this finishes, we should build llc
+$ ninja llc
+
+
+

To verify that we have an XRay instrumented binary, we can use objdump to +look for the xray_instr_map section.

+
$ objdump -h -j xray_instr_map ./bin/llc
+./bin/llc:     file format elf64-x86-64
+
+Sections:
+Idx Name          Size      VMA               LMA               File off  Algn
+ 14 xray_instr_map 00002fc0  00000000041516c6  00000000041516c6  03d516c6  2**0
+                  CONTENTS, ALLOC, LOAD, READONLY, DATA
+
+
+
+
+

Getting Traces

+

By default, XRay does not write out the trace files or patch the application +before main starts. If we run llc it should work like a normally built +binary. If we want to get a full trace of the application’s operations (of the +functions we do end up instrumenting with XRay) then we need to enable XRay +at application start. To do this, XRay checks the XRAY_OPTIONS environment +variable.

+
# The following doesn't create an XRay trace by default.
+$ ./bin/llc input.ll
+
+# We need to set the XRAY_OPTIONS to enable some features.
+$ XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic verbosity=1" ./bin/llc input.ll
+==69819==XRay: Log file in 'xray-log.llc.m35qPB'
+
+
+

At this point we now have an XRay trace we can start analysing.

+
+
+

The llvm-xray Tool

+

Having a trace then allows us to do basic accounting of the functions that were +instrumented, and how much time we’re spending in parts of the code. To make +sense of this data, we use the llvm-xray tool which has a few subcommands +to help us understand our trace.

+

One of the things we can do is to get an accounting of the functions that have +been instrumented. We can see an example accounting with llvm-xray account:

+
$ llvm-xray account xray-log.llc.m35qPB --top=10 --sort=sum --sortorder=dsc --instr_map=./bin/llc
+Functions with latencies: 29
+   funcid      count [      min,       med,       90p,       99p,       max]       sum  function
+      187        360 [ 0.000000,  0.000001,  0.000014,  0.000032,  0.000075]  0.001596  LLLexer.cpp:446:0: llvm::LLLexer::LexIdentifier()
+       85        130 [ 0.000000,  0.000000,  0.000018,  0.000023,  0.000156]  0.000799  X86ISelDAGToDAG.cpp:1984:0: (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*)
+      138        130 [ 0.000000,  0.000000,  0.000017,  0.000155,  0.000155]  0.000774  SelectionDAGISel.cpp:2963:0: llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int)
+      188        103 [ 0.000000,  0.000000,  0.000003,  0.000123,  0.000214]  0.000737  LLParser.cpp:2692:0: llvm::LLParser::ParseValID(llvm::ValID&, llvm::LLParser::PerFunctionState*)
+       88          1 [ 0.000562,  0.000562,  0.000562,  0.000562,  0.000562]  0.000562  X86ISelLowering.cpp:83:0: llvm::X86TargetLowering::X86TargetLowering(llvm::X86TargetMachine const&, llvm::X86Subtarget const&)
+      125        102 [ 0.000001,  0.000003,  0.000010,  0.000017,  0.000049]  0.000471  Verifier.cpp:3714:0: (anonymous namespace)::Verifier::visitInstruction(llvm::Instruction&)
+       90          8 [ 0.000023,  0.000035,  0.000106,  0.000106,  0.000106]  0.000342  X86ISelLowering.cpp:3363:0: llvm::X86TargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const
+      124         32 [ 0.000003,  0.000007,  0.000016,  0.000041,  0.000041]  0.000310  Verifier.cpp:1967:0: (anonymous namespace)::Verifier::visitFunction(llvm::Function const&)
+      123          1 [ 0.000302,  0.000302,  0.000302,  0.000302,  0.000302]  0.000302  LLVMContextImpl.cpp:54:0: llvm::LLVMContextImpl::~LLVMContextImpl()
+      139         46 [ 0.000000,  0.000002,  0.000006,  0.000008,  0.000019]  0.000138  TargetLowering.cpp:506:0: llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::APInt&, llvm::APInt&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const
+
+
+

This shows us that for our input file, llc spent the most cumulative time +in the lexer (a total of 1 millisecond). If we wanted for example to work with +this data in a spreadsheet, we can output the results as CSV using the +-format=csv option to the command for further analysis.

+

If we want to get a textual representation of the raw trace we can use the +llvm-xray convert tool to get YAML output. The first few lines of that +output for an example trace would look like the following:

+
$ llvm-xray convert -f yaml --symbolize --instr_map=./bin/llc xray-log.llc.m35qPB
+---
+header:
+  version:         1
+  type:            0
+  constant-tsc:    true
+  nonstop-tsc:     true
+  cycle-frequency: 2601000000
+records:
+  - { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426023268520 }
+  - { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426023523052 }
+  - { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426029925386 }
+  - { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426030031128 }
+  - { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426046951388 }
+  - { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047282020 }
+  - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426047857332 }
+  - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047984152 }
+  - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048036584 }
+  - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048042292 }
+  - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048055056 }
+  - { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048067316 }
+
+
+
+
+

Controlling Fidelity

+

So far in our examples, we haven’t been getting full coverage of the functions +we have in the binary. To get that, we need to modify the compiler flags so +that we can instrument more (if not all) the functions we have in the binary. +We have two options for doing that, and we explore both of these below.

+
+

Instruction Threshold

+

The first “blunt” way of doing this is by setting the minimum threshold for +function bodies to 1. We can do that with the +-fxray-instruction-threshold=N flag when building our binary. We rebuild +llc with this option and observe the results:

+
$ rm CMakeCache.txt
+$ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_C_FLAGS_RELEASE="-fxray-instrument -fxray-instruction-threshold=1" \
+    -DCMAKE_CXX_FLAGS="-fxray-instrument -fxray-instruction-threshold=1"
+$ ninja llc
+$ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll
+==69819==XRay: Log file in 'xray-log.llc.5rqxkU'
+
+$ llvm-xray account xray-log.llc.5rqxkU --top=10 --sort=sum --sortorder=dsc --instr_map=./bin/llc
+Functions with latencies: 36652
+ funcid      count [      min,       med,       90p,       99p,       max]       sum  function
+     75          1 [ 0.672368,  0.672368,  0.672368,  0.672368,  0.672368]  0.672368  llc.cpp:271:0: main
+     78          1 [ 0.626455,  0.626455,  0.626455,  0.626455,  0.626455]  0.626455  llc.cpp:381:0: compileModule(char**, llvm::LLVMContext&)
+ 139617          1 [ 0.472618,  0.472618,  0.472618,  0.472618,  0.472618]  0.472618  LegacyPassManager.cpp:1723:0: llvm::legacy::PassManager::run(llvm::Module&)
+ 139610          1 [ 0.472618,  0.472618,  0.472618,  0.472618,  0.472618]  0.472618  LegacyPassManager.cpp:1681:0: llvm::legacy::PassManagerImpl::run(llvm::Module&)
+ 139612          1 [ 0.470948,  0.470948,  0.470948,  0.470948,  0.470948]  0.470948  LegacyPassManager.cpp:1564:0: (anonymous namespace)::MPPassManager::runOnModule(llvm::Module&)
+ 139607          2 [ 0.147345,  0.315994,  0.315994,  0.315994,  0.315994]  0.463340  LegacyPassManager.cpp:1530:0: llvm::FPPassManager::runOnModule(llvm::Module&)
+ 139605         21 [ 0.000002,  0.000002,  0.102593,  0.213336,  0.213336]  0.463331  LegacyPassManager.cpp:1491:0: llvm::FPPassManager::runOnFunction(llvm::Function&)
+ 139563      26096 [ 0.000002,  0.000002,  0.000037,  0.000063,  0.000215]  0.225708  LegacyPassManager.cpp:1083:0: llvm::PMDataManager::findAnalysisPass(void const*, bool)
+ 108055        188 [ 0.000002,  0.000120,  0.001375,  0.004523,  0.062624]  0.159279  MachineFunctionPass.cpp:38:0: llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
+  62635         22 [ 0.000041,  0.000046,  0.000050,  0.126744,  0.126744]  0.127715  X86TargetMachine.cpp:242:0: llvm::X86TargetMachine::getSubtargetImpl(llvm::Function const&) const
+
+
+
+
+

Instrumentation Attributes

+

The other way is to use configuration files for selecting which functions +should always be instrumented by the compiler. This gives us a way of ensuring +that certain functions are either always or never instrumented by not having to +add the attribute to the source.

+

To use this feature, you can define one file for the functions to always +instrument, and another for functions to never instrument. The format of these +files are exactly the same as the SanitizerLists files that control similar +things for the sanitizer implementations. For example:

+
# xray-attr-list.txt
+# always instrument functions that match the following filters:
+[always]
+fun:main
+
+# never instrument functions that match the following filters:
+[never]
+fun:__cxx_*
+
+
+

Given the file above we can re-build by providing it to the +-fxray-attr-list= flag to clang. You can have multiple files, each defining +different sets of attribute sets, to be combined into a single list by clang.

+
+
+
+

The XRay stack tool

+

Given a trace, and optionally an instrumentation map, the llvm-xray stack +command can be used to analyze a call stack graph constructed from the function +call timeline.

+

The way to use the command is to output the top stacks by call count and time spent.

+
$ llvm-xray stack xray-log.llc.5rqxkU --instr_map=./bin/llc
+
+Unique Stacks: 3069
+Top 10 Stacks by leaf sum:
+
+Sum: 9633790
+lvl   function                                                            count              sum
+#0    main                                                                    1         58421550
+#1    compileModule(char**, llvm::LLVMContext&)                               1         51440360
+#2    llvm::legacy::PassManagerImpl::run(llvm::Module&)                       1         40535375
+#3    llvm::FPPassManager::runOnModule(llvm::Module&)                         2         39337525
+#4    llvm::FPPassManager::runOnFunction(llvm::Function&)                     6         39331465
+#5    llvm::PMDataManager::verifyPreservedAnalysis(llvm::Pass*)             399         16628590
+#6    llvm::PMTopLevelManager::findAnalysisPass(void const*)               4584         15155600
+#7    llvm::PMDataManager::findAnalysisPass(void const*, bool)            32088          9633790
+
+..etc..
+
+
+

In the default mode, identical stacks on different threads are independently +aggregated. In a multithreaded program, you may end up having identical call +stacks fill your list of top calls.

+

To address this, you may specify the --aggregate-threads or +--per-thread-stacks flags. --per-thread-stacks treats the thread id as an +implicit root in each call stack tree, while --aggregate-threads combines +identical stacks from all threads.

+
+
+

Flame Graph Generation

+

The llvm-xray stack tool may also be used to generate flamegraphs for +visualizing your instrumented invocations. The tool does not generate the graphs +themselves, but instead generates a format that can be used with Brendan Gregg’s +FlameGraph tool, currently available on github.

+

To generate output for a flamegraph, a few more options are necessary.

+
    +
  • --all-stacks - Emits all of the stacks.

  • +
  • --stack-format - Choose the flamegraph output format ‘flame’.

  • +
  • --aggregation-type - Choose the metric to graph.

  • +
+

You may pipe the command output directly to the flamegraph tool to obtain an +svg file.

+
$ llvm-xray stack xray-log.llc.5rqxkU --instr_map=./bin/llc --stack-format=flame --aggregation-type=time --all-stacks | \
+/path/to/FlameGraph/flamegraph.pl > flamegraph.svg
+
+
+

If you open the svg in a browser, mouse events allow exploring the call stacks.

+
+
+

Chrome Trace Viewer Visualization

+

We can also generate a trace which can be loaded by the Chrome Trace Viewer +from the same generated trace:

+
$ llvm-xray convert --symbolize --instr_map=./bin/llc \
+  --output-format=trace_event xray-log.llc.5rqxkU \
+    | gzip > llc-trace.txt.gz
+
+
+

From a Chrome browser, navigating to chrome:///tracing allows us to load +the sample-trace.txt.gz file to visualize the execution trace.

+
+
+

Further Exploration

+

The llvm-xray tool has a few other subcommands that are in various stages +of being developed. One interesting subcommand that can highlight a few +interesting things is the graph subcommand. Given for example the following +toy program that we build with XRay instrumentation, we can see how the +generated graph may be a helpful indicator of where time is being spent for the +application.

+
// sample.cc
+#include <iostream>
+#include <thread>
+
+[[clang::xray_always_instrument]] void f() {
+  std::cerr << '.';
+}
+
+[[clang::xray_always_instrument]] void g() {
+  for (int i = 0; i < 1 << 10; ++i) {
+    std::cerr << '-';
+  }
+}
+
+int main(int argc, char* argv[]) {
+  std::thread t1([] {
+    for (int i = 0; i < 1 << 10; ++i)
+      f();
+  });
+  std::thread t2([] {
+    g();
+  });
+  t1.join();
+  t2.join();
+  std::cerr << '\n';
+}
+
+
+

We then build the above with XRay instrumentation:

+
$ clang++ -o sample -O3 sample.cc -std=c++11 -fxray-instrument -fxray-instruction-threshold=1
+$ XRAY_OPTIONS="patch_premain=true xray_mode=xray-basic" ./sample
+
+
+

We can then explore the graph rendering of the trace generated by this sample +application. We assume you have the graphviz tools available in your system, +including both unflatten and dot. If you prefer rendering or exploring +the graph using another tool, then that should be feasible as well. llvm-xray +graph will create DOT format graphs which should be usable in most graph +rendering applications. One example invocation of the llvm-xray graph +command should yield some interesting insights to the workings of C++ +applications:

+
$ llvm-xray graph xray-log.sample.* -m sample --color-edges=sum --edge-label=sum \
+    | unflatten -f -l10 | dot -Tsvg -o sample.svg
+
+
+
+
+

Next Steps

+

If you have some interesting analyses you’d like to implement as part of the +llvm-xray tool, please feel free to propose them on the llvm-dev@ mailing list. +The following are some ideas to inspire you in getting involved and potentially +making things better.

+
+
    +
  • Implement a query/filtering library that allows for finding patterns in the +XRay traces.

  • +
  • Collecting function call stacks and how often they’re encountered in the +XRay trace.

  • +
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/XRayFDRFormat.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/XRayFDRFormat.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/XRayFDRFormat.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/XRayFDRFormat.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,708 @@ + + + + + + + + + XRay Flight Data Recorder Trace Format — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

XRay Flight Data Recorder Trace Format

+
+
Version
+

1 as of 2017-07-20

+
+
+ +
+

Introduction

+

When gathering XRay traces in Flight Data Recorder mode, each thread of an +application will claim buffers to fill with trace data, which at some point +is finalized and flushed.

+

A goal of the profiler is to minimize overhead, the flushed data directly +corresponds to the buffer.

+

This document describes the format of a trace file.

+
+
+

General

+

Each trace file corresponds to a sequence of events in a particular thread.

+

The file has a header followed by a sequence of discriminated record types.

+

The endianness of byte fields matches the endianness of the platform which +produced the trace file.

+
+
+

Header Section

+

A trace file begins with a 32 byte header.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Field

Size (bytes)

Description

version

2

Anticipates versioned readers. This +document describes the format when +version == 1

type

2

An enumeration encoding the type of +trace. Flight Data Recorder mode +traces have type == 1

bitfield

4

Holds parameters that are not aligned +to bytes. Further described below.

cycle_frequency

8

The frequency in hertz of the CPU +oscillator used to measure duration of +events in ticks.

buffer_size

8

The size in bytes of the data portion +of the trace following the header.

reserved

8

Reserved for future use.

+

The bitfield parameter of the file header is composed of the following fields.

+ +++++ + + + + + + + + + + + + + + + + + + + + +

Field

Size (bits)

Description

constant_tsc

1

Whether the platform’s timestamp +counter used to record ticks between +events ticks at a constant frequency +despite CPU frequency changes. +0 == non-constant. 1 == constant.

nonstop_tsc

1

Whether the tsc continues to count +despite whether the CPU is in a low +power state. 0 == stop. 1 == non-stop.

reserved

30

Not meaningful.

+
+
+

Data Section

+

Following the header in a trace is a data section with size matching the +buffer_size field in the header.

+

The data section is a stream of elements of different types.

+

There are a few categories of data in the sequence.

+
    +
  • Function Records: Function Records contain the timing of entry into and +exit from function execution. Function Records have 8 bytes each.

  • +
  • Metadata Records: Metadata records serve many purposes. Mostly, they +capture information that may be too costly to record for each function, but +that is required to contextualize the fine-grained timings. They also are used +as markers for user-defined Event Data payloads. Metadata records have 16 +bytes each.

  • +
  • Event Data: Free form data may be associated with events that are traced +by the binary and encode data defined by a handler function. Event data is +always preceded with a marker record which indicates how large it is.

  • +
  • Function Arguments: The arguments to some functions are included in the +trace. These are either pointer addresses or primitives that are read and +logged independently of their types in a high level language. To the tracer, +they are all numbers. Function Records that have attached arguments will +indicate their presence on the function entry record. We only support logging +contiguous function argument sequences starting with argument zero, which will +be the “this” pointer for member function invocations. For example, we don’t +support logging the first and third argument.

  • +
+

A reader of the memory format must maintain a state machine. The format makes no +attempt to pad for alignment, and it is not seekable.

+
+

Function Records

+

Function Records have an 8 byte layout. This layout encodes information to +reconstruct a call stack of instrumented function and their durations.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +

Field

Size (bits)

Description

discriminant

1

Indicates whether a reader should read a +Function or Metadata record. Set to 0 for +Function records.

action

3

Specifies whether the function is being +entered, exited, or is a non-standard entry +or exit produced by optimizations.

function_id

28

A numeric ID for the function. Resolved to a +name via the xray instrumentation map. The +instrumentation map is built by xray at +compile time into an object file and pairs +the function ids to addresses. It is used for +patching and as a lookup into the binary’s +symbols to obtain names.

tsc_delta

32

The number of ticks of the timestamp counter +since a previous record recorded a delta or +other TSC resetting event.

+

On little-endian machines, the bitfields are ordered from least significant bit +bit to most significant bit. A reader can read an 8 bit value and apply the mask +0x01 for the discriminant. Similarly, they can read 32 bits and unsigned +shift right by 0x04 to obtain the function_id field.

+

On big-endian machine, the bitfields are written in order from most significant +bit to least significant bit. A reader would read an 8 bit value and unsigned +shift right by 7 bits for the discriminant. The function_id field could be +obtained by reading a 32 bit value and applying the mask 0x0FFFFFFF.

+

Function action types are as follows.

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + +

Type

Number

Description

Entry

0

Typical function entry.

Exit

1

Typical function exit.

Tail_Exit

2

An exit from a function due to tail call +optimization.

Entry_Args

3

A function entry that records arguments.

+

Entry_Args records do not contain the arguments themselves. Instead, metadata +records for each of the logged args follow the function record in the stream.

+
+
+

Metadata Records

+

Interspersed throughout the buffer are 16 byte Metadata records. For typically +instrumented binaries, they will be sparser than Function records, and they +provide a fuller picture of the binary execution state.

+

Metadata record layout is partially record dependent, but they share a common +structure.

+

The same bit field rules described for function records apply to the first byte +of MetadataRecords. Within this byte, little endian machines use lsb to msb +ordering and big endian machines use msb to lsb ordering.

+ +++++ + + + + + + + + + + + + + + + + + + + + +

Field

Size

Description

discriminant

1 bit

Indicates whether a reader should read a +Function or Metadata record. Set to 1 for +Metadata records.

record_kind

7 bits

The type of Metadata record.

data

15 bytes

A data field used differently for each record +type.

+

Here is a table of the enumerated record kinds.

+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Number

Type

0

NewBuffer

1

EndOfBuffer

2

NewCPUId

3

TSCWrap

4

WallTimeMarker

5

CustomEventMarker

6

CallArgument

+
+
+

NewBuffer Records

+

Each buffer begins with a NewBuffer record immediately after the header. +It records the thread ID of the thread that the trace belongs to.

+

Its data segment is as follows.

+ +++++ + + + + + + + + + + + + + + + + +

Field

Size (bytes)

Description

thread_Id

2

Thread ID for buffer.

reserved

13

Unused.

+
+
+

WallClockTime Records

+

Following the NewBuffer record, each buffer records an absolute time as a frame +of reference for the durations recorded by timestamp counter deltas.

+

Its data segment is as follows.

+ +++++ + + + + + + + + + + + + + + + + + + + + +

Field

Size (bytes)

Description

seconds

8

Seconds on absolute timescale. The starting +point is unspecified and depends on the +implementation and platform configured by the +tracer.

microseconds

4

The microsecond component of the time.

reserved

3

Unused.

+
+
+

NewCpuId Records

+

Each function entry invokes a routine to determine what CPU is executing. +Typically, this is done with readtscp, which reads the timestamp counter at the +same time.

+

If the tracing detects that the execution has switched CPUs or if this is the +first instrumented entry point, the tracer will output a NewCpuId record.

+

Its data segment is as follows.

+ +++++ + + + + + + + + + + + + + + + + + + + + +

Field

Size (bytes)

Description

cpu_id

2

CPU Id.

absolute_tsc

8

The absolute value of the timestamp counter.

reserved

5

Unused.

+
+
+

TSCWrap Records

+

Since each function record uses a 32 bit value to represent the number of ticks +of the timestamp counter since the last reference, it is possible for this value +to overflow, particularly for sparsely instrumented binaries.

+

When this delta would not fit into a 32 bit representation, a reference absolute +timestamp counter record is written in the form of a TSCWrap record.

+

Its data segment is as follows.

+ +++++ + + + + + + + + + + + + + + + + +

Field

Size (bytes)

Description

absolute_tsc

8

Timestamp counter value.

reserved

7

Unused.

+
+
+

CallArgument Records

+

Immediately following an Entry_Args type function record, there may be one or +more CallArgument records that contain the traced function’s parameter values.

+

The order of the CallArgument Record sequency corresponds one to one with the +order of the function parameters.

+

CallArgument data segment:

+ +++++ + + + + + + + + + + + + + + + + +

Field

Size (bytes)

Description

argument

8

Numeric argument (may be pointer address).

reserved

7

Unused.

+
+
+

CustomEventMarker Records

+

XRay provides the feature of logging custom events. This may be leveraged to +record tracing info for RPCs or similarly trace data that is application +specific.

+

Custom Events themselves are an unstructured (application defined) segment of +memory with arbitrary size within the buffer. They are preceded by +CustomEventMarkers to indicate their presence and size.

+

CustomEventMarker data segment:

+ +++++ + + + + + + + + + + + + + + + + + + + + +

Field

Size (bytes)

Description

event_size

4

Size of preceded event.

absolute_tsc

8

A timestamp counter of the event.

reserved

3

Unused.

+
+
+

EndOfBuffer Records

+

An EndOfBuffer record type indicates that there is no more trace data in this +buffer. The reader is expected to seek past the remaining buffer_size expressed +before the start of buffer and look for either another header or EOF.

+
+
+
+

Format Grammar and Invariants

+

Not all sequences of Metadata records and Function records are valid data. A +sequence should be parsed as a state machine. The expectations for a valid +format can be expressed as a context free grammar.

+

This is an attempt to explain the format with statements in EBNF format.

+
    +
  • Format := Header ThreadBuffer* EOF

  • +
  • ThreadBuffer := NewBuffer WallClockTime NewCPUId BodySequence* End

  • +
  • BodySequence := NewCPUId | TSCWrap | Function | CustomEvent

  • +
  • Function := (Function_Entry_Args CallArgument*) | Function_Other_Type

  • +
  • CustomEvent := CustomEventMarker CustomEventUnstructuredMemory

  • +
  • End := EndOfBuffer RemainingBufferSizeToSkip

  • +
+
+

Function Record Order

+

There are a few clarifications that may help understand what is expected of +Function records.

+
    +
  • Functions with an Exit are expected to have a corresponding Entry or +Entry_Args function record precede them in the trace.

  • +
  • Tail_Exit Function records record the Function ID of the function whose return +address the program counter will take. In other words, the final function that +would be popped off of the call stack if tail call optimization was not used.

  • +
  • Not all functions marked for instrumentation are necessarily in the trace. The +tracer uses heuristics to preserve the trace for non-trivial functions.

  • +
  • Not every entry must have a traced Exit or Tail Exit. The buffer may run out +of space or the program may request for the tracer to finalize toreturn the +buffer before an instrumented function exits.

  • +
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/XRay.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/XRay.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/XRay.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/XRay.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,511 @@ + + + + + + + + + XRay Instrumentation — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

XRay Instrumentation

+
+
Version
+

1 as of 2016-11-08

+
+
+ +
+

Introduction

+

XRay is a function call tracing system which combines compiler-inserted +instrumentation points and a runtime library that can dynamically enable and +disable the instrumentation.

+

More high level information about XRay can be found in the XRay whitepaper.

+

This document describes how to use XRay as implemented in LLVM.

+
+
+

XRay in LLVM

+

XRay consists of three main parts:

+
    +
  • Compiler-inserted instrumentation points.

  • +
  • A runtime library for enabling/disabling tracing at runtime.

  • +
  • A suite of tools for analysing the traces.

    +

    NOTE: As of July 25, 2018 , XRay is only available for the following +architectures running Linux: x86_64, arm7 (no thumb), aarch64, powerpc64le, +mips, mipsel, mips64, mips64el, NetBSD: x86_64, FreeBSD: x86_64 and +OpenBSD: x86_64.

    +
  • +
+

The compiler-inserted instrumentation points come in the form of nop-sleds in +the final generated binary, and an ELF section named xray_instr_map which +contains entries pointing to these instrumentation points. The runtime library +relies on being able to access the entries of the xray_instr_map, and +overwrite the instrumentation points at runtime.

+
+
+

Using XRay

+

You can use XRay in a couple of ways:

+
    +
  • Instrumenting your C/C++/Objective-C/Objective-C++ application.

  • +
  • Generating LLVM IR with the correct function attributes.

  • +
+

The rest of this section covers these main ways and later on how to customize +what XRay does in an XRay-instrumented binary.

+
+

Instrumenting your C/C++/Objective-C Application

+

The easiest way of getting XRay instrumentation for your application is by +enabling the -fxray-instrument flag in your clang invocation.

+

For example:

+
clang -fxray-instrument ...
+
+
+

By default, functions that have at least 200 instructions (or contain a loop) will +get XRay instrumentation points. You can tweak that number through the +-fxray-instruction-threshold= flag:

+
clang -fxray-instrument -fxray-instruction-threshold=1 ...
+
+
+

The loop detection can be disabled with -fxray-ignore-loops to use only the +instruction threshold. You can also specifically instrument functions in your +binary to either always or never be instrumented using source-level attributes. +You can do it using the GCC-style attributes or C++11-style attributes.

+
[[clang::xray_always_instrument]] void always_instrumented();
+
+[[clang::xray_never_instrument]] void never_instrumented();
+
+void alt_always_instrumented() __attribute__((xray_always_instrument));
+
+void alt_never_instrumented() __attribute__((xray_never_instrument));
+
+
+

When linking a binary, you can either manually link in the XRay Runtime +Library or use clang to link it in automatically with the +-fxray-instrument flag. Alternatively, you can statically link-in the XRay +runtime library from compiler-rt – those archive files will take the name of +libclang_rt.xray-{arch} where {arch} is the mnemonic supported by clang +(x86_64, arm7, etc.).

+
+
+

LLVM Function Attribute

+

If you’re using LLVM IR directly, you can add the function-instrument +string attribute to your functions, to get the similar effect that the +C/C++/Objective-C source-level attributes would get:

+
define i32 @always_instrument() uwtable "function-instrument"="xray-always" {
+  ; ...
+}
+
+define i32 @never_instrument() uwtable "function-instrument"="xray-never" {
+  ; ...
+}
+
+
+

You can also set the xray-instruction-threshold attribute and provide a +numeric string value for how many instructions should be in the function before +it gets instrumented.

+
define i32 @maybe_instrument() uwtable "xray-instruction-threshold"="2" {
+  ; ...
+}
+
+
+
+
+

Special Case File

+

Attributes can be imbued through the use of special case files instead of +adding them to the original source files. You can use this to mark certain +functions and classes to be never, always, or instrumented with first-argument +logging from a file. The file’s format is described below:

+
# Comments are supported
+[always]
+fun:always_instrument
+fun:log_arg1=arg1 # Log the first argument for the function
+
+[never]
+fun:never_instrument
+
+
+

These files can be provided through the -fxray-attr-list= flag to clang. +You may have multiple files loaded through multiple instances of the flag.

+
+
+

XRay Runtime Library

+

The XRay Runtime Library is part of the compiler-rt project, which implements +the runtime components that perform the patching and unpatching of inserted +instrumentation points. When you use clang to link your binaries and the +-fxray-instrument flag, it will automatically link in the XRay runtime.

+

The default implementation of the XRay runtime will enable XRay instrumentation +before main starts, which works for applications that have a short +lifetime. This implementation also records all function entry and exit events +which may result in a lot of records in the resulting trace.

+

Also by default the filename of the XRay trace is xray-log.XXXXXX where the +XXXXXX part is randomly generated.

+

These options can be controlled through the XRAY_OPTIONS environment +variable, where we list down the options and their defaults below.

+ ++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Option

Type

Default

Description

patch_premain

bool

false

Whether to patch +instrumentation points +before main.

xray_mode

const char*

""

Default mode to +install and initialize +before main.

xray_logfile_base

const char*

xray-log.

Filename base for the +XRay logfile.

verbosity

int

0

Runtime verbosity +level.

+

If you choose to not use the default logging implementation that comes with the +XRay runtime and/or control when/how the XRay instrumentation runs, you may use +the XRay APIs directly for doing so. To do this, you’ll need to include the +xray_log_interface.h from the compiler-rt xray directory. The important API +functions we list below:

+
    +
  • __xray_log_register_mode(...): Register a logging implementation against +a string Mode identifier. The implementation is an instance of +XRayLogImpl defined in xray/xray_log_interface.h.

  • +
  • __xray_log_select_mode(...): Select the mode to install, associated with +a string Mode identifier. Only implementations registered with +__xray_log_register_mode(...) can be chosen with this function.

  • +
  • __xray_log_init_mode(...): This function allows for initializing and +re-initializing an installed logging implementation. See +xray/xray_log_interface.h for details, part of the XRay compiler-rt +installation.

  • +
+

Once a logging implementation has been initialized, it can be “stopped” by +finalizing the implementation through the __xray_log_finalize() function. +The finalization routine is the opposite of the initialization. When finalized, +an implementation’s data can be cleared out through the +__xray_log_flushLog() function. For implementations that support in-memory +processing, these should register an iterator function to provide access to the +data via the __xray_log_set_buffer_iterator(...) which allows code calling +the __xray_log_process_buffers(...) function to deal with the data in +memory.

+

All of this is better explained in the xray/xray_log_interface.h header.

+
+
+

Basic Mode

+

XRay supports a basic logging mode which will trace the application’s +execution, and periodically append to a single log. This mode can be +installed/enabled by setting xray_mode=xray-basic in the XRAY_OPTIONS +environment variable. Combined with patch_premain=true this can allow for +tracing applications from start to end.

+

Like all the other modes installed through __xray_log_select_mode(...), the +implementation can be configured through the __xray_log_init_mode(...) +function, providing the mode string and the flag options. Basic-mode specific +defaults can be provided in the XRAY_BASIC_OPTIONS environment variable.

+
+
+

Flight Data Recorder Mode

+

XRay supports a logging mode which allows the application to only capture a +fixed amount of memory’s worth of events. Flight Data Recorder (FDR) mode works +very much like a plane’s “black box” which keeps recording data to memory in a +fixed-size circular queue of buffers, and have the data available +programmatically until the buffers are finalized and flushed. To use FDR mode +on your application, you may set the xray_mode variable to xray-fdr in +the XRAY_OPTIONS environment variable. Additional options to the FDR mode +implementation can be provided in the XRAY_FDR_OPTIONS environment +variable. Programmatic configuration can be done by calling +__xray_log_init_mode("xray-fdr", <configuration string>) once it has been +selected/installed.

+

When the buffers are flushed to disk, the result is a binary trace format +described by XRay FDR format

+

When FDR mode is on, it will keep writing and recycling memory buffers until +the logging implementation is finalized – at which point it can be flushed and +re-initialised later. To do this programmatically, we follow the workflow +provided below:

+
// Patch the sleds, if we haven't yet.
+auto patch_status = __xray_patch();
+
+// Maybe handle the patch_status errors.
+
+// When we want to flush the log, we need to finalize it first, to give
+// threads a chance to return buffers to the queue.
+auto finalize_status = __xray_log_finalize();
+if (finalize_status != XRAY_LOG_FINALIZED) {
+  // maybe retry, or bail out.
+}
+
+// At this point, we are sure that the log is finalized, so we may try
+// flushing the log.
+auto flush_status = __xray_log_flushLog();
+if (flush_status != XRAY_LOG_FLUSHED) {
+  // maybe retry, or bail out.
+}
+
+
+

The default settings for the FDR mode implementation will create logs named +similarly to the basic log implementation, but will have a different log +format. All the trace analysis tools (and the trace reading library) will +support all versions of the FDR mode format as we add more functionality and +record types in the future.

+
+

NOTE: We do not promise perpetual support for when we update the log +versions we support going forward. Deprecation of the formats will be +announced and discussed on the developers mailing list.

+
+
+
+

Trace Analysis Tools

+

We currently have the beginnings of a trace analysis tool in LLVM, which can be +found in the tools/llvm-xray directory. The llvm-xray tool currently +supports the following subcommands:

+
    +
  • extract: Extract the instrumentation map from a binary, and return it as +YAML.

  • +
  • account: Performs basic function call accounting statistics with various +options for sorting, and output formats (supports CSV, YAML, and +console-friendly TEXT).

  • +
  • convert: Converts an XRay log file from one format to another. We can +convert from binary XRay traces (both basic and FDR mode) to YAML, +flame-graph friendly text +formats, as well as Chrome Trace Viewer (catapult) +<https://github.com/catapult-project/catapult> formats.

  • +
  • graph: Generates a DOT graph of the function call relationships between +functions found in an XRay trace.

  • +
  • stack: Reconstructs function call stacks from a timeline of function +calls in an XRay trace.

  • +
+

These subcommands use various library components found as part of the XRay +libraries, distributed with the LLVM distribution. These are:

+
    +
  • llvm/XRay/Trace.h : A trace reading library for conveniently loading +an XRay trace of supported forms, into a convenient in-memory representation. +All the analysis tools that deal with traces use this implementation.

  • +
  • llvm/XRay/Graph.h : A semi-generic graph type used by the graph +subcommand to conveniently represent a function call graph with statistics +associated with edges and vertices.

  • +
  • llvm/XRay/InstrumentationMap.h: A convenient tool for analyzing the +instrumentation map in XRay-instrumented object files and binaries. The +extract and stack subcommands uses this particular library.

  • +
+
+
+

Minimizing Binary Size

+

XRay supports several different instrumentation points including function-entry, +function-exit, custom, and typed points. These can be enabled individually +using the -fxray-instrumentation-bundle= flag. For example if you only wanted to +instrument function entry and custom points you could specify:

+
clang -fxray-instrument -fxray-instrumentation-bundle=function-entry,custom ...
+
+
+

This will omit the other sled types entirely, reducing the binary size. You can also +instrument just a sampled subset of functions using instrumentation groups. +For example, to instrument only a quarter of available functions invoke:

+
clang -fxray-instrument -fxray-function-groups=4
+
+
+

A subset will be chosen arbitrarily based on a hash of the function name. To sample a +different subset you can specify -fxray-selected-function-group= with a group number +in the range of 0 to xray-function-groups - 1. Together these options could be used +to produce multiple binaries with different instrumented subsets. If all you need is +runtime control over which functions are being traced at any given time it is better +to selectively patch and unpatch the individual functions you need using the XRay +Runtime Library’s __xray_patch_function() method.

+
+
+
+

Future Work

+

There are a number of ongoing efforts for expanding the toolset building around +the XRay instrumentation system.

+
+

Trace Analysis Tools

+
    +
  • Work is in progress to integrate with or develop tools to visualize findings +from an XRay trace. Particularly, the stack tool is being expanded to +output formats that allow graphing and exploring the duration of time in each +call stack.

  • +
  • With a large instrumented binary, the size of generated XRay traces can +quickly become unwieldy. We are working on integrating pruning techniques and +heuristics for the analysis tools to sift through the traces and surface only +relevant information.

  • +
+
+
+

More Platforms

+

We’re looking forward to contributions to port XRay to more architectures and +operating systems.

+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/yaml2obj.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/yaml2obj.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/yaml2obj.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/yaml2obj.html 2021-09-19 16:16:42.000000000 +0000 @@ -0,0 +1,352 @@ + + + + + + + + + yaml2obj — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

yaml2obj

+

yaml2obj takes a YAML description of an object file and converts it to a binary +file.

+
+

$ yaml2obj input-file

+
+

Outputs the binary to stdout.

+
+

COFF Syntax

+

Here’s a sample COFF file.

+
header:
+  Machine: IMAGE_FILE_MACHINE_I386 # (0x14C)
+
+sections:
+  - Name: .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE
+                     , IMAGE_SCN_ALIGN_16BYTES
+                     , IMAGE_SCN_MEM_EXECUTE
+                     , IMAGE_SCN_MEM_READ
+                     ] # 0x60500020
+    SectionData:
+      "\x83\xEC\x0C\xC7\x44\x24\x08\x00\x00\x00\x00\xC7\x04\x24\x00\x00\x00\x00\xE8\x00\x00\x00\x00\xE8\x00\x00\x00\x00\x8B\x44\x24\x08\x83\xC4\x0C\xC3" # |....D$.......$...............D$.....|
+
+symbols:
+  - Name: .text
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_STATIC # (3)
+    NumberOfAuxSymbols: 1
+    AuxiliaryData:
+      "\x24\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00" # |$.................|
+
+  - Name: _main
+    Value: 0
+    SectionNumber: 1
+    SimpleType: IMAGE_SYM_TYPE_NULL # (0)
+    ComplexType: IMAGE_SYM_DTYPE_NULL # (0)
+    StorageClass: IMAGE_SYM_CLASS_EXTERNAL # (2)
+
+
+

Here’s a simplified Kwalify schema with an extension to allow alternate types.

+
type: map
+  mapping:
+    header:
+      type: map
+      mapping:
+        Machine: [ {type: str, enum:
+                               [ IMAGE_FILE_MACHINE_UNKNOWN
+                               , IMAGE_FILE_MACHINE_AM33
+                               , IMAGE_FILE_MACHINE_AMD64
+                               , IMAGE_FILE_MACHINE_ARM
+                               , IMAGE_FILE_MACHINE_ARMNT
+                               , IMAGE_FILE_MACHINE_ARM64
+                               , IMAGE_FILE_MACHINE_EBC
+                               , IMAGE_FILE_MACHINE_I386
+                               , IMAGE_FILE_MACHINE_IA64
+                               , IMAGE_FILE_MACHINE_M32R
+                               , IMAGE_FILE_MACHINE_MIPS16
+                               , IMAGE_FILE_MACHINE_MIPSFPU
+                               , IMAGE_FILE_MACHINE_MIPSFPU16
+                               , IMAGE_FILE_MACHINE_POWERPC
+                               , IMAGE_FILE_MACHINE_POWERPCFP
+                               , IMAGE_FILE_MACHINE_R4000
+                               , IMAGE_FILE_MACHINE_SH3
+                               , IMAGE_FILE_MACHINE_SH3DSP
+                               , IMAGE_FILE_MACHINE_SH4
+                               , IMAGE_FILE_MACHINE_SH5
+                               , IMAGE_FILE_MACHINE_THUMB
+                               , IMAGE_FILE_MACHINE_WCEMIPSV2
+                               ]}
+                 , {type: int}
+                 ]
+        Characteristics:
+          - type: seq
+            sequence:
+              - type: str
+                enum: [ IMAGE_FILE_RELOCS_STRIPPED
+                      , IMAGE_FILE_EXECUTABLE_IMAGE
+                      , IMAGE_FILE_LINE_NUMS_STRIPPED
+                      , IMAGE_FILE_LOCAL_SYMS_STRIPPED
+                      , IMAGE_FILE_AGGRESSIVE_WS_TRIM
+                      , IMAGE_FILE_LARGE_ADDRESS_AWARE
+                      , IMAGE_FILE_BYTES_REVERSED_LO
+                      , IMAGE_FILE_32BIT_MACHINE
+                      , IMAGE_FILE_DEBUG_STRIPPED
+                      , IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP
+                      , IMAGE_FILE_NET_RUN_FROM_SWAP
+                      , IMAGE_FILE_SYSTEM
+                      , IMAGE_FILE_DLL
+                      , IMAGE_FILE_UP_SYSTEM_ONLY
+                      , IMAGE_FILE_BYTES_REVERSED_HI
+                      ]
+          - type: int
+    sections:
+      type: seq
+      sequence:
+        - type: map
+          mapping:
+            Name: {type: str}
+            Characteristics:
+              - type: seq
+                sequence:
+                  - type: str
+                    enum: [ IMAGE_SCN_TYPE_NO_PAD
+                          , IMAGE_SCN_CNT_CODE
+                          , IMAGE_SCN_CNT_INITIALIZED_DATA
+                          , IMAGE_SCN_CNT_UNINITIALIZED_DATA
+                          , IMAGE_SCN_LNK_OTHER
+                          , IMAGE_SCN_LNK_INFO
+                          , IMAGE_SCN_LNK_REMOVE
+                          , IMAGE_SCN_LNK_COMDAT
+                          , IMAGE_SCN_GPREL
+                          , IMAGE_SCN_MEM_PURGEABLE
+                          , IMAGE_SCN_MEM_16BIT
+                          , IMAGE_SCN_MEM_LOCKED
+                          , IMAGE_SCN_MEM_PRELOAD
+                          , IMAGE_SCN_ALIGN_1BYTES
+                          , IMAGE_SCN_ALIGN_2BYTES
+                          , IMAGE_SCN_ALIGN_4BYTES
+                          , IMAGE_SCN_ALIGN_8BYTES
+                          , IMAGE_SCN_ALIGN_16BYTES
+                          , IMAGE_SCN_ALIGN_32BYTES
+                          , IMAGE_SCN_ALIGN_64BYTES
+                          , IMAGE_SCN_ALIGN_128BYTES
+                          , IMAGE_SCN_ALIGN_256BYTES
+                          , IMAGE_SCN_ALIGN_512BYTES
+                          , IMAGE_SCN_ALIGN_1024BYTES
+                          , IMAGE_SCN_ALIGN_2048BYTES
+                          , IMAGE_SCN_ALIGN_4096BYTES
+                          , IMAGE_SCN_ALIGN_8192BYTES
+                          , IMAGE_SCN_LNK_NRELOC_OVFL
+                          , IMAGE_SCN_MEM_DISCARDABLE
+                          , IMAGE_SCN_MEM_NOT_CACHED
+                          , IMAGE_SCN_MEM_NOT_PAGED
+                          , IMAGE_SCN_MEM_SHARED
+                          , IMAGE_SCN_MEM_EXECUTE
+                          , IMAGE_SCN_MEM_READ
+                          , IMAGE_SCN_MEM_WRITE
+                          ]
+              - type: int
+            SectionData: {type: str}
+    symbols:
+      type: seq
+      sequence:
+        - type: map
+          mapping:
+            Name: {type: str}
+            Value: {type: int}
+            SectionNumber: {type: int}
+            SimpleType: [ {type: str, enum: [ IMAGE_SYM_TYPE_NULL
+                                            , IMAGE_SYM_TYPE_VOID
+                                            , IMAGE_SYM_TYPE_CHAR
+                                            , IMAGE_SYM_TYPE_SHORT
+                                            , IMAGE_SYM_TYPE_INT
+                                            , IMAGE_SYM_TYPE_LONG
+                                            , IMAGE_SYM_TYPE_FLOAT
+                                            , IMAGE_SYM_TYPE_DOUBLE
+                                            , IMAGE_SYM_TYPE_STRUCT
+                                            , IMAGE_SYM_TYPE_UNION
+                                            , IMAGE_SYM_TYPE_ENUM
+                                            , IMAGE_SYM_TYPE_MOE
+                                            , IMAGE_SYM_TYPE_BYTE
+                                            , IMAGE_SYM_TYPE_WORD
+                                            , IMAGE_SYM_TYPE_UINT
+                                            , IMAGE_SYM_TYPE_DWORD
+                                            ]}
+                        , {type: int}
+                        ]
+            ComplexType: [ {type: str, enum: [ IMAGE_SYM_DTYPE_NULL
+                                             , IMAGE_SYM_DTYPE_POINTER
+                                             , IMAGE_SYM_DTYPE_FUNCTION
+                                             , IMAGE_SYM_DTYPE_ARRAY
+                                             ]}
+                         , {type: int}
+                         ]
+            StorageClass: [ {type: str, enum:
+                                        [ IMAGE_SYM_CLASS_END_OF_FUNCTION
+                                        , IMAGE_SYM_CLASS_NULL
+                                        , IMAGE_SYM_CLASS_AUTOMATIC
+                                        , IMAGE_SYM_CLASS_EXTERNAL
+                                        , IMAGE_SYM_CLASS_STATIC
+                                        , IMAGE_SYM_CLASS_REGISTER
+                                        , IMAGE_SYM_CLASS_EXTERNAL_DEF
+                                        , IMAGE_SYM_CLASS_LABEL
+                                        , IMAGE_SYM_CLASS_UNDEFINED_LABEL
+                                        , IMAGE_SYM_CLASS_MEMBER_OF_STRUCT
+                                        , IMAGE_SYM_CLASS_ARGUMENT
+                                        , IMAGE_SYM_CLASS_STRUCT_TAG
+                                        , IMAGE_SYM_CLASS_MEMBER_OF_UNION
+                                        , IMAGE_SYM_CLASS_UNION_TAG
+                                        , IMAGE_SYM_CLASS_TYPE_DEFINITION
+                                        , IMAGE_SYM_CLASS_UNDEFINED_STATIC
+                                        , IMAGE_SYM_CLASS_ENUM_TAG
+                                        , IMAGE_SYM_CLASS_MEMBER_OF_ENUM
+                                        , IMAGE_SYM_CLASS_REGISTER_PARAM
+                                        , IMAGE_SYM_CLASS_BIT_FIELD
+                                        , IMAGE_SYM_CLASS_BLOCK
+                                        , IMAGE_SYM_CLASS_FUNCTION
+                                        , IMAGE_SYM_CLASS_END_OF_STRUCT
+                                        , IMAGE_SYM_CLASS_FILE
+                                        , IMAGE_SYM_CLASS_SECTION
+                                        , IMAGE_SYM_CLASS_WEAK_EXTERNAL
+                                        , IMAGE_SYM_CLASS_CLR_TOKEN
+                                        ]}
+                          , {type: int}
+                          ]
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/YamlIO.html llvm-toolchain-13-13.0.0/llvm/docs/_build/html/YamlIO.html --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/html/YamlIO.html 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/html/YamlIO.html 2021-09-19 16:16:41.000000000 +0000 @@ -0,0 +1,1077 @@ + + + + + + + + + YAML I/O — LLVM 13 documentation + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +
+

YAML I/O

+ +
+

Introduction to YAML

+

YAML is a human readable data serialization language. The full YAML language +spec can be read at yaml.org. The simplest form of +yaml is just “scalars”, “mappings”, and “sequences”. A scalar is any number +or string. The pound/hash symbol (#) begins a comment line. A mapping is +a set of key-value pairs where the key ends with a colon. For example:

+
# a mapping
+name:      Tom
+hat-size:  7
+
+
+

A sequence is a list of items where each item starts with a leading dash (‘-‘). +For example:

+
# a sequence
+- x86
+- x86_64
+- PowerPC
+
+
+

You can combine mappings and sequences by indenting. For example a sequence +of mappings in which one of the mapping values is itself a sequence:

+
# a sequence of mappings with one key's value being a sequence
+- name:      Tom
+  cpus:
+   - x86
+   - x86_64
+- name:      Bob
+  cpus:
+   - x86
+- name:      Dan
+  cpus:
+   - PowerPC
+   - x86
+
+
+

Sometime sequences are known to be short and the one entry per line is too +verbose, so YAML offers an alternate syntax for sequences called a “Flow +Sequence” in which you put comma separated sequence elements into square +brackets. The above example could then be simplified to :

+
# a sequence of mappings with one key's value being a flow sequence
+- name:      Tom
+  cpus:      [ x86, x86_64 ]
+- name:      Bob
+  cpus:      [ x86 ]
+- name:      Dan
+  cpus:      [ PowerPC, x86 ]
+
+
+
+
+

Introduction to YAML I/O

+

The use of indenting makes the YAML easy for a human to read and understand, +but having a program read and write YAML involves a lot of tedious details. +The YAML I/O library structures and simplifies reading and writing YAML +documents.

+

YAML I/O assumes you have some “native” data structures which you want to be +able to dump as YAML and recreate from YAML. The first step is to try +writing example YAML for your data structures. You may find after looking at +possible YAML representations that a direct mapping of your data structures +to YAML is not very readable. Often the fields are not in the order that +a human would find readable. Or the same information is replicated in multiple +locations, making it hard for a human to write such YAML correctly.

+

In relational database theory there is a design step called normalization in +which you reorganize fields and tables. The same considerations need to +go into the design of your YAML encoding. But, you may not want to change +your existing native data structures. Therefore, when writing out YAML +there may be a normalization step, and when reading YAML there would be a +corresponding denormalization step.

+

YAML I/O uses a non-invasive, traits based design. YAML I/O defines some +abstract base templates. You specialize those templates on your data types. +For instance, if you have an enumerated type FooBar you could specialize +ScalarEnumerationTraits on that type and define the enumeration() method:

+
using llvm::yaml::ScalarEnumerationTraits;
+using llvm::yaml::IO;
+
+template <>
+struct ScalarEnumerationTraits<FooBar> {
+  static void enumeration(IO &io, FooBar &value) {
+  ...
+  }
+};
+
+
+

As with all YAML I/O template specializations, the ScalarEnumerationTraits is used for +both reading and writing YAML. That is, the mapping between in-memory enum +values and the YAML string representation is only in one place. +This assures that the code for writing and parsing of YAML stays in sync.

+

To specify a YAML mappings, you define a specialization on +llvm::yaml::MappingTraits. +If your native data structure happens to be a struct that is already normalized, +then the specialization is simple. For example:

+
using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+template <>
+struct MappingTraits<Person> {
+  static void mapping(IO &io, Person &info) {
+    io.mapRequired("name",         info.name);
+    io.mapOptional("hat-size",     info.hatSize);
+  }
+};
+
+
+

A YAML sequence is automatically inferred if you data type has begin()/end() +iterators and a push_back() method. Therefore any of the STL containers +(such as std::vector<>) will automatically translate to YAML sequences.

+

Once you have defined specializations for your data types, you can +programmatically use YAML I/O to write a YAML document:

+
using llvm::yaml::Output;
+
+Person tom;
+tom.name = "Tom";
+tom.hatSize = 8;
+Person dan;
+dan.name = "Dan";
+dan.hatSize = 7;
+std::vector<Person> persons;
+persons.push_back(tom);
+persons.push_back(dan);
+
+Output yout(llvm::outs());
+yout << persons;
+
+
+

This would write the following:

+
- name:      Tom
+  hat-size:  8
+- name:      Dan
+  hat-size:  7
+
+
+

And you can also read such YAML documents with the following code:

+
using llvm::yaml::Input;
+
+typedef std::vector<Person> PersonList;
+std::vector<PersonList> docs;
+
+Input yin(document.getBuffer());
+yin >> docs;
+
+if ( yin.error() )
+  return;
+
+// Process read document
+for ( PersonList &pl : docs ) {
+  for ( Person &person : pl ) {
+    cout << "name=" << person.name;
+  }
+}
+
+
+

One other feature of YAML is the ability to define multiple documents in a +single file. That is why reading YAML produces a vector of your document type.

+
+
+

Error Handling

+

When parsing a YAML document, if the input does not match your schema (as +expressed in your XxxTraits<> specializations). YAML I/O +will print out an error message and your Input object’s error() method will +return true. For instance the following document:

+
- name:      Tom
+  shoe-size: 12
+- name:      Dan
+  hat-size:  7
+
+
+

Has a key (shoe-size) that is not defined in the schema. YAML I/O will +automatically generate this error:

+
YAML:2:2: error: unknown key 'shoe-size'
+  shoe-size:       12
+  ^~~~~~~~~
+
+
+

Similar errors are produced for other input not conforming to the schema.

+
+
+

Scalars

+

YAML scalars are just strings (i.e. not a sequence or mapping). The YAML I/O +library provides support for translating between YAML scalars and specific +C++ types.

+
+

Built-in types

+

The following types have built-in support in YAML I/O:

+
    +
  • bool

  • +
  • float

  • +
  • double

  • +
  • StringRef

  • +
  • std::string

  • +
  • int64_t

  • +
  • int32_t

  • +
  • int16_t

  • +
  • int8_t

  • +
  • uint64_t

  • +
  • uint32_t

  • +
  • uint16_t

  • +
  • uint8_t

  • +
+

That is, you can use those types in fields of MappingTraits or as element type +in sequence. When reading, YAML I/O will validate that the string found +is convertible to that type and error out if not.

+
+
+

Unique types

+

Given that YAML I/O is trait based, the selection of how to convert your data +to YAML is based on the type of your data. But in C++ type matching, typedefs +do not generate unique type names. That means if you have two typedefs of +unsigned int, to YAML I/O both types look exactly like unsigned int. To +facilitate make unique type names, YAML I/O provides a macro which is used +like a typedef on built-in types, but expands to create a class with conversion +operators to and from the base type. For example:

+
LLVM_YAML_STRONG_TYPEDEF(uint32_t, MyFooFlags)
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, MyBarFlags)
+
+
+

This generates two classes MyFooFlags and MyBarFlags which you can use in your +native data structures instead of uint32_t. They are implicitly +converted to and from uint32_t. The point of creating these unique types +is that you can now specify traits on them to get different YAML conversions.

+
+
+

Hex types

+

An example use of a unique type is that YAML I/O provides fixed sized unsigned +integers that are written with YAML I/O as hexadecimal instead of the decimal +format used by the built-in integer types:

+
    +
  • Hex64

  • +
  • Hex32

  • +
  • Hex16

  • +
  • Hex8

  • +
+

You can use llvm::yaml::Hex32 instead of uint32_t and the only different will +be that when YAML I/O writes out that type it will be formatted in hexadecimal.

+
+
+

ScalarEnumerationTraits

+

YAML I/O supports translating between in-memory enumerations and a set of string +values in YAML documents. This is done by specializing ScalarEnumerationTraits<> +on your enumeration type and define an enumeration() method. +For instance, suppose you had an enumeration of CPUs and a struct with it as +a field:

+
enum CPUs {
+  cpu_x86_64  = 5,
+  cpu_x86     = 7,
+  cpu_PowerPC = 8
+};
+
+struct Info {
+  CPUs      cpu;
+  uint32_t  flags;
+};
+
+
+

To support reading and writing of this enumeration, you can define a +ScalarEnumerationTraits specialization on CPUs, which can then be used +as a field type:

+
using llvm::yaml::ScalarEnumerationTraits;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+template <>
+struct ScalarEnumerationTraits<CPUs> {
+  static void enumeration(IO &io, CPUs &value) {
+    io.enumCase(value, "x86_64",  cpu_x86_64);
+    io.enumCase(value, "x86",     cpu_x86);
+    io.enumCase(value, "PowerPC", cpu_PowerPC);
+  }
+};
+
+template <>
+struct MappingTraits<Info> {
+  static void mapping(IO &io, Info &info) {
+    io.mapRequired("cpu",       info.cpu);
+    io.mapOptional("flags",     info.flags, 0);
+  }
+};
+
+
+

When reading YAML, if the string found does not match any of the strings +specified by enumCase() methods, an error is automatically generated. +When writing YAML, if the value being written does not match any of the values +specified by the enumCase() methods, a runtime assertion is triggered.

+
+
+

BitValue

+

Another common data structure in C++ is a field where each bit has a unique +meaning. This is often used in a “flags” field. YAML I/O has support for +converting such fields to a flow sequence. For instance suppose you +had the following bit flags defined:

+
enum {
+  flagsPointy = 1
+  flagsHollow = 2
+  flagsFlat   = 4
+  flagsRound  = 8
+};
+
+LLVM_YAML_STRONG_TYPEDEF(uint32_t, MyFlags)
+
+
+

To support reading and writing of MyFlags, you specialize ScalarBitSetTraits<> +on MyFlags and provide the bit values and their names.

+
using llvm::yaml::ScalarBitSetTraits;
+using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+template <>
+struct ScalarBitSetTraits<MyFlags> {
+  static void bitset(IO &io, MyFlags &value) {
+    io.bitSetCase(value, "hollow",  flagHollow);
+    io.bitSetCase(value, "flat",    flagFlat);
+    io.bitSetCase(value, "round",   flagRound);
+    io.bitSetCase(value, "pointy",  flagPointy);
+  }
+};
+
+struct Info {
+  StringRef   name;
+  MyFlags     flags;
+};
+
+template <>
+struct MappingTraits<Info> {
+  static void mapping(IO &io, Info& info) {
+    io.mapRequired("name",  info.name);
+    io.mapRequired("flags", info.flags);
+   }
+};
+
+
+

With the above, YAML I/O (when writing) will test mask each value in the +bitset trait against the flags field, and each that matches will +cause the corresponding string to be added to the flow sequence. The opposite +is done when reading and any unknown string values will result in an error. With +the above schema, a same valid YAML document is:

+
name:    Tom
+flags:   [ pointy, flat ]
+
+
+

Sometimes a “flags” field might contains an enumeration part +defined by a bit-mask.

+
enum {
+  flagsFeatureA = 1,
+  flagsFeatureB = 2,
+  flagsFeatureC = 4,
+
+  flagsCPUMask = 24,
+
+  flagsCPU1 = 8,
+  flagsCPU2 = 16
+};
+
+
+

To support reading and writing such fields, you need to use the maskedBitSet() +method and provide the bit values, their names and the enumeration mask.

+
template <>
+struct ScalarBitSetTraits<MyFlags> {
+  static void bitset(IO &io, MyFlags &value) {
+    io.bitSetCase(value, "featureA",  flagsFeatureA);
+    io.bitSetCase(value, "featureB",  flagsFeatureB);
+    io.bitSetCase(value, "featureC",  flagsFeatureC);
+    io.maskedBitSetCase(value, "CPU1",  flagsCPU1, flagsCPUMask);
+    io.maskedBitSetCase(value, "CPU2",  flagsCPU2, flagsCPUMask);
+  }
+};
+
+
+

YAML I/O (when writing) will apply the enumeration mask to the flags field, +and compare the result and values from the bitset. As in case of a regular +bitset, each that matches will cause the corresponding string to be added +to the flow sequence.

+
+
+

Custom Scalar

+

Sometimes for readability a scalar needs to be formatted in a custom way. For +instance your internal data structure may use an integer for time (seconds since +some epoch), but in YAML it would be much nicer to express that integer in +some time format (e.g. 4-May-2012 10:30pm). YAML I/O has a way to support +custom formatting and parsing of scalar types by specializing ScalarTraits<> on +your data type. When writing, YAML I/O will provide the native type and +your specialization must create a temporary llvm::StringRef. When reading, +YAML I/O will provide an llvm::StringRef of scalar and your specialization +must convert that to your native data type. An outline of a custom scalar type +looks like:

+
using llvm::yaml::ScalarTraits;
+using llvm::yaml::IO;
+
+template <>
+struct ScalarTraits<MyCustomType> {
+  static void output(const MyCustomType &value, void*,
+                     llvm::raw_ostream &out) {
+    out << value;  // do custom formatting here
+  }
+  static StringRef input(StringRef scalar, void*, MyCustomType &value) {
+    // do custom parsing here.  Return the empty string on success,
+    // or an error message on failure.
+    return StringRef();
+  }
+  // Determine if this scalar needs quotes.
+  static QuotingType mustQuote(StringRef) { return QuotingType::Single; }
+};
+
+
+
+
+

Block Scalars

+

YAML block scalars are string literals that are represented in YAML using the +literal block notation, just like the example shown below:

+
text: |
+  First line
+  Second line
+
+
+

The YAML I/O library provides support for translating between YAML block scalars +and specific C++ types by allowing you to specialize BlockScalarTraits<> on +your data type. The library doesn’t provide any built-in support for block +scalar I/O for types like std::string and llvm::StringRef as they are already +supported by YAML I/O and use the ordinary scalar notation by default.

+

BlockScalarTraits specializations are very similar to the +ScalarTraits specialization - YAML I/O will provide the native type and your +specialization must create a temporary llvm::StringRef when writing, and +it will also provide an llvm::StringRef that has the value of that block scalar +and your specialization must convert that to your native data type when reading. +An example of a custom type with an appropriate specialization of +BlockScalarTraits is shown below:

+
using llvm::yaml::BlockScalarTraits;
+using llvm::yaml::IO;
+
+struct MyStringType {
+  std::string Str;
+};
+
+template <>
+struct BlockScalarTraits<MyStringType> {
+  static void output(const MyStringType &Value, void *Ctxt,
+                     llvm::raw_ostream &OS) {
+    OS << Value.Str;
+  }
+
+  static StringRef input(StringRef Scalar, void *Ctxt,
+                         MyStringType &Value) {
+    Value.Str = Scalar.str();
+    return StringRef();
+  }
+};
+
+
+
+
+
+

Mappings

+

To be translated to or from a YAML mapping for your type T you must specialize +llvm::yaml::MappingTraits on T and implement the “void mapping(IO &io, T&)” +method. If your native data structures use pointers to a class everywhere, +you can specialize on the class pointer. Examples:

+
using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+// Example of struct Foo which is used by value
+template <>
+struct MappingTraits<Foo> {
+  static void mapping(IO &io, Foo &foo) {
+    io.mapOptional("size",      foo.size);
+  ...
+  }
+};
+
+// Example of struct Bar which is natively always a pointer
+template <>
+struct MappingTraits<Bar*> {
+  static void mapping(IO &io, Bar *&bar) {
+    io.mapOptional("size",    bar->size);
+  ...
+  }
+};
+
+
+
+

No Normalization

+

The mapping() method is responsible, if needed, for normalizing and +denormalizing. In a simple case where the native data structure requires no +normalization, the mapping method just uses mapOptional() or mapRequired() to +bind the struct’s fields to YAML key names. For example:

+
using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+template <>
+struct MappingTraits<Person> {
+  static void mapping(IO &io, Person &info) {
+    io.mapRequired("name",         info.name);
+    io.mapOptional("hat-size",     info.hatSize);
+  }
+};
+
+
+
+
+

Normalization

+

When [de]normalization is required, the mapping() method needs a way to access +normalized values as fields. To help with this, there is +a template MappingNormalization<> which you can then use to automatically +do the normalization and denormalization. The template is used to create +a local variable in your mapping() method which contains the normalized keys.

+

Suppose you have native data type +Polar which specifies a position in polar coordinates (distance, angle):

+
struct Polar {
+  float distance;
+  float angle;
+};
+
+
+

but you’ve decided the normalized YAML for should be in x,y coordinates. That +is, you want the yaml to look like:

+
x:   10.3
+y:   -4.7
+
+
+

You can support this by defining a MappingTraits that normalizes the polar +coordinates to x,y coordinates when writing YAML and denormalizes x,y +coordinates into polar when reading YAML.

+
using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+template <>
+struct MappingTraits<Polar> {
+
+  class NormalizedPolar {
+  public:
+    NormalizedPolar(IO &io)
+      : x(0.0), y(0.0) {
+    }
+    NormalizedPolar(IO &, Polar &polar)
+      : x(polar.distance * cos(polar.angle)),
+        y(polar.distance * sin(polar.angle)) {
+    }
+    Polar denormalize(IO &) {
+      return Polar(sqrt(x*x+y*y), arctan(x,y));
+    }
+
+    float        x;
+    float        y;
+  };
+
+  static void mapping(IO &io, Polar &polar) {
+    MappingNormalization<NormalizedPolar, Polar> keys(io, polar);
+
+    io.mapRequired("x",    keys->x);
+    io.mapRequired("y",    keys->y);
+  }
+};
+
+
+

When writing YAML, the local variable “keys” will be a stack allocated +instance of NormalizedPolar, constructed from the supplied polar object which +initializes it x and y fields. The mapRequired() methods then write out the x +and y values as key/value pairs.

+

When reading YAML, the local variable “keys” will be a stack allocated instance +of NormalizedPolar, constructed by the empty constructor. The mapRequired +methods will find the matching key in the YAML document and fill in the x and y +fields of the NormalizedPolar object keys. At the end of the mapping() method +when the local keys variable goes out of scope, the denormalize() method will +automatically be called to convert the read values back to polar coordinates, +and then assigned back to the second parameter to mapping().

+

In some cases, the normalized class may be a subclass of the native type and +could be returned by the denormalize() method, except that the temporary +normalized instance is stack allocated. In these cases, the utility template +MappingNormalizationHeap<> can be used instead. It just like +MappingNormalization<> except that it heap allocates the normalized object +when reading YAML. It never destroys the normalized object. The denormalize() +method can this return “this”.

+
+
+

Default values

+

Within a mapping() method, calls to io.mapRequired() mean that that key is +required to exist when parsing YAML documents, otherwise YAML I/O will issue an +error.

+

On the other hand, keys registered with io.mapOptional() are allowed to not +exist in the YAML document being read. So what value is put in the field +for those optional keys? +There are two steps to how those optional fields are filled in. First, the +second parameter to the mapping() method is a reference to a native class. That +native class must have a default constructor. Whatever value the default +constructor initially sets for an optional field will be that field’s value. +Second, the mapOptional() method has an optional third parameter. If provided +it is the value that mapOptional() should set that field to if the YAML document +does not have that key.

+

There is one important difference between those two ways (default constructor +and third parameter to mapOptional). When YAML I/O generates a YAML document, +if the mapOptional() third parameter is used, if the actual value being written +is the same as (using ==) the default value, then that key/value is not written.

+
+
+

Order of Keys

+

When writing out a YAML document, the keys are written in the order that the +calls to mapRequired()/mapOptional() are made in the mapping() method. This +gives you a chance to write the fields in an order that a human reader of +the YAML document would find natural. This may be different that the order +of the fields in the native class.

+

When reading in a YAML document, the keys in the document can be in any order, +but they are processed in the order that the calls to mapRequired()/mapOptional() +are made in the mapping() method. That enables some interesting +functionality. For instance, if the first field bound is the cpu and the second +field bound is flags, and the flags are cpu specific, you can programmatically +switch how the flags are converted to and from YAML based on the cpu. +This works for both reading and writing. For example:

+
using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+struct Info {
+  CPUs        cpu;
+  uint32_t    flags;
+};
+
+template <>
+struct MappingTraits<Info> {
+  static void mapping(IO &io, Info &info) {
+    io.mapRequired("cpu",       info.cpu);
+    // flags must come after cpu for this to work when reading yaml
+    if ( info.cpu == cpu_x86_64 )
+      io.mapRequired("flags",  *(My86_64Flags*)info.flags);
+    else
+      io.mapRequired("flags",  *(My86Flags*)info.flags);
+ }
+};
+
+
+
+
+

Tags

+

The YAML syntax supports tags as a way to specify the type of a node before +it is parsed. This allows dynamic types of nodes. But the YAML I/O model uses +static typing, so there are limits to how you can use tags with the YAML I/O +model. Recently, we added support to YAML I/O for checking/setting the optional +tag on a map. Using this functionality it is even possible to support different +mappings, as long as they are convertible.

+

To check a tag, inside your mapping() method you can use io.mapTag() to specify +what the tag should be. This will also add that tag when writing yaml.

+
+
+

Validation

+

Sometimes in a YAML map, each key/value pair is valid, but the combination is +not. This is similar to something having no syntax errors, but still having +semantic errors. To support semantic level checking, YAML I/O allows +an optional validate() method in a MappingTraits template specialization.

+

When parsing YAML, the validate() method is call after all key/values in +the map have been processed. Any error message returned by the validate() +method during input will be printed just a like a syntax error would be printed. +When writing YAML, the validate() method is called before the YAML +key/values are written. Any error during output will trigger an assert() +because it is a programming error to have invalid struct values.

+
using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+struct Stuff {
+  ...
+};
+
+template <>
+struct MappingTraits<Stuff> {
+  static void mapping(IO &io, Stuff &stuff) {
+  ...
+  }
+  static std::string validate(IO &io, Stuff &stuff) {
+    // Look at all fields in 'stuff' and if there
+    // are any bad values return a string describing
+    // the error.  Otherwise return an empty string.
+    return std::string{};
+  }
+};
+
+
+
+
+

Flow Mapping

+

A YAML “flow mapping” is a mapping that uses the inline notation +(e.g { x: 1, y: 0 } ) when written to YAML. To specify that a type should be +written in YAML using flow mapping, your MappingTraits specialization should +add “static const bool flow = true;”. For instance:

+
using llvm::yaml::MappingTraits;
+using llvm::yaml::IO;
+
+struct Stuff {
+  ...
+};
+
+template <>
+struct MappingTraits<Stuff> {
+  static void mapping(IO &io, Stuff &stuff) {
+    ...
+  }
+
+  static const bool flow = true;
+}
+
+
+

Flow mappings are subject to line wrapping according to the Output object +configuration.

+
+
+
+

Sequence

+

To be translated to or from a YAML sequence for your type T you must specialize +llvm::yaml::SequenceTraits on T and implement two methods: +size_t size(IO &io, T&) and +T::value_type& element(IO &io, T&, size_t indx). For example:

+
template <>
+struct SequenceTraits<MySeq> {
+  static size_t size(IO &io, MySeq &list) { ... }
+  static MySeqEl &element(IO &io, MySeq &list, size_t index) { ... }
+};
+
+
+

The size() method returns how many elements are currently in your sequence. +The element() method returns a reference to the i’th element in the sequence. +When parsing YAML, the element() method may be called with an index one bigger +than the current size. Your element() method should allocate space for one +more element (using default constructor if element is a C++ object) and returns +a reference to that new allocated space.

+
+

Flow Sequence

+

A YAML “flow sequence” is a sequence that when written to YAML it uses the +inline notation (e.g [ foo, bar ] ). To specify that a sequence type should +be written in YAML as a flow sequence, your SequenceTraits specialization should +add “static const bool flow = true;”. For instance:

+
template <>
+struct SequenceTraits<MyList> {
+  static size_t size(IO &io, MyList &list) { ... }
+  static MyListEl &element(IO &io, MyList &list, size_t index) { ... }
+
+  // The existence of this member causes YAML I/O to use a flow sequence
+  static const bool flow = true;
+};
+
+
+

With the above, if you used MyList as the data type in your native data +structures, then when converted to YAML, a flow sequence of integers +will be used (e.g. [ 10, -3, 4 ]).

+

Flow sequences are subject to line wrapping according to the Output object +configuration.

+
+
+

Utility Macros

+

Since a common source of sequences is std::vector<>, YAML I/O provides macros: +LLVM_YAML_IS_SEQUENCE_VECTOR() and LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR() which +can be used to easily specify SequenceTraits<> on a std::vector type. YAML +I/O does not partial specialize SequenceTraits on std::vector<> because that +would force all vectors to be sequences. An example use of the macros:

+
std::vector<MyType1>;
+std::vector<MyType2>;
+LLVM_YAML_IS_SEQUENCE_VECTOR(MyType1)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(MyType2)
+
+
+
+
+
+

Document List

+

YAML allows you to define multiple “documents” in a single YAML file. Each +new document starts with a left aligned “—” token. The end of all documents +is denoted with a left aligned “…” token. Many users of YAML will never +have need for multiple documents. The top level node in their YAML schema +will be a mapping or sequence. For those cases, the following is not needed. +But for cases where you do want multiple documents, you can specify a +trait for you document list type. The trait has the same methods as +SequenceTraits but is named DocumentListTraits. For example:

+
template <>
+struct DocumentListTraits<MyDocList> {
+  static size_t size(IO &io, MyDocList &list) { ... }
+  static MyDocType element(IO &io, MyDocList &list, size_t index) { ... }
+};
+
+
+
+
+

User Context Data

+

When an llvm::yaml::Input or llvm::yaml::Output object is created their +constructors take an optional “context” parameter. This is a pointer to +whatever state information you might need.

+

For instance, in a previous example we showed how the conversion type for a +flags field could be determined at runtime based on the value of another field +in the mapping. But what if an inner mapping needs to know some field value +of an outer mapping? That is where the “context” parameter comes in. You +can set values in the context in the outer map’s mapping() method and +retrieve those values in the inner map’s mapping() method.

+

The context value is just a void*. All your traits which use the context +and operate on your native data types, need to agree what the context value +actually is. It could be a pointer to an object or struct which your various +traits use to shared context sensitive information.

+
+
+

Output

+

The llvm::yaml::Output class is used to generate a YAML document from your +in-memory data structures, using traits defined on your data types. +To instantiate an Output object you need an llvm::raw_ostream, an optional +context pointer and an optional wrapping column:

+
class Output : public IO {
+public:
+  Output(llvm::raw_ostream &, void *context = NULL, int WrapColumn = 70);
+
+
+

Once you have an Output object, you can use the C++ stream operator on it +to write your native data as YAML. One thing to recall is that a YAML file +can contain multiple “documents”. If the top level data structure you are +streaming as YAML is a mapping, scalar, or sequence, then Output assumes you +are generating one document and wraps the mapping output +with “---” and trailing “...”.

+

The WrapColumn parameter will cause the flow mappings and sequences to +line-wrap when they go over the supplied column. Pass 0 to completely +suppress the wrapping.

+
using llvm::yaml::Output;
+
+void dumpMyMapDoc(const MyMapType &info) {
+  Output yout(llvm::outs());
+  yout << info;
+}
+
+
+

The above could produce output like:

+
---
+name:      Tom
+hat-size:  7
+...
+
+
+

On the other hand, if the top level data structure you are streaming as YAML +has a DocumentListTraits specialization, then Output walks through each element +of your DocumentList and generates a “—” before the start of each element +and ends with a “…”.

+
using llvm::yaml::Output;
+
+void dumpMyMapDoc(const MyDocListType &docList) {
+  Output yout(llvm::outs());
+  yout << docList;
+}
+
+
+

The above could produce output like:

+
---
+name:      Tom
+hat-size:  7
+---
+name:      Tom
+shoe-size:  11
+...
+
+
+
+
+

Input

+

The llvm::yaml::Input class is used to parse YAML document(s) into your native +data structures. To instantiate an Input +object you need a StringRef to the entire YAML file, and optionally a context +pointer:

+
class Input : public IO {
+public:
+  Input(StringRef inputContent, void *context=NULL);
+
+
+

Once you have an Input object, you can use the C++ stream operator to read +the document(s). If you expect there might be multiple YAML documents in +one file, you’ll need to specialize DocumentListTraits on a list of your +document type and stream in that document list type. Otherwise you can +just stream in the document type. Also, you can check if there was +any syntax errors in the YAML be calling the error() method on the Input +object. For example:

+
// Reading a single document
+using llvm::yaml::Input;
+
+Input yin(mb.getBuffer());
+
+// Parse the YAML file
+MyDocType theDoc;
+yin >> theDoc;
+
+// Check for error
+if ( yin.error() )
+  return;
+
+
+
// Reading multiple documents in one file
+using llvm::yaml::Input;
+
+LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(MyDocType)
+
+Input yin(mb.getBuffer());
+
+// Parse the YAML file
+std::vector<MyDocType> theDocList;
+yin >> theDocList;
+
+// Check for error
+if ( yin.error() )
+  return;
+
+
+
+
+ + +
+
+
+
+
+
+ + + + \ No newline at end of file diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/bugpoint.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/bugpoint.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/bugpoint.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/bugpoint.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,308 @@ +.\" Man page generated from reStructuredText. +. +.TH "BUGPOINT" "1" "2021-09-18" "13" "LLVM" +.SH NAME +bugpoint \- automatic test case reduction tool +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBbugpoint\fP [\fIoptions\fP] [\fIinput LLVM ll/bc files\fP] [\fILLVM passes\fP] \fB–args\fP +\fIprogram arguments\fP +.SH DESCRIPTION +.sp +\fBbugpoint\fP narrows down the source of problems in LLVM tools and passes. It +can be used to debug three types of failures: optimizer crashes, miscompilations +by optimizers, or bad native code generation (including problems in the static +and JIT compilers). It aims to reduce large test cases to small, useful ones. +For more information on the design and inner workings of \fBbugpoint\fP, as well as +advice for using bugpoint, see /Bugpoint in the LLVM +distribution. +.SH OPTIONS +.sp +\fB–additional\-so\fP \fIlibrary\fP +.INDENT 0.0 +.INDENT 3.5 +Load the dynamic shared object \fIlibrary\fP into the test program whenever it is +run. This is useful if you are debugging programs which depend on non\-LLVM +libraries (such as the X or curses libraries) to run. +.UNINDENT +.UNINDENT +.sp +\fB–append\-exit\-code\fP=\fI{true,false}\fP +.INDENT 0.0 +.INDENT 3.5 +Append the test programs exit code to the output file so that a change in exit +code is considered a test failure. Defaults to false. +.UNINDENT +.UNINDENT +.sp +\fB–args\fP \fIprogram args\fP +.INDENT 0.0 +.INDENT 3.5 +Pass all arguments specified after \fB–args\fP to the test program whenever it runs. +Note that if any of the \fIprogram args\fP start with a “\fB\-\fP“, you should use: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +bugpoint [bugpoint args] \-\-args \-\- [program args] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The “\fB\-\-\fP” right after the \fB–args\fP option tells \fBbugpoint\fP to consider +any options starting with “\fB\-\fP” to be part of the \fB–args\fP option, not as +options to \fBbugpoint\fP itself. +.UNINDENT +.UNINDENT +.sp +\fB–tool\-args\fP \fItool args\fP +.INDENT 0.0 +.INDENT 3.5 +Pass all arguments specified after \fB–tool\-args\fP to the LLVM tool under test +(\fBllc\fP, \fBlli\fP, etc.) whenever it runs. You should use this option in the +following way: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +bugpoint [bugpoint args] \-\-tool\-args \-\- [tool args] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The “\fB\-\-\fP” right after the \fB–tool\-args\fP option tells \fBbugpoint\fP to +consider any options starting with “\fB\-\fP” to be part of the \fB–tool\-args\fP +option, not as options to \fBbugpoint\fP itself. (See \fB–args\fP, above.) +.UNINDENT +.UNINDENT +.sp +\fB–safe\-tool\-args\fP \fItool args\fP +.INDENT 0.0 +.INDENT 3.5 +Pass all arguments specified after \fB–safe\-tool\-args\fP to the “safe” execution +tool. +.UNINDENT +.UNINDENT +.sp +\fB–gcc\-tool\-args\fP \fIgcc tool args\fP +.INDENT 0.0 +.INDENT 3.5 +Pass all arguments specified after \fB–gcc\-tool\-args\fP to the invocation of +\fBgcc\fP\&. +.UNINDENT +.UNINDENT +.sp +\fB–opt\-args\fP \fIopt args\fP +.INDENT 0.0 +.INDENT 3.5 +Pass all arguments specified after \fB–opt\-args\fP to the invocation of \fBopt\fP\&. +.UNINDENT +.UNINDENT +.sp +\fB–disable\-{dce,simplifycfg}\fP +.INDENT 0.0 +.INDENT 3.5 +Do not run the specified passes to clean up and reduce the size of the test +program. By default, \fBbugpoint\fP uses these passes internally when attempting to +reduce test programs. If you’re trying to find a bug in one of these passes, +\fBbugpoint\fP may crash. +.UNINDENT +.UNINDENT +.sp +\fB–enable\-valgrind\fP +.INDENT 0.0 +.INDENT 3.5 +Use valgrind to find faults in the optimization phase. This will allow +bugpoint to find otherwise asymptomatic problems caused by memory +mis\-management. +.UNINDENT +.UNINDENT +.sp +\fB\-find\-bugs\fP +.INDENT 0.0 +.INDENT 3.5 +Continually randomize the specified passes and run them on the test program +until a bug is found or the user kills \fBbugpoint\fP\&. +.UNINDENT +.UNINDENT +.sp +\fB\-help\fP +.INDENT 0.0 +.INDENT 3.5 +Print a summary of command line options. +.UNINDENT +.UNINDENT +.sp +\fB–input\fP \fIfilename\fP +.INDENT 0.0 +.INDENT 3.5 +Open \fIfilename\fP and redirect the standard input of the test program, whenever +it runs, to come from that file. +.UNINDENT +.UNINDENT +.sp +\fB–load\fP \fIplugin\fP +.INDENT 0.0 +.INDENT 3.5 +Load the dynamic object \fIplugin\fP into \fBbugpoint\fP itself. This object should +register new optimization passes. Once loaded, the object will add new command +line options to enable various optimizations. To see the new complete list of +optimizations, use the \fB\-help\fP and \fB–load\fP options together; for example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +bugpoint \-\-load myNewPass.so \-help +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.UNINDENT +.sp +\fB–mlimit\fP \fImegabytes\fP +.INDENT 0.0 +.INDENT 3.5 +Specifies an upper limit on memory usage of the optimization and codegen. Set +to zero to disable the limit. +.UNINDENT +.UNINDENT +.sp +\fB–output\fP \fIfilename\fP +.INDENT 0.0 +.INDENT 3.5 +Whenever the test program produces output on its standard output stream, it +should match the contents of \fIfilename\fP (the “reference output”). If you +do not use this option, \fBbugpoint\fP will attempt to generate a reference output +by compiling the program with the “safe” backend and running it. +.UNINDENT +.UNINDENT +.sp +\fB–run\-{int,jit,llc,custom}\fP +.INDENT 0.0 +.INDENT 3.5 +Whenever the test program is compiled, \fBbugpoint\fP should generate code for it +using the specified code generator. These options allow you to choose the +interpreter, the JIT compiler, the static native code compiler, or a +custom command (see \fB–exec\-command\fP) respectively. +.UNINDENT +.UNINDENT +.sp +\fB–safe\-{llc,custom}\fP +.INDENT 0.0 +.INDENT 3.5 +When debugging a code generator, \fBbugpoint\fP should use the specified code +generator as the “safe” code generator. This is a known\-good code generator +used to generate the “reference output” if it has not been provided, and to +compile portions of the program that as they are excluded from the testcase. +These options allow you to choose the +static native code compiler, or a custom command, (see \fB–exec\-command\fP) +respectively. The interpreter and the JIT backends cannot currently +be used as the “safe” backends. +.UNINDENT +.UNINDENT +.sp +\fB–exec\-command\fP \fIcommand\fP +.INDENT 0.0 +.INDENT 3.5 +This option defines the command to use with the \fB–run\-custom\fP and +\fB–safe\-custom\fP options to execute the bitcode testcase. This can +be useful for cross\-compilation. +.UNINDENT +.UNINDENT +.sp +\fB–compile\-command\fP \fIcommand\fP +.INDENT 0.0 +.INDENT 3.5 +This option defines the command to use with the \fB–compile\-custom\fP +option to compile the bitcode testcase. The command should exit with a +failure exit code if the file is “interesting” and should exit with a +success exit code (i.e. 0) otherwise (this is the same as if it crashed on +“interesting” inputs). +.sp +This can be useful for +testing compiler output without running any link or execute stages. To +generate a reduced unit test, you may add CHECK directives to the +testcase and pass the name of an executable compile\-command script in this form: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +#!/bin/sh +llc "$@" +not FileCheck [bugpoint input file].ll < bugpoint\-test\-program.s +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This script will “fail” as long as FileCheck passes. So the result +will be the minimum bitcode that passes FileCheck. +.UNINDENT +.UNINDENT +.sp +\fB–safe\-path\fP \fIpath\fP +.INDENT 0.0 +.INDENT 3.5 +This option defines the path to the command to execute with the +\fB–safe\-{int,jit,llc,custom}\fP +option. +.UNINDENT +.UNINDENT +.sp +\fB–verbose\-errors\fP=\fI{true,false}\fP +.INDENT 0.0 +.INDENT 3.5 +The default behavior of bugpoint is to print “” when it finds a reduced +test that crashes compilation. This flag prints the output of the crashing +program to stderr. This is useful to make sure it is the same error being +tracked down and not a different error that happens to crash the compiler as +well. Defaults to false. +.UNINDENT +.UNINDENT +.SH EXIT STATUS +.sp +If \fBbugpoint\fP succeeds in finding a problem, it will exit with 0. Otherwise, +if an error occurs, it will exit with a non\-zero value. +.SH SEE ALSO +.sp +\fBopt(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/clang-tblgen.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/clang-tblgen.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/clang-tblgen.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/clang-tblgen.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,51 @@ +.\" Man page generated from reStructuredText. +. +.TH "CLANG-TBLGEN" "1" "2021-09-18" "13" "LLVM" +.SH NAME +clang-tblgen \- Description to C++ Code for Clang +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBclang\-tblgen\fP [\fIoptions\fP] [\fIfilename\fP] +.SH DESCRIPTION +.sp +\fBclang\-tblgen\fP is a program that translates compiler\-related target +description (\fB\&.td\fP) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler. +.sp +Please see tblgen \- Description to C++ Code +for a description of the \fIfilename\fP argument and options, including the +options common to all \fB*\-tblgen\fP programs. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/dsymutil.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/dsymutil.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/dsymutil.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/dsymutil.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,228 @@ +.\" Man page generated from reStructuredText. +. +.TH "DSYMUTIL" "1" "2021-09-18" "13" "LLVM" +.SH NAME +dsymutil \- manipulate archived DWARF debug symbol files +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.nf +\fBdsymutil\fP [\fIoptions\fP] \fIexecutable\fP +.fi +.sp +.SH DESCRIPTION +.sp +\fBdsymutil\fP links the DWARF debug information found in the object files +for an executable \fIexecutable\fP by using debug symbols information contained in +its symbol table. By default, the linked debug information is placed in a +\fB\&.dSYM\fP bundle with the same name as the executable. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-\-accelerator= +Specify the desired type of accelerator table. Valid options are ‘Apple’, +‘Dwarf’ and ‘Default’. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-arch +Link DWARF debug information only for specified CPU architecture types. +Architectures may be specified by name. When using this option, an error will +be returned if any architectures can not be properly linked. This option can +be specified multiple times, once for each desired architecture. All CPU +architectures will be linked by default and any architectures that can’t be +properly linked will cause \fBdsymutil\fP to return an error. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dump\-debug\-map +Dump the \fIexecutable\fP’s debug\-map (the list of the object files containing the +debug information) in YAML format and exit. Not DWARF link will take place. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-flat, \-f +Produce a flat dSYM file. A \fB\&.dwarf\fP extension will be appended to the +executable name unless the output file is specified using the \fB\-o\fP option. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-gen\-reproducer +Generate a reproducer consisting of the input object files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-help, \-h +Print this help output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-keep\-function\-for\-static +Make a static variable keep the enclosing function even if it would have been +omitted otherwise. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-minimize, \-z +When used when creating a dSYM file, this option will suppress the emission of +the .debug_inlines, .debug_pubnames, and .debug_pubtypes sections since +dsymutil currently has better equivalents: .apple_names and .apple_types. When +used in conjunction with \fB\-\-update\fP option, this option will cause redundant +accelerator tables to be removed. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-odr +Do not use ODR (One Definition Rule) for uniquing C++ types. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-output +Do the link in memory, but do not emit the result file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-swiftmodule\-timestamp +Don’t check the timestamp for swiftmodule files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-num\-threads , \-j +Specifies the maximum number (\fBn\fP) of simultaneous threads to use when +linking multiple architectures. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-object\-prefix\-map +Remap object file paths (but no source paths) before processing. Use +this for Clang objects where the module cache location was remapped using +\fB\-fdebug\-prefix\-map\fP; to help dsymutil find the Clang module cache. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-oso\-prepend\-path +Specifies a \fBpath\fP to prepend to all debug symbol object file paths. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-out , \-o +Specifies an alternate \fBpath\fP to place the dSYM bundle. The default dSYM +bundle path is created by appending \fB\&.dSYM\fP to the executable name. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-papertrail +When running dsymutil as part of your build system, it can be desirable for +warnings to be part of the end product, rather than just being emitted to the +output stream. When enabled warnings are embedded in the linked DWARF debug +information. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-remarks\-output\-format +Specify the format to be used when serializing the linked remarks. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-remarks\-prepend\-path +Specify a directory to prepend the paths of the external remark files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-statistics +Print statistics about the contribution of each object file to the linked +debug info. This prints a table after linking with the object file name, the +size of the debug info in the object file (in bytes) and the size contributed +(in bytes) to the linked dSYM. The table is sorted by the output size listing +the object files with the largest contribution first. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-symbol\-map +Update the existing dSYMs inplace using symbol map specified. +.UNINDENT +.INDENT 0.0 +.TP +.B \-s, \-\-symtab +Dumps the symbol table found in \fIexecutable\fP or object file(s) and exits. +.UNINDENT +.INDENT 0.0 +.TP +.B \-S +Output textual assembly instead of a binary dSYM companion file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-toolchain +Embed the toolchain in the dSYM bundle’s property list. +.UNINDENT +.INDENT 0.0 +.TP +.B \-u, \-\-update +Update an existing dSYM file to contain the latest accelerator tables and +other DWARF optimizations. This option will rebuild the ‘.apple_names’ and +‘.apple_types’ hashed accelerator tables. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-use\-reproducer +Use the object files from the given reproducer path. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-verbose +Display verbose information when linking. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-verify +Run the DWARF verifier on the linked DWARF debug info. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-version +Display the version of the tool. +.UNINDENT +.INDENT 0.0 +.TP +.B \-y +Treat \fIexecutable\fP as a YAML debug\-map rather than an executable. +.UNINDENT +.SH EXIT STATUS +.sp +\fBdsymutil\fP returns 0 if the DWARF debug information was linked +successfully. Otherwise, it returns 1. +.SH SEE ALSO +.sp +\fBllvm\-dwarfdump(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/FileCheck.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/FileCheck.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/FileCheck.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/FileCheck.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,1219 @@ +.\" Man page generated from reStructuredText. +. +.TH "FILECHECK" "1" "2021-09-18" "13" "LLVM" +.SH NAME +FileCheck \- Flexible pattern matching file verifier +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBFileCheck\fP \fImatch\-filename\fP [\fI–check\-prefix=XXX\fP] [\fI–strict\-whitespace\fP] +.SH DESCRIPTION +.sp +\fBFileCheck\fP reads two files (one from standard input, and one +specified on the command line) and uses one to verify the other. This +behavior is particularly useful for the testsuite, which wants to verify that +the output of some tool (e.g. \fBllc\fP) contains the expected information +(for example, a movsd from esp or whatever is interesting). This is similar to +using \fBgrep\fP, but it is optimized for matching multiple different +inputs in one file in a specific order. +.sp +The \fBmatch\-filename\fP file specifies the file that contains the patterns to +match. The file to verify is read from standard input unless the +\fI\%\-\-input\-file\fP option is used. +.SH OPTIONS +.sp +Options are parsed from the environment variable \fBFILECHECK_OPTS\fP +and from the command line. +.INDENT 0.0 +.TP +.B \-help +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-check\-prefix prefix +FileCheck searches the contents of \fBmatch\-filename\fP for patterns to +match. By default, these patterns are prefixed with “\fBCHECK:\fP”. +If you’d like to use a different prefix (e.g. because the same input +file is checking multiple different tool or options), the +\fI\%\-\-check\-prefix\fP argument allows you to specify (without the trailing +“\fB:\fP”) one or more prefixes to match. Multiple prefixes are useful for tests +which might change for different run options, but most lines remain the same. +.sp +FileCheck does not permit duplicate prefixes, even if one is a check prefix +and one is a comment prefix (see \fI\%\-\-comment\-prefixes\fP below). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-check\-prefixes prefix1,prefix2,... +An alias of \fI\%\-\-check\-prefix\fP that allows multiple prefixes to be +specified as a comma separated list. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-comment\-prefixes prefix1,prefix2,... +By default, FileCheck ignores any occurrence in \fBmatch\-filename\fP of any check +prefix if it is preceded on the same line by “\fBCOM:\fP” or “\fBRUN:\fP”. See the +section \fI\%The “COM:” directive\fP for usage details. +.sp +These default comment prefixes can be overridden by +\fI\%\-\-comment\-prefixes\fP if they are not appropriate for your testing +environment. However, doing so is not recommended in LLVM’s LIT\-based test +suites, which should be easier to maintain if they all follow a consistent +comment style. In that case, consider proposing a change to the default +comment prefixes instead. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-allow\-unused\-prefixes +This option controls the behavior when using more than one prefix as specified +by \fI\%\-\-check\-prefix\fP or \fI\%\-\-check\-prefixes\fP, and some of these +prefixes are missing in the test file. If true, this is allowed, if false, +FileCheck will report an error, listing the missing prefixes. +.sp +It is currently, temporarily, true by default, and will be subsequently +switched to false. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-input\-file filename +File to check (defaults to stdin). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-match\-full\-lines +By default, FileCheck allows matches of anywhere on a line. This +option will require all positive matches to cover an entire +line. Leading and trailing whitespace is ignored, unless +\fI\%\-\-strict\-whitespace\fP is also specified. (Note: negative +matches from \fBCHECK\-NOT\fP are not affected by this option!) +.sp +Passing this option is equivalent to inserting \fB{{^ *}}\fP or +\fB{{^}}\fP before, and \fB{{ *$}}\fP or \fB{{$}}\fP after every positive +check pattern. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strict\-whitespace +By default, FileCheck canonicalizes input horizontal whitespace (spaces and +tabs) which causes it to ignore these differences (a space will match a tab). +The \fI\%\-\-strict\-whitespace\fP argument disables this behavior. End\-of\-line +sequences are canonicalized to UNIX\-style \fB\en\fP in all modes. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-ignore\-case +By default, FileCheck uses case\-sensitive matching. This option causes +FileCheck to use case\-insensitive matching. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-implicit\-check\-not check\-pattern +Adds implicit negative checks for the specified patterns between positive +checks. The option allows writing stricter tests without stuffing them with +\fBCHECK\-NOT\fPs. +.sp +For example, “\fB\-\-implicit\-check\-not warning:\fP” can be useful when testing +diagnostic messages from tools that don’t have an option similar to \fBclang +\-verify\fP\&. With this option FileCheck will verify that input does not contain +warnings not covered by any \fBCHECK:\fP patterns. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dump\-input +Dump input to stderr, adding annotations representing currently enabled +diagnostics. When there are multiple occurrences of this option, the +\fB\fP that appears earliest in the list below has precedence. The +default is \fBfail\fP\&. +.INDENT 7.0 +.IP \(bu 2 +\fBhelp\fP \- Explain input dump and quit +.IP \(bu 2 +\fBalways\fP \- Always dump input +.IP \(bu 2 +\fBfail\fP \- Dump input on failure +.IP \(bu 2 +\fBnever\fP \- Never dump input +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dump\-input\-context +In the dump requested by \fB\-\-dump\-input\fP, print \fB\fP input lines before +and \fB\fP input lines after any lines specified by \fB\-\-dump\-input\-filter\fP\&. +When there are multiple occurrences of this option, the largest specified +\fB\fP has precedence. The default is 5. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dump\-input\-filter +In the dump requested by \fB\-\-dump\-input\fP, print only input lines of kind +\fB\fP plus any context specified by \fB\-\-dump\-input\-context\fP\&. When +there are multiple occurrences of this option, the \fB\fP that appears +earliest in the list below has precedence. The default is \fBerror\fP when +\fB\-\-dump\-input=fail\fP, and it’s \fBall\fP when \fB\-\-dump\-input=always\fP\&. +.INDENT 7.0 +.IP \(bu 2 +\fBall\fP \- All input lines +.IP \(bu 2 +\fBannotation\-full\fP \- Input lines with annotations +.IP \(bu 2 +\fBannotation\fP \- Input lines with starting points of annotations +.IP \(bu 2 +\fBerror\fP \- Input lines with starting points of error annotations +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-enable\-var\-scope +Enables scope for regex variables. +.sp +Variables with names that start with \fB$\fP are considered global and +remain set throughout the file. +.sp +All other variables get undefined after each encountered \fBCHECK\-LABEL\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-D +Sets a filecheck pattern variable \fBVAR\fP with value \fBVALUE\fP that can be +used in \fBCHECK:\fP lines. +.UNINDENT +.INDENT 0.0 +.TP +.B \-D#,= +Sets a filecheck numeric variable \fBNUMVAR\fP of matching format \fBFMT\fP to +the result of evaluating \fB\fP that can be used in +\fBCHECK:\fP lines. See section +\fBFileCheck Numeric Variables and Expressions\fP for details on supported +numeric expressions. +.UNINDENT +.INDENT 0.0 +.TP +.B \-version +Show the version number of this program. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v +Print good directive pattern matches. However, if \fB\-dump\-input=fail\fP or +\fB\-dump\-input=always\fP, add those matches as input annotations instead. +.UNINDENT +.INDENT 0.0 +.TP +.B \-vv +Print information helpful in diagnosing internal FileCheck issues, such as +discarded overlapping \fBCHECK\-DAG:\fP matches, implicit EOF pattern matches, +and \fBCHECK\-NOT:\fP patterns that do not have matches. Implies \fB\-v\fP\&. +However, if \fB\-dump\-input=fail\fP or \fB\-dump\-input=always\fP, just add that +information as input annotations instead. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-allow\-deprecated\-dag\-overlap +Enable overlapping among matches in a group of consecutive \fBCHECK\-DAG:\fP +directives. This option is deprecated and is only provided for convenience +as old tests are migrated to the new non\-overlapping \fBCHECK\-DAG:\fP +implementation. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-allow\-empty +Allow checking empty input. By default, empty input is rejected. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-color +Use colors in output (autodetected by default). +.UNINDENT +.SH EXIT STATUS +.sp +If \fBFileCheck\fP verifies that the file matches the expected contents, +it exits with 0. Otherwise, if not, or if an error occurs, it will exit with a +non\-zero value. +.SH TUTORIAL +.sp +FileCheck is typically used from LLVM regression tests, being invoked on the RUN +line of the test. A simple example of using FileCheck from a RUN line looks +like this: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; RUN: llvm\-as < %s | llc \-march=x86\-64 | FileCheck %s +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This syntax says to pipe the current file (“\fB%s\fP”) into \fBllvm\-as\fP, pipe +that into \fBllc\fP, then pipe the output of \fBllc\fP into \fBFileCheck\fP\&. This +means that FileCheck will be verifying its standard input (the llc output) +against the filename argument specified (the original \fB\&.ll\fP file specified by +“\fB%s\fP”). To see how this works, let’s look at the rest of the \fB\&.ll\fP file +(after the RUN line): +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +define void @sub1(i32* %p, i32 %v) { +entry: +; CHECK: sub1: +; CHECK: subl + %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v) + ret void +} + +define void @inc4(i64* %p) { +entry: +; CHECK: inc4: +; CHECK: incq + %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) + ret void +} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Here you can see some “\fBCHECK:\fP” lines specified in comments. Now you can +see how the file is piped into \fBllvm\-as\fP, then \fBllc\fP, and the machine code +output is what we are verifying. FileCheck checks the machine code output to +verify that it matches what the “\fBCHECK:\fP” lines specify. +.sp +The syntax of the “\fBCHECK:\fP” lines is very simple: they are fixed strings that +must occur in order. FileCheck defaults to ignoring horizontal whitespace +differences (e.g. a space is allowed to match a tab) but otherwise, the contents +of the “\fBCHECK:\fP” line is required to match some thing in the test file exactly. +.sp +One nice thing about FileCheck (compared to grep) is that it allows merging +test cases together into logical groups. For example, because the test above +is checking for the “\fBsub1:\fP” and “\fBinc4:\fP” labels, it will not match +unless there is a “\fBsubl\fP” in between those labels. If it existed somewhere +else in the file, that would not count: “\fBgrep subl\fP” matches if “\fBsubl\fP” +exists anywhere in the file. +.SS The FileCheck \-check\-prefix option +.sp +The FileCheck \fI\-check\-prefix\fP option allows multiple test +configurations to be driven from one \fI\&.ll\fP file. This is useful in many +circumstances, for example, testing different architectural variants with +\fBllc\fP\&. Here’s a simple example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; RUN: llvm\-as < %s | llc \-mtriple=i686\-apple\-darwin9 \-mattr=sse41 \e +; RUN: | FileCheck %s \-check\-prefix=X32 +; RUN: llvm\-as < %s | llc \-mtriple=x86_64\-apple\-darwin9 \-mattr=sse41 \e +; RUN: | FileCheck %s \-check\-prefix=X64 + +define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind { + %tmp1 = insertelement <4 x i32>; %tmp, i32 %s, i32 1 + ret <4 x i32> %tmp1 +; X32: pinsrd_1: +; X32: pinsrd $1, 4(%esp), %xmm0 + +; X64: pinsrd_1: +; X64: pinsrd $1, %edi, %xmm0 +} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +In this case, we’re testing that we get the expected code generation with +both 32\-bit and 64\-bit code generation. +.SS The “COM:” directive +.sp +Sometimes you want to disable a FileCheck directive without removing it +entirely, or you want to write comments that mention a directive by name. The +“\fBCOM:\fP” directive makes it easy to do this. For example, you might have: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; X32: pinsrd_1: +; X32: pinsrd $1, 4(%esp), %xmm0 + +; COM: FIXME: X64 isn\(aqt working correctly yet for this part of codegen, but +; COM: X64 will have something similar to X32: +; COM: +; COM: X64: pinsrd_1: +; COM: X64: pinsrd $1, %edi, %xmm0 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Without “\fBCOM:\fP”, you would need to use some combination of rewording and +directive syntax mangling to prevent FileCheck from recognizing the commented +occurrences of “\fBX32:\fP” and “\fBX64:\fP” above as directives. Moreover, +FileCheck diagnostics have been proposed that might complain about the above +occurrences of “\fBX64\fP” that don’t have the trailing “\fB:\fP” because they look +like directive typos. Dodging all these problems can be tedious for a test +author, and directive syntax mangling can make the purpose of test code unclear. +“\fBCOM:\fP” avoids all these problems. +.sp +A few important usage notes: +.INDENT 0.0 +.IP \(bu 2 +“\fBCOM:\fP” within another directive’s pattern does \fInot\fP comment out the +remainder of the pattern. For example: +.INDENT 2.0 +.INDENT 3.5 +.sp +.nf +.ft C +; X32: pinsrd $1, 4(%esp), %xmm0 COM: This is part of the X32 pattern! +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If you need to temporarily comment out part of a directive’s pattern, move it +to another line. The reason is that FileCheck parses “\fBCOM:\fP” in the same +manner as any other directive: only the first directive on the line is +recognized as a directive. +.IP \(bu 2 +For the sake of LIT, FileCheck treats “\fBRUN:\fP” just like “\fBCOM:\fP”. If this +is not suitable for your test environment, see \fI\%\-\-comment\-prefixes\fP\&. +.IP \(bu 2 +FileCheck does not recognize “\fBCOM\fP”, “\fBRUN\fP”, or any user\-defined comment +prefix as a comment directive if it’s combined with one of the usual check +directive suffixes, such as “\fB\-NEXT:\fP” or “\fB\-NOT:\fP”, discussed below. +FileCheck treats such a combination as plain text instead. If it needs to act +as a comment directive for your test environment, define it as such with +\fI\%\-\-comment\-prefixes\fP\&. +.UNINDENT +.SS The “CHECK\-NEXT:” directive +.sp +Sometimes you want to match lines and would like to verify that matches +happen on exactly consecutive lines with no other lines in between them. In +this case, you can use “\fBCHECK:\fP” and “\fBCHECK\-NEXT:\fP” directives to specify +this. If you specified a custom check prefix, just use “\fB\-NEXT:\fP”. +For example, something like this works as you’d expect: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) { + %tmp3 = load <2 x double>* %A, align 16 + %tmp7 = insertelement <2 x double> undef, double %B, i32 0 + %tmp9 = shufflevector <2 x double> %tmp3, + <2 x double> %tmp7, + <2 x i32> < i32 0, i32 2 > + store <2 x double> %tmp9, <2 x double>* %r, align 16 + ret void + +; CHECK: t2: +; CHECK: movl 8(%esp), %eax +; CHECK\-NEXT: movapd (%eax), %xmm0 +; CHECK\-NEXT: movhpd 12(%esp), %xmm0 +; CHECK\-NEXT: movl 4(%esp), %eax +; CHECK\-NEXT: movapd %xmm0, (%eax) +; CHECK\-NEXT: ret +} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +“\fBCHECK\-NEXT:\fP” directives reject the input unless there is exactly one +newline between it and the previous directive. A “\fBCHECK\-NEXT:\fP” cannot be +the first directive in a file. +.SS The “CHECK\-SAME:” directive +.sp +Sometimes you want to match lines and would like to verify that matches happen +on the same line as the previous match. In this case, you can use “\fBCHECK:\fP” +and “\fBCHECK\-SAME:\fP” directives to specify this. If you specified a custom +check prefix, just use “\fB\-SAME:\fP”. +.sp +“\fBCHECK\-SAME:\fP” is particularly powerful in conjunction with “\fBCHECK\-NOT:\fP” +(described below). +.sp +For example, the following works like you’d expect: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +!0 = !DILocation(line: 5, scope: !1, inlinedAt: !2) + +; CHECK: !DILocation(line: 5, +; CHECK\-NOT: column: +; CHECK\-SAME: scope: ![[SCOPE:[0\-9]+]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +“\fBCHECK\-SAME:\fP” directives reject the input if there are any newlines between +it and the previous directive. +.sp +“\fBCHECK\-SAME:\fP” is also useful to avoid writing matchers for irrelevant +fields. For example, suppose you’re writing a test which parses a tool that +generates output like this: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Name: foo +Field1: ... +Field2: ... +Field3: ... +Value: 1 + +Name: bar +Field1: ... +Field2: ... +Field3: ... +Value: 2 + +Name: baz +Field1: ... +Field2: ... +Field3: ... +Value: 1 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +To write a test that verifies \fBfoo\fP has the value \fB1\fP, you might first +write this: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +CHECK: Name: foo +CHECK: Value: 1{{$}} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +However, this would be a bad test: if the value for \fBfoo\fP changes, the test +would still pass because the “\fBCHECK: Value: 1\fP” line would match the value +from \fBbaz\fP\&. To fix this, you could add \fBCHECK\-NEXT\fP matchers for every +\fBFieldN:\fP line, but that would be verbose, and need to be updated when +\fBField4\fP is added. A more succinct way to write the test using the +“\fBCHECK\-SAME:\fP” matcher would be as follows: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +CHECK: Name: foo +CHECK: Value: +CHECK\-SAME: {{ 1$}} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This verifies that the \fInext\fP time “\fBValue:\fP” appears in the output, it has +the value \fB1\fP\&. +.sp +Note: a “\fBCHECK\-SAME:\fP” cannot be the first directive in a file. +.SS The “CHECK\-EMPTY:” directive +.sp +If you need to check that the next line has nothing on it, not even whitespace, +you can use the “\fBCHECK\-EMPTY:\fP” directive. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +declare void @foo() + +declare void @bar() +; CHECK: foo +; CHECK\-EMPTY: +; CHECK\-NEXT: bar +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Just like “\fBCHECK\-NEXT:\fP” the directive will fail if there is more than one +newline before it finds the next blank line, and it cannot be the first +directive in a file. +.SS The “CHECK\-NOT:” directive +.sp +The “\fBCHECK\-NOT:\fP” directive is used to verify that a string doesn’t occur +between two matches (or before the first match, or after the last match). For +example, to verify that a load is removed by a transformation, a test like this +can be used: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +define i8 @coerce_offset0(i32 %V, i32* %P) { + store i32 %V, i32* %P + + %P2 = bitcast i32* %P to i8* + %P3 = getelementptr i8* %P2, i32 2 + + %A = load i8* %P3 + ret i8 %A +; CHECK: @coerce_offset0 +; CHECK\-NOT: load +; CHECK: ret i8 +} +.ft P +.fi +.UNINDENT +.UNINDENT +.SS The “CHECK\-COUNT:” directive +.sp +If you need to match multiple lines with the same pattern over and over again +you can repeat a plain \fBCHECK:\fP as many times as needed. If that looks too +boring you can instead use a counted check “\fBCHECK\-COUNT\-:\fP”, where +\fB\fP is a positive decimal number. It will match the pattern exactly +\fB\fP times, no more and no less. If you specified a custom check prefix, +just use “\fB\-COUNT\-:\fP” for the same effect. +Here is a simple example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Loop at depth 1 +Loop at depth 1 +Loop at depth 1 +Loop at depth 1 + Loop at depth 2 + Loop at depth 3 + +; CHECK\-COUNT\-6: Loop at depth {{[0\-9]+}} +; CHECK\-NOT: Loop at depth {{[0\-9]+}} +.ft P +.fi +.UNINDENT +.UNINDENT +.SS The “CHECK\-DAG:” directive +.sp +If it’s necessary to match strings that don’t occur in a strictly sequential +order, “\fBCHECK\-DAG:\fP” could be used to verify them between two matches (or +before the first match, or after the last match). For example, clang emits +vtable globals in reverse order. Using \fBCHECK\-DAG:\fP, we can keep the checks +in the natural order: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +// RUN: %clang_cc1 %s \-emit\-llvm \-o \- | FileCheck %s + +struct Foo { virtual void method(); }; +Foo f; // emit vtable +// CHECK\-DAG: @_ZTV3Foo = + +struct Bar { virtual void method(); }; +Bar b; +// CHECK\-DAG: @_ZTV3Bar = +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +\fBCHECK\-NOT:\fP directives could be mixed with \fBCHECK\-DAG:\fP directives to +exclude strings between the surrounding \fBCHECK\-DAG:\fP directives. As a result, +the surrounding \fBCHECK\-DAG:\fP directives cannot be reordered, i.e. all +occurrences matching \fBCHECK\-DAG:\fP before \fBCHECK\-NOT:\fP must not fall behind +occurrences matching \fBCHECK\-DAG:\fP after \fBCHECK\-NOT:\fP\&. For example, +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK\-DAG: BEFORE +; CHECK\-NOT: NOT +; CHECK\-DAG: AFTER +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This case will reject input strings where \fBBEFORE\fP occurs after \fBAFTER\fP\&. +.sp +With captured variables, \fBCHECK\-DAG:\fP is able to match valid topological +orderings of a DAG with edges from the definition of a variable to its use. +It’s useful, e.g., when your test cases need to match different output +sequences from the instruction scheduler. For example, +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK\-DAG: add [[REG1:r[0\-9]+]], r1, r2 +; CHECK\-DAG: add [[REG2:r[0\-9]+]], r3, r4 +; CHECK: mul r5, [[REG1]], [[REG2]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +In this case, any order of that two \fBadd\fP instructions will be allowed. +.sp +If you are defining \fIand\fP using variables in the same \fBCHECK\-DAG:\fP block, +be aware that the definition rule can match \fIafter\fP its use. +.sp +So, for instance, the code below will pass: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK\-DAG: vmov.32 [[REG2:d[0\-9]+]][0] +; CHECK\-DAG: vmov.32 [[REG2]][1] +vmov.32 d0[1] +vmov.32 d0[0] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +While this other code, will not: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK\-DAG: vmov.32 [[REG2:d[0\-9]+]][0] +; CHECK\-DAG: vmov.32 [[REG2]][1] +vmov.32 d1[1] +vmov.32 d0[0] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +While this can be very useful, it’s also dangerous, because in the case of +register sequence, you must have a strong order (read before write, copy before +use, etc). If the definition your test is looking for doesn’t match (because +of a bug in the compiler), it may match further away from the use, and mask +real bugs away. +.sp +In those cases, to enforce the order, use a non\-DAG directive between DAG\-blocks. +.sp +A \fBCHECK\-DAG:\fP directive skips matches that overlap the matches of any +preceding \fBCHECK\-DAG:\fP directives in the same \fBCHECK\-DAG:\fP block. Not only +is this non\-overlapping behavior consistent with other directives, but it’s +also necessary to handle sets of non\-unique strings or patterns. For example, +the following directives look for unordered log entries for two tasks in a +parallel program, such as the OpenMP runtime: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +// CHECK\-DAG: [[THREAD_ID:[0\-9]+]]: task_begin +// CHECK\-DAG: [[THREAD_ID]]: task_end +// +// CHECK\-DAG: [[THREAD_ID:[0\-9]+]]: task_begin +// CHECK\-DAG: [[THREAD_ID]]: task_end +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The second pair of directives is guaranteed not to match the same log entries +as the first pair even though the patterns are identical and even if the text +of the log entries is identical because the thread ID manages to be reused. +.SS The “CHECK\-LABEL:” directive +.sp +Sometimes in a file containing multiple tests divided into logical blocks, one +or more \fBCHECK:\fP directives may inadvertently succeed by matching lines in a +later block. While an error will usually eventually be generated, the check +flagged as causing the error may not actually bear any relationship to the +actual source of the problem. +.sp +In order to produce better error messages in these cases, the “\fBCHECK\-LABEL:\fP” +directive can be used. It is treated identically to a normal \fBCHECK\fP +directive except that FileCheck makes an additional assumption that a line +matched by the directive cannot also be matched by any other check present in +\fBmatch\-filename\fP; this is intended to be used for lines containing labels or +other unique identifiers. Conceptually, the presence of \fBCHECK\-LABEL\fP divides +the input stream into separate blocks, each of which is processed independently, +preventing a \fBCHECK:\fP directive in one block matching a line in another block. +If \fB\-\-enable\-var\-scope\fP is in effect, all local variables are cleared at the +beginning of the block. +.sp +For example, +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +define %struct.C* @C_ctor_base(%struct.C* %this, i32 %x) { +entry: +; CHECK\-LABEL: C_ctor_base: +; CHECK: mov [[SAVETHIS:r[0\-9]+]], r0 +; CHECK: bl A_ctor_base +; CHECK: mov r0, [[SAVETHIS]] + %0 = bitcast %struct.C* %this to %struct.A* + %call = tail call %struct.A* @A_ctor_base(%struct.A* %0) + %1 = bitcast %struct.C* %this to %struct.B* + %call2 = tail call %struct.B* @B_ctor_base(%struct.B* %1, i32 %x) + ret %struct.C* %this +} + +define %struct.D* @D_ctor_base(%struct.D* %this, i32 %x) { +entry: +; CHECK\-LABEL: D_ctor_base: +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The use of \fBCHECK\-LABEL:\fP directives in this case ensures that the three +\fBCHECK:\fP directives only accept lines corresponding to the body of the +\fB@C_ctor_base\fP function, even if the patterns match lines found later in +the file. Furthermore, if one of these three \fBCHECK:\fP directives fail, +FileCheck will recover by continuing to the next block, allowing multiple test +failures to be detected in a single invocation. +.sp +There is no requirement that \fBCHECK\-LABEL:\fP directives contain strings that +correspond to actual syntactic labels in a source or output language: they must +simply uniquely match a single line in the file being verified. +.sp +\fBCHECK\-LABEL:\fP directives cannot contain variable definitions or uses. +.SS Directive modifiers +.sp +A directive modifier can be append to a directive by following the directive +with \fB{}\fP where the only supported value for \fB\fP is +\fBLITERAL\fP\&. +.sp +The \fBLITERAL\fP directive modifier can be used to perform a literal match. The +modifier results in the directive not recognizing any syntax to perform regex +matching, variable capture or any substitutions. This is useful when the text +to match would require excessive escaping otherwise. For example, the +following will perform literal matches rather than considering these as +regular expressions: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Input: [[[10, 20]], [[30, 40]]] +Output %r10: [[10, 20]] +Output %r10: [[30, 40]] + +; CHECK{LITERAL}: [[[10, 20]], [[30, 40]]] +; CHECK\-DAG{LITERAL}: [[30, 40]] +; CHECK\-DAG{LITERAL}: [[10, 20]] +.ft P +.fi +.UNINDENT +.UNINDENT +.SS FileCheck Regex Matching Syntax +.sp +All FileCheck directives take a pattern to match. +For most uses of FileCheck, fixed string matching is perfectly sufficient. For +some things, a more flexible form of matching is desired. To support this, +FileCheck allows you to specify regular expressions in matching strings, +surrounded by double braces: \fB{{yourregex}}\fP\&. FileCheck implements a POSIX +regular expression matcher; it supports Extended POSIX regular expressions +(ERE). Because we want to use fixed string matching for a majority of what we +do, FileCheck has been designed to support mixing and matching fixed string +matching with regular expressions. This allows you to write things like this: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK: movhpd {{[0\-9]+}}(%esp), {{%xmm[0\-7]}} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +In this case, any offset from the ESP register will be allowed, and any xmm +register will be allowed. +.sp +Because regular expressions are enclosed with double braces, they are +visually distinct, and you don’t need to use escape characters within the double +braces like you would in C. In the rare case that you want to match double +braces explicitly from the input, you can use something ugly like +\fB{{[}][}]}}\fP as your pattern. Or if you are using the repetition count +syntax, for example \fB[[:xdigit:]]{8}\fP to match exactly 8 hex digits, you +would need to add parentheses like this \fB{{([[:xdigit:]]{8})}}\fP to avoid +confusion with FileCheck’s closing double\-brace. +.SS FileCheck String Substitution Blocks +.sp +It is often useful to match a pattern and then verify that it occurs again +later in the file. For codegen tests, this can be useful to allow any +register, but verify that that register is used consistently later. To do +this, \fBFileCheck\fP supports string substitution blocks that allow +string variables to be defined and substituted into patterns. Here is a simple +example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK: test5: +; CHECK: notw [[REGISTER:%[a\-z]+]] +; CHECK: andw {{.*}}[[REGISTER]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The first check line matches a regex \fB%[a\-z]+\fP and captures it into the +string variable \fBREGISTER\fP\&. The second line verifies that whatever is in +\fBREGISTER\fP occurs later in the file after an “\fBandw\fP”. \fBFileCheck\fP +string substitution blocks are always contained in \fB[[ ]]\fP pairs, and string +variable names can be formed with the regex \fB[a\-zA\-Z_][a\-zA\-Z0\-9_]*\fP\&. If a +colon follows the name, then it is a definition of the variable; otherwise, it +is a substitution. +.sp +\fBFileCheck\fP variables can be defined multiple times, and substitutions +always get the latest value. Variables can also be substituted later on the +same line they were defined on. For example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK: op [[REG:r[0\-9]+]], [[REG]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Can be useful if you want the operands of \fBop\fP to be the same register, +and don’t care exactly which register it is. +.sp +If \fB\-\-enable\-var\-scope\fP is in effect, variables with names that +start with \fB$\fP are considered to be global. All others variables are +local. All local variables get undefined at the beginning of each +CHECK\-LABEL block. Global variables are not affected by CHECK\-LABEL. +This makes it easier to ensure that individual tests are not affected +by variables set in preceding tests. +.SS FileCheck Numeric Substitution Blocks +.sp +\fBFileCheck\fP also supports numeric substitution blocks that allow +defining numeric variables and checking for numeric values that satisfy a +numeric expression constraint based on those variables via a numeric +substitution. This allows \fBCHECK:\fP directives to verify a numeric relation +between two numbers, such as the need for consecutive registers to be used. +.sp +The syntax to capture a numeric value is +\fB[[#%,:]]\fP where: +.INDENT 0.0 +.IP \(bu 2 +\fB%,\fP is an optional format specifier to indicate what number +format to match and the minimum number of digits to expect. +.IP \(bu 2 +\fB:\fP is an optional definition of variable \fB\fP from the +captured value. +.UNINDENT +.sp +The syntax of \fB\fP is: \fB#.\fP where: +.INDENT 0.0 +.IP \(bu 2 +\fB#\fP is an optional flag available for hex values (see +\fB\fP below) which requires the value matched to be +prefixed by \fB0x\fP\&. +.IP \(bu 2 +\fB\&.\fP is an optional printf\-style precision specifier in which +\fB\fP indicates the minimum number of digits that the value matched +must have, expecting leading zeros if needed. +.IP \(bu 2 +\fB\fP is an optional scanf\-style conversion specifier +to indicate what number format to match (e.g. hex number). Currently +accepted format specifiers are \fB%u\fP, \fB%d\fP, \fB%x\fP and \fB%X\fP\&. If absent, +the format specifier defaults to \fB%u\fP\&. +.UNINDENT +.sp +For example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK: mov r[[#REG:]], 0x[[#%.8X,ADDR:]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +would match \fBmov r5, 0x0000FEFE\fP and set \fBREG\fP to the value \fB5\fP and +\fBADDR\fP to the value \fB0xFEFE\fP\&. Note that due to the precision it would fail +to match \fBmov r5, 0xFEFE\fP\&. +.sp +As a result of the numeric variable definition being optional, it is possible +to only check that a numeric value is present in a given format. This can be +useful when the value itself is not useful, for instance: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK\-NOT: mov r0, r[[#]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +to check that a value is synthesized rather than moved around. +.sp +The syntax of a numeric substitution is +\fB[[#%, ]]\fP where: +.INDENT 0.0 +.IP \(bu 2 +\fB\fP is the same format specifier as for defining a variable but +in this context indicating how a numeric expression value should be matched +against. If absent, both components of the format specifier are inferred from +the matching format of the numeric variable(s) used by the expression +constraint if any, and defaults to \fB%u\fP if no numeric variable is used, +denoting that the value should be unsigned with no leading zeros. In case of +conflict between format specifiers of several numeric variables, the +conversion specifier becomes mandatory but the precision specifier remains +optional. +.IP \(bu 2 +\fB\fP is the constraint describing how the value to match must +relate to the value of the numeric expression. The only currently accepted +constraint is \fB==\fP for an exact match and is the default if +\fB\fP is not provided. No matching constraint must be specified +when the \fB\fP is empty. +.IP \(bu 2 +\fB\fP is an expression. An expression is in turn recursively defined +as: +.INDENT 2.0 +.IP \(bu 2 +a numeric operand, or +.IP \(bu 2 +an expression followed by an operator and a numeric operand. +.UNINDENT +.sp +A numeric operand is a previously defined numeric variable, an integer +literal, or a function. Spaces are accepted before, after and between any of +these elements. Numeric operands have 64\-bit precision. Overflow and underflow +are rejected. There is no support for operator precedence, but parentheses +can be used to change the evaluation order. +.UNINDENT +.sp +The supported operators are: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fB+\fP \- Returns the sum of its two operands. +.IP \(bu 2 +\fB\-\fP \- Returns the difference of its two operands. +.UNINDENT +.UNINDENT +.UNINDENT +.sp +The syntax of a function call is \fB()\fP where: +.INDENT 0.0 +.IP \(bu 2 +\fBname\fP is a predefined string literal. Accepted values are: +.INDENT 2.0 +.IP \(bu 2 +add \- Returns the sum of its two operands. +.IP \(bu 2 +div \- Returns the quotient of its two operands. +.IP \(bu 2 +max \- Returns the largest of its two operands. +.IP \(bu 2 +min \- Returns the smallest of its two operands. +.IP \(bu 2 +mul \- Returns the product of its two operands. +.IP \(bu 2 +sub \- Returns the difference of its two operands. +.UNINDENT +.IP \(bu 2 +\fB\fP is a comma separated list of expressions. +.UNINDENT +.sp +For example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK: load r[[#REG:]], [r0] +; CHECK: load r[[#REG+1]], [r1] +; CHECK: Loading from 0x[[#%x,ADDR:]] +; CHECK\-SAME: to 0x[[#ADDR + 7]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The above example would match the text: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +load r5, [r0] +load r6, [r1] +Loading from 0xa0463440 to 0xa0463447 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +but would not match the text: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +load r5, [r0] +load r7, [r1] +Loading from 0xa0463440 to 0xa0463443 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Due to \fB7\fP being unequal to \fB5 + 1\fP and \fBa0463443\fP being unequal to +\fBa0463440 + 7\fP\&. +.sp +A numeric variable can also be defined to the result of a numeric expression, +in which case the numeric expression constraint is checked and if verified the +variable is assigned to the value. The unified syntax for both checking a +numeric expression and capturing its value into a numeric variable is thus +\fB[[#%,: ]]\fP with each element as +described previously. One can use this syntax to make a testcase more +self\-describing by using variables instead of values: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +; CHECK: mov r[[#REG_OFFSET:]], 0x[[#%X,FIELD_OFFSET:12]] +; CHECK\-NEXT: load r[[#]], [r[[#REG_BASE:]], r[[#REG_OFFSET]]] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +which would match: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +mov r4, 0xC +load r6, [r5, r4] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The \fB\-\-enable\-var\-scope\fP option has the same effect on numeric variables as +on string variables. +.sp +Important note: In its current implementation, an expression cannot use a +numeric variable defined earlier in the same CHECK directive. +.SS FileCheck Pseudo Numeric Variables +.sp +Sometimes there’s a need to verify output that contains line numbers of the +match file, e.g. when testing compiler diagnostics. This introduces a certain +fragility of the match file structure, as “\fBCHECK:\fP” lines contain absolute +line numbers in the same file, which have to be updated whenever line numbers +change due to text addition or deletion. +.sp +To support this case, FileCheck expressions understand the \fB@LINE\fP pseudo +numeric variable which evaluates to the line number of the CHECK pattern where +it is found. +.sp +This way match patterns can be put near the relevant test lines and include +relative line number references, for example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +// CHECK: test.cpp:[[# @LINE + 4]]:6: error: expected \(aq;\(aq after top level declarator +// CHECK\-NEXT: {{^int a}} +// CHECK\-NEXT: {{^ \e^}} +// CHECK\-NEXT: {{^ ;}} +int a +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +To support legacy uses of \fB@LINE\fP as a special string variable, +\fBFileCheck\fP also accepts the following uses of \fB@LINE\fP with string +substitution block syntax: \fB[[@LINE]]\fP, \fB[[@LINE+]]\fP and +\fB[[@LINE\-]]\fP without any spaces inside the brackets and where +\fBoffset\fP is an integer. +.SS Matching Newline Characters +.sp +To match newline characters in regular expressions the character class +\fB[[:space:]]\fP can be used. For example, the following pattern: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +// CHECK: DW_AT_location [DW_FORM_sec_offset] ([[DLOC:0x[0\-9a\-f]+]]){{[[:space:]].*}}"intd" +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +matches output of the form (from llvm\-dwarfdump): +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +DW_AT_location [DW_FORM_sec_offset] (0x00000233) +DW_AT_name [DW_FORM_strp] ( .debug_str[0x000000c9] = "intd") +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +letting us set the \fBFileCheck\fP variable \fBDLOC\fP to the desired value +\fB0x00000233\fP, extracted from the line immediately preceding “\fBintd\fP”. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/lit.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/lit.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/lit.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/lit.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,839 @@ +.\" Man page generated from reStructuredText. +. +.TH "LIT" "1" "2021-09-18" "13" "LLVM" +.SH NAME +lit \- LLVM Integrated Tester +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBlit\fP [\fIoptions\fP] [\fItests\fP] +.SH DESCRIPTION +.sp +\fBlit\fP is a portable tool for executing LLVM and Clang style test +suites, summarizing their results, and providing indication of failures. +\fBlit\fP is designed to be a lightweight testing tool with as simple a +user interface as possible. +.sp +\fBlit\fP should be run with one or more \fItests\fP to run specified on the +command line. Tests can be either individual test files or directories to +search for tests (see \fI\%TEST DISCOVERY\fP). +.sp +Each specified test will be executed (potentially concurrently) and once all +tests have been run \fBlit\fP will print summary information on the number +of tests which passed or failed (see \fI\%TEST STATUS RESULTS\fP). The +\fBlit\fP program will execute with a non\-zero exit code if any tests +fail. +.sp +By default \fBlit\fP will use a succinct progress display and will only +print summary information for test failures. See \fI\%OUTPUT OPTIONS\fP for +options controlling the \fBlit\fP progress display and output. +.sp +\fBlit\fP also includes a number of options for controlling how tests are +executed (specific features may depend on the particular test format). See +\fI\%EXECUTION OPTIONS\fP for more information. +.sp +Finally, \fBlit\fP also supports additional options for only running a +subset of the options specified on the command line, see +\fI\%SELECTION OPTIONS\fP for more information. +.sp +\fBlit\fP parses options from the environment variable \fBLIT_OPTS\fP after +parsing options from the command line. \fBLIT_OPTS\fP is primarily useful for +supplementing or overriding the command\-line options supplied to \fBlit\fP +by \fBcheck\fP targets defined by a project’s build system. +.sp +Users interested in the \fBlit\fP architecture or designing a +\fBlit\fP testing implementation should see \fI\%LIT INFRASTRUCTURE\fP\&. +.SH GENERAL OPTIONS +.INDENT 0.0 +.TP +.B \-h, \-\-help +Show the \fBlit\fP help message. +.UNINDENT +.INDENT 0.0 +.TP +.B \-j N, \-\-workers=N +Run \fBN\fP tests in parallel. By default, this is automatically chosen to +match the number of detected available CPUs. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-config\-prefix=NAME +Search for \fB\fINAME\fP\fP\fB\&.cfg\fP and \fB\fINAME\fP\fP\fB\&.site.cfg\fP when searching for +test suites, instead of \fBlit.cfg\fP and \fBlit.site.cfg\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-D NAME[=VALUE], \-\-param NAME[=VALUE] +Add a user defined parameter \fBNAME\fP with the given \fBVALUE\fP (or the empty +string if not given). The meaning and use of these parameters is test suite +dependent. +.UNINDENT +.SH OUTPUT OPTIONS +.INDENT 0.0 +.TP +.B \-q, \-\-quiet +Suppress any output except for test failures. +.UNINDENT +.INDENT 0.0 +.TP +.B \-s, \-\-succinct +Show less output, for example don’t show information on tests that pass. +Also show a progress bar, unless \fB\-\-no\-progress\-bar\fP is specified. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-verbose +Show more information on test failures, for example the entire test output +instead of just the test result. +.UNINDENT +.INDENT 0.0 +.TP +.B \-vv, \-\-echo\-all\-commands +Echo all commands to stdout, as they are being executed. +This can be valuable for debugging test failures, as the last echoed command +will be the one which has failed. +\fBlit\fP normally inserts a no\-op command (\fB:\fP in the case of bash) +with argument \fB\(aqRUN: at line N\(aq\fP before each command pipeline, and this +option also causes those no\-op commands to be echoed to stdout to help you +locate the source line of the failed command. +This option implies \fB\-\-verbose\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-a, \-\-show\-all +Show more information about all tests, for example the entire test +commandline and output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-progress\-bar +Do not use curses based progress bar. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-show\-unsupported +Show the names of unsupported tests. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-show\-xfail +Show the names of tests that were expected to fail. +.UNINDENT +.SH EXECUTION OPTIONS +.INDENT 0.0 +.TP +.B \-\-path=PATH +Specify an additional \fBPATH\fP to use when searching for executables in tests. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-vg +Run individual tests under valgrind (using the memcheck tool). The +\fB\-\-error\-exitcode\fP argument for valgrind is used so that valgrind failures +will cause the program to exit with a non\-zero status. +.sp +When this option is enabled, \fBlit\fP will also automatically provide a +“\fBvalgrind\fP” feature that can be used to conditionally disable (or expect +failure in) certain tests. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-vg\-arg=ARG +When \fI\%\-\-vg\fP is used, specify an additional argument to pass to +\fBvalgrind\fP itself. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-vg\-leak +When \fI\%\-\-vg\fP is used, enable memory leak checks. When this option is +enabled, \fBlit\fP will also automatically provide a “\fBvg_leak\fP” +feature that can be used to conditionally disable (or expect failure in) +certain tests. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-time\-tests +Track the wall time individual tests take to execute and includes the results +in the summary output. This is useful for determining which tests in a test +suite take the most time to execute. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-ignore\-fail +Exit with status zero even if some tests fail. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-indirectly\-run\-check +Do not error if a test would not be run if the user had specified the +containing directory instead of naming the test directly. +.UNINDENT +.SH SELECTION OPTIONS +.sp +By default, \fIlit\fP will run failing tests first, then run tests in descending +execution time order to optimize concurrency. The execution order can be +changed using the \fI\%\-\-order\fP option. +.sp +The timing data is stored in the \fItest_exec_root\fP in a file named +\fI\&.lit_test_times.txt\fP\&. If this file does not exist, then \fIlit\fP checks the +\fItest_source_root\fP for the file to optionally accelerate clean builds. +.INDENT 0.0 +.TP +.B \-\-shuffle +Run the tests in a random order, not failing/slowest first. Deprecated, +use \fI\%\-\-order\fP instead. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-max\-failures N +Stop execution after the given number \fBN\fP of failures. +An integer argument should be passed on the command line +prior to execution. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-max\-tests=N +Run at most \fBN\fP tests and then terminate. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-max\-time=N +Spend at most \fBN\fP seconds (approximately) running tests and then terminate. +Note that this is not an alias for \fI\%\-\-timeout\fP; the two are +different kinds of maximums. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-num\-shards=M +Divide the set of selected tests into \fBM\fP equal\-sized subsets or +“shards”, and run only one of them. Must be used with the +\fB\-\-run\-shard=N\fP option, which selects the shard to run. The environment +variable \fBLIT_NUM_SHARDS\fP can also be used in place of this +option. These two options provide a coarse mechanism for partitioning large +testsuites, for parallel execution on separate machines (say in a large +testing farm). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-order={lexical,random,smart} +Define the order in which tests are run. The supported values are: +.INDENT 7.0 +.IP \(bu 2 +lexical \- tests will be run in lexical order according to the test file +path. This option is useful when predictable test order is desired. +.IP \(bu 2 +random \- tests will be run in random order. +.IP \(bu 2 +smart \- tests that failed previously will be run first, then the remaining +tests, all in descending execution time order. This is the default as it +optimizes concurrency. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-run\-shard=N +Select which shard to run, assuming the \fB\-\-num\-shards=M\fP option was +provided. The two options must be used together, and the value of \fBN\fP +must be in the range \fB1..M\fP\&. The environment variable +\fBLIT_RUN_SHARD\fP can also be used in place of this option. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-timeout=N +Spend at most \fBN\fP seconds (approximately) running each individual test. +\fB0\fP means no time limit, and \fB0\fP is the default. Note that this is not an +alias for \fI\%\-\-max\-time\fP; the two are different kinds of maximums. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-filter=REGEXP +Run only those tests whose name matches the regular expression specified in +\fBREGEXP\fP\&. The environment variable \fBLIT_FILTER\fP can be also used in place +of this option, which is especially useful in environments where the call +to \fBlit\fP is issued indirectly. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-filter\-out=REGEXP +Filter out those tests whose name matches the regular expression specified in +\fBREGEXP\fP\&. The environment variable \fBLIT_FILTER_OUT\fP can be also used in +place of this option, which is especially useful in environments where the +call to \fBlit\fP is issued indirectly. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-xfail=LIST +Treat those tests whose name is in the semicolon separated list \fBLIST\fP as +\fBXFAIL\fP\&. This can be helpful when one does not want to modify the test +suite. The environment variable \fBLIT_XFAIL\fP can be also used in place of +this option, which is especially useful in environments where the call to +\fBlit\fP is issued indirectly. +.sp +A test name can specified as a file name relative to the test suite directory. +For example: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +LIT_XFAIL="affinity/kmp\-hw\-subset.c;offloading/memory_manager.cpp" +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +In this case, all of the following tests are treated as \fBXFAIL\fP: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +libomp :: affinity/kmp\-hw\-subset.c +libomptarget :: nvptx64\-nvidia\-cuda :: offloading/memory_manager.cpp +libomptarget :: x86_64\-pc\-linux\-gnu :: offloading/memory_manager.cpp +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Alternatively, a test name can be specified as the full test name +reported in LIT output. For example, we can adjust the previous +example not to treat the \fBnvptx64\-nvidia\-cuda\fP version of +\fBoffloading/memory_manager.cpp\fP as XFAIL: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +LIT_XFAIL="affinity/kmp\-hw\-subset.c;libomptarget :: x86_64\-pc\-linux\-gnu :: offloading/memory_manager.cpp" +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-xfail\-not=LIST +Do not treat the specified tests as \fBXFAIL\fP\&. The environment variable +\fBLIT_XFAIL_NOT\fP can also be used in place of this option. The syntax is the +same as for \fI\%\-\-xfail\fP and \fBLIT_XFAIL\fP\&. \fI\%\-\-xfail\-not\fP and +\fBLIT_XFAIL_NOT\fP always override all other \fBXFAIL\fP specifications, +including an \fI\%\-\-xfail\fP appearing later on the command line. The +primary purpose is to suppress an \fBXPASS\fP result without modifying a test +case that uses the \fBXFAIL\fP directive. +.UNINDENT +.SH ADDITIONAL OPTIONS +.INDENT 0.0 +.TP +.B \-\-debug +Run \fBlit\fP in debug mode, for debugging configuration issues and +\fBlit\fP itself. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-show\-suites +List the discovered test suites and exit. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-show\-tests +List all of the discovered tests and exit. +.UNINDENT +.SH EXIT STATUS +.sp +\fBlit\fP will exit with an exit code of 1 if there are any FAIL or XPASS +results. Otherwise, it will exit with the status 0. Other exit codes are used +for non\-test related failures (for example a user error or an internal program +error). +.SH TEST DISCOVERY +.sp +The inputs passed to \fBlit\fP can be either individual tests, or entire +directories or hierarchies of tests to run. When \fBlit\fP starts up, the +first thing it does is convert the inputs into a complete list of tests to run +as part of \fItest discovery\fP\&. +.sp +In the \fBlit\fP model, every test must exist inside some \fItest suite\fP\&. +\fBlit\fP resolves the inputs specified on the command line to test suites +by searching upwards from the input path until it finds a \fBlit.cfg\fP or +\fBlit.site.cfg\fP file. These files serve as both a marker of test suites +and as configuration files which \fBlit\fP loads in order to understand +how to find and run the tests inside the test suite. +.sp +Once \fBlit\fP has mapped the inputs into test suites it traverses the +list of inputs adding tests for individual files and recursively searching for +tests in directories. +.sp +This behavior makes it easy to specify a subset of tests to run, while still +allowing the test suite configuration to control exactly how tests are +interpreted. In addition, \fBlit\fP always identifies tests by the test +suite they are in, and their relative path inside the test suite. For +appropriately configured projects, this allows \fBlit\fP to provide +convenient and flexible support for out\-of\-tree builds. +.SH TEST STATUS RESULTS +.sp +Each test ultimately produces one of the following eight results: +.sp +\fBPASS\fP +.INDENT 0.0 +.INDENT 3.5 +The test succeeded. +.UNINDENT +.UNINDENT +.sp +\fBFLAKYPASS\fP +.INDENT 0.0 +.INDENT 3.5 +The test succeeded after being re\-run more than once. This only applies to +tests containing an \fBALLOW_RETRIES:\fP annotation. +.UNINDENT +.UNINDENT +.sp +\fBXFAIL\fP +.INDENT 0.0 +.INDENT 3.5 +The test failed, but that is expected. This is used for test formats which allow +specifying that a test does not currently work, but wish to leave it in the test +suite. +.UNINDENT +.UNINDENT +.sp +\fBXPASS\fP +.INDENT 0.0 +.INDENT 3.5 +The test succeeded, but it was expected to fail. This is used for tests which +were specified as expected to fail, but are now succeeding (generally because +the feature they test was broken and has been fixed). +.UNINDENT +.UNINDENT +.sp +\fBFAIL\fP +.INDENT 0.0 +.INDENT 3.5 +The test failed. +.UNINDENT +.UNINDENT +.sp +\fBUNRESOLVED\fP +.INDENT 0.0 +.INDENT 3.5 +The test result could not be determined. For example, this occurs when the test +could not be run, the test itself is invalid, or the test was interrupted. +.UNINDENT +.UNINDENT +.sp +\fBUNSUPPORTED\fP +.INDENT 0.0 +.INDENT 3.5 +The test is not supported in this environment. This is used by test formats +which can report unsupported tests. +.UNINDENT +.UNINDENT +.sp +\fBTIMEOUT\fP +.INDENT 0.0 +.INDENT 3.5 +The test was run, but it timed out before it was able to complete. This is +considered a failure. +.UNINDENT +.UNINDENT +.sp +Depending on the test format tests may produce additional information about +their status (generally only for failures). See the \fI\%OUTPUT OPTIONS\fP +section for more information. +.SH LIT INFRASTRUCTURE +.sp +This section describes the \fBlit\fP testing architecture for users interested in +creating a new \fBlit\fP testing implementation, or extending an existing one. +.sp +\fBlit\fP proper is primarily an infrastructure for discovering and running +arbitrary tests, and to expose a single convenient interface to these +tests. \fBlit\fP itself doesn’t know how to run tests, rather this logic is +defined by \fItest suites\fP\&. +.SS TEST SUITES +.sp +As described in \fI\%TEST DISCOVERY\fP, tests are always located inside a \fItest +suite\fP\&. Test suites serve to define the format of the tests they contain, the +logic for finding those tests, and any additional information to run the tests. +.sp +\fBlit\fP identifies test suites as directories containing \fBlit.cfg\fP or +\fBlit.site.cfg\fP files (see also \fI\%\-\-config\-prefix\fP). Test suites are +initially discovered by recursively searching up the directory hierarchy for +all the input files passed on the command line. You can use +\fI\%\-\-show\-suites\fP to display the discovered test suites at startup. +.sp +Once a test suite is discovered, its config file is loaded. Config files +themselves are Python modules which will be executed. When the config file is +executed, two important global variables are predefined: +.sp +\fBlit_config\fP +.INDENT 0.0 +.INDENT 3.5 +The global \fBlit\fP configuration object (a \fILitConfig\fP instance), which defines +the builtin test formats, global configuration parameters, and other helper +routines for implementing test configurations. +.UNINDENT +.UNINDENT +.sp +\fBconfig\fP +.INDENT 0.0 +.INDENT 3.5 +This is the config object (a \fITestingConfig\fP instance) for the test suite, +which the config file is expected to populate. The following variables are also +available on the \fIconfig\fP object, some of which must be set by the config and +others are optional or predefined: +.sp +\fBname\fP \fI[required]\fP The name of the test suite, for use in reports and +diagnostics. +.sp +\fBtest_format\fP \fI[required]\fP The test format object which will be used to +discover and run tests in the test suite. Generally this will be a builtin test +format available from the \fIlit.formats\fP module. +.sp +\fBtest_source_root\fP The filesystem path to the test suite root. For out\-of\-dir +builds this is the directory that will be scanned for tests. +.sp +\fBtest_exec_root\fP For out\-of\-dir builds, the path to the test suite root inside +the object directory. This is where tests will be run and temporary output files +placed. +.sp +\fBenvironment\fP A dictionary representing the environment to use when executing +tests in the suite. +.sp +\fBstandalone_tests\fP When true, mark a directory with tests expected to be run +standalone. Test discovery is disabled for that directory and +\fI–no\-indirectly\-run\-check\fP is in effect. \fIlit.suffixes\fP and \fIlit.excludes\fP +must be empty when this variable is true. +.sp +\fBsuffixes\fP For \fBlit\fP test formats which scan directories for tests, this +variable is a list of suffixes to identify test files. Used by: \fIShTest\fP\&. +.sp +\fBsubstitutions\fP For \fBlit\fP test formats which substitute variables into a test +script, the list of substitutions to perform. Used by: \fIShTest\fP\&. +.sp +\fBunsupported\fP Mark an unsupported directory, all tests within it will be +reported as unsupported. Used by: \fIShTest\fP\&. +.sp +\fBparent\fP The parent configuration, this is the config object for the directory +containing the test suite, or None. +.sp +\fBroot\fP The root configuration. This is the top\-most \fBlit\fP configuration in +the project. +.sp +\fBpipefail\fP Normally a test using a shell pipe fails if any of the commands +on the pipe fail. If this is not desired, setting this variable to false +makes the test fail only if the last command in the pipe fails. +.sp +\fBavailable_features\fP A set of features that can be used in \fIXFAIL\fP, +\fIREQUIRES\fP, and \fIUNSUPPORTED\fP directives. +.UNINDENT +.UNINDENT +.SS TEST DISCOVERY +.sp +Once test suites are located, \fBlit\fP recursively traverses the source +directory (following \fItest_source_root\fP) looking for tests. When \fBlit\fP +enters a sub\-directory, it first checks to see if a nested test suite is +defined in that directory. If so, it loads that test suite recursively, +otherwise it instantiates a local test config for the directory (see +\fI\%LOCAL CONFIGURATION FILES\fP). +.sp +Tests are identified by the test suite they are contained within, and the +relative path inside that suite. Note that the relative path may not refer to +an actual file on disk; some test formats (such as \fIGoogleTest\fP) define +“virtual tests” which have a path that contains both the path to the actual +test file and a subpath to identify the virtual test. +.SS LOCAL CONFIGURATION FILES +.sp +When \fBlit\fP loads a subdirectory in a test suite, it instantiates a +local test configuration by cloning the configuration for the parent directory +— the root of this configuration chain will always be a test suite. Once the +test configuration is cloned \fBlit\fP checks for a \fIlit.local.cfg\fP file +in the subdirectory. If present, this file will be loaded and can be used to +specialize the configuration for each individual directory. This facility can +be used to define subdirectories of optional tests, or to change other +configuration parameters — for example, to change the test format, or the +suffixes which identify test files. +.SS SUBSTITUTIONS +.sp +\fBlit\fP allows patterns to be substituted inside RUN commands. It also +provides the following base set of substitutions, which are defined in +TestRunner.py: +.INDENT 0.0 +.INDENT 3.5 +.TS +center; +|l|l|. +_ +T{ +Macro +T} T{ +Substitution +T} +_ +T{ +%s +T} T{ +source path (path to the file currently being run) +T} +_ +T{ +%S +T} T{ +source dir (directory of the file currently being run) +T} +_ +T{ +%p +T} T{ +same as %S +T} +_ +T{ +%{pathsep} +T} T{ +path separator +T} +_ +T{ +%t +T} T{ +temporary file name unique to the test +T} +_ +T{ +%basename_t +T} T{ +The last path component of %t but without the \fB\&.tmp\fP extension +T} +_ +T{ +%T +T} T{ +parent directory of %t (not unique, deprecated, do not use) +T} +_ +T{ +%% +T} T{ +% +T} +_ +T{ +%/s +T} T{ +%s but \fB\e\fP is replaced by \fB/\fP +T} +_ +T{ +%/S +T} T{ +%S but \fB\e\fP is replaced by \fB/\fP +T} +_ +T{ +%/p +T} T{ +%p but \fB\e\fP is replaced by \fB/\fP +T} +_ +T{ +%/t +T} T{ +%t but \fB\e\fP is replaced by \fB/\fP +T} +_ +T{ +%/T +T} T{ +%T but \fB\e\fP is replaced by \fB/\fP +T} +_ +T{ +%{/s:regex_replacement} +T} T{ +%/s but escaped for use in the replacement of a \fBs@@@\fP command in sed +T} +_ +T{ +%{/S:regex_replacement} +T} T{ +%/S but escaped for use in the replacement of a \fBs@@@\fP command in sed +T} +_ +T{ +%{/p:regex_replacement} +T} T{ +%/p but escaped for use in the replacement of a \fBs@@@\fP command in sed +T} +_ +T{ +%{/t:regex_replacement} +T} T{ +%/t but escaped for use in the replacement of a \fBs@@@\fP command in sed +T} +_ +T{ +%{/T:regex_replacement} +T} T{ +%/T but escaped for use in the replacement of a \fBs@@@\fP command in sed +T} +_ +T{ +%:s +T} T{ +On Windows, %/s but a \fB:\fP is removed if its the second character. +Otherwise, %s but with a single leading \fB/\fP removed. +T} +_ +T{ +%:S +T} T{ +On Windows, %/S but a \fB:\fP is removed if its the second character. +Otherwise, %S but with a single leading \fB/\fP removed. +T} +_ +T{ +%:p +T} T{ +On Windows, %/p but a \fB:\fP is removed if its the second character. +Otherwise, %p but with a single leading \fB/\fP removed. +T} +_ +T{ +%:t +T} T{ +On Windows, %/t but a \fB:\fP is removed if its the second character. +Otherwise, %t but with a single leading \fB/\fP removed. +T} +_ +T{ +%:T +T} T{ +On Windows, %/T but a \fB:\fP is removed if its the second character. +Otherwise, %T but with a single leading \fB/\fP removed. +T} +_ +.TE +.UNINDENT +.UNINDENT +.sp +Other substitutions are provided that are variations on this base set and +further substitution patterns can be defined by each test module. See the +modules \fI\%LOCAL CONFIGURATION FILES\fP\&. +.sp +By default, substitutions are expanded exactly once, so that if e.g. a +substitution \fB%build\fP is defined in top of another substitution \fB%cxx\fP, +\fB%build\fP will expand to \fB%cxx\fP textually, not to what \fB%cxx\fP expands to. +However, if the \fBrecursiveExpansionLimit\fP property of the \fBTestingConfig\fP +is set to a non\-negative integer, substitutions will be expanded recursively +until that limit is reached. It is an error if the limit is reached and +expanding substitutions again would yield a different result. +.sp +More detailed information on substitutions can be found in the +\&../TestingGuide\&. +.SS TEST RUN OUTPUT FORMAT +.sp +The \fBlit\fP output for a test run conforms to the following schema, in +both short and verbose modes (although in short mode no PASS lines will be +shown). This schema has been chosen to be relatively easy to reliably parse by +a machine (for example in buildbot log scraping), and for other tools to +generate. +.sp +Each test result is expected to appear on a line that matches: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +: () +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +where \fB\fP is a standard test result such as PASS, FAIL, XFAIL, +XPASS, UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and +REGRESSED are also allowed. +.sp +The \fB\fP field can consist of an arbitrary string containing no +newline. +.sp +The \fB\fP field can be used to report progress information such +as (1/300) or can be empty, but even when empty the parentheses are required. +.sp +Each test result may include additional (multiline) log information in the +following format: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C + TEST \(aq()\(aq +\&... log message ... + +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +where \fB\fP should be the name of a preceding reported test, \fB\fP is a string of “*” characters \fIat least\fP four characters long +(the recommended length is 20), and \fB\fP is an arbitrary +(unparsed) string. +.sp +The following is an example of a test run output which consists of four tests A, +B, C, and D, and a log message for the failing test C: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +PASS: A (1 of 4) +PASS: B (2 of 4) +FAIL: C (3 of 4) +******************** TEST \(aqC\(aq FAILED ******************** +Test \(aqC\(aq failed as a result of exit code 1. +******************** +PASS: D (4 of 4) +.ft P +.fi +.UNINDENT +.UNINDENT +.SS LIT EXAMPLE TESTS +.sp +The \fBlit\fP distribution contains several example implementations of +test suites in the \fIExampleTests\fP directory. +.SH SEE ALSO +.sp +valgrind(1) +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llc.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llc.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llc.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llc.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,295 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLC" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llc \- LLVM static compiler +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllc\fP [\fIoptions\fP] [\fIfilename\fP] +.SH DESCRIPTION +.sp +The \fBllc\fP command compiles LLVM source inputs into assembly language +for a specified architecture. The assembly language output can then be passed +through a native assembler and linker to generate a native executable. +.sp +The choice of architecture for the output assembly code is automatically +determined from the input file, unless the \fI\%\-march\fP option is used to +override the default. +.SH OPTIONS +.sp +If \fBfilename\fP is “\fB\-\fP” or omitted, \fBllc\fP reads from standard input. +Otherwise, it will from \fBfilename\fP\&. Inputs can be in either the LLVM assembly +language format (\fB\&.ll\fP) or the LLVM bitcode format (\fB\&.bc\fP). +.sp +If the \fI\%\-o\fP option is omitted, then \fBllc\fP will send its output +to standard output if the input is from standard input. If the \fI\%\-o\fP +option specifies “\fB\-\fP“, then the output will also be sent to standard output. +.sp +If no \fI\%\-o\fP option is specified and an input file other than “\fB\-\fP” is +specified, then \fBllc\fP creates the output filename by taking the input +filename, removing any existing \fB\&.bc\fP extension, and adding a \fB\&.s\fP suffix. +.sp +Other \fBllc\fP options are described below. +.SS End\-user Options +.INDENT 0.0 +.TP +.B \-help +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o +Use \fB\fP as the output filename. See the summary above for more +details. +.UNINDENT +.INDENT 0.0 +.TP +.B \-O=uint +Generate code at different optimization levels. These correspond to the +\fB\-O0\fP, \fB\-O1\fP, \fB\-O2\fP, and \fB\-O3\fP optimization levels used by +\fBclang\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-mtriple= +Override the target triple specified in the input file with the specified +string. +.UNINDENT +.INDENT 0.0 +.TP +.B \-march= +Specify the architecture for which to generate assembly, overriding the target +encoded in the input file. See the output of \fBllc \-help\fP for a list of +valid architectures. By default this is inferred from the target triple or +autodetected to the current architecture. +.UNINDENT +.INDENT 0.0 +.TP +.B \-mcpu= +Specify a specific chip in the current architecture to generate code for. +By default this is inferred from the target triple and autodetected to +the current architecture. For a list of available CPUs, use: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +llvm\-as < /dev/null | llc \-march=xyz \-mcpu=help +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-filetype= +Specify what kind of output \fBllc\fP should generated. Options are: \fBasm\fP +for textual assembly ( \fB\(aq.s\(aq\fP), \fBobj\fP for native object files (\fB\(aq.o\(aq\fP) +and \fBnull\fP for not emitting anything (for performance testing). +.sp +Note that not all targets support all options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-mattr=a1,+a2,\-a3,... +Override or control specific attributes of the target, such as whether SIMD +operations are enabled or not. The default set of attributes is set by the +current CPU. For a list of available attributes, use: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +llvm\-as < /dev/null | llc \-march=xyz \-mattr=help +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-frame\-pointer +Specify effect of frame pointer elimination optimization (all,non\-leaf,none). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-disable\-excess\-fp\-precision +Disable optimizations that may produce excess precision for floating point. +Note that this option can dramatically slow down code on some systems +(e.g. X86). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-enable\-no\-infs\-fp\-math +Enable optimizations that assume no Inf values. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-enable\-no\-nans\-fp\-math +Enable optimizations that assume no NAN values. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-enable\-no\-signed\-zeros\-fp\-math +Enable FP math optimizations that assume the sign of 0 is insignificant. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-enable\-no\-trapping\-fp\-math +Enable setting the FP exceptions build attribute not to use exceptions. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-enable\-unsafe\-fp\-math +Enable optimizations that make unsafe assumptions about IEEE math (e.g. that +addition is associative) or may not work for all input ranges. These +optimizations allow the code generator to make use of some instructions which +would otherwise not be usable (such as \fBfsin\fP on X86). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-stats +Print statistics recorded by code\-generation passes. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-time\-passes +Record the amount of time needed for each pass and print a report to standard +error. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-load= +Dynamically load \fBdso_path\fP (a path to a dynamically shared object) that +implements an LLVM target. This will permit the target name to be used with +the \fI\%\-march\fP option so that code can be generated for that target. +.UNINDENT +.INDENT 0.0 +.TP +.B \-meabi=[default|gnu|4|5] +Specify which EABI version should conform to. Valid EABI versions are \fIgnu\fP, +\fI4\fP and \fI5\fP\&. Default value (\fIdefault\fP) depends on the triple. +.UNINDENT +.INDENT 0.0 +.TP +.B \-stack\-size\-section +Emit the .stack_sizes section which contains stack size metadata. The section +contains an array of pairs of function symbol values (pointer size) and stack +sizes (unsigned LEB128). The stack size values only include the space allocated +in the function prologue. Functions with dynamic stack allocations are not +included. +.UNINDENT +.INDENT 0.0 +.TP +.B \-remarks\-section +Emit the __remarks (MachO) section which contains metadata about remark +diagnostics. +.UNINDENT +.SS Tuning/Configuration Options +.INDENT 0.0 +.TP +.B \-\-print\-after\-isel +Print generated machine code after instruction selection (useful for debugging). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-regalloc= +Specify the register allocator to use. +Valid register allocators are: +.sp +\fIbasic\fP +.INDENT 7.0 +.INDENT 3.5 +Basic register allocator. +.UNINDENT +.UNINDENT +.sp +\fIfast\fP +.INDENT 7.0 +.INDENT 3.5 +Fast register allocator. It is the default for unoptimized code. +.UNINDENT +.UNINDENT +.sp +\fIgreedy\fP +.INDENT 7.0 +.INDENT 3.5 +Greedy register allocator. It is the default for optimized code. +.UNINDENT +.UNINDENT +.sp +\fIpbqp\fP +.INDENT 7.0 +.INDENT 3.5 +Register allocator based on ‘Partitioned Boolean Quadratic Programming’. +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-spiller= +Specify the spiller to use for register allocators that support it. Currently +this option is used only by the linear scan register allocator. The default +\fBspiller\fP is \fIlocal\fP\&. Valid spillers are: +.sp +\fIsimple\fP +.INDENT 7.0 +.INDENT 3.5 +Simple spiller +.UNINDENT +.UNINDENT +.sp +\fIlocal\fP +.INDENT 7.0 +.INDENT 3.5 +Local spiller +.UNINDENT +.UNINDENT +.UNINDENT +.SS Intel IA\-32\-specific Options +.INDENT 0.0 +.TP +.B \-\-x86\-asm\-syntax=[att|intel] +Specify whether to emit assembly code in AT&T syntax (the default) or Intel +syntax. +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllc\fP succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non\-zero value. +.SH SEE ALSO +.sp +\fBlli(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/lldb-tblgen.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/lldb-tblgen.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/lldb-tblgen.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/lldb-tblgen.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,51 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLDB-TBLGEN" "1" "2021-09-18" "13" "LLVM" +.SH NAME +lldb-tblgen \- Description to C++ Code for LLDB +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBlldb\-tblgen\fP [\fIoptions\fP] [\fIfilename\fP] +.SH DESCRIPTION +.sp +\fBlldb\-tblgen\fP is a program that translates compiler\-related target +description (\fB\&.td\fP) files into C++ code and other output formats. Most +users of LLVM will not need to use this program. It is used only for writing +parts of the compiler. +.sp +Please see tblgen \- Description to C++ Code +for a description of the \fIfilename\fP argument and options, including the +options common to all \fB*\-tblgen\fP programs. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/lli.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/lli.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/lli.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/lli.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,298 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLI" "1" "2021-09-18" "13" "LLVM" +.SH NAME +lli \- directly execute programs from LLVM bitcode +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBlli\fP [\fIoptions\fP] [\fIfilename\fP] [\fIprogram args\fP] +.SH DESCRIPTION +.sp +\fBlli\fP directly executes programs in LLVM bitcode format. It takes a program +in LLVM bitcode format and executes it using a just\-in\-time compiler or an +interpreter. +.sp +\fBlli\fP is \fInot\fP an emulator. It will not execute IR of different architectures +and it can only interpret (or JIT\-compile) for the host architecture. +.sp +The JIT compiler takes the same arguments as other tools, like \fBllc\fP, +but they don’t necessarily work for the interpreter. +.sp +If \fIfilename\fP is not specified, then \fBlli\fP reads the LLVM bitcode for the +program from standard input. +.sp +The optional \fIargs\fP specified on the command line are passed to the program as +arguments. +.SH GENERAL OPTIONS +.INDENT 0.0 +.TP +.B \-fake\-argv0=executable +Override the \fBargv[0]\fP value passed into the executing program. +.UNINDENT +.INDENT 0.0 +.TP +.B \-force\-interpreter={false,true} +If set to true, use the interpreter even if a just\-in\-time compiler is available +for this architecture. Defaults to false. +.UNINDENT +.INDENT 0.0 +.TP +.B \-help +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-load=pluginfilename +Causes \fBlli\fP to load the plugin (shared object) named \fIpluginfilename\fP and use +it for optimization. +.UNINDENT +.INDENT 0.0 +.TP +.B \-stats +Print statistics from the code\-generation passes. This is only meaningful for +the just\-in\-time compiler, at present. +.UNINDENT +.INDENT 0.0 +.TP +.B \-time\-passes +Record the amount of time needed for each code\-generation pass and print it to +standard error. +.UNINDENT +.INDENT 0.0 +.TP +.B \-version +Print out the version of \fBlli\fP and exit without doing anything else. +.UNINDENT +.SH TARGET OPTIONS +.INDENT 0.0 +.TP +.B \-mtriple=target triple +Override the target triple specified in the input bitcode file with the +specified string. This may result in a crash if you pick an +architecture which is not compatible with the current system. +.UNINDENT +.INDENT 0.0 +.TP +.B \-march=arch +Specify the architecture for which to generate assembly, overriding the target +encoded in the bitcode file. See the output of \fBllc \-help\fP for a list of +valid architectures. By default this is inferred from the target triple or +autodetected to the current architecture. +.UNINDENT +.INDENT 0.0 +.TP +.B \-mcpu=cpuname +Specify a specific chip in the current architecture to generate code for. +By default this is inferred from the target triple and autodetected to +the current architecture. For a list of available CPUs, use: +\fBllvm\-as < /dev/null | llc \-march=xyz \-mcpu=help\fP +.UNINDENT +.INDENT 0.0 +.TP +.B \-mattr=a1,+a2,\-a3,... +Override or control specific attributes of the target, such as whether SIMD +operations are enabled or not. The default set of attributes is set by the +current CPU. For a list of available attributes, use: +\fBllvm\-as < /dev/null | llc \-march=xyz \-mattr=help\fP +.UNINDENT +.SH FLOATING POINT OPTIONS +.INDENT 0.0 +.TP +.B \-disable\-excess\-fp\-precision +Disable optimizations that may increase floating point precision. +.UNINDENT +.INDENT 0.0 +.TP +.B \-enable\-no\-infs\-fp\-math +Enable optimizations that assume no Inf values. +.UNINDENT +.INDENT 0.0 +.TP +.B \-enable\-no\-nans\-fp\-math +Enable optimizations that assume no NAN values. +.UNINDENT +.INDENT 0.0 +.TP +.B \-enable\-unsafe\-fp\-math +Causes \fBlli\fP to enable optimizations that may decrease floating point +precision. +.UNINDENT +.INDENT 0.0 +.TP +.B \-soft\-float +Causes \fBlli\fP to generate software floating point library calls instead of +equivalent hardware instructions. +.UNINDENT +.SH CODE GENERATION OPTIONS +.INDENT 0.0 +.TP +.B \-code\-model=model +Choose the code model from: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +default: Target default code model +tiny: Tiny code model +small: Small code model +kernel: Kernel code model +medium: Medium code model +large: Large code model +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-disable\-post\-RA\-scheduler +Disable scheduling after register allocation. +.UNINDENT +.INDENT 0.0 +.TP +.B \-disable\-spill\-fusing +Disable fusing of spill code into instructions. +.UNINDENT +.INDENT 0.0 +.TP +.B \-jit\-enable\-eh +Exception handling should be enabled in the just\-in\-time compiler. +.UNINDENT +.INDENT 0.0 +.TP +.B \-join\-liveintervals +Coalesce copies (default=true). +.UNINDENT +.INDENT 0.0 +.TP +.B \-nozero\-initialized\-in\-bss +Don’t place zero\-initialized symbols into the BSS section. +.UNINDENT +.INDENT 0.0 +.TP +.B \-pre\-RA\-sched=scheduler +Instruction schedulers available (before register allocation): +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +=default: Best scheduler for the target +=none: No scheduling: breadth first sequencing +=simple: Simple two pass scheduling: minimize critical path and maximize processor utilization +=simple\-noitin: Simple two pass scheduling: Same as simple except using generic latency +=list\-burr: Bottom\-up register reduction list scheduling +=list\-tdrr: Top\-down register reduction list scheduling +=list\-td: Top\-down list scheduler +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-regalloc=allocator +Register allocator to use (default=linearscan) +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +=bigblock: Big\-block register allocator +=linearscan: linear scan register allocator =local \- local register allocator +=simple: simple register allocator +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-relocation\-model=model +Choose relocation model from: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +=default: Target default relocation model +=static: Non\-relocatable code =pic \- Fully relocatable, position independent code +=dynamic\-no\-pic: Relocatable external references, non\-relocatable code +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-spiller +Spiller to use (default=local) +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +=simple: simple spiller +=local: local spiller +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-x86\-asm\-syntax=syntax +Choose style of code to emit from X86 backend: +.INDENT 7.0 +.INDENT 3.5 +.sp +.nf +.ft C +=att: Emit AT&T\-style assembly +=intel: Emit Intel\-style assembly +.ft P +.fi +.UNINDENT +.UNINDENT +.UNINDENT +.SH EXIT STATUS +.sp +If \fBlli\fP fails to load the program, it will exit with an exit code of 1. +Otherwise, it will return the exit code of the program it executes. +.SH SEE ALSO +.sp +\fBllc(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-addr2line.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-addr2line.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-addr2line.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-addr2line.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,72 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-ADDR2LINE" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-addr2line \- a drop-in replacement for addr2line +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-addr2line\fP [\fIoptions\fP] +.SH DESCRIPTION +.sp +\fBllvm\-addr2line\fP is an alias for the \fBllvm\-symbolizer(1)\fP +tool with different defaults. The goal is to make it a drop\-in replacement for +GNU’s \fBaddr2line\fP\&. +.sp +Here are some of those differences: +.INDENT 0.0 +.IP \(bu 2 +\fBllvm\-addr2line\fP interprets all addresses as hexadecimal and ignores an +optional \fB0x\fP prefix, whereas \fBllvm\-symbolizer\fP attempts to determine +the base from the literal’s prefix and defaults to decimal if there is no +prefix. +.IP \(bu 2 +\fBllvm\-addr2line\fP defaults not to print function names. Use \fI\%\-f\fP to enable +that. +.IP \(bu 2 +\fBllvm\-addr2line\fP defaults not to demangle function names. Use \fI\%\-C\fP to +switch the demangling on. +.IP \(bu 2 +\fBllvm\-addr2line\fP defaults not to print inlined frames. Use \fI\%\-i\fP to show +inlined frames for a source code location in an inlined function. +.IP \(bu 2 +\fBllvm\-addr2line\fP uses \fI\%–output\-style=GNU\fP by default. +.IP \(bu 2 +\fBllvm\-addr2line\fP parses options from the environment variable +\fBLLVM_ADDR2LINE_OPTS\fP instead of from \fBLLVM_SYMBOLIZER_OPTS\fP\&. +.UNINDENT +.SH SEE ALSO +.sp +\fBllvm\-symbolizer(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-ar.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-ar.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-ar.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-ar.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,429 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-AR" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-ar \- LLVM archiver +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-ar\fP [\-]{dmpqrstx}[abcDilLNoOPsSTuUvV] [relpos] [count] archive [files…] +.SH DESCRIPTION +.sp +The \fBllvm\-ar\fP command is similar to the common Unix utility, +\fBar\fP\&. It archives several files, such as objects and LLVM bitcode +files into a single archive library that can be linked into a program. However, +the archive can contain any kind of file. By default, \fBllvm\-ar\fP +generates a symbol table that makes linking faster because only the symbol +table needs to be consulted, not each individual file member of the archive. +.sp +The \fBllvm\-ar\fP command can be used to \fIread\fP archive files in SVR4, +GNU, BSD and Darwin format, and \fIwrite\fP in the GNU, BSD, and Darwin style +archive files. If an SVR4 format archive is used with the \fI\%r\fP +(replace), \fI\%d\fP (delete), \fI\%m\fP (move) or \fI\%q\fP +(quick update) operations, the archive will be reconstructed in the format +defined by \fI\%\-\-format\fP\&. +.sp +Here’s where \fBllvm\-ar\fP departs from previous \fBar\fP +implementations: +.sp +\fIThe following option is not supported\fP +.INDENT 0.0 +.INDENT 3.5 +[f] \- truncate inserted filenames +.UNINDENT +.UNINDENT +.sp +\fIThe following options are ignored for compatibility\fP +.INDENT 0.0 +.INDENT 3.5 +–plugin= \- load a plugin which adds support for other file formats +.sp +[l] \- ignored in \fBar\fP +.UNINDENT +.UNINDENT +.sp +\fISymbol Table\fP +.INDENT 0.0 +.INDENT 3.5 +Since \fBllvm\-ar\fP supports bitcode files, the symbol table it creates +includes both native and bitcode symbols. +.UNINDENT +.UNINDENT +.sp +\fIDeterministic Archives\fP +.INDENT 0.0 +.INDENT 3.5 +By default, \fBllvm\-ar\fP always uses zero for timestamps and UIDs/GIDs +to write archives in a deterministic mode. This is equivalent to the +\fI\%D\fP modifier being enabled by default. If you wish to maintain +compatibility with other \fBar\fP implementations, you can pass the +\fI\%U\fP modifier to write actual timestamps and UIDs/GIDs. +.UNINDENT +.UNINDENT +.sp +\fIWindows Paths\fP +.INDENT 0.0 +.INDENT 3.5 +When on Windows \fBllvm\-ar\fP treats the names of archived \fIfiles\fP in the same +case sensitive manner as the operating system. When on a non\-Windows machine +\fBllvm\-ar\fP does not consider character case. +.UNINDENT +.UNINDENT +.SH OPTIONS +.sp +\fBllvm\-ar\fP operations are compatible with other \fBar\fP +implementations. However, there are a few modifiers (\fI\%L\fP) that are not +found in other \fBar\fP implementations. The options for +\fBllvm\-ar\fP specify a single basic Operation to perform on the archive, +a variety of Modifiers for that Operation, the name of the archive file, and an +optional list of file names. If the \fIfiles\fP option is not specified, it +generally means either “none” or “all” members, depending on the operation. The +Options, Operations and Modifiers are explained in the sections below. +.sp +The minimal set of options is at least one operator and the name of the +archive. +.SS Operations +.INDENT 0.0 +.TP +.B d [NT] +Delete files from the \fBarchive\fP\&. The \fI\%N\fP and \fI\%T\fP modifiers +apply to this operation. The \fIfiles\fP options specify which members should be +removed from the archive. It is not an error if a specified file does not +appear in the archive. If no \fIfiles\fP are specified, the archive is not +modified. +.UNINDENT +.INDENT 0.0 +.TP +.B m [abi] +Move files from one location in the \fBarchive\fP to another. The \fI\%a\fP, +\fI\%b\fP, and \fI\%i\fP modifiers apply to this operation. The \fIfiles\fP +will all be moved to the location given by the modifiers. If no modifiers are +used, the files will be moved to the end of the archive. If no \fIfiles\fP are +specified, the archive is not modified. +.UNINDENT +.INDENT 0.0 +.TP +.B p [v] +Print \fIfiles\fP to the standard output stream. If no \fIfiles\fP are specified, the +entire \fBarchive\fP is printed. With the \fI\%v\fP modifier, +\fBllvm\-ar\fP also prints out the name of the file being output. Printing +binary files is ill\-advised as they might confuse your terminal settings. The +\fI\%p\fP operation never modifies the archive. +.UNINDENT +.INDENT 0.0 +.TP +.B q [LT] +Quickly append files to the end of the \fBarchive\fP without removing +duplicates. If no \fIfiles\fP are specified, the archive is not modified. The +behavior when appending one archive to another depends upon whether the +\fI\%L\fP and \fI\%T\fP modifiers are used: +.INDENT 7.0 +.IP \(bu 2 +Appending a regular archive to a regular archive will append the archive +file. If the \fI\%L\fP modifier is specified the members will be appended +instead. +.IP \(bu 2 +Appending a regular archive to a thin archive requires the \fI\%T\fP +modifier and will append the archive file. The \fI\%L\fP modifier is not +supported. +.IP \(bu 2 +Appending a thin archive to a regular archive will append the archive file. +If the \fI\%L\fP modifier is specified the members will be appended +instead. +.IP \(bu 2 +Appending a thin archive to a thin archive will always quick append its +members. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B r [abTu] +Replace existing \fIfiles\fP or insert them at the end of the \fBarchive\fP if +they do not exist. The \fI\%a\fP, \fI\%b\fP, \fI\%T\fP and \fI\%u\fP +modifiers apply to this operation. If no \fIfiles\fP are specified, the archive +is not modified. +.UNINDENT +.sp +t[v] +\&.. option:: t [vO] +.INDENT 0.0 +.INDENT 3.5 +Print the table of contents. Without any modifiers, this operation just prints +the names of the members to the standard output stream. With the \fI\%v\fP +modifier, \fBllvm\-ar\fP also prints out the file type (B=bitcode, +S=symbol table, blank=regular file), the permission mode, the owner and group, +are ignored when extracting \fIfiles\fP and set to placeholder values when adding +size, and the date. With the \fI\%O\fP modifier, display member offsets. If +any \fIfiles\fP are specified, the listing is only for those files. If no \fIfiles\fP +are specified, the table of contents for the whole archive is printed. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B V +A synonym for the \fI\%\-\-version\fP option. +.UNINDENT +.INDENT 0.0 +.TP +.B x [oP] +Extract \fBarchive\fP members back to files. The \fI\%o\fP modifier applies +to this operation. This operation retrieves the indicated \fIfiles\fP from the +archive and writes them back to the operating system’s file system. If no +\fIfiles\fP are specified, the entire archive is extracted. +.UNINDENT +.SS Modifiers (operation specific) +.sp +The modifiers below are specific to certain operations. See the Operations +section to determine which modifiers are applicable to which operations. +.INDENT 0.0 +.TP +.B a +When inserting or moving member files, this option specifies the destination +of the new files as being after the \fIrelpos\fP member. If \fIrelpos\fP is not found, +the files are placed at the end of the \fBarchive\fP\&. \fIrelpos\fP cannot be +consumed without either \fI\%a\fP, \fI\%b\fP or \fI\%i\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B b +When inserting or moving member files, this option specifies the destination +of the new files as being before the \fIrelpos\fP member. If \fIrelpos\fP is not +found, the files are placed at the end of the \fBarchive\fP\&. \fIrelpos\fP cannot +be consumed without either \fI\%a\fP, \fI\%b\fP or \fI\%i\fP\&. This +modifier is identical to the \fI\%i\fP modifier. +.UNINDENT +.INDENT 0.0 +.TP +.B i +A synonym for the \fI\%b\fP option. +.UNINDENT +.INDENT 0.0 +.TP +.B L +When quick appending an \fBarchive\fP, instead quick append its members. This +is a feature for \fBllvm\-ar\fP that is not found in gnu\-ar. +.UNINDENT +.INDENT 0.0 +.TP +.B N +When extracting or deleting a member that shares its name with another member, +the \fIcount\fP parameter allows you to supply a positive whole number that +selects the instance of the given name, with “1” indicating the first +instance. If \fI\%N\fP is not specified the first member of that name will +be selected. If \fIcount\fP is not supplied, the operation fails.*count* cannot be +.UNINDENT +.INDENT 0.0 +.TP +.B o +When extracting files, use the modification times of any \fIfiles\fP as they +appear in the \fBarchive\fP\&. By default \fIfiles\fP extracted from the archive +use the time of extraction. +.UNINDENT +.INDENT 0.0 +.TP +.B O +Display member offsets inside the archive. +.UNINDENT +.INDENT 0.0 +.TP +.B T +When creating or modifying an archive, this option specifies that the +\fBarchive\fP will be thin. By default, archives are not created as thin +archives and when modifying a thin archive, it will be converted to a regular +archive. +.UNINDENT +.INDENT 0.0 +.TP +.B v +When printing \fIfiles\fP or the \fBarchive\fP table of contents, this modifier +instructs \fBllvm\-ar\fP to include additional information in the output. +.UNINDENT +.SS Modifiers (generic) +.sp +The modifiers below may be applied to any operation. +.INDENT 0.0 +.TP +.B c +For the \fI\%r\fP (replace)and \fI\%q\fP (quick update) operations, +\fBllvm\-ar\fP will always create the archive if it doesn’t exist. +Normally, \fBllvm\-ar\fP will print a warning message indicating that the +\fBarchive\fP is being created. Using this modifier turns off +that warning. +.UNINDENT +.INDENT 0.0 +.TP +.B D +Use zero for timestamps and UIDs/GIDs. This is set by default. +.UNINDENT +.INDENT 0.0 +.TP +.B P +Use full paths when matching member names rather than just the file name. +This can be useful when manipulating an \fBarchive\fP generated by another +archiver, as some allow paths as member names. This is the default behavior +for thin archives. +.UNINDENT +.INDENT 0.0 +.TP +.B s +This modifier requests that an archive index (or symbol table) be added to the +\fBarchive\fP, as if using ranlib. The symbol table will contain all the +externally visible functions and global variables defined by all the bitcode +files in the archive. By default \fBllvm\-ar\fP generates symbol tables in +archives. This can also be used as an operation. +.UNINDENT +.INDENT 0.0 +.TP +.B S +This modifier is the opposite of the \fI\%s\fP modifier. It instructs +\fBllvm\-ar\fP to not build the symbol table. If both \fI\%s\fP and +\fI\%S\fP are used, the last modifier to occur in the options will prevail. +.UNINDENT +.INDENT 0.0 +.TP +.B u +Only update \fBarchive\fP members with \fIfiles\fP that have more recent +timestamps. +.UNINDENT +.INDENT 0.0 +.TP +.B U +Use actual timestamps and UIDs/GIDs. +.UNINDENT +.SS Other +.INDENT 0.0 +.TP +.B \-\-format= +This option allows for default, gnu, darwin or bsd \fB\fP to be selected. +When creating an \fBarchive\fP, \fB\fP will default to that of the host +machine. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-\-help +Print a summary of command\-line options and their meanings. +.UNINDENT +.INDENT 0.0 +.TP +.B \-M +This option allows for MRI scripts to be read through the standard input +stream. No other options are compatible with this option. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-rsp\-quoting= +.TP +.B This option selects the quoting style \(ga\(ga\(ga\(ga for response files, either +.TP +.B \(ga\(gaposix\(ga\(ga or \(ga\(gawindows\(ga\(ga. The default when on Windows is \(ga\(gawindows\(ga\(ga, otherwise the +.TP +.B default is \(ga\(gaposix\(ga\(ga. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-version +Display the version of the \fBllvm\-ar\fP executable. +.UNINDENT +.INDENT 0.0 +.TP +.B @ +Read command\-line options and commands from response file \fB\fP\&. +.UNINDENT +.SH MRI SCRIPTS +.sp +\fBllvm\-ar\fP understands a subset of the MRI scripting interface commonly +supported by archivers following in the ar tradition. An MRI script contains a +sequence of commands to be executed by the archiver. The \fI\%\-M\fP option +allows for an MRI script to be passed to \fBllvm\-ar\fP through the +standard input stream. +.sp +Note that \fBllvm\-ar\fP has known limitations regarding the use of MRI +scripts: +.INDENT 0.0 +.IP \(bu 2 +Each script can only create one archive. +.IP \(bu 2 +Existing archives can not be modified. +.UNINDENT +.SS MRI Script Commands +.sp +Each command begins with the command’s name and must appear on its own line. +Some commands have arguments, which must be separated from the name by +whitespace. An MRI script should begin with either a \fI\%CREATE\fP or +\fI\%CREATETHIN\fP command and will typically end with a \fI\%SAVE\fP +command. Any text after either ‘*’ or ‘;’ is treated as a comment. +.INDENT 0.0 +.TP +.B CREATE archive +Begin creation of a regular archive with the specified name. Subsequent +commands act upon this \fBarchive\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B CREATETHIN archive +Begin creation of a thin archive with the specified name. Subsequent +commands act upon this \fBarchive\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B ADDLIB archive +Append the contents of \fBarchive\fP to the current archive. +.UNINDENT +.INDENT 0.0 +.TP +.B ADDMOD +Append \fB\fP to the current archive. +.UNINDENT +.INDENT 0.0 +.TP +.B DELETE +Delete the member of the current archive whose file name, excluding directory +components, matches \fB\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B SAVE +Write the current archive to the path specified in the previous +\fI\%CREATE\fP/\fI\%CREATETHIN\fP command. +.UNINDENT +.INDENT 0.0 +.TP +.B END +Ends the MRI script (optional). +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllvm\-ar\fP succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non\-zero value. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-as.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-as.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-as.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-as.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,86 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-AS" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-as \- LLVM assembler +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-as\fP [\fIoptions\fP] [\fIfilename\fP] +.SH DESCRIPTION +.sp +\fBllvm\-as\fP is the LLVM assembler. It reads a file containing human\-readable +LLVM assembly language, translates it to LLVM bitcode, and writes the result +into a file or to standard output. +.sp +If \fIfilename\fP is omitted or is \fB\-\fP, then \fBllvm\-as\fP reads its input from +standard input. +.sp +If an output file is not specified with the \fB\-o\fP option, then +\fBllvm\-as\fP sends its output to a file or standard output by following +these rules: +.INDENT 0.0 +.IP \(bu 2 +If the input is standard input, then the output is standard output. +.IP \(bu 2 +If the input is a file that ends with \fB\&.ll\fP, then the output file is of the +same name, except that the suffix is changed to \fB\&.bc\fP\&. +.IP \(bu 2 +If the input is a file that does not end with the \fB\&.ll\fP suffix, then the +output file has the same name as the input file, except that the \fB\&.bc\fP +suffix is appended. +.UNINDENT +.SH OPTIONS +.INDENT 0.0 +.TP +\fB\-f\fP +Enable binary output on terminals. Normally, \fBllvm\-as\fP will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +\fBllvm\-as\fP will write raw bitcode regardless of the output device. +.TP +\fB\-help\fP +Print a summary of command line options. +.TP +\fB\-o\fP \fIfilename\fP +Specify the output file name. If \fIfilename\fP is \fB\-\fP, then \fBllvm\-as\fP +sends its output to standard output. +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllvm\-as\fP succeeds, it will exit with 0. Otherwise, if an error occurs, it +will exit with a non\-zero value. +.SH SEE ALSO +.sp +\fBllvm\-dis(1)\fP, as(1) +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-bcanalyzer.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-bcanalyzer.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-bcanalyzer.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-bcanalyzer.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,475 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-BCANALYZER" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-bcanalyzer \- LLVM bitcode analyzer +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-bcanalyzer\fP [\fIoptions\fP] [\fIfilename\fP] +.SH DESCRIPTION +.sp +The \fBllvm\-bcanalyzer\fP command is a small utility for analyzing bitcode +files. The tool reads a bitcode file (such as generated with the +\fBllvm\-as\fP tool) and produces a statistical report on the contents of +the bitcode file. The tool can also dump a low level but human readable +version of the bitcode file. This tool is probably not of much interest or +utility except for those working directly with the bitcode file format. Most +LLVM users can just ignore this tool. +.sp +If \fIfilename\fP is omitted or is \fB\-\fP, then \fBllvm\-bcanalyzer\fP reads its +input from standard input. This is useful for combining the tool into a +pipeline. Output is written to the standard output. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-nodetails +Causes \fBllvm\-bcanalyzer\fP to abbreviate its output by writing out only +a module level summary. The details for individual functions are not +displayed. +.UNINDENT +.INDENT 0.0 +.TP +.B \-dump +Causes \fBllvm\-bcanalyzer\fP to dump the bitcode in a human readable +format. This format is significantly different from LLVM assembly and +provides details about the encoding of the bitcode file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-verify +Causes \fBllvm\-bcanalyzer\fP to verify the module produced by reading the +bitcode. This ensures that the statistics generated are based on a consistent +module. +.UNINDENT +.INDENT 0.0 +.TP +.B \-help +Print a summary of command line options. +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllvm\-bcanalyzer\fP succeeds, it will exit with 0. Otherwise, if an +error occurs, it will exit with a non\-zero value, usually 1. +.SH SUMMARY OUTPUT DEFINITIONS +.sp +The following items are always printed by llvm\-bcanalyzer. They comprize the +summary output. +.sp +\fBBitcode Analysis Of Module\fP +.INDENT 0.0 +.INDENT 3.5 +This just provides the name of the module for which bitcode analysis is being +generated. +.UNINDENT +.UNINDENT +.sp +\fBBitcode Version Number\fP +.INDENT 0.0 +.INDENT 3.5 +The bitcode version (not LLVM version) of the file read by the analyzer. +.UNINDENT +.UNINDENT +.sp +\fBFile Size\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of the entire bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBModule Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of the module block. Percentage is relative to File Size. +.UNINDENT +.UNINDENT +.sp +\fBFunction Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of all the function blocks. Percentage is relative to File +Size. +.UNINDENT +.UNINDENT +.sp +\fBGlobal Types Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of the Global Types Pool. Percentage is relative to File +Size. This is the size of the definitions of all types in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBConstant Pool Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of the Constant Pool Blocks Percentage is relative to File +Size. +.UNINDENT +.UNINDENT +.sp +\fBModule Globals Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +Ths size, in bytes, of the Global Variable Definitions and their initializers. +Percentage is relative to File Size. +.UNINDENT +.UNINDENT +.sp +\fBInstruction List Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of all the instruction lists in all the functions. +Percentage is relative to File Size. Note that this value is also included in +the Function Bytes. +.UNINDENT +.UNINDENT +.sp +\fBCompaction Table Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of all the compaction tables in all the functions. +Percentage is relative to File Size. Note that this value is also included in +the Function Bytes. +.UNINDENT +.UNINDENT +.sp +\fBSymbol Table Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of all the symbol tables in all the functions. Percentage is +relative to File Size. Note that this value is also included in the Function +Bytes. +.UNINDENT +.UNINDENT +.sp +\fBDependent Libraries Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The size, in bytes, of the list of dependent libraries in the module. Percentage +is relative to File Size. Note that this value is also included in the Module +Global Bytes. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Bitcode Blocks\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of blocks of any kind in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Functions\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of function definitions in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Types\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of types defined in the Global Types Pool. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Constants\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of constants (of any type) defined in the Constant Pool. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Basic Blocks\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of basic blocks defined in all functions in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Instructions\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of instructions defined in all functions in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Long Instructions\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of long instructions defined in all functions in the bitcode +file. Long instructions are those taking greater than 4 bytes. Typically long +instructions are GetElementPtr with several indices, PHI nodes, and calls to +functions with large numbers of arguments. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Operands\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of operands used in all instructions in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Compaction Tables\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of compaction tables in all functions in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Symbol Tables\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of symbol tables in all functions in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBNumber Of Dependent Libs\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of dependent libraries found in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBTotal Instruction Size\fP +.INDENT 0.0 +.INDENT 3.5 +The total size of the instructions in all functions in the bitcode file. +.UNINDENT +.UNINDENT +.sp +\fBAverage Instruction Size\fP +.INDENT 0.0 +.INDENT 3.5 +The average number of bytes per instruction across all functions in the bitcode +file. This value is computed by dividing Total Instruction Size by Number Of +Instructions. +.UNINDENT +.UNINDENT +.sp +\fBMaximum Type Slot Number\fP +.INDENT 0.0 +.INDENT 3.5 +The maximum value used for a type’s slot number. Larger slot number values take +more bytes to encode. +.UNINDENT +.UNINDENT +.sp +\fBMaximum Value Slot Number\fP +.INDENT 0.0 +.INDENT 3.5 +The maximum value used for a value’s slot number. Larger slot number values take +more bytes to encode. +.UNINDENT +.UNINDENT +.sp +\fBBytes Per Value\fP +.INDENT 0.0 +.INDENT 3.5 +The average size of a Value definition (of any type). This is computed by +dividing File Size by the total number of values of any type. +.UNINDENT +.UNINDENT +.sp +\fBBytes Per Global\fP +.INDENT 0.0 +.INDENT 3.5 +The average size of a global definition (constants and global variables). +.UNINDENT +.UNINDENT +.sp +\fBBytes Per Function\fP +.INDENT 0.0 +.INDENT 3.5 +The average number of bytes per function definition. This is computed by +dividing Function Bytes by Number Of Functions. +.UNINDENT +.UNINDENT +.sp +\fB# of VBR 32\-bit Integers\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of 32\-bit integers encoded using the Variable Bit Rate +encoding scheme. +.UNINDENT +.UNINDENT +.sp +\fB# of VBR 64\-bit Integers\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of 64\-bit integers encoded using the Variable Bit Rate encoding +scheme. +.UNINDENT +.UNINDENT +.sp +\fB# of VBR Compressed Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of bytes consumed by the 32\-bit and 64\-bit integers that use +the Variable Bit Rate encoding scheme. +.UNINDENT +.UNINDENT +.sp +\fB# of VBR Expanded Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of bytes that would have been consumed by the 32\-bit and 64\-bit +integers had they not been compressed with the Variable Bit Rage encoding +scheme. +.UNINDENT +.UNINDENT +.sp +\fBBytes Saved With VBR\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of bytes saved by using the Variable Bit Rate encoding scheme. +The percentage is relative to # of VBR Expanded Bytes. +.UNINDENT +.UNINDENT +.SH DETAILED OUTPUT DEFINITIONS +.sp +The following definitions occur only if the \-nodetails option was not given. +The detailed output provides additional information on a per\-function basis. +.sp +\fBType\fP +.INDENT 0.0 +.INDENT 3.5 +The type signature of the function. +.UNINDENT +.UNINDENT +.sp +\fBByte Size\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of bytes in the function’s block. +.UNINDENT +.UNINDENT +.sp +\fBBasic Blocks\fP +.INDENT 0.0 +.INDENT 3.5 +The number of basic blocks defined by the function. +.UNINDENT +.UNINDENT +.sp +\fBInstructions\fP +.INDENT 0.0 +.INDENT 3.5 +The number of instructions defined by the function. +.UNINDENT +.UNINDENT +.sp +\fBLong Instructions\fP +.INDENT 0.0 +.INDENT 3.5 +The number of instructions using the long instruction format in the function. +.UNINDENT +.UNINDENT +.sp +\fBOperands\fP +.INDENT 0.0 +.INDENT 3.5 +The number of operands used by all instructions in the function. +.UNINDENT +.UNINDENT +.sp +\fBInstruction Size\fP +.INDENT 0.0 +.INDENT 3.5 +The number of bytes consumed by instructions in the function. +.UNINDENT +.UNINDENT +.sp +\fBAverage Instruction Size\fP +.INDENT 0.0 +.INDENT 3.5 +The average number of bytes consumed by the instructions in the function. +This value is computed by dividing Instruction Size by Instructions. +.UNINDENT +.UNINDENT +.sp +\fBBytes Per Instruction\fP +.INDENT 0.0 +.INDENT 3.5 +The average number of bytes used by the function per instruction. This value +is computed by dividing Byte Size by Instructions. Note that this is not the +same as Average Instruction Size. It computes a number relative to the total +function size not just the size of the instruction list. +.UNINDENT +.UNINDENT +.sp +\fBNumber of VBR 32\-bit Integers\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of 32\-bit integers found in this function (for any use). +.UNINDENT +.UNINDENT +.sp +\fBNumber of VBR 64\-bit Integers\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of 64\-bit integers found in this function (for any use). +.UNINDENT +.UNINDENT +.sp +\fBNumber of VBR Compressed Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of bytes in this function consumed by the 32\-bit and 64\-bit +integers that use the Variable Bit Rate encoding scheme. +.UNINDENT +.UNINDENT +.sp +\fBNumber of VBR Expanded Bytes\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of bytes in this function that would have been consumed by +the 32\-bit and 64\-bit integers had they not been compressed with the Variable +Bit Rate encoding scheme. +.UNINDENT +.UNINDENT +.sp +\fBBytes Saved With VBR\fP +.INDENT 0.0 +.INDENT 3.5 +The total number of bytes saved in this function by using the Variable Bit +Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes. +.UNINDENT +.UNINDENT +.SH SEE ALSO +.sp +\fBllvm\-dis(1)\fP, /BitCodeFormat +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-config.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-config.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-config.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-config.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,209 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-CONFIG" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-config \- Print LLVM compilation options +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-config\fP \fIoption\fP [\fIcomponents\fP…] +.SH DESCRIPTION +.sp +\fBllvm\-config\fP makes it easier to build applications that use LLVM. It can +print the compiler flags, linker flags and object libraries needed to link +against LLVM. +.SH EXAMPLES +.sp +To link against the JIT: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +g++ \(gallvm\-config \-\-cxxflags\(ga \-o HowToUseJIT.o \-c HowToUseJIT.cpp +g++ \(gallvm\-config \-\-ldflags\(ga \-o HowToUseJIT HowToUseJIT.o \e + \(gallvm\-config \-\-libs engine bcreader scalaropts\(ga +.ft P +.fi +.UNINDENT +.UNINDENT +.SH OPTIONS +.sp +\fB–version\fP +.INDENT 0.0 +.INDENT 3.5 +Print the version number of LLVM. +.UNINDENT +.UNINDENT +.sp +\fB\-help\fP +.INDENT 0.0 +.INDENT 3.5 +Print a summary of \fBllvm\-config\fP arguments. +.UNINDENT +.UNINDENT +.sp +\fB–prefix\fP +.INDENT 0.0 +.INDENT 3.5 +Print the installation prefix for LLVM. +.UNINDENT +.UNINDENT +.sp +\fB–src\-root\fP +.INDENT 0.0 +.INDENT 3.5 +Print the source root from which LLVM was built. +.UNINDENT +.UNINDENT +.sp +\fB–obj\-root\fP +.INDENT 0.0 +.INDENT 3.5 +Print the object root used to build LLVM. +.UNINDENT +.UNINDENT +.sp +\fB–bindir\fP +.INDENT 0.0 +.INDENT 3.5 +Print the installation directory for LLVM binaries. +.UNINDENT +.UNINDENT +.sp +\fB–includedir\fP +.INDENT 0.0 +.INDENT 3.5 +Print the installation directory for LLVM headers. +.UNINDENT +.UNINDENT +.sp +\fB–libdir\fP +.INDENT 0.0 +.INDENT 3.5 +Print the installation directory for LLVM libraries. +.UNINDENT +.UNINDENT +.sp +\fB–cxxflags\fP +.INDENT 0.0 +.INDENT 3.5 +Print the C++ compiler flags needed to use LLVM headers. +.UNINDENT +.UNINDENT +.sp +\fB–ldflags\fP +.INDENT 0.0 +.INDENT 3.5 +Print the flags needed to link against LLVM libraries. +.UNINDENT +.UNINDENT +.sp +\fB–libs\fP +.INDENT 0.0 +.INDENT 3.5 +Print all the libraries needed to link against the specified LLVM +\fIcomponents\fP, including any dependencies. +.UNINDENT +.UNINDENT +.sp +\fB–libnames\fP +.INDENT 0.0 +.INDENT 3.5 +Similar to \fB–libs\fP, but prints the bare filenames of the libraries +without \fB\-l\fP or pathnames. Useful for linking against a not\-yet\-installed +copy of LLVM. +.UNINDENT +.UNINDENT +.sp +\fB–libfiles\fP +.INDENT 0.0 +.INDENT 3.5 +Similar to \fB–libs\fP, but print the full path to each library file. This is +useful when creating makefile dependencies, to ensure that a tool is relinked if +any library it uses changes. +.UNINDENT +.UNINDENT +.sp +\fB–components\fP +.INDENT 0.0 +.INDENT 3.5 +Print all valid component names. +.UNINDENT +.UNINDENT +.sp +\fB–targets\-built\fP +.INDENT 0.0 +.INDENT 3.5 +Print the component names for all targets supported by this copy of LLVM. +.UNINDENT +.UNINDENT +.sp +\fB–build\-mode\fP +.INDENT 0.0 +.INDENT 3.5 +Print the build mode used when LLVM was built (e.g. Debug or Release) +.UNINDENT +.UNINDENT +.SH COMPONENTS +.sp +To print a list of all available components, run \fBllvm\-config +–components\fP\&. In most cases, components correspond directly to LLVM +libraries. Useful “virtual” components include: +.sp +\fBall\fP +.INDENT 0.0 +.INDENT 3.5 +Includes all LLVM libraries. The default if no components are specified. +.UNINDENT +.UNINDENT +.sp +\fBbackend\fP +.INDENT 0.0 +.INDENT 3.5 +Includes either a native backend or the C backend. +.UNINDENT +.UNINDENT +.sp +\fBengine\fP +.INDENT 0.0 +.INDENT 3.5 +Includes either a native JIT or the bitcode interpreter. +.UNINDENT +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllvm\-config\fP succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non\-zero value. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-cov.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-cov.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-cov.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-cov.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,528 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-COV" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-cov \- emit coverage information +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-cov\fP \fIcommand\fP [\fIargs…\fP] +.SH DESCRIPTION +.sp +The \fBllvm\-cov\fP tool shows code coverage information for +programs that are instrumented to emit profile data. It can be used to +work with \fBgcov\fP\-style coverage or with \fBclang\fP\(aqs instrumentation +based profiling. +.sp +If the program is invoked with a base name of \fBgcov\fP, it will behave as if +the \fBllvm\-cov gcov\fP command were called. Otherwise, a command should +be provided. +.SH COMMANDS +.INDENT 0.0 +.IP \(bu 2 +\fI\%gcov\fP +.IP \(bu 2 +\fI\%show\fP +.IP \(bu 2 +\fI\%report\fP +.IP \(bu 2 +\fI\%export\fP +.UNINDENT +.SH GCOV COMMAND +.SS SYNOPSIS +.sp +\fBllvm\-cov gcov\fP [\fIoptions\fP] \fISOURCEFILE\fP +.SS DESCRIPTION +.sp +The \fBllvm\-cov gcov\fP tool reads code coverage data files and displays +the coverage information for a specified source file. It is compatible with the +\fBgcov\fP tool from version 4.2 of \fBGCC\fP and may also be compatible with some +later versions of \fBgcov\fP\&. +.sp +To use \fBllvm\-cov gcov\fP, you must first build an instrumented version +of your application that collects coverage data as it runs. Compile with the +\fB\-fprofile\-arcs\fP and \fB\-ftest\-coverage\fP options to add the +instrumentation. (Alternatively, you can use the \fB\-\-coverage\fP option, which +includes both of those other options.) +.sp +At the time you compile the instrumented code, a \fB\&.gcno\fP data file will be +generated for each object file. These \fB\&.gcno\fP files contain half of the +coverage data. The other half of the data comes from \fB\&.gcda\fP files that are +generated when you run the instrumented program, with a separate \fB\&.gcda\fP +file for each object file. Each time you run the program, the execution counts +are summed into any existing \fB\&.gcda\fP files, so be sure to remove any old +files if you do not want their contents to be included. +.sp +By default, the \fB\&.gcda\fP files are written into the same directory as the +object files, but you can override that by setting the \fBGCOV_PREFIX\fP and +\fBGCOV_PREFIX_STRIP\fP environment variables. The \fBGCOV_PREFIX_STRIP\fP +variable specifies a number of directory components to be removed from the +start of the absolute path to the object file directory. After stripping those +directories, the prefix from the \fBGCOV_PREFIX\fP variable is added. These +environment variables allow you to run the instrumented program on a machine +where the original object file directories are not accessible, but you will +then need to copy the \fB\&.gcda\fP files back to the object file directories +where \fBllvm\-cov gcov\fP expects to find them. +.sp +Once you have generated the coverage data files, run \fBllvm\-cov gcov\fP +for each main source file where you want to examine the coverage results. This +should be run from the same directory where you previously ran the +compiler. The results for the specified source file are written to a file named +by appending a \fB\&.gcov\fP suffix. A separate output file is also created for +each file included by the main source file, also with a \fB\&.gcov\fP suffix added. +.sp +The basic content of an \fB\&.gcov\fP output file is a copy of the source file with +an execution count and line number prepended to every line. The execution +count is shown as \fB\-\fP if a line does not contain any executable code. If +a line contains code but that code was never executed, the count is displayed +as \fB#####\fP\&. +.SS OPTIONS +.INDENT 0.0 +.TP +.B \-a, \-\-all\-blocks +Display all basic blocks. If there are multiple blocks for a single line of +source code, this option causes llvm\-cov to show the count for each block +instead of just one count for the entire line. +.UNINDENT +.INDENT 0.0 +.TP +.B \-b, \-\-branch\-probabilities +Display conditional branch probabilities and a summary of branch information. +.UNINDENT +.INDENT 0.0 +.TP +.B \-c, \-\-branch\-counts +Display branch counts instead of probabilities (requires \-b). +.UNINDENT +.INDENT 0.0 +.TP +.B \-m, \-\-demangled\-names +Demangle function names. +.UNINDENT +.INDENT 0.0 +.TP +.B \-f, \-\-function\-summaries +Show a summary of coverage for each function instead of just one summary for +an entire source file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-help +Display available options (–help\-hidden for more). +.UNINDENT +.INDENT 0.0 +.TP +.B \-l, \-\-long\-file\-names +For coverage output of files included from the main source file, add the +main file name followed by \fB##\fP as a prefix to the output file names. This +can be combined with the –preserve\-paths option to use complete paths for +both the main file and the included file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-n, \-\-no\-output +Do not output any \fB\&.gcov\fP files. Summary information is still +displayed. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o=, \-\-object\-directory=, \-\-object\-file= +Find objects in DIR or based on FILE’s path. If you specify a particular +object file, the coverage data files are expected to have the same base name +with \fB\&.gcno\fP and \fB\&.gcda\fP extensions. If you specify a directory, the +files are expected in that directory with the same base name as the source +file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-p, \-\-preserve\-paths +Preserve path components when naming the coverage output files. In addition +to the source file name, include the directories from the path to that +file. The directories are separate by \fB#\fP characters, with \fB\&.\fP directories +removed and \fB\&..\fP directories replaced by \fB^\fP characters. When used with +the –long\-file\-names option, this applies to both the main file name and the +included file name. +.UNINDENT +.INDENT 0.0 +.TP +.B \-r +Only dump files with relative paths or absolute paths with the prefix specified +by \fB\-s\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-s= +Source prefix to elide. +.UNINDENT +.INDENT 0.0 +.TP +.B \-t, \-\-stdout +Print to stdout instead of producing \fB\&.gcov\fP files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-u, \-\-unconditional\-branches +Include unconditional branches in the output for the –branch\-probabilities +option. +.UNINDENT +.INDENT 0.0 +.TP +.B \-version +Display the version of llvm\-cov. +.UNINDENT +.INDENT 0.0 +.TP +.B \-x, \-\-hash\-filenames +Use md5 hash of file name when naming the coverage output files. The source +file name will be suffixed by \fB##\fP followed by MD5 hash calculated for it. +.UNINDENT +.SS EXIT STATUS +.sp +\fBllvm\-cov gcov\fP returns 1 if it cannot read input files. Otherwise, +it exits with zero. +.SH SHOW COMMAND +.SS SYNOPSIS +.sp +\fBllvm\-cov show\fP [\fIoptions\fP] \-instr\-profile \fIPROFILE\fP \fIBIN\fP [\fI\-object BIN,…\fP] [[\fI\-object BIN\fP]] [\fISOURCES\fP] +.SS DESCRIPTION +.sp +The \fBllvm\-cov show\fP command shows line by line coverage of the +binaries \fIBIN\fP,… using the profile data \fIPROFILE\fP\&. It can optionally be +filtered to only show the coverage for the files listed in \fISOURCES\fP\&. +.sp +\fIBIN\fP may be an executable, object file, dynamic library, or archive (thin or +otherwise). +.sp +To use \fBllvm\-cov show\fP, you need a program that is compiled with +instrumentation to emit profile and coverage data. To build such a program with +\fBclang\fP use the \fB\-fprofile\-instr\-generate\fP and \fB\-fcoverage\-mapping\fP +flags. If linking with the \fBclang\fP driver, pass \fB\-fprofile\-instr\-generate\fP +to the link stage to make sure the necessary runtime libraries are linked in. +.sp +The coverage information is stored in the built executable or library itself, +and this is what you should pass to \fBllvm\-cov show\fP as a \fIBIN\fP +argument. The profile data is generated by running this instrumented program +normally. When the program exits it will write out a raw profile file, +typically called \fBdefault.profraw\fP, which can be converted to a format that +is suitable for the \fIPROFILE\fP argument using the \fBllvm\-profdata merge\fP +tool. +.SS OPTIONS +.INDENT 0.0 +.TP +.B \-show\-branches= +Show coverage for branch conditions in terms of either count or percentage. +The supported views are: “count”, “percent”. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-line\-counts +Show the execution counts for each line. Defaults to true, unless another +\fB\-show\fP option is used. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-expansions +Expand inclusions, such as preprocessor macros or textual inclusions, inline +in the display of the source file. Defaults to false. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-instantiations +For source regions that are instantiated multiple times, such as templates in +\fBC++\fP, show each instantiation separately as well as the combined summary. +Defaults to true. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-regions +Show the execution counts for each region by displaying a caret that points to +the character where the region starts. Defaults to false. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-line\-counts\-or\-regions +Show the execution counts for each line if there is only one region on the +line, but show the individual regions if there are multiple on the line. +Defaults to false. +.UNINDENT +.INDENT 0.0 +.TP +.B \-use\-color +Enable or disable color output. By default this is autodetected. +.UNINDENT +.INDENT 0.0 +.TP +.B \-arch=[*NAMES*] +Specify a list of architectures such that the Nth entry in the list +corresponds to the Nth specified binary. If the covered object is a universal +binary, this specifies the architecture to use. It is an error to specify an +architecture that is not included in the universal binary or to use an +architecture that does not match a non\-universal binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-name= +Show code coverage only for functions with the given name. +.UNINDENT +.INDENT 0.0 +.TP +.B \-name\-whitelist= +Show code coverage only for functions listed in the given file. Each line in +the file should start with \fIwhitelist_fun:\fP, immediately followed by the name +of the function to accept. This name can be a wildcard expression. +.UNINDENT +.INDENT 0.0 +.TP +.B \-name\-regex= +Show code coverage only for functions that match the given regular expression. +.UNINDENT +.INDENT 0.0 +.TP +.B \-ignore\-filename\-regex= +Skip source code files with file paths that match the given regular expression. +.UNINDENT +.INDENT 0.0 +.TP +.B \-format= +Use the specified output format. The supported formats are: “text”, “html”. +.UNINDENT +.INDENT 0.0 +.TP +.B \-tab\-size= +Replace tabs with spaces when preparing reports. Currently, this is +only supported for the html format. +.UNINDENT +.INDENT 0.0 +.TP +.B \-output\-dir=PATH +Specify a directory to write coverage reports into. If the directory does not +exist, it is created. When used in function view mode (i.e when \-name or +\-name\-regex are used to select specific functions), the report is written to +PATH/functions.EXTENSION. When used in file view mode, a report for each file +is written to PATH/REL_PATH_TO_FILE.EXTENSION. +.UNINDENT +.INDENT 0.0 +.TP +.B \-Xdemangler=| +Specify a symbol demangler. This can be used to make reports more +human\-readable. This option can be specified multiple times to supply +arguments to the demangler (e.g \fI\-Xdemangler c++filt \-Xdemangler \-n\fP for C++). +The demangler is expected to read a newline\-separated list of symbols from +stdin and write a newline\-separated list of the same length to stdout. +.UNINDENT +.INDENT 0.0 +.TP +.B \-num\-threads=N, \-j=N +Use N threads to write file reports (only applicable when \-output\-dir is +specified). When N=0, llvm\-cov auto\-detects an appropriate number of threads to +use. This is the default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-compilation\-dir= +Directory used as a base for relative coverage mapping paths. Only applicable +when binaries have been compiled with one of \fI\-fcoverage\-prefix\-map\fP +\fI\-fcoverage\-compilation\-dir\fP, or \fI\-ffile\-compilation\-dir\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-line\-coverage\-gt= +Show code coverage only for functions with line coverage greater than the +given threshold. +.UNINDENT +.INDENT 0.0 +.TP +.B \-line\-coverage\-lt= +Show code coverage only for functions with line coverage less than the given +threshold. +.UNINDENT +.INDENT 0.0 +.TP +.B \-region\-coverage\-gt= +Show code coverage only for functions with region coverage greater than the +given threshold. +.UNINDENT +.INDENT 0.0 +.TP +.B \-region\-coverage\-lt= +Show code coverage only for functions with region coverage less than the given +threshold. +.UNINDENT +.INDENT 0.0 +.TP +.B \-path\-equivalence=, +Map the paths in the coverage data to local source file paths. This allows you +to generate the coverage data on one machine, and then use llvm\-cov on a +different machine where you have the same files on a different path. +.UNINDENT +.SH REPORT COMMAND +.SS SYNOPSIS +.sp +\fBllvm\-cov report\fP [\fIoptions\fP] \-instr\-profile \fIPROFILE\fP \fIBIN\fP [\fI\-object BIN,…\fP] [[\fI\-object BIN\fP]] [\fISOURCES\fP] +.SS DESCRIPTION +.sp +The \fBllvm\-cov report\fP command displays a summary of the coverage of +the binaries \fIBIN\fP,… using the profile data \fIPROFILE\fP\&. It can optionally be +filtered to only show the coverage for the files listed in \fISOURCES\fP\&. +.sp +\fIBIN\fP may be an executable, object file, dynamic library, or archive (thin or +otherwise). +.sp +If no source files are provided, a summary line is printed for each file in the +coverage data. If any files are provided, summaries can be shown for each +function in the listed files if the \fB\-show\-functions\fP option is enabled. +.sp +For information on compiling programs for coverage and generating profile data, +see \fI\%SHOW COMMAND\fP\&. +.SS OPTIONS +.INDENT 0.0 +.TP +.B \-use\-color[=VALUE] +Enable or disable color output. By default this is autodetected. +.UNINDENT +.INDENT 0.0 +.TP +.B \-arch= +If the covered binary is a universal binary, select the architecture to use. +It is an error to specify an architecture that is not included in the +universal binary or to use an architecture that does not match a +non\-universal binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-region\-summary +Show statistics for all regions. Defaults to true. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-branch\-summary +Show statistics for all branch conditions. Defaults to true. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-functions +Show coverage summaries for each function. Defaults to false. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-instantiation\-summary +Show statistics for all function instantiations. Defaults to false. +.UNINDENT +.INDENT 0.0 +.TP +.B \-ignore\-filename\-regex= +Skip source code files with file paths that match the given regular expression. +.UNINDENT +.INDENT 0.0 +.TP +.B \-compilation\-dir= +Directory used as a base for relative coverage mapping paths. Only applicable +when binaries have been compiled with one of \fI\-fcoverage\-prefix\-map\fP +\fI\-fcoverage\-compilation\-dir\fP, or \fI\-ffile\-compilation\-dir\fP\&. +.UNINDENT +.SH EXPORT COMMAND +.SS SYNOPSIS +.sp +\fBllvm\-cov export\fP [\fIoptions\fP] \-instr\-profile \fIPROFILE\fP \fIBIN\fP [\fI\-object BIN,…\fP] [[\fI\-object BIN\fP]] [\fISOURCES\fP] +.SS DESCRIPTION +.sp +The \fBllvm\-cov export\fP command exports coverage data of the binaries +\fIBIN\fP,… using the profile data \fIPROFILE\fP in either JSON or lcov trace file +format. +.sp +When exporting JSON, the regions, functions, branches, expansions, and +summaries of the coverage data will be exported. When exporting an lcov trace +file, the line\-based coverage, branch coverage, and summaries will be exported. +.sp +The exported data can optionally be filtered to only export the coverage +for the files listed in \fISOURCES\fP\&. +.sp +For information on compiling programs for coverage and generating profile data, +see \fI\%SHOW COMMAND\fP\&. +.SS OPTIONS +.INDENT 0.0 +.TP +.B \-arch= +If the covered binary is a universal binary, select the architecture to use. +It is an error to specify an architecture that is not included in the +universal binary or to use an architecture that does not match a +non\-universal binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-format= +Use the specified output format. The supported formats are: “text” (JSON), +“lcov”. +.UNINDENT +.INDENT 0.0 +.TP +.B \-summary\-only +Export only summary information for each file in the coverage data. This mode +will not export coverage information for smaller units such as individual +functions or regions. The result will contain the same information as produced +by the \fBllvm\-cov report\fP command, but presented in JSON or lcov +format rather than text. +.UNINDENT +.INDENT 0.0 +.TP +.B \-ignore\-filename\-regex= +Skip source code files with file paths that match the given regular expression. +.INDENT 7.0 +.TP +.B \-skip\-expansions +.UNINDENT +.sp +Skip exporting macro expansion coverage data. +.INDENT 7.0 +.TP +.B \-skip\-functions +.UNINDENT +.sp +Skip exporting per\-function coverage data. +.INDENT 7.0 +.TP +.B \-num\-threads=N, \-j=N +.UNINDENT +.sp +Use N threads to export coverage data. When N=0, llvm\-cov auto\-detects an +appropriate number of threads to use. This is the default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-compilation\-dir= +Directory used as a base for relative coverage mapping paths. Only applicable +when binaries have been compiled with one of \fI\-fcoverage\-prefix\-map\fP +\fI\-fcoverage\-compilation\-dir\fP, or \fI\-ffile\-compilation\-dir\fP\&. +.UNINDENT +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-cxxfilt.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-cxxfilt.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-cxxfilt.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-cxxfilt.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,117 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-CXXFILT" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-cxxfilt \- LLVM symbol name demangler +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-cxxfilt\fP [\fIoptions\fP] [\fImangled names…\fP] +.SH DESCRIPTION +.sp +\fBllvm\-cxxfilt\fP is a symbol demangler that can be used as a replacement +for the GNU \fBc++filt\fP tool. It takes a series of symbol names and +prints their demangled form on the standard output stream. If a name cannot be +demangled, it is simply printed as is. +.sp +If no names are specified on the command\-line, names are read interactively from +the standard input stream. When reading names from standard input, each input +line is split on characters that are not part of valid Itanium name manglings, +i.e. characters that are not alphanumeric, ‘.’, ‘$’, or ‘_’. Separators between +names are copied to the output as is. +.SH EXAMPLE +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ llvm\-cxxfilt _Z3foov _Z3bari not_mangled +foo() +bar(int) +not_mangled +$ cat input.txt +| _Z3foov *** _Z3bari *** not_mangled | +$ llvm\-cxxfilt < input.txt +| foo() *** bar(int) *** not_mangled | +.ft P +.fi +.UNINDENT +.UNINDENT +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-\-format=, \-s +Mangling scheme to assume. Valid values are \fBauto\fP (default, auto\-detect the +style) and \fBgnu\fP (assume GNU/Itanium style). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-help, \-h +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-strip\-underscore, \-n +Do not strip a leading underscore. This is the default for all platforms +except Mach\-O based hosts. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-underscore, \-_ +Strip a single leading underscore, if present, from each input name before +demangling. On by default on Mach\-O based platforms. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-types, \-t +Attempt to demangle names as type names as well as function names. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-version +Display the version of the \fBllvm\-cxxfilt\fP executable. +.UNINDENT +.INDENT 0.0 +.TP +.B @ +Read command\-line options from response file \fI\fP\&. +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-cxxfilt\fP returns 0 unless it encounters a usage error, in which +case a non\-zero exit code is returned. +.SH SEE ALSO +.sp +\fBllvm\-nm(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-cxxmap.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-cxxmap.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-cxxmap.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-cxxmap.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,154 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-CXXMAP" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-cxxmap \- Mangled name remapping tool +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-cxxmap\fP [\fIoptions\fP] \fIsymbol\-file\-1\fP \fIsymbol\-file\-2\fP +.SH DESCRIPTION +.sp +The \fBllvm\-cxxmap\fP tool performs fuzzy matching of C++ mangled names, +based on a file describing name components that should be considered equivalent. +.sp +The symbol files should contain a list of C++ mangled names (one per line). +Blank lines and lines starting with \fB#\fP are ignored. The output is a list +of pairs of equivalent symbols, one per line, of the form +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C + +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +where \fB\fP is a symbol from \fIsymbol\-file\-1\fP and \fB\fP is +a symbol from \fIsymbol\-file\-2\fP\&. Mappings for which the two symbols are identical +are omitted. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-remapping\-file=file, \-r=file +Specify a file containing a list of equivalence rules that should be used +to determine whether two symbols are equivalent. Required. +See \fI\%REMAPPING FILE\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-output=file, \-o=file +Specify a file to write the list of matched names to. If unspecified, the +list will be written to stdout. +.UNINDENT +.INDENT 0.0 +.TP +.B \-Wambiguous +Produce a warning if there are multiple equivalent (but distinct) symbols in +\fIsymbol\-file\-2\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-Wincomplete +Produce a warning if \fIsymbol\-file\-1\fP contains a symbol for which there is no +equivalent symbol in \fIsymbol\-file\-2\fP\&. +.UNINDENT +.SH REMAPPING FILE +.sp +The remapping file is a text file containing lines of the form +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +fragmentkind fragment1 fragment2 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +where \fBfragmentkind\fP is one of \fBname\fP, \fBtype\fP, or \fBencoding\fP, +indicating whether the following mangled name fragments are +<\fI\%name\fP>s, +<\fI\%type\fP>s, or +<\fI\%encoding\fP>s, +respectively. +Blank lines and lines starting with \fB#\fP are ignored. +.sp +Unmangled C names can be expressed as an \fBencoding\fP that is a (length\-prefixed) +<\fI\%source\-name\fP>: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# C function "void foo_bar()" is remapped to C++ function "void foo::bar()". +encoding 7foo_bar _Z3foo3barv +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +For convenience, built\-in s such as \fBSt\fP and \fBSs\fP +are accepted as s (even though they technically are not s). +.sp +For example, to specify that \fBabsl::string_view\fP and \fBstd::string_view\fP +should be treated as equivalent, the following remapping file could be used: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# absl::string_view is considered equivalent to std::string_view +type N4absl11string_viewE St17basic_string_viewIcSt11char_traitsIcEE + +# std:: might be std::__1:: in libc++ or std::__cxx11:: in libstdc++ +name St St3__1 +name St St7__cxx11 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +\fBNOTE:\fP +.INDENT 0.0 +.INDENT 3.5 +Symbol remapping is currently only supported for C++ mangled names +following the Itanium C++ ABI mangling scheme. This covers all C++ targets +supported by Clang other than Windows targets. +.UNINDENT +.UNINDENT +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-diff.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-diff.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-diff.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-diff.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,76 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-DIFF" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-diff \- LLVM structural 'diff' +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-diff\fP [\fIoptions\fP] \fImodule 1\fP \fImodule 2\fP [\fIglobal name …\fP] +.SH DESCRIPTION +.sp +\fBllvm\-diff\fP compares the structure of two LLVM modules, primarily +focusing on differences in function definitions. Insignificant +differences, such as changes in the ordering of globals or in the +names of local values, are ignored. +.sp +An input module will be interpreted as an assembly file if its name +ends in ‘.ll’; otherwise it will be read in as a bitcode file. +.sp +If a list of global names is given, just the values with those names +are compared; otherwise, all global values are compared, and +diagnostics are produced for globals which only appear in one module +or the other. +.sp +\fBllvm\-diff\fP compares two functions by comparing their basic blocks, +beginning with the entry blocks. If the terminators seem to match, +then the corresponding successors are compared; otherwise they are +ignored. This algorithm is very sensitive to changes in control flow, +which tend to stop any downstream changes from being detected. +.sp +\fBllvm\-diff\fP is intended as a debugging tool for writers of LLVM +passes and frontends. It does not have a stable output format. +.SH EXIT STATUS +.sp +If \fBllvm\-diff\fP finds no differences between the modules, it will exit +with 0 and produce no output. Otherwise it will exit with a non\-zero +value. +.SH BUGS +.sp +Many important differences, like changes in linkage or function +attributes, are not diagnosed. +.sp +Changes in memory behavior (for example, coalescing loads) can cause +massive detected differences in blocks. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-dis.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-dis.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-dis.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-dis.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,87 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-DIS" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-dis \- LLVM disassembler +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-dis\fP [\fIoptions\fP] [\fIfilename\fP] +.SH DESCRIPTION +.sp +The \fBllvm\-dis\fP command is the LLVM disassembler. It takes an LLVM +bitcode file and converts it into human\-readable LLVM assembly language. +.sp +If filename is omitted or specified as \fB\-\fP, \fBllvm\-dis\fP reads its +input from standard input. +.sp +If the input is being read from standard input, then \fBllvm\-dis\fP +will send its output to standard output by default. Otherwise, the +output will be written to a file named after the input file, with +a \fB\&.ll\fP suffix added (any existing \fB\&.bc\fP suffix will first be +removed). You can override the choice of output file using the +\fB\-o\fP option. +.SH OPTIONS +.sp +\fB\-f\fP +.INDENT 0.0 +.INDENT 3.5 +Enable binary output on terminals. Normally, \fBllvm\-dis\fP will refuse to +write raw bitcode output if the output stream is a terminal. With this option, +\fBllvm\-dis\fP will write raw bitcode regardless of the output device. +.UNINDENT +.UNINDENT +.sp +\fB\-help\fP +.INDENT 0.0 +.INDENT 3.5 +Print a summary of command line options. +.UNINDENT +.UNINDENT +.sp +\fB\-o\fP \fIfilename\fP +.INDENT 0.0 +.INDENT 3.5 +Specify the output file name. If \fIfilename\fP is \-, then the output is sent +to standard output. +.UNINDENT +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllvm\-dis\fP succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non\-zero value. +.SH SEE ALSO +.sp +\fBllvm\-as(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-dwarfdump.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-dwarfdump.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-dwarfdump.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-dwarfdump.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,262 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-DWARFDUMP" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-dwarfdump \- dump and verify DWARF debug information +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-dwarfdump\fP [\fIoptions\fP] [\fIfilename …\fP] +.SH DESCRIPTION +.sp +\fBllvm\-dwarfdump\fP parses DWARF sections in object files, +archives, and \fI\&.dSYM\fP bundles and prints their contents in +human\-readable form. Only the .debug_info section is printed unless one of +the section\-specific options or \fI\%\-\-all\fP is specified. +.sp +If no input file is specified, \fIa.out\fP is used instead. If \fI\-\fP is used as the +input file, \fBllvm\-dwarfdump\fP reads the input from its standard input +stream. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-a, \-\-all +Dump all supported DWARF sections. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-arch= +Dump DWARF debug information for the specified CPU architecture. +Architectures may be specified by name or by number. This +option can be specified multiple times, once for each desired +architecture. All CPU architectures will be printed by +default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-c, \-\-show\-children +Show a debug info entry’s children when selectively printing with +the \fI=\fP argument of \fI\%\-\-debug\-info\fP, or options such +as \fI\%\-\-find\fP or \fI\%\-\-name\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-color +Use colors in output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-f , \-\-find= +Search for the exact text in the accelerator tables +and print the matching debug information entries. +When there is no accelerator tables or the name of the DIE +you are looking for is not found in the accelerator tables, +try using the slower but more complete \fI\%\-\-name\fP option. +.UNINDENT +.INDENT 0.0 +.TP +.B \-F, \-\-show\-form +Show DWARF form types after the DWARF attribute types. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-\-help +Show help and usage for this command. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-help\-list +Show help and usage for this command without grouping the options +into categories. +.UNINDENT +.INDENT 0.0 +.TP +.B \-i, \-\-ignore\-case +Ignore case distinctions when using \fI\%\-\-name\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-n , \-\-name= +Find and print all debug info entries whose name +(\fIDW_AT_name\fP attribute) is . +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-lookup=
+Look up
in the debug information and print out the file, +function, block, and line table details. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o +Redirect output to a file specified by , where \fI\-\fP is the +standard output stream. +.UNINDENT +.INDENT 0.0 +.TP +.B \-p, \-\-show\-parents +Show a debug info entry’s parents when selectively printing with +the \fI=\fP argument of \fI\%\-\-debug\-info\fP, or options such +as \fI\%\-\-find\fP or \fI\%\-\-name\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-parent\-recurse\-depth= +When displaying debug info entry parents, only show them to a +maximum depth of . +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-quiet +Use with \fI\%\-\-verify\fP to not emit to \fISTDOUT\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-r , \-\-recurse\-depth= +When displaying debug info entries, only show children to a maximum +depth of . +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-show\-section\-sizes +Show the sizes of all debug sections, expressed in bytes. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-statistics +Collect debug info quality metrics and print the results +as machine\-readable single\-line JSON output. The output +format is described in the section below (\fI\%FORMAT OF STATISTICS OUTPUT\fP). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-summarize\-types +Abbreviate the description of type unit entries. +.UNINDENT +.INDENT 0.0 +.TP +.B \-x, \-\-regex +Treat any strings as regular expressions when searching +with \fI\%\-\-name\fP\&. If \fI\%\-\-ignore\-case\fP is also specified, +the regular expression becomes case\-insensitive. +.UNINDENT +.INDENT 0.0 +.TP +.B \-u, \-\-uuid +Show the UUID for each architecture. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-diff +Dump the output in a format that is more friendly for comparing +DWARF output from two different files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v, \-\-verbose +Display verbose information when dumping. This can help to debug +DWARF issues. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-verify +Verify the structure of the DWARF information by verifying the +compile unit chains, DIE relationships graph, address +ranges, and more. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-version +Display the version of the tool. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-debug\-abbrev, \-\-debug\-addr, \-\-debug\-aranges, \-\-debug\-cu\-index, \-\-debug\-frame[=], \-\-debug\-gnu\-pubnames, \-\-debug\-gnu\-pubtypes, \-\-debug\-info [=], \-\-debug\-line [=], \-\-debug\-line\-str, \-\-debug\-loc [=], \-\-debug\-loclists [=], \-\-debug\-macro, \-\-debug\-names, \-\-debug\-pubnames, \-\-debug\-pubtypes, \-\-debug\-ranges, \-\-debug\-rnglists, \-\-debug\-str, \-\-debug\-str\-offsets, \-\-debug\-tu\-index, \-\-debug\-types [=], \-\-eh\-frame [=], \-\-gdb\-index, \-\-apple\-names, \-\-apple\-types, \-\-apple\-namespaces, \-\-apple\-objc +Dump the specified DWARF section by name. Only the +\fI\&.debug_info\fP section is shown by default. Some entries +support adding an \fI=\fP as a way to provide an +optional offset of the exact entry to dump within the +respective section. When an offset is provided, only the +entry at that offset will be dumped, else the entire +section will be dumped. +.UNINDENT +.INDENT 0.0 +.TP +.B @ +Read command\-line options from \fI\fP\&. +.UNINDENT +.SH FORMAT OF STATISTICS OUTPUT +.sp +The :\fI\%\-\-statistics\fP option generates single\-line JSON output +representing quality metrics of the processed debug info. These metrics are +useful to compare changes between two compilers, particularly for judging +the effect that a change to the compiler has on the debug info quality. +.sp +The output is formatted as key\-value pairs. The first pair contains a version +number. The following naming scheme is used for the keys: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fIvariables\fP ==> local variables and parameters +.IP \(bu 2 +\fIlocal vars\fP ==> local variables +.IP \(bu 2 +\fIparams\fP ==> formal parameters +.UNINDENT +.UNINDENT +.UNINDENT +.sp +For aggregated values, the following keys are used: +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fIsum_of_all_variables(…)\fP ==> the sum applied to all variables +.IP \(bu 2 +\fI#bytes\fP ==> the number of bytes +.IP \(bu 2 +\fI#variables \- entry values …\fP ==> the number of variables excluding +the entry values etc. +.UNINDENT +.UNINDENT +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-dwarfdump\fP returns 0 if the input files were parsed and dumped +successfully. Otherwise, it returns 1. +.SH SEE ALSO +.sp +\fBdsymutil(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-exegesis.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-exegesis.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-exegesis.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-exegesis.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,397 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-EXEGESIS" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-exegesis \- LLVM Machine Instruction Benchmark +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-exegesis\fP [\fIoptions\fP] +.SH DESCRIPTION +.sp +\fBllvm\-exegesis\fP is a benchmarking tool that uses information available +in LLVM to measure host machine instruction characteristics like latency, +throughput, or port decomposition. +.sp +Given an LLVM opcode name and a benchmarking mode, \fBllvm\-exegesis\fP +generates a code snippet that makes execution as serial (resp. as parallel) as +possible so that we can measure the latency (resp. inverse throughput/uop decomposition) +of the instruction. +The code snippet is jitted and executed on the host subtarget. The time taken +(resp. resource usage) is measured using hardware performance counters. The +result is printed out as YAML to the standard output. +.sp +The main goal of this tool is to automatically (in)validate the LLVM’s TableDef +scheduling models. To that end, we also provide analysis of the results. +.sp +\fBllvm\-exegesis\fP can also benchmark arbitrary user\-provided code +snippets. +.SH EXAMPLE 1: BENCHMARKING INSTRUCTIONS +.sp +Assume you have an X86\-64 machine. To measure the latency of a single +instruction, run: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ llvm\-exegesis \-mode=latency \-opcode\-name=ADD64rr +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Measuring the uop decomposition or inverse throughput of an instruction works similarly: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ llvm\-exegesis \-mode=uops \-opcode\-name=ADD64rr +$ llvm\-exegesis \-mode=inverse_throughput \-opcode\-name=ADD64rr +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The output is a YAML document (the default is to write to stdout, but you can +redirect the output to a file using \fI\-benchmarks\-file\fP): +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +\-\-\- +key: + opcode_name: ADD64rr + mode: latency + config: \(aq\(aq +cpu_name: haswell +llvm_triple: x86_64\-unknown\-linux\-gnu +num_repetitions: 10000 +measurements: + \- { key: latency, value: 1.0058, debug_string: \(aq\(aq } +error: \(aq\(aq +info: \(aqexplicit self cycles, selecting one aliasing configuration. +Snippet: +ADD64rr R8, R8, R10 +\(aq +\&... +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +To measure the latency of all instructions for the host architecture, run: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ llvm\-exegesis \-mode=latency \-opcode\-index=\-1 +.ft P +.fi +.UNINDENT +.UNINDENT +.SH EXAMPLE 2: BENCHMARKING A CUSTOM CODE SNIPPET +.sp +To measure the latency/uops of a custom piece of code, you can specify the +\fIsnippets\-file\fP option (\fI\-\fP reads from standard input). +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ echo "vzeroupper" | llvm\-exegesis \-mode=uops \-snippets\-file=\- +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Real\-life code snippets typically depend on registers or memory. +\fBllvm\-exegesis\fP checks the liveliness of registers (i.e. any register +use has a corresponding def or is a “live in”). If your code depends on the +value of some registers, you have two options: +.INDENT 0.0 +.IP \(bu 2 +Mark the register as requiring a definition. \fBllvm\-exegesis\fP will +automatically assign a value to the register. This can be done using the +directive \fILLVM\-EXEGESIS\-DEFREG \fP, where \fI\fP +is a bit pattern used to fill \fI\fP\&. If \fI\fP is smaller than +the register width, it will be sign\-extended. +.IP \(bu 2 +Mark the register as a “live in”. \fBllvm\-exegesis\fP will benchmark +using whatever value was in this registers on entry. This can be done using +the directive \fILLVM\-EXEGESIS\-LIVEIN \fP\&. +.UNINDENT +.sp +For example, the following code snippet depends on the values of XMM1 (which +will be set by the tool) and the memory buffer passed in RDI (live in). +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# LLVM\-EXEGESIS\-LIVEIN RDI +# LLVM\-EXEGESIS\-DEFREG XMM1 42 +vmulps (%rdi), %xmm1, %xmm2 +vhaddps %xmm2, %xmm2, %xmm3 +addq $0x10, %rdi +.ft P +.fi +.UNINDENT +.UNINDENT +.SH EXAMPLE 3: ANALYSIS +.sp +Assuming you have a set of benchmarked instructions (either latency or uops) as +YAML in file \fI/tmp/benchmarks.yaml\fP, you can analyze the results using the +following command: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C + $ llvm\-exegesis \-mode=analysis \e +\-benchmarks\-file=/tmp/benchmarks.yaml \e +\-analysis\-clusters\-output\-file=/tmp/clusters.csv \e +\-analysis\-inconsistencies\-output\-file=/tmp/inconsistencies.html +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +This will group the instructions into clusters with the same performance +characteristics. The clusters will be written out to \fI/tmp/clusters.csv\fP in the +following format: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +cluster_id,opcode_name,config,sched_class +\&... +2,ADD32ri8_DB,,WriteALU,1.00 +2,ADD32ri_DB,,WriteALU,1.01 +2,ADD32rr,,WriteALU,1.01 +2,ADD32rr_DB,,WriteALU,1.00 +2,ADD32rr_REV,,WriteALU,1.00 +2,ADD64i32,,WriteALU,1.01 +2,ADD64ri32,,WriteALU,1.01 +2,MOVSX64rr32,,BSWAP32r_BSWAP64r_MOVSX64rr32,1.00 +2,VPADDQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.02 +2,VPSUBQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.01 +2,ADD64ri8,,WriteALU,1.00 +2,SETBr,,WriteSETCC,1.01 +\&... +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +\fBllvm\-exegesis\fP will also analyze the clusters to point out +inconsistencies in the scheduling information. The output is an html file. For +example, \fI/tmp/inconsistencies.html\fP will contain messages like the following : +[image] +.sp +Note that the scheduling class names will be resolved only when +\fBllvm\-exegesis\fP is compiled in debug mode, else only the class id will +be shown. This does not invalidate any of the analysis results though. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-help +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-opcode\-index= +Specify the opcode to measure, by index. Specifying \fI\-1\fP will result +in measuring every existing opcode. See example 1 for details. +Either \fIopcode\-index\fP, \fIopcode\-name\fP or \fIsnippets\-file\fP must be set. +.UNINDENT +.INDENT 0.0 +.TP +.B \-opcode\-name=,,... +Specify the opcode to measure, by name. Several opcodes can be specified as +a comma\-separated list. See example 1 for details. +Either \fIopcode\-index\fP, \fIopcode\-name\fP or \fIsnippets\-file\fP must be set. +.UNINDENT +.INDENT 0.0 +.TP +.B \-snippets\-file= +Specify the custom code snippet to measure. See example 2 for details. +Either \fIopcode\-index\fP, \fIopcode\-name\fP or \fIsnippets\-file\fP must be set. +.UNINDENT +.INDENT 0.0 +.TP +.B \-mode=[latency|uops|inverse_throughput|analysis] +Specify the run mode. Note that some modes have additional requirements and options. +.sp +\fIlatency\fP mode can be make use of either RDTSC or LBR. +\fIlatency[LBR]\fP is only available on X86 (at least \fISkylake\fP). +To run in \fIlatency\fP mode, a positive value must be specified +for \fIx86\-lbr\-sample\-period\fP and \fI–repetition\-mode=loop\fP\&. +.sp +In \fIanalysis\fP mode, you also need to specify at least one of the +\fI\-analysis\-clusters\-output\-file=\fP and \fI\-analysis\-inconsistencies\-output\-file=\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-x86\-lbr\-sample\-period= +Specify the LBR sampling period \- how many branches before we take a sample. +When a positive value is specified for this option and when the mode is \fIlatency\fP, +we will use LBRs for measuring. +On choosing the “right” sampling period, a small value is preferred, but throttling +could occur if the sampling is too frequent. A prime number should be used to +avoid consistently skipping certain blocks. +.UNINDENT +.INDENT 0.0 +.TP +.B \-repetition\-mode=[duplicate|loop|min] +Specify the repetition mode. \fIduplicate\fP will create a large, straight line +basic block with \fInum\-repetitions\fP instructions (repeating the snippet +\fInum\-repetitions\fP/\fIsnippet size\fP times). \fIloop\fP will, optionally, duplicate the +snippet until the loop body contains at least \fIloop\-body\-size\fP instructions, +and then wrap the result in a loop which will execute \fInum\-repetitions\fP +instructions (thus, again, repeating the snippet +\fInum\-repetitions\fP/\fIsnippet size\fP times). The \fIloop\fP mode, especially with loop +unrolling tends to better hide the effects of the CPU frontend on architectures +that cache decoded instructions, but consumes a register for counting +iterations. If performing an analysis over many opcodes, it may be best to +instead use the \fImin\fP mode, which will run each other mode, +and produce the minimal measured result. +.UNINDENT +.INDENT 0.0 +.TP +.B \-num\-repetitions= +Specify the target number of executed instructions. Note that the actual +repetition count of the snippet will be \fInum\-repetitions\fP/\fIsnippet size\fP\&. +Higher values lead to more accurate measurements but lengthen the benchmark. +.UNINDENT +.INDENT 0.0 +.TP +.B \-loop\-body\-size= +Only effective for \fI\-repetition\-mode=[loop|min]\fP\&. +Instead of looping over the snippet directly, first duplicate it so that the +loop body contains at least this many instructions. This potentially results +in loop body being cached in the CPU Op Cache / Loop Cache, which allows to +which may have higher throughput than the CPU decoders. +.UNINDENT +.INDENT 0.0 +.TP +.B \-max\-configs\-per\-opcode= +Specify the maximum configurations that can be generated for each opcode. +By default this is \fI1\fP, meaning that we assume that a single measurement is +enough to characterize an opcode. This might not be true of all instructions: +for example, the performance characteristics of the LEA instruction on X86 +depends on the value of assigned registers and immediates. Setting a value of +\fI\-max\-configs\-per\-opcode\fP larger than \fI1\fP allows \fIllvm\-exegesis\fP to explore +more configurations to discover if some register or immediate assignments +lead to different performance characteristics. +.UNINDENT +.INDENT 0.0 +.TP +.B \-benchmarks\-file= +File to read (\fIanalysis\fP mode) or write (\fIlatency\fP/\fIuops\fP/\fIinverse_throughput\fP +modes) benchmark results. “\-” uses stdin/stdout. +.UNINDENT +.INDENT 0.0 +.TP +.B \-analysis\-clusters\-output\-file= +If provided, write the analysis clusters as CSV to this file. “\-” prints to +stdout. By default, this analysis is not run. +.UNINDENT +.INDENT 0.0 +.TP +.B \-analysis\-inconsistencies\-output\-file= +If non\-empty, write inconsistencies found during analysis to this file. \fI\-\fP +prints to stdout. By default, this analysis is not run. +.UNINDENT +.INDENT 0.0 +.TP +.B \-analysis\-clustering=[dbscan,naive] +Specify the clustering algorithm to use. By default DBSCAN will be used. +Naive clustering algorithm is better for doing further work on the +\fI\-analysis\-inconsistencies\-output\-file=\fP output, it will create one cluster +per opcode, and check that the cluster is stable (all points are neighbours). +.UNINDENT +.INDENT 0.0 +.TP +.B \-analysis\-numpoints= +Specify the numPoints parameters to be used for DBSCAN clustering +(\fIanalysis\fP mode, DBSCAN only). +.UNINDENT +.INDENT 0.0 +.TP +.B \-analysis\-clustering\-epsilon= +Specify the epsilon parameter used for clustering of benchmark points +(\fIanalysis\fP mode). +.UNINDENT +.INDENT 0.0 +.TP +.B \-analysis\-inconsistency\-epsilon= +Specify the epsilon parameter used for detection of when the cluster +is different from the LLVM schedule profile values (\fIanalysis\fP mode). +.UNINDENT +.INDENT 0.0 +.TP +.B \-analysis\-display\-unstable\-clusters +If there is more than one benchmark for an opcode, said benchmarks may end up +not being clustered into the same cluster if the measured performance +characteristics are different. by default all such opcodes are filtered out. +This flag will instead show only such unstable opcodes. +.UNINDENT +.INDENT 0.0 +.TP +.B \-ignore\-invalid\-sched\-class=false +If set, ignore instructions that do not have a sched class (class idx = 0). +.UNINDENT +.INDENT 0.0 +.TP +.B \-mcpu= +If set, measure the cpu characteristics using the counters for this CPU. This +is useful when creating new sched models (the host CPU is unknown to LLVM). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dump\-object\-to\-disk=true +By default, llvm\-exegesis will dump the generated code to a temporary file to +enable code inspection. You may disable it to speed up the execution and save +disk space. +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-exegesis\fP returns 0 on success. Otherwise, an error message is +printed to standard error, and the tool returns a non 0 value. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-extract.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-extract.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-extract.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-extract.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,176 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-EXTRACT" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-extract \- extract a function from an LLVM module +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-extract\fP [\fIoptions\fP] \fB–func\fP \fIfunction\-name\fP [\fIfilename\fP] +.SH DESCRIPTION +.sp +The \fBllvm\-extract\fP command takes the name of a function and extracts +it from the specified LLVM bitcode file. It is primarily used as a debugging +tool to reduce test cases from larger programs that are triggering a bug. +.sp +In addition to extracting the bitcode of the specified function, +\fBllvm\-extract\fP will also remove unreachable global variables, +prototypes, and unused types. +.sp +The \fBllvm\-extract\fP command reads its input from standard input if +filename is omitted or if filename is \fB\-\fP\&. The output is always written to +standard output, unless the \fB\-o\fP option is specified (see below). +.SH OPTIONS +.sp +\fB–alias\fP \fIalias\-name\fP +.INDENT 0.0 +.INDENT 3.5 +Extract the alias named \fIfunction\-name\fP from the LLVM bitcode. May be +specified multiple times to extract multiple alias at once. +.UNINDENT +.UNINDENT +.sp +\fB–ralias\fP \fIalias\-regular\-expr\fP +.INDENT 0.0 +.INDENT 3.5 +Extract the alias matching \fIalias\-regular\-expr\fP from the LLVM bitcode. +All alias matching the regular expression will be extracted. May be +specified multiple times. +.UNINDENT +.UNINDENT +.sp +\fB–bb\fP \fIbasic\-block\-specifier\fP +.INDENT 0.0 +.INDENT 3.5 +Extract basic blocks(s) specified in \fIbasic\-block\-specifier\fP\&. May be +specified multiple times. Each specifier pair will create +a function. If multiple basic blocks are specified in one pair, the first +block in the sequence should dominate the rest. +.UNINDENT +.UNINDENT +.sp +\fB–delete\fP +.INDENT 0.0 +.INDENT 3.5 +Delete specified Globals from Module. +.UNINDENT +.UNINDENT +.sp +\fB\-f\fP +.INDENT 0.0 +.INDENT 3.5 +Enable binary output on terminals. Normally, \fBllvm\-extract\fP will +refuse to write raw bitcode output if the output stream is a terminal. With +this option, \fBllvm\-extract\fP will write raw bitcode regardless of the +output device. +.UNINDENT +.UNINDENT +.sp +\fB–func\fP \fIfunction\-name\fP +.INDENT 0.0 +.INDENT 3.5 +Extract the function named \fIfunction\-name\fP from the LLVM bitcode. May be +specified multiple times to extract multiple functions at once. +.UNINDENT +.UNINDENT +.sp +\fB–rfunc\fP \fIfunction\-regular\-expr\fP +.INDENT 0.0 +.INDENT 3.5 +Extract the function(s) matching \fIfunction\-regular\-expr\fP from the LLVM bitcode. +All functions matching the regular expression will be extracted. May be +specified multiple times. +.UNINDENT +.UNINDENT +.sp +\fB–glob\fP \fIglobal\-name\fP +.INDENT 0.0 +.INDENT 3.5 +Extract the global variable named \fIglobal\-name\fP from the LLVM bitcode. May be +specified multiple times to extract multiple global variables at once. +.UNINDENT +.UNINDENT +.sp +\fB–rglob\fP \fIglob\-regular\-expr\fP +.INDENT 0.0 +.INDENT 3.5 +Extract the global variable(s) matching \fIglobal\-regular\-expr\fP from the LLVM +bitcode. All global variables matching the regular expression will be +extracted. May be specified multiple times. +.UNINDENT +.UNINDENT +.sp +\fB–keep\-const\-init\fP +.INDENT 0.0 +.INDENT 3.5 +Preserve the values of constant globals. +.UNINDENT +.UNINDENT +.sp +\fB–recursive\fP +.INDENT 0.0 +.INDENT 3.5 +Recursively extract all called functions +.UNINDENT +.UNINDENT +.sp +\fB\-help\fP +.INDENT 0.0 +.INDENT 3.5 +Print a summary of command line options. +.UNINDENT +.UNINDENT +.sp +\fB\-o\fP \fIfilename\fP +.INDENT 0.0 +.INDENT 3.5 +Specify the output filename. If filename is “\-” (the default), then +\fBllvm\-extract\fP sends its output to standard output. +.UNINDENT +.UNINDENT +.sp +\fB\-S\fP +.INDENT 0.0 +.INDENT 3.5 +Write output in LLVM intermediate language (instead of bitcode). +.UNINDENT +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllvm\-extract\fP succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non\-zero value. +.SH SEE ALSO +.sp +\fBbugpoint(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-install-name-tool.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-install-name-tool.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-install-name-tool.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-install-name-tool.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,115 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-INSTALL-NAME-TOOL" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-install-name-tool \- LLVM tool for manipulating install-names and rpaths +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-install\-name\-tool\fP [\fIoptions\fP] \fIinput\fP +.SH DESCRIPTION +.sp +\fBllvm\-install\-name\-tool\fP is a tool to manipulate dynamic shared library +install names and rpaths listed in a Mach\-O binary. +.sp +For most scenarios, it works as a drop\-in replacement for Apple’s +\fBinstall_name_tool\fP\&. +.SH OPTIONS +.sp +At least one of the following options are required, and some options can be +combined with other options. Options \fI\%\-add_rpath\fP, \fI\%\-delete_rpath\fP, +and \fI\%\-rpath\fP can be combined in an invocation only if they do not share +the same \fI\fP value. +.INDENT 0.0 +.TP +.B \-add_rpath +Add an rpath named \fB\fP to the specified binary. Can be specified multiple +times to add multiple rpaths. Throws an error if \fB\fP is already listed in +the binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-change +Change an install name \fB\fP to \fB\fP in the +specified binary. Can be specified multiple times to change multiple dependent shared +library install names. Option is ignored if \fB\fP is not listed +in the specified binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-delete_rpath +Delete an rpath named \fB\fP from the specified binary. Can be specified multiple +times to delete multiple rpaths. Throws an error if \fB\fP is not listed in +the binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-delete_all_rpaths +Deletes all rpaths from the binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-help, \-h +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-id +Change shared library’s identification name under LC_ID_DYLIB to \fB\fP in the +specified binary. If specified multiple times, only the last \fI\%\-id\fP option is +selected. Option is ignored if the specified Mach\-O binary is not a dynamic shared library. +.UNINDENT +.INDENT 0.0 +.TP +.B \-rpath +Change an rpath named \fB\fP to \fB\fP in the specified binary. Can be specified +multiple times to change multiple rpaths. Throws an error if \fB\fP is not listed +in the binary or \fB\fP is already listed in the binary. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-version, \-V +Display the version of the \fBllvm\-install\-name\-tool\fP executable. +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-install\-name\-tool\fP exits with a non\-zero exit code if there is an error. +Otherwise, it exits with code 0. +.SH BUGS +.sp +To report bugs, please visit <\fI\%https://bugs.llvm.org/\fP>. +.SH SEE ALSO +.sp +\fBllvm\-objcopy(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-lib.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-lib.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-lib.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-lib.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,61 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-LIB" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-lib \- LLVM lib.exe compatible library tool +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-lib\fP [/libpath:] [/out:] [/llvmlibthin] +[/ignore] [/machine] [/nologo] [files…] +.SH DESCRIPTION +.sp +The \fBllvm\-lib\fP command is intended to be a \fBlib.exe\fP compatible +tool. See \fI\%https://msdn.microsoft.com/en\-us/library/7ykb2k5f\fP for the +general description. +.sp +\fBllvm\-lib\fP has the following extensions: +.INDENT 0.0 +.IP \(bu 2 +Bitcode files in symbol tables. +\fBllvm\-lib\fP includes symbols from both bitcode files and regular +object files in the symbol table. +.IP \(bu 2 +Creating thin archives. +The /llvmlibthin option causes \fBllvm\-lib\fP to create thin archive +that contain only the symbol table and the header for the various +members. These files are much smaller, but are not compatible with +link.exe (lld can handle them). +.UNINDENT +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-libtool-darwin.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-libtool-darwin.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-libtool-darwin.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-libtool-darwin.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,142 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-LIBTOOL-DARWIN" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-libtool-darwin \- LLVM tool for creating libraries for Darwin +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-libtool\-darwin\fP [\fIoptions\fP] \fI\fP +.SH DESCRIPTION +.sp +\fBllvm\-libtool\-darwin\fP is a tool for creating static and dynamic +libraries for Darwin. +.sp +For most scenarios, it works as a drop\-in replacement for cctools’ +\fBlibtool\fP\&. +.SH OPTIONS +.sp +\fBllvm\-libtool\-darwin\fP supports the following options: +.INDENT 0.0 +.TP +.B \-arch_only +Build a static library only for the specified \fI\fP and ignore all +other architectures in the files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-color +Use colors in output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-D +Use zero for timestamps and UIDs/GIDs. This is set by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-filelist +Read input file names from \fI\fP\&. File names are specified in \fI\fP +one per line, separated only by newlines. Whitespace on a line is assumed +to be part of the filename. If the directory name, \fIdirname\fP, is also +specified then it is prepended to each file name in the \fI\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-h, \-help +Show help and usage for this command. +.UNINDENT +.INDENT 0.0 +.TP +.B \-help\-list +Show help and usage for this command without grouping the options +into categories. +.UNINDENT +.INDENT 0.0 +.TP +.B \-l +Searches for the library libx.a in the library search path. If the string \fI\fP +ends with ‘.o’, then the library ‘x’ is searched for without prepending ‘lib’ +or appending ‘.a’. If the library is found, it is added to the list of input +files. Otherwise, an error is raised. +.UNINDENT +.INDENT 0.0 +.TP +.B \-L +Adds \fI\fP to the list of directories in which to search for libraries. The +directories are searched in the order in which they are specified with +\fI\%\-L\fP and before the default search path. The default search path +includes directories \fI/lib\fP, \fI/usr/lib\fP and \fI/usr/local/lib\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-no_warning_for_no_symbols +Do not warn about files that have no symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o +Specify the output file name. Must be specified exactly once. +.UNINDENT +.INDENT 0.0 +.TP +.B \-static +Produces a static library from the input files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-U +Use actual timestamps and UIDs/GIDs. +.UNINDENT +.INDENT 0.0 +.TP +.B \-V +Display the version of this program and perform any operation specified. +.UNINDENT +.INDENT 0.0 +.TP +.B \-version +Display the version of this program and exit immediately. +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-libtool\-darwin\fP exits with a non\-zero exit code if there is an error. +Otherwise, it exits with code 0. +.SH BUGS +.sp +To report bugs, please visit <\fI\%https://bugs.llvm.org/\fP>. +.SH SEE ALSO +.sp +\fBllvm\-ar(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-link.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-link.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-link.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-link.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,88 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-LINK" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-link \- LLVM bitcode linker +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-link\fP [\fIoptions\fP] \fIfilename …\fP +.SH DESCRIPTION +.sp +\fBllvm\-link\fP takes several LLVM bitcode files and links them together +into a single LLVM bitcode file. It writes the output file to standard output, +unless the \fI\%\-o\fP option is used to specify a filename. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-f +Enable binary output on terminals. Normally, \fBllvm\-link\fP will refuse +to write raw bitcode output if the output stream is a terminal. With this +option, \fBllvm\-link\fP will write raw bitcode regardless of the output +device. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o filename +Specify the output file name. If \fBfilename\fP is “\fB\-\fP“, then +\fBllvm\-link\fP will write its output to standard output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-S +Write output in LLVM intermediate language (instead of bitcode). +.UNINDENT +.INDENT 0.0 +.TP +.B \-d +If specified, \fBllvm\-link\fP prints a human\-readable version of the +output bitcode file to standard error. +.UNINDENT +.INDENT 0.0 +.TP +.B \-help +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-v +Verbose mode. Print information about what \fBllvm\-link\fP is doing. +This typically includes a message for each bitcode file linked in and for each +library found. +.UNINDENT +.SH EXIT STATUS +.sp +If \fBllvm\-link\fP succeeds, it will exit with 0. Otherwise, if an error +occurs, it will exit with a non\-zero value. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-lipo.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-lipo.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-lipo.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-lipo.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,110 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-LIPO" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-lipo \- LLVM tool for manipulating universal binaries +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-lipo\fP [\fIfilenames…\fP] [\fIoptions\fP] +.SH DESCRIPTION +.sp +\fBllvm\-lipo\fP can create universal binaries from Mach\-O files, extract regular object files from universal binaries, and display architecture information about both universal and regular files. +.SH COMMANDS +.sp +\fBllvm\-lipo\fP supports the following mutually exclusive commands: +.INDENT 0.0 +.TP +.B \-help, \-h +Display usage information and exit. +.UNINDENT +.INDENT 0.0 +.TP +.B \-version +Display the version of this program. +.UNINDENT +.INDENT 0.0 +.TP +.B \-verify_arch [ ...] +Take a single input file and verify the specified architectures are present in the file. +If so then exit with a status of 0 else exit with a status of 1. +.UNINDENT +.INDENT 0.0 +.TP +.B \-archs +Take a single input file and display the architectures present in the file. +Each architecture is separated by a single whitespace. +Unknown architectures are displayed as unknown(CPUtype,CPUsubtype). +.UNINDENT +.INDENT 0.0 +.TP +.B \-info +Take at least one input file and display the descriptions of each file. +The descriptions include the filename and architecture types separated by whitespace. +Universal binaries are grouped together first, followed by thin files. +Architectures in the fat file: are: +Non\-fat file: is architecture: +.UNINDENT +.INDENT 0.0 +.TP +.B \-thin +Take a single universal binary input file and the thin flag followed by an architecture type. +Require the output flag to be specified, and output a thin binary of the specified architecture. +.UNINDENT +.INDENT 0.0 +.TP +.B \-create +Take at least one input file and require the output flag to be specified. +Output a universal binary combining the input files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-replace +Take a single universal binary input file and require the output flag to be specified. +The replace flag is followed by an architecture type, and a thin input file. +Output a universal binary with the specified architecture slice in the +universal binary input replaced with the contents of the thin input file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-segalign +Additional flag that can be specified with create and replace. +The segalign flag is followed by an architecture type, and an alignment. +The alignment is a hexadecimal number that is a power of 2. +Output a file in which the slice with the specified architecture has the specified alignment. +.UNINDENT +.SH BUGS +.sp +To report bugs, please visit <\fI\%https://bugs.llvm.org/\fP>. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-locstats.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-locstats.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-locstats.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-locstats.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,155 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-LOCSTATS" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-locstats \- calculate statistics on DWARF debug location +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-locstats\fP [\fIoptions\fP] [\fIfilename\fP] +.SH DESCRIPTION +.sp +\fBllvm\-locstats\fP works like a wrapper around \fBllvm\-dwarfdump\fP\&. +It parses \fBllvm\-dwarfdump\fP statistics regarding debug location by +pretty printing it in a more human readable way. +.sp +The line 0% shows the number and the percentage of DIEs with no location +information, but the line 100% shows the information for DIEs where there is +location information in all code section bytes (where the variable or parameter +is in the scope). The line [50%,60%) shows the number and the percentage of DIEs +where the location information is between 50 and 60 percentage of its scope +covered. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-\-only\-variables +calculate the location statistics only for local variables +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-only\-formal\-parameters +calculate the location statistics only for formal parameters +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-ignore\-debug\-entry\-values +ignore the location statistics on locations containing the +debug entry values DWARF operation +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-draw\-plot +make histogram of location buckets generated (requires +matplotlib) +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-compare +compare the debug location coverage on two files provided, and draw +a plot showing the difference (requires matplotlib) +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-locstats\fP returns 0 if the input file were parsed +successfully. Otherwise, it returns 1. +.SH EXAMPLE 1 +.sp +Pretty print the location coverage on the standard output. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +llvm\-locstats a.out + + ================================================= + Debug Location Statistics + ================================================= + cov% samples percentage(~) + \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + 0% 1 16% + (0%,10%) 0 0% + [10%,20%) 0 0% + [20%,30%) 0 0% + [30%,40%) 0 0% + [40%,50%) 0 0% + [50%,60%) 1 16% + [60%,70%) 0 0% + [70%,80%) 0 0% + [80%,90%) 1 16% + [90%,100%) 0 0% + 100% 3 50% + ================================================= + \-the number of debug variables processed: 6 + \-PC ranges covered: 81% + \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- + \-total availability: 83% + ================================================= +.ft P +.fi +.UNINDENT +.UNINDENT +.SH EXAMPLE 2 +.sp +Generate a plot as an image file. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +llvm\-locstats \-\-draw\-plot file1.out +.ft P +.fi +.UNINDENT +.UNINDENT +[image] +.SH EXAMPLE 3 +.sp +Generate a plot as an image file showing the difference in the debug location +coverage. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +llvm\-locstats \-\-compare file1.out file1.withentryvals.out +.ft P +.fi +.UNINDENT +.UNINDENT +[image] +.SH SEE ALSO +.sp +\fBllvm\-dwarfdump(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-mca.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-mca.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-mca.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-mca.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,1213 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-MCA" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-mca \- LLVM Machine Code Analyzer +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-mca\fP [\fIoptions\fP] [input] +.SH DESCRIPTION +.sp +\fBllvm\-mca\fP is a performance analysis tool that uses information +available in LLVM (e.g. scheduling models) to statically measure the performance +of machine code in a specific CPU. +.sp +Performance is measured in terms of throughput as well as processor resource +consumption. The tool currently works for processors with a backend for which +there is a scheduling model available in LLVM. +.sp +The main goal of this tool is not just to predict the performance of the code +when run on the target, but also help with diagnosing potential performance +issues. +.sp +Given an assembly code sequence, \fBllvm\-mca\fP estimates the Instructions +Per Cycle (IPC), as well as hardware resource pressure. The analysis and +reporting style were inspired by the IACA tool from Intel. +.sp +For example, you can compile code with clang, output assembly, and pipe it +directly into \fBllvm\-mca\fP for analysis: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ clang foo.c \-O2 \-target x86_64\-unknown\-unknown \-S \-o \- | llvm\-mca \-mcpu=btver2 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Or for Intel syntax: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ clang foo.c \-O2 \-target x86_64\-unknown\-unknown \-mllvm \-x86\-asm\-syntax=intel \-S \-o \- | llvm\-mca \-mcpu=btver2 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +(\fBllvm\-mca\fP detects Intel syntax by the presence of an \fI\&.intel_syntax\fP +directive at the beginning of the input. By default its output syntax matches +that of its input.) +.sp +Scheduling models are not just used to compute instruction latencies and +throughput, but also to understand what processor resources are available +and how to simulate them. +.sp +By design, the quality of the analysis conducted by \fBllvm\-mca\fP is +inevitably affected by the quality of the scheduling models in LLVM. +.sp +If you see that the performance report is not accurate for a processor, +please \fI\%file a bug\fP +against the appropriate backend. +.SH OPTIONS +.sp +If \fBinput\fP is “\fB\-\fP” or omitted, \fBllvm\-mca\fP reads from standard +input. Otherwise, it will read from the specified filename. +.sp +If the \fI\%\-o\fP option is omitted, then \fBllvm\-mca\fP will send its output +to standard output if the input is from standard input. If the \fI\%\-o\fP +option specifies “\fB\-\fP“, then the output will also be sent to standard output. +.INDENT 0.0 +.TP +.B \-help +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-o +Use \fB\fP as the output filename. See the summary above for more +details. +.UNINDENT +.INDENT 0.0 +.TP +.B \-mtriple= +Specify a target triple string. +.UNINDENT +.INDENT 0.0 +.TP +.B \-march= +Specify the architecture for which to analyze the code. It defaults to the +host default target. +.UNINDENT +.INDENT 0.0 +.TP +.B \-mcpu= +Specify the processor for which to analyze the code. By default, the cpu name +is autodetected from the host. +.UNINDENT +.INDENT 0.0 +.TP +.B \-output\-asm\-variant= +Specify the output assembly variant for the report generated by the tool. +On x86, possible values are [0, 1]. A value of 0 (vic. 1) for this flag enables +the AT&T (vic. Intel) assembly format for the code printed out by the tool in +the analysis report. +.UNINDENT +.INDENT 0.0 +.TP +.B \-print\-imm\-hex +Prefer hex format for numeric literals in the output assembly printed as part +of the report. +.UNINDENT +.INDENT 0.0 +.TP +.B \-dispatch= +Specify a different dispatch width for the processor. The dispatch width +defaults to field ‘IssueWidth’ in the processor scheduling model. If width is +zero, then the default dispatch width is used. +.UNINDENT +.INDENT 0.0 +.TP +.B \-register\-file\-size= +Specify the size of the register file. When specified, this flag limits how +many physical registers are available for register renaming purposes. A value +of zero for this flag means “unlimited number of physical registers”. +.UNINDENT +.INDENT 0.0 +.TP +.B \-iterations= +Specify the number of iterations to run. If this flag is set to 0, then the +tool sets the number of iterations to a default value (i.e. 100). +.UNINDENT +.INDENT 0.0 +.TP +.B \-noalias= +If set, the tool assumes that loads and stores don’t alias. This is the +default behavior. +.UNINDENT +.INDENT 0.0 +.TP +.B \-lqueue= +Specify the size of the load queue in the load/store unit emulated by the tool. +By default, the tool assumes an unbound number of entries in the load queue. +A value of zero for this flag is ignored, and the default load queue size is +used instead. +.UNINDENT +.INDENT 0.0 +.TP +.B \-squeue= +Specify the size of the store queue in the load/store unit emulated by the +tool. By default, the tool assumes an unbound number of entries in the store +queue. A value of zero for this flag is ignored, and the default store queue +size is used instead. +.UNINDENT +.INDENT 0.0 +.TP +.B \-timeline +Enable the timeline view. +.UNINDENT +.INDENT 0.0 +.TP +.B \-timeline\-max\-iterations= +Limit the number of iterations to print in the timeline view. By default, the +timeline view prints information for up to 10 iterations. +.UNINDENT +.INDENT 0.0 +.TP +.B \-timeline\-max\-cycles= +Limit the number of cycles in the timeline view, or use 0 for no limit. By +default, the number of cycles is set to 80. +.UNINDENT +.INDENT 0.0 +.TP +.B \-resource\-pressure +Enable the resource pressure view. This is enabled by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-register\-file\-stats +Enable register file usage statistics. +.UNINDENT +.INDENT 0.0 +.TP +.B \-dispatch\-stats +Enable extra dispatch statistics. This view collects and analyzes instruction +dispatch events, as well as static/dynamic dispatch stall events. This view +is disabled by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-scheduler\-stats +Enable extra scheduler statistics. This view collects and analyzes instruction +issue events. This view is disabled by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-retire\-stats +Enable extra retire control unit statistics. This view is disabled by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-instruction\-info +Enable the instruction info view. This is enabled by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-show\-encoding +Enable the printing of instruction encodings within the instruction info view. +.UNINDENT +.INDENT 0.0 +.TP +.B \-all\-stats +Print all hardware statistics. This enables extra statistics related to the +dispatch logic, the hardware schedulers, the register file(s), and the retire +control unit. This option is disabled by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-all\-views +Enable all the view. +.UNINDENT +.INDENT 0.0 +.TP +.B \-instruction\-tables +Prints resource pressure information based on the static information +available from the processor model. This differs from the resource pressure +view because it doesn’t require that the code is simulated. It instead prints +the theoretical uniform distribution of resource pressure for every +instruction in sequence. +.UNINDENT +.INDENT 0.0 +.TP +.B \-bottleneck\-analysis +Print information about bottlenecks that affect the throughput. This analysis +can be expensive, and it is disabled by default. Bottlenecks are highlighted +in the summary view. Bottleneck analysis is currently not supported for +processors with an in\-order backend. +.UNINDENT +.INDENT 0.0 +.TP +.B \-json +Print the requested views in valid JSON format. The instructions and the +processor resources are printed as members of special top level JSON objects. +The individual views refer to them by index. However, not all views are +currently supported. For example, the report from the bottleneck analysis is +not printed out in JSON. All the default views are currently supported. +.UNINDENT +.INDENT 0.0 +.TP +.B \-disable\-cb +Force usage of the generic CustomBehaviour and InstrPostProcess classes rather +than using the target specific implementation. The generic classes never +detect any custom hazards or make any post processing modifications to +instructions. +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-mca\fP returns 0 on success. Otherwise, an error message is printed +to standard error, and the tool returns 1. +.SH USING MARKERS TO ANALYZE SPECIFIC CODE BLOCKS +.sp +\fBllvm\-mca\fP allows for the optional usage of special code comments to +mark regions of the assembly code to be analyzed. A comment starting with +substring \fBLLVM\-MCA\-BEGIN\fP marks the beginning of a code region. A comment +starting with substring \fBLLVM\-MCA\-END\fP marks the end of a code region. For +example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# LLVM\-MCA\-BEGIN + ... +# LLVM\-MCA\-END +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If no user\-defined region is specified, then \fBllvm\-mca\fP assumes a +default region which contains every instruction in the input file. Every region +is analyzed in isolation, and the final performance report is the union of all +the reports generated for every code region. +.sp +Code regions can have names. For example: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# LLVM\-MCA\-BEGIN A simple example + add %eax, %eax +# LLVM\-MCA\-END +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The code from the example above defines a region named “A simple example” with a +single instruction in it. Note how the region name doesn’t have to be repeated +in the \fBLLVM\-MCA\-END\fP directive. In the absence of overlapping regions, +an anonymous \fBLLVM\-MCA\-END\fP directive always ends the currently active user +defined region. +.sp +Example of nesting regions: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# LLVM\-MCA\-BEGIN foo + add %eax, %edx +# LLVM\-MCA\-BEGIN bar + sub %eax, %edx +# LLVM\-MCA\-END bar +# LLVM\-MCA\-END foo +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Example of overlapping regions: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +# LLVM\-MCA\-BEGIN foo + add %eax, %edx +# LLVM\-MCA\-BEGIN bar + sub %eax, %edx +# LLVM\-MCA\-END foo + add %eax, %edx +# LLVM\-MCA\-END bar +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +Note that multiple anonymous regions cannot overlap. Also, overlapping regions +cannot have the same name. +.sp +There is no support for marking regions from high\-level source code, like C or +C++. As a workaround, inline assembly directives may be used: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +int foo(int a, int b) { + __asm volatile("# LLVM\-MCA\-BEGIN foo"); + a += 42; + __asm volatile("# LLVM\-MCA\-END"); + a *= b; + return a; +} +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +However, this interferes with optimizations like loop vectorization and may have +an impact on the code generated. This is because the \fB__asm\fP statements are +seen as real code having important side effects, which limits how the code +around them can be transformed. If users want to make use of inline assembly +to emit markers, then the recommendation is to always verify that the output +assembly is equivalent to the assembly generated in the absence of markers. +The \fI\%Clang options to emit optimization reports\fP +can also help in detecting missed optimizations. +.SH HOW LLVM-MCA WORKS +.sp +\fBllvm\-mca\fP takes assembly code as input. The assembly code is parsed +into a sequence of MCInst with the help of the existing LLVM target assembly +parsers. The parsed sequence of MCInst is then analyzed by a \fBPipeline\fP module +to generate a performance report. +.sp +The Pipeline module simulates the execution of the machine code sequence in a +loop of iterations (default is 100). During this process, the pipeline collects +a number of execution related statistics. At the end of this process, the +pipeline generates and prints a report from the collected statistics. +.sp +Here is an example of a performance report generated by the tool for a +dot\-product of two packed float vectors of four elements. The analysis is +conducted for target x86, cpu btver2. The following result can be produced via +the following command using the example located at +\fBtest/tools/llvm\-mca/X86/BtVer2/dot\-product.s\fP: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ llvm\-mca \-mtriple=x86_64\-unknown\-unknown \-mcpu=btver2 \-iterations=300 dot\-product.s +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Iterations: 300 +Instructions: 900 +Total Cycles: 610 +Total uOps: 900 + +Dispatch Width: 2 +uOps Per Cycle: 1.48 +IPC: 1.48 +Block RThroughput: 2.0 + + +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) + +[1] [2] [3] [4] [5] [6] Instructions: + 1 2 1.00 vmulps %xmm0, %xmm1, %xmm2 + 1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3 + 1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4 + + +Resources: +[0] \- JALU0 +[1] \- JALU1 +[2] \- JDiv +[3] \- JFPA +[4] \- JFPM +[5] \- JFPU0 +[6] \- JFPU1 +[7] \- JLAGU +[8] \- JMul +[9] \- JSAGU +[10] \- JSTC +[11] \- JVALU0 +[12] \- JVALU1 +[13] \- JVIMUL + + +Resource pressure per iteration: +[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] + \- \- \- 2.00 1.00 2.00 1.00 \- \- \- \- \- \- \- + +Resource pressure by instruction: +[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: + \- \- \- \- 1.00 \- 1.00 \- \- \- \- \- \- \- vmulps %xmm0, %xmm1, %xmm2 + \- \- \- 1.00 \- 1.00 \- \- \- \- \- \- \- \- vhaddps %xmm2, %xmm2, %xmm3 + \- \- \- 1.00 \- 1.00 \- \- \- \- \- \- \- \- vhaddps %xmm3, %xmm3, %xmm4 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +According to this report, the dot\-product kernel has been executed 300 times, +for a total of 900 simulated instructions. The total number of simulated micro +opcodes (uOps) is also 900. +.sp +The report is structured in three main sections. The first section collects a +few performance numbers; the goal of this section is to give a very quick +overview of the performance throughput. Important performance indicators are +\fBIPC\fP, \fBuOps Per Cycle\fP, and \fBBlock RThroughput\fP (Block Reciprocal +Throughput). +.sp +Field \fIDispatchWidth\fP is the maximum number of micro opcodes that are dispatched +to the out\-of\-order backend every simulated cycle. For processors with an +in\-order backend, \fIDispatchWidth\fP is the maximum number of micro opcodes issued +to the backend every simulated cycle. +.sp +IPC is computed dividing the total number of simulated instructions by the total +number of cycles. +.sp +Field \fIBlock RThroughput\fP is the reciprocal of the block throughput. Block +throughput is a theoretical quantity computed as the maximum number of blocks +(i.e. iterations) that can be executed per simulated clock cycle in the absence +of loop carried dependencies. Block throughput is superiorly limited by the +dispatch rate, and the availability of hardware resources. +.sp +In the absence of loop\-carried data dependencies, the observed IPC tends to a +theoretical maximum which can be computed by dividing the number of instructions +of a single iteration by the \fIBlock RThroughput\fP\&. +.sp +Field ‘uOps Per Cycle’ is computed dividing the total number of simulated micro +opcodes by the total number of cycles. A delta between Dispatch Width and this +field is an indicator of a performance issue. In the absence of loop\-carried +data dependencies, the observed ‘uOps Per Cycle’ should tend to a theoretical +maximum throughput which can be computed by dividing the number of uOps of a +single iteration by the \fIBlock RThroughput\fP\&. +.sp +Field \fIuOps Per Cycle\fP is bounded from above by the dispatch width. That is +because the dispatch width limits the maximum size of a dispatch group. Both IPC +and ‘uOps Per Cycle’ are limited by the amount of hardware parallelism. The +availability of hardware resources affects the resource pressure distribution, +and it limits the number of instructions that can be executed in parallel every +cycle. A delta between Dispatch Width and the theoretical maximum uOps per +Cycle (computed by dividing the number of uOps of a single iteration by the +\fIBlock RThroughput\fP) is an indicator of a performance bottleneck caused by the +lack of hardware resources. +In general, the lower the Block RThroughput, the better. +.sp +In this example, \fBuOps per iteration/Block RThroughput\fP is 1.50. Since there +are no loop\-carried dependencies, the observed \fIuOps Per Cycle\fP is expected to +approach 1.50 when the number of iterations tends to infinity. The delta between +the Dispatch Width (2.00), and the theoretical maximum uOp throughput (1.50) is +an indicator of a performance bottleneck caused by the lack of hardware +resources, and the \fIResource pressure view\fP can help to identify the problematic +resource usage. +.sp +The second section of the report is the \fIinstruction info view\fP\&. It shows the +latency and reciprocal throughput of every instruction in the sequence. It also +reports extra information related to the number of micro opcodes, and opcode +properties (i.e., ‘MayLoad’, ‘MayStore’, and ‘HasSideEffects’). +.sp +Field \fIRThroughput\fP is the reciprocal of the instruction throughput. Throughput +is computed as the maximum number of instructions of a same type that can be +executed per clock cycle in the absence of operand dependencies. In this +example, the reciprocal throughput of a vector float multiply is 1 +cycles/instruction. That is because the FP multiplier JFPM is only available +from pipeline JFPU1. +.sp +Instruction encodings are displayed within the instruction info view when flag +\fI\-show\-encoding\fP is specified. +.sp +Below is an example of \fI\-show\-encoding\fP output for the dot\-product kernel: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Instruction Info: +[1]: #uOps +[2]: Latency +[3]: RThroughput +[4]: MayLoad +[5]: MayStore +[6]: HasSideEffects (U) +[7]: Encoding Size + +[1] [2] [3] [4] [5] [6] [7] Encodings: Instructions: + 1 2 1.00 4 c5 f0 59 d0 vmulps %xmm0, %xmm1, %xmm2 + 1 4 1.00 4 c5 eb 7c da vhaddps %xmm2, %xmm2, %xmm3 + 1 4 1.00 4 c5 e3 7c e3 vhaddps %xmm3, %xmm3, %xmm4 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The \fIEncoding Size\fP column shows the size in bytes of instructions. The +\fIEncodings\fP column shows the actual instruction encodings (byte sequences in +hex). +.sp +The third section is the \fIResource pressure view\fP\&. This view reports +the average number of resource cycles consumed every iteration by instructions +for every processor resource unit available on the target. Information is +structured in two tables. The first table reports the number of resource cycles +spent on average every iteration. The second table correlates the resource +cycles to the machine instruction in the sequence. For example, every iteration +of the instruction vmulps always executes on resource unit [6] +(JFPU1 \- floating point pipeline #1), consuming an average of 1 resource cycle +per iteration. Note that on AMD Jaguar, vector floating\-point multiply can +only be issued to pipeline JFPU1, while horizontal floating\-point additions can +only be issued to pipeline JFPU0. +.sp +The resource pressure view helps with identifying bottlenecks caused by high +usage of specific hardware resources. Situations with resource pressure mainly +concentrated on a few resources should, in general, be avoided. Ideally, +pressure should be uniformly distributed between multiple resources. +.SS Timeline View +.sp +The timeline view produces a detailed report of each instruction’s state +transitions through an instruction pipeline. This view is enabled by the +command line option \fB\-timeline\fP\&. As instructions transition through the +various stages of the pipeline, their states are depicted in the view report. +These states are represented by the following characters: +.INDENT 0.0 +.IP \(bu 2 +D : Instruction dispatched. +.IP \(bu 2 +e : Instruction executing. +.IP \(bu 2 +E : Instruction executed. +.IP \(bu 2 +R : Instruction retired. +.IP \(bu 2 += : Instruction already dispatched, waiting to be executed. +.IP \(bu 2 +\- : Instruction executed, waiting to be retired. +.UNINDENT +.sp +Below is the timeline view for a subset of the dot\-product example located in +\fBtest/tools/llvm\-mca/X86/BtVer2/dot\-product.s\fP and processed by +\fBllvm\-mca\fP using the following command: +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +$ llvm\-mca \-mtriple=x86_64\-unknown\-unknown \-mcpu=btver2 \-iterations=3 \-timeline dot\-product.s +.ft P +.fi +.UNINDENT +.UNINDENT +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Timeline view: + 012345 +Index 0123456789 + +[0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2 +[0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3 +[0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 +[1,0] .DeeE\-\-\-\-\-R . vmulps %xmm0, %xmm1, %xmm2 +[1,1] . D=eeeE\-\-\-R . vhaddps %xmm2, %xmm2, %xmm3 +[1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4 +[2,0] . DeeE\-\-\-\-\-R . vmulps %xmm0, %xmm1, %xmm2 +[2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3 +[2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4 + + +Average Wait times (based on the timeline view): +[0]: Executions +[1]: Average time spent waiting in a scheduler\(aqs queue +[2]: Average time spent waiting in a scheduler\(aqs queue while ready +[3]: Average time elapsed from WB until retire stage + + [0] [1] [2] [3] +0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2 +1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3 +2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 + 3 3.3 0.5 1.4 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +The timeline view is interesting because it shows instruction state changes +during execution. It also gives an idea of how the tool processes instructions +executed on the target, and how their timing information might be calculated. +.sp +The timeline view is structured in two tables. The first table shows +instructions changing state over time (measured in cycles); the second table +(named \fIAverage Wait times\fP) reports useful timing statistics, which should +help diagnose performance bottlenecks caused by long data dependencies and +sub\-optimal usage of hardware resources. +.sp +An instruction in the timeline view is identified by a pair of indices, where +the first index identifies an iteration, and the second index is the +instruction index (i.e., where it appears in the code sequence). Since this +example was generated using 3 iterations: \fB\-iterations=3\fP, the iteration +indices range from 0\-2 inclusively. +.sp +Excluding the first and last column, the remaining columns are in cycles. +Cycles are numbered sequentially starting from 0. +.sp +From the example output above, we know the following: +.INDENT 0.0 +.IP \(bu 2 +Instruction [1,0] was dispatched at cycle 1. +.IP \(bu 2 +Instruction [1,0] started executing at cycle 2. +.IP \(bu 2 +Instruction [1,0] reached the write back stage at cycle 4. +.IP \(bu 2 +Instruction [1,0] was retired at cycle 10. +.UNINDENT +.sp +Instruction [1,0] (i.e., vmulps from iteration #1) does not have to wait in the +scheduler’s queue for the operands to become available. By the time vmulps is +dispatched, operands are already available, and pipeline JFPU1 is ready to +serve another instruction. So the instruction can be immediately issued on the +JFPU1 pipeline. That is demonstrated by the fact that the instruction only +spent 1cy in the scheduler’s queue. +.sp +There is a gap of 5 cycles between the write\-back stage and the retire event. +That is because instructions must retire in program order, so [1,0] has to wait +for [0,2] to be retired first (i.e., it has to wait until cycle 10). +.sp +In the example, all instructions are in a RAW (Read After Write) dependency +chain. Register %xmm2 written by vmulps is immediately used by the first +vhaddps, and register %xmm3 written by the first vhaddps is used by the second +vhaddps. Long data dependencies negatively impact the ILP (Instruction Level +Parallelism). +.sp +In the dot\-product example, there are anti\-dependencies introduced by +instructions from different iterations. However, those dependencies can be +removed at register renaming stage (at the cost of allocating register aliases, +and therefore consuming physical registers). +.sp +Table \fIAverage Wait times\fP helps diagnose performance issues that are caused by +the presence of long latency instructions and potentially long data dependencies +which may limit the ILP. Last row, \fB\fP, shows a global average over all +instructions measured. Note that \fBllvm\-mca\fP, by default, assumes at +least 1cy between the dispatch event and the issue event. +.sp +When the performance is limited by data dependencies and/or long latency +instructions, the number of cycles spent while in the \fIready\fP state is expected +to be very small when compared with the total number of cycles spent in the +scheduler’s queue. The difference between the two counters is a good indicator +of how large of an impact data dependencies had on the execution of the +instructions. When performance is mostly limited by the lack of hardware +resources, the delta between the two counters is small. However, the number of +cycles spent in the queue tends to be larger (i.e., more than 1\-3cy), +especially when compared to other low latency instructions. +.SS Bottleneck Analysis +.sp +The \fB\-bottleneck\-analysis\fP command line option enables the analysis of +performance bottlenecks. +.sp +This analysis is potentially expensive. It attempts to correlate increases in +backend pressure (caused by pipeline resource pressure and data dependencies) to +dynamic dispatch stalls. +.sp +Below is an example of \fB\-bottleneck\-analysis\fP output generated by +\fBllvm\-mca\fP for 500 iterations of the dot\-product example on btver2. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Cycles with backend pressure increase [ 48.07% ] +Throughput Bottlenecks: + Resource Pressure [ 47.77% ] + \- JFPA [ 47.77% ] + \- JFPU0 [ 47.77% ] + Data Dependencies: [ 0.30% ] + \- Register Dependencies [ 0.30% ] + \- Memory Dependencies [ 0.00% ] + +Critical sequence based on the simulation: + + Instruction Dependency Information + +\-\-\-\-< 2. vhaddps %xmm3, %xmm3, %xmm4 + | + | < loop carried > + | + | 0. vmulps %xmm0, %xmm1, %xmm2 + +\-\-\-\-> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 74% ] + +\-\-\-\-> 2. vhaddps %xmm3, %xmm3, %xmm4 ## REGISTER dependency: %xmm3 + | + | < loop carried > + | + +\-\-\-\-> 1. vhaddps %xmm2, %xmm2, %xmm3 ## RESOURCE interference: JFPA [ probability: 74% ] +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +According to the analysis, throughput is limited by resource pressure and not by +data dependencies. The analysis observed increases in backend pressure during +48.07% of the simulated run. Almost all those pressure increase events were +caused by contention on processor resources JFPA/JFPU0. +.sp +The \fIcritical sequence\fP is the most expensive sequence of instructions according +to the simulation. It is annotated to provide extra information about critical +register dependencies and resource interferences between instructions. +.sp +Instructions from the critical sequence are expected to significantly impact +performance. By construction, the accuracy of this analysis is strongly +dependent on the simulation and (as always) by the quality of the processor +model in llvm. +.sp +Bottleneck analysis is currently not supported for processors with an in\-order +backend. +.SS Extra Statistics to Further Diagnose Performance Issues +.sp +The \fB\-all\-stats\fP command line option enables extra statistics and performance +counters for the dispatch logic, the reorder buffer, the retire control unit, +and the register file. +.sp +Below is an example of \fB\-all\-stats\fP output generated by \fBllvm\-mca\fP +for 300 iterations of the dot\-product example discussed in the previous +sections. +.INDENT 0.0 +.INDENT 3.5 +.sp +.nf +.ft C +Dynamic Dispatch Stall Cycles: +RAT \- Register unavailable: 0 +RCU \- Retire tokens unavailable: 0 +SCHEDQ \- Scheduler full: 272 (44.6%) +LQ \- Load queue full: 0 +SQ \- Store queue full: 0 +GROUP \- Static restrictions on the dispatch group: 0 + + +Dispatch Logic \- number of cycles where we saw N micro opcodes dispatched: +[# dispatched], [# cycles] + 0, 24 (3.9%) + 1, 272 (44.6%) + 2, 314 (51.5%) + + +Schedulers \- number of cycles where we saw N micro opcodes issued: +[# issued], [# cycles] + 0, 7 (1.1%) + 1, 306 (50.2%) + 2, 297 (48.7%) + +Scheduler\(aqs queue usage: +[1] Resource name. +[2] Average number of used buffer entries. +[3] Maximum number of used buffer entries. +[4] Total number of buffer entries. + + [1] [2] [3] [4] +JALU01 0 0 20 +JFPU01 17 18 18 +JLSAGU 0 0 12 + + +Retire Control Unit \- number of cycles where we saw N instructions retired: +[# retired], [# cycles] + 0, 109 (17.9%) + 1, 102 (16.7%) + 2, 399 (65.4%) + +Total ROB Entries: 64 +Max Used ROB Entries: 35 ( 54.7% ) +Average Used ROB Entries per cy: 32 ( 50.0% ) + + +Register File statistics: +Total number of mappings created: 900 +Max number of mappings used: 35 + +* Register File #1 \-\- JFpuPRF: + Number of physical registers: 72 + Total number of mappings created: 900 + Max number of mappings used: 35 + +* Register File #2 \-\- JIntegerPRF: + Number of physical registers: 64 + Total number of mappings created: 0 + Max number of mappings used: 0 +.ft P +.fi +.UNINDENT +.UNINDENT +.sp +If we look at the \fIDynamic Dispatch Stall Cycles\fP table, we see the counter for +SCHEDQ reports 272 cycles. This counter is incremented every time the dispatch +logic is unable to dispatch a full group because the scheduler’s queue is full. +.sp +Looking at the \fIDispatch Logic\fP table, we see that the pipeline was only able to +dispatch two micro opcodes 51.5% of the time. The dispatch group was limited to +one micro opcode 44.6% of the cycles, which corresponds to 272 cycles. The +dispatch statistics are displayed by either using the command option +\fB\-all\-stats\fP or \fB\-dispatch\-stats\fP\&. +.sp +The next table, \fISchedulers\fP, presents a histogram displaying a count, +representing the number of micro opcodes issued on some number of cycles. In +this case, of the 610 simulated cycles, single opcodes were issued 306 times +(50.2%) and there were 7 cycles where no opcodes were issued. +.sp +The \fIScheduler’s queue usage\fP table shows that the average and maximum number of +buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01 +reached its maximum (18 of 18 queue entries). Note that AMD Jaguar implements +three schedulers: +.INDENT 0.0 +.IP \(bu 2 +JALU01 \- A scheduler for ALU instructions. +.IP \(bu 2 +JFPU01 \- A scheduler floating point operations. +.IP \(bu 2 +JLSAGU \- A scheduler for address generation. +.UNINDENT +.sp +The dot\-product is a kernel of three floating point instructions (a vector +multiply followed by two horizontal adds). That explains why only the floating +point scheduler appears to be used. +.sp +A full scheduler queue is either caused by data dependency chains or by a +sub\-optimal usage of hardware resources. Sometimes, resource pressure can be +mitigated by rewriting the kernel using different instructions that consume +different scheduler resources. Schedulers with a small queue are less resilient +to bottlenecks caused by the presence of long data dependencies. The scheduler +statistics are displayed by using the command option \fB\-all\-stats\fP or +\fB\-scheduler\-stats\fP\&. +.sp +The next table, \fIRetire Control Unit\fP, presents a histogram displaying a count, +representing the number of instructions retired on some number of cycles. In +this case, of the 610 simulated cycles, two instructions were retired during the +same cycle 399 times (65.4%) and there were 109 cycles where no instructions +were retired. The retire statistics are displayed by using the command option +\fB\-all\-stats\fP or \fB\-retire\-stats\fP\&. +.sp +The last table presented is \fIRegister File statistics\fP\&. Each physical register +file (PRF) used by the pipeline is presented in this table. In the case of AMD +Jaguar, there are two register files, one for floating\-point registers (JFpuPRF) +and one for integer registers (JIntegerPRF). The table shows that of the 900 +instructions processed, there were 900 mappings created. Since this dot\-product +example utilized only floating point registers, the JFPuPRF was responsible for +creating the 900 mappings. However, we see that the pipeline only used a +maximum of 35 of 72 available register slots at any given time. We can conclude +that the floating point PRF was the only register file used for the example, and +that it was never resource constrained. The register file statistics are +displayed by using the command option \fB\-all\-stats\fP or +\fB\-register\-file\-stats\fP\&. +.sp +In this example, we can conclude that the IPC is mostly limited by data +dependencies, and not by resource pressure. +.SS Instruction Flow +.sp +This section describes the instruction flow through the default pipeline of +\fBllvm\-mca\fP, as well as the functional units involved in the process. +.sp +The default pipeline implements the following sequence of stages used to +process instructions. +.INDENT 0.0 +.IP \(bu 2 +Dispatch (Instruction is dispatched to the schedulers). +.IP \(bu 2 +Issue (Instruction is issued to the processor pipelines). +.IP \(bu 2 +Write Back (Instruction is executed, and results are written back). +.IP \(bu 2 +Retire (Instruction is retired; writes are architecturally committed). +.UNINDENT +.sp +The in\-order pipeline implements the following sequence of stages: +* InOrderIssue (Instruction is issued to the processor pipelines). +* Retire (Instruction is retired; writes are architecturally committed). +.sp +\fBllvm\-mca\fP assumes that instructions have all been decoded and placed +into a queue before the simulation start. Therefore, the instruction fetch and +decode stages are not modeled. Performance bottlenecks in the frontend are not +diagnosed. Also, \fBllvm\-mca\fP does not model branch prediction. +.SS Instruction Dispatch +.sp +During the dispatch stage, instructions are picked in program order from a +queue of already decoded instructions, and dispatched in groups to the +simulated hardware schedulers. +.sp +The size of a dispatch group depends on the availability of the simulated +hardware resources. The processor dispatch width defaults to the value +of the \fBIssueWidth\fP in LLVM’s scheduling model. +.sp +An instruction can be dispatched if: +.INDENT 0.0 +.IP \(bu 2 +The size of the dispatch group is smaller than processor’s dispatch width. +.IP \(bu 2 +There are enough entries in the reorder buffer. +.IP \(bu 2 +There are enough physical registers to do register renaming. +.IP \(bu 2 +The schedulers are not full. +.UNINDENT +.sp +Scheduling models can optionally specify which register files are available on +the processor. \fBllvm\-mca\fP uses that information to initialize register +file descriptors. Users can limit the number of physical registers that are +globally available for register renaming by using the command option +\fB\-register\-file\-size\fP\&. A value of zero for this option means \fIunbounded\fP\&. By +knowing how many registers are available for renaming, the tool can predict +dispatch stalls caused by the lack of physical registers. +.sp +The number of reorder buffer entries consumed by an instruction depends on the +number of micro\-opcodes specified for that instruction by the target scheduling +model. The reorder buffer is responsible for tracking the progress of +instructions that are “in\-flight”, and retiring them in program order. The +number of entries in the reorder buffer defaults to the value specified by field +\fIMicroOpBufferSize\fP in the target scheduling model. +.sp +Instructions that are dispatched to the schedulers consume scheduler buffer +entries. \fBllvm\-mca\fP queries the scheduling model to determine the set +of buffered resources consumed by an instruction. Buffered resources are +treated like scheduler resources. +.SS Instruction Issue +.sp +Each processor scheduler implements a buffer of instructions. An instruction +has to wait in the scheduler’s buffer until input register operands become +available. Only at that point, does the instruction becomes eligible for +execution and may be issued (potentially out\-of\-order) for execution. +Instruction latencies are computed by \fBllvm\-mca\fP with the help of the +scheduling model. +.sp +\fBllvm\-mca\fP’s scheduler is designed to simulate multiple processor +schedulers. The scheduler is responsible for tracking data dependencies, and +dynamically selecting which processor resources are consumed by instructions. +It delegates the management of processor resource units and resource groups to a +resource manager. The resource manager is responsible for selecting resource +units that are consumed by instructions. For example, if an instruction +consumes 1cy of a resource group, the resource manager selects one of the +available units from the group; by default, the resource manager uses a +round\-robin selector to guarantee that resource usage is uniformly distributed +between all units of a group. +.sp +\fBllvm\-mca\fP’s scheduler internally groups instructions into three sets: +.INDENT 0.0 +.IP \(bu 2 +WaitSet: a set of instructions whose operands are not ready. +.IP \(bu 2 +ReadySet: a set of instructions ready to execute. +.IP \(bu 2 +IssuedSet: a set of instructions executing. +.UNINDENT +.sp +Depending on the operands availability, instructions that are dispatched to the +scheduler are either placed into the WaitSet or into the ReadySet. +.sp +Every cycle, the scheduler checks if instructions can be moved from the WaitSet +to the ReadySet, and if instructions from the ReadySet can be issued to the +underlying pipelines. The algorithm prioritizes older instructions over younger +instructions. +.SS Write\-Back and Retire Stage +.sp +Issued instructions are moved from the ReadySet to the IssuedSet. There, +instructions wait until they reach the write\-back stage. At that point, they +get removed from the queue and the retire control unit is notified. +.sp +When instructions are executed, the retire control unit flags the instruction as +“ready to retire.” +.sp +Instructions are retired in program order. The register file is notified of the +retirement so that it can free the physical registers that were allocated for +the instruction during the register renaming stage. +.SS Load/Store Unit and Memory Consistency Model +.sp +To simulate an out\-of\-order execution of memory operations, \fBllvm\-mca\fP +utilizes a simulated load/store unit (LSUnit) to simulate the speculative +execution of loads and stores. +.sp +Each load (or store) consumes an entry in the load (or store) queue. Users can +specify flags \fB\-lqueue\fP and \fB\-squeue\fP to limit the number of entries in the +load and store queues respectively. The queues are unbounded by default. +.sp +The LSUnit implements a relaxed consistency model for memory loads and stores. +The rules are: +.INDENT 0.0 +.IP 1. 3 +A younger load is allowed to pass an older load only if there are no +intervening stores or barriers between the two loads. +.IP 2. 3 +A younger load is allowed to pass an older store provided that the load does +not alias with the store. +.IP 3. 3 +A younger store is not allowed to pass an older store. +.IP 4. 3 +A younger store is not allowed to pass an older load. +.UNINDENT +.sp +By default, the LSUnit optimistically assumes that loads do not alias +(\fI\-noalias=true\fP) store operations. Under this assumption, younger loads are +always allowed to pass older stores. Essentially, the LSUnit does not attempt +to run any alias analysis to predict when loads and stores do not alias with +each other. +.sp +Note that, in the case of write\-combining memory, rule 3 could be relaxed to +allow reordering of non\-aliasing store operations. That being said, at the +moment, there is no way to further relax the memory model (\fB\-noalias\fP is the +only option). Essentially, there is no option to specify a different memory +type (e.g., write\-back, write\-combining, write\-through; etc.) and consequently +to weaken, or strengthen, the memory model. +.sp +Other limitations are: +.INDENT 0.0 +.IP \(bu 2 +The LSUnit does not know when store\-to\-load forwarding may occur. +.IP \(bu 2 +The LSUnit does not know anything about cache hierarchy and memory types. +.IP \(bu 2 +The LSUnit does not know how to identify serializing operations and memory +fences. +.UNINDENT +.sp +The LSUnit does not attempt to predict if a load or store hits or misses the L1 +cache. It only knows if an instruction “MayLoad” and/or “MayStore.” For +loads, the scheduling model provides an “optimistic” load\-to\-use latency (which +usually matches the load\-to\-use latency for when there is a hit in the L1D). +.sp +\fBllvm\-mca\fP does not know about serializing operations or memory\-barrier +like instructions. The LSUnit conservatively assumes that an instruction which +has both “MayLoad” and unmodeled side effects behaves like a “soft” +load\-barrier. That means, it serializes loads without forcing a flush of the +load queue. Similarly, instructions that “MayStore” and have unmodeled side +effects are treated like store barriers. A full memory barrier is a “MayLoad” +and “MayStore” instruction with unmodeled side effects. This is inaccurate, but +it is the best that we can do at the moment with the current information +available in LLVM. +.sp +A load/store barrier consumes one entry of the load/store queue. A load/store +barrier enforces ordering of loads/stores. A younger load cannot pass a load +barrier. Also, a younger store cannot pass a store barrier. A younger load +has to wait for the memory/load barrier to execute. A load/store barrier is +“executed” when it becomes the oldest entry in the load/store queue(s). That +also means, by construction, all of the older loads/stores have been executed. +.sp +In conclusion, the full set of load/store consistency rules are: +.INDENT 0.0 +.IP 1. 3 +A store may not pass a previous store. +.IP 2. 3 +A store may not pass a previous load (regardless of \fB\-noalias\fP). +.IP 3. 3 +A store has to wait until an older store barrier is fully executed. +.IP 4. 3 +A load may pass a previous load. +.IP 5. 3 +A load may not pass a previous store unless \fB\-noalias\fP is set. +.IP 6. 3 +A load has to wait until an older load barrier is fully executed. +.UNINDENT +.SS In\-order Issue and Execute +.sp +In\-order processors are modelled as a single \fBInOrderIssueStage\fP stage. It +bypasses Dispatch, Scheduler and Load/Store unit. Instructions are issued as +soon as their operand registers are available and resource requirements are +met. Multiple instructions can be issued in one cycle according to the value of +the \fBIssueWidth\fP parameter in LLVM’s scheduling model. +.sp +Once issued, an instruction is moved to \fBIssuedInst\fP set until it is ready to +retire. \fBllvm\-mca\fP ensures that writes are committed in\-order. However, +an instruction is allowed to commit writes and retire out\-of\-order if +\fBRetireOOO\fP property is true for at least one of its writes. +.SS Custom Behaviour +.sp +Due to certain instructions not being expressed perfectly within their +scheduling model, \fBllvm\-mca\fP isn’t always able to simulate them +perfectly. Modifying the scheduling model isn’t always a viable +option though (maybe because the instruction is modeled incorrectly on +purpose or the instruction’s behaviour is quite complex). The +CustomBehaviour class can be used in these cases to enforce proper +instruction modeling (often by customizing data dependencies and detecting +hazards that \fBllvm\-mca\fP has no way of knowing about). +.sp +\fBllvm\-mca\fP comes with one generic and multiple target specific +CustomBehaviour classes. The generic class will be used if the \fB\-disable\-cb\fP +flag is used or if a target specific CustomBehaviour class doesn’t exist for +that target. (The generic class does nothing.) Currently, the CustomBehaviour +class is only a part of the in\-order pipeline, but there are plans to add it +to the out\-of\-order pipeline in the future. +.sp +CustomBehaviour’s main method is \fIcheckCustomHazard()\fP which uses the +current instruction and a list of all instructions still executing within +the pipeline to determine if the current instruction should be dispatched. +As output, the method returns an integer representing the number of cycles +that the current instruction must stall for (this can be an underestimate +if you don’t know the exact number and a value of 0 represents no stall). +.sp +If you’d like to add a CustomBehaviour class for a target that doesn’t +already have one, refer to an existing implementation to see how to set it +up. The classes are implemented within the target specific backend (for +example \fI/llvm/lib/Target/AMDGPU/MCA/\fP) so that they can access backend symbols. +.SS Custom Views +.sp +\fBllvm\-mca\fP comes with several Views such as the Timeline View and +Summary View. These Views are generic and can work with most (if not all) +targets. If you wish to add a new View to \fBllvm\-mca\fP and it does not +require any backend functionality that is not already exposed through MC layer +classes (MCSubtargetInfo, MCInstrInfo, etc.), please add it to the +\fI/tools/llvm\-mca/View/\fP directory. However, if your new View is target specific +AND requires unexposed backend symbols or functionality, you can define it in +the \fI/lib/Target//MCA/\fP directory. +.sp +To enable this target specific View, you will have to use this target’s +CustomBehaviour class to override the \fICustomBehaviour::getViews()\fP methods. +There are 3 variations of these methods based on where you want your View to +appear in the output: \fIgetStartViews()\fP, \fIgetPostInstrInfoViews()\fP, and +\fIgetEndViews()\fP\&. These methods returns a vector of Views so you will want to +return a vector containing all of the target specific Views for the target in +question. +.sp +Because these target specific (and backend dependent) Views require the +\fICustomBehaviour::getViews()\fP variants, these Views will not be enabled if +the \fI\-disable\-cb\fP flag is used. +.sp +Enabling these custom Views does not affect the non\-custom (generic) Views. +Continue to use the usual command line arguments to enable / disable those +Views. +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-nm.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-nm.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-nm.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-nm.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,397 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-NM" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-nm \- list LLVM bitcode and object file's symbol table +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-nm\fP [\fIoptions\fP] [\fIfilenames…\fP] +.SH DESCRIPTION +.sp +The \fBllvm\-nm\fP utility lists the names of symbols from LLVM bitcode +files, object files, and archives. Each symbol is listed along with some simple +information about its provenance. If no filename is specified, \fIa.out\fP is used +as the input. If \fI\-\fP is used as a filename, \fBllvm\-nm\fP will read a file +from its standard input stream. +.sp +\fBllvm\-nm\fP’s default output format is the traditional BSD \fBnm\fP +output format. Each such output record consists of an (optional) 8\-digit +hexadecimal address, followed by a type code character, followed by a name, for +each symbol. One record is printed per line; fields are separated by spaces. +When the address is omitted, it is replaced by 8 spaces. +.sp +The supported type code characters are as follows. Where both lower and +upper\-case characters are listed for the same meaning, a lower\-case character +represents a local symbol, whilst an upper\-case character represents a global +(external) symbol: +.sp +a, A +.INDENT 0.0 +.INDENT 3.5 +Absolute symbol. +.UNINDENT +.UNINDENT +.sp +b, B +.INDENT 0.0 +.INDENT 3.5 +Uninitialized data (bss) object. +.UNINDENT +.UNINDENT +.sp +C +.INDENT 0.0 +.INDENT 3.5 +Common symbol. Multiple definitions link together into one definition. +.UNINDENT +.UNINDENT +.sp +d, D +.INDENT 0.0 +.INDENT 3.5 +Writable data object. +.UNINDENT +.UNINDENT +.sp +i, I +.INDENT 0.0 +.INDENT 3.5 +COFF: .idata symbol or symbol in a section with IMAGE_SCN_LNK_INFO set. +.UNINDENT +.UNINDENT +.sp +n +.INDENT 0.0 +.INDENT 3.5 +ELF: local symbol from non\-alloc section. +.sp +COFF: debug symbol. +.UNINDENT +.UNINDENT +.sp +N +.INDENT 0.0 +.INDENT 3.5 +ELF: debug section symbol, or global symbol from non\-alloc section. +.UNINDENT +.UNINDENT +.sp +s, S +.INDENT 0.0 +.INDENT 3.5 +COFF: section symbol. +.sp +Mach\-O: absolute symbol or symbol from a section other than __TEXT_EXEC __text, +__TEXT __text, __DATA __data, or __DATA __bss. +.UNINDENT +.UNINDENT +.sp +r, R +.INDENT 0.0 +.INDENT 3.5 +Read\-only data object. +.UNINDENT +.UNINDENT +.sp +t, T +.INDENT 0.0 +.INDENT 3.5 +Code (text) object. +.UNINDENT +.UNINDENT +.sp +u +.INDENT 0.0 +.INDENT 3.5 +ELF: GNU unique symbol. +.UNINDENT +.UNINDENT +.sp +U +.INDENT 0.0 +.INDENT 3.5 +Named object is undefined in this file. +.UNINDENT +.UNINDENT +.sp +v +.INDENT 0.0 +.INDENT 3.5 +ELF: Undefined weak object. It is not a link failure if the object is not +defined. +.UNINDENT +.UNINDENT +.sp +V +.INDENT 0.0 +.INDENT 3.5 +ELF: Defined weak object symbol. This definition will only be used if no +regular definitions exist in a link. If multiple weak definitions and no +regular definitions exist, one of the weak definitions will be used. +.UNINDENT +.UNINDENT +.sp +w +.INDENT 0.0 +.INDENT 3.5 +Undefined weak symbol other than an ELF object symbol. It is not a link failure +if the symbol is not defined. +.UNINDENT +.UNINDENT +.sp +W +.INDENT 0.0 +.INDENT 3.5 +Defined weak symbol other than an ELF object symbol. This definition will only +be used if no regular definitions exist in a link. If multiple weak definitions +and no regular definitions exist, one of the weak definitions will be used. +.UNINDENT +.UNINDENT +.sp +\- +.INDENT 0.0 +.INDENT 3.5 +Mach\-O: N_STAB symbol. +.UNINDENT +.UNINDENT +.sp +? +.INDENT 0.0 +.INDENT 3.5 +Something unrecognizable. +.UNINDENT +.UNINDENT +.sp +Because LLVM bitcode files typically contain objects that are not considered to +have addresses until they are linked into an executable image or dynamically +compiled “just\-in\-time”, \fBllvm\-nm\fP does not print an address for any +symbol in an LLVM bitcode file, even symbols which are defined in the bitcode +file. +.SH OPTIONS +.INDENT 0.0 +.TP +.B \-B +Use BSD output format. Alias for \fB\-\-format=bsd\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-debug\-syms, \-a +Show all symbols, even those usually suppressed. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-defined\-only +Print only symbols defined in this file. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-demangle, \-C +Demangle symbol names. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dynamic, \-D +Display dynamic symbols instead of normal symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-extern\-only, \-g +Print only symbols whose definitions are external; that is, accessible from +other files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-format=, \-f +Select an output format; \fIformat\fP may be \fIsysv\fP, \fIposix\fP, \fIdarwin\fP, \fIbsd\fP or +\fIjust\-symbols\fP\&. +The default is \fIbsd\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-help, \-h +Print a summary of command\-line options and their meanings. +.UNINDENT +.INDENT 0.0 +.TP +.B \-j +Print just the symbol names. Alias for \fI–format=just\-symbols\(ga\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-m +Use Darwin format. Alias for \fB\-\-format=darwin\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-demangle +Don’t demangle symbol names. This is the default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-llvm\-bc +Disable the LLVM bitcode reader. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-sort, \-p +Show symbols in the order encountered. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-weak +Don’t print weak symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-numeric\-sort, \-n, \-v +Sort symbols by address. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-portability, \-P +Use POSIX.2 output format. Alias for \fB\-\-format=posix\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-print\-armap +Print the archive symbol table, in addition to the symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-print\-file\-name, \-A, \-o +Precede each symbol with the file it came from. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-print\-size, \-S +Show symbol size as well as address (not applicable for Mach\-O). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-quiet +Suppress ‘no symbols’ diagnostic. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-radix=, \-t +Specify the radix of the symbol address(es). Values accepted are \fId\fP (decimal), +\fIx\fP (hexadecimal) and \fIo\fP (octal). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-reverse\-sort, \-r +Sort symbols in reverse order. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-size\-sort +Sort symbols by size. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-special\-syms +Do not filter special symbols from the output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-undefined\-only, \-u +Print only undefined symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-version, \-V +Display the version of the \fBllvm\-nm\fP executable, then exit. Does not +stack with other commands. +.UNINDENT +.INDENT 0.0 +.TP +.B @ +Read command\-line options from response file \fI\fP\&. +.UNINDENT +.SH MACH-O SPECIFIC OPTIONS +.INDENT 0.0 +.TP +.B \-\-add\-dyldinfo +Add symbols from the dyldinfo, if they are not already in the symbol table. +This is the default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-add\-inlinedinfo +Add symbols from the inlined libraries, TBD file inputs only. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-arch= +Dump the symbols from the specified architecture(s). +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dyldinfo\-only +Dump only symbols from the dyldinfo. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-no\-dyldinfo +Do not add any symbols from the dyldinfo. +.UNINDENT +.INDENT 0.0 +.TP +.B \-s
+Dump only symbols from this segment and section name. +.UNINDENT +.INDENT 0.0 +.TP +.B \-x +Print symbol entry in hex. +.UNINDENT +.SH BUGS +.INDENT 0.0 +.INDENT 3.5 +.INDENT 0.0 +.IP \(bu 2 +\fBllvm\-nm\fP does not support the full set of arguments that GNU +\fBnm\fP does. +.UNINDENT +.UNINDENT +.UNINDENT +.SH EXIT STATUS +.sp +\fBllvm\-nm\fP exits with an exit code of zero. +.SH SEE ALSO +.sp +\fBllvm\-ar(1)\fP, \fBllvm\-objdump(1)\fP, \fBllvm\-readelf(1)\fP, +\fBllvm\-readobj(1)\fP +.SH AUTHOR +Maintained by the LLVM Team (https://llvm.org/). +.SH COPYRIGHT +2003-2021, LLVM Project +.\" Generated by docutils manpage writer. +. diff -Nru llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-objcopy.1 llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-objcopy.1 --- llvm-toolchain-13-13.0.0~+rc4/llvm/docs/_build/man/llvm-objcopy.1 1970-01-01 00:00:00.000000000 +0000 +++ llvm-toolchain-13-13.0.0/llvm/docs/_build/man/llvm-objcopy.1 2021-09-19 16:28:21.000000000 +0000 @@ -0,0 +1,730 @@ +.\" Man page generated from reStructuredText. +. +.TH "LLVM-OBJCOPY" "1" "2021-09-18" "13" "LLVM" +.SH NAME +llvm-objcopy \- object copying and editing tool +. +.nr rst2man-indent-level 0 +. +.de1 rstReportMargin +\\$1 \\n[an-margin] +level \\n[rst2man-indent-level] +level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] +- +\\n[rst2man-indent0] +\\n[rst2man-indent1] +\\n[rst2man-indent2] +.. +.de1 INDENT +.\" .rstReportMargin pre: +. RS \\$1 +. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] +. nr rst2man-indent-level +1 +.\" .rstReportMargin post: +.. +.de UNINDENT +. RE +.\" indent \\n[an-margin] +.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] +.nr rst2man-indent-level -1 +.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] +.in \\n[rst2man-indent\\n[rst2man-indent-level]]u +.. +.SH SYNOPSIS +.sp +\fBllvm\-objcopy\fP [\fIoptions\fP] \fIinput\fP [\fIoutput\fP] +.SH DESCRIPTION +.sp +\fBllvm\-objcopy\fP is a tool to copy and manipulate objects. In basic +usage, it makes a semantic copy of the input to the output. If any options are +specified, the output may be modified along the way, e.g. by removing sections. +.sp +If no output file is specified, the input file is modified in\-place. If “\-” is +specified for the input file, the input is read from the program’s standard +input stream. If “\-” is specified for the output file, the output is written to +the standard output stream of the program. +.sp +If the input is an archive, any requested operations will be applied to each +archive member individually. +.sp +The tool is still in active development, but in most scenarios it works as a +drop\-in replacement for GNU’s \fBobjcopy\fP\&. +.SH GENERIC AND CROSS-PLATFORM OPTIONS +.sp +The following options are either agnostic of the file format, or apply to +multiple file formats. +.INDENT 0.0 +.TP +.B \-\-add\-gnu\-debuglink +Add a .gnu_debuglink section for \fB\fP to the output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-add\-section +Add a section named \fB
\fP with the contents of \fB\fP to the +output. For ELF objects the section will be of type \fISHT_NOTE\fP, if the name +starts with “.note”. Otherwise, it will have type \fISHT_PROGBITS\fP\&. Can be +specified multiple times to add multiple sections. +.sp +For MachO objects, \fB
\fP must be formatted as +\fB,
\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-binary\-architecture , \-B +Ignored for compatibility. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-disable\-deterministic\-archives, \-U +Use real values for UIDs, GIDs and timestamps when updating archive member +headers. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-discard\-all, \-x +Remove most local symbols from the output. Different file formats may limit +this to a subset of the local symbols. For example, file and section symbols in +ELF objects will not be discarded. Additionally, remove all debug sections. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-dump\-section
= +Dump the contents of section \fB
\fP into the file \fB\fP\&. Can be +specified multiple times to dump multiple sections to different files. +\fB\fP is unrelated to the input and output files provided to +\fBllvm\-objcopy\fP and as such the normal copying and editing +operations will still be performed. No operations are performed on the sections +prior to dumping them. +.sp +For MachO objects, \fB
\fP must be formatted as +\fB,
\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-enable\-deterministic\-archives, \-D +Enable deterministic mode when copying archives, i.e. use 0 for archive member +header UIDs, GIDs and timestamp fields. On by default. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-help, \-h +Print a summary of command line options. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-only\-keep\-debug +Produce a debug file as the output that only preserves contents of sections +useful for debugging purposes. +.sp +For ELF objects, this removes the contents of \fISHF_ALLOC\fP sections that are not +\fISHT_NOTE\fP by making them \fISHT_NOBITS\fP and shrinking the program headers where +possible. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-only\-section
, \-j +Remove all sections from the output, except for sections named \fB
\fP\&. +Can be specified multiple times to keep multiple sections. +.sp +For MachO objects, \fB
\fP must be formatted as +\fB,
\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-redefine\-sym = +Rename symbols called \fB\fP to \fB\fP in the output. Can be specified +multiple times to rename multiple symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-redefine\-syms +Rename symbols in the output as described in the file \fB\fP\&. In the +file, each line represents a single symbol to rename, with the old name and new +name separated by whitespace. Leading and trailing whitespace is ignored, as is +anything following a ‘#’. Can be specified multiple times to read names from +multiple files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-regex +If specified, symbol and section names specified by other switches are treated +as extended POSIX regular expression patterns. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-remove\-section
, \-R +Remove the specified section from the output. Can be specified multiple times +to remove multiple sections simultaneously. +.sp +For MachO objects, \fB
\fP must be formatted as +\fB,
\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-set\-section\-alignment
= +Set the alignment of section \fB
\fP to \fI\(ga\fP\&. Can be specified +multiple times to update multiple sections. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-set\-section\-flags
=[,,...] +Set section properties in the output of section \fB
\fP based on the +specified \fB\fP values. Can be specified multiple times to update multiple +sections. +.sp +Supported flag names are \fIalloc\fP, \fIload\fP, \fInoload\fP, \fIreadonly\fP, \fIexclude\fP, +\fIdebug\fP, \fIcode\fP, \fIdata\fP, \fIrom\fP, \fIshare\fP, \fIcontents\fP, \fImerge\fP and \fIstrings\fP\&. Not +all flags are meaningful for all object file formats. +.sp +For ELF objects, the flags have the following effects: +.INDENT 7.0 +.IP \(bu 2 +\fIalloc\fP = add the \fISHF_ALLOC\fP flag. +.IP \(bu 2 +\fIload\fP = if the section has \fISHT_NOBITS\fP type, mark it as a \fISHT_PROGBITS\fP +section. +.IP \(bu 2 +\fIreadonly\fP = if this flag is not specified, add the \fISHF_WRITE\fP flag. +.IP \(bu 2 +\fIexclude\fP = add the \fISHF_EXCLUDE\fP flag. +.IP \(bu 2 +\fIcode\fP = add the \fISHF_EXECINSTR\fP flag. +.IP \(bu 2 +\fImerge\fP = add the \fISHF_MERGE\fP flag. +.IP \(bu 2 +\fIstrings\fP = add the \fISHF_STRINGS\fP flag. +.IP \(bu 2 +\fIcontents\fP = if the section has \fISHT_NOBITS\fP type, mark it as a \fISHT_PROGBITS\fP +section. +.UNINDENT +.sp +For COFF objects, the flags have the following effects: +.INDENT 7.0 +.IP \(bu 2 +\fIalloc\fP = add the \fIIMAGE_SCN_CNT_UNINITIALIZED_DATA\fP and \fIIMAGE_SCN_MEM_READ\fP +flags, unless the \fIload\fP flag is specified. +.IP \(bu 2 +\fInoload\fP = add the \fIIMAGE_SCN_LNK_REMOVE\fP and \fIIMAGE_SCN_MEM_READ\fP flags. +.IP \(bu 2 +\fIreadonly\fP = if this flag is not specified, add the \fIIMAGE_SCN_MEM_WRITE\fP +flag. +.IP \(bu 2 +\fIexclude\fP = add the \fIIMAGE_SCN_LNK_REMOVE\fP and \fIIMAGE_SCN_MEM_READ\fP flags. +.IP \(bu 2 +\fIdebug\fP = add the \fIIMAGE_SCN_CNT_INITIALIZED_DATA\fP, +\fIIMAGE_SCN_MEM_DISCARDABLE\fP and \fIIMAGE_SCN_MEM_READ\fP flags. +.IP \(bu 2 +\fIcode\fP = add the \fIIMAGE_SCN_CNT_CODE\fP, \fIIMAGE_SCN_MEM_EXECUTE\fP and +\fIIMAGE_SCN_MEM_READ\fP flags. +.IP \(bu 2 +\fIdata\fP = add the \fIIMAGE_SCN_CNT_INITIALIZED_DATA\fP and \fIIMAGE_SCN_MEM_READ\fP +flags. +.IP \(bu 2 +\fIshare\fP = add the \fIIMAGE_SCN_MEM_SHARED\fP and \fIIMAGE_SCN_MEM_READ\fP flags. +.UNINDENT +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-all\-gnu +Remove all symbols, debug sections and relocations from the output. This option +is equivalent to GNU \fBobjcopy\fP’s \fB\-\-strip\-all\fP switch. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-all, \-S +For ELF objects, remove from the output all symbols and non\-alloc sections not +within segments, except for .gnu.warning, .ARM.attribute sections and the +section name table. +.sp +For COFF and Mach\-O objects, remove all symbols, debug sections, and +relocations from the output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-debug, \-g +Remove all debug sections from the output. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-symbol , \-N +Remove all symbols named \fB\fP from the output. Can be specified +multiple times to remove multiple symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-symbols +Remove all symbols whose names appear in the file \fB\fP, from the +output. In the file, each line represents a single symbol name, with leading +and trailing whitespace ignored, as is anything following a ‘#’. Can be +specified multiple times to read names from multiple files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-unneeded\-symbol +Remove from the output all symbols named \fB\fP that are local or +undefined and are not required by any relocation. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-unneeded\-symbols +Remove all symbols whose names appear in the file \fB\fP, from the +output, if they are local or undefined and are not required by any relocation. +In the file, each line represents a single symbol name, with leading and +trailing whitespace ignored, as is anything following a ‘#’. Can be specified +multiple times to read names from multiple files. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-strip\-unneeded +Remove from the output all local or undefined symbols that are not required by +relocations. Also remove all debug sections. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-version, \-V +Display the version of the \fBllvm\-objcopy\fP executable. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-wildcard, \-w +Allow wildcard syntax for symbol\-related flags. On by default for +section\-related flags. Incompatible with –regex. +.sp +Wildcard syntax allows the following special symbols: +.TS +center; +|l|l|l|. +_ +T{ +Character +T} T{ +Meaning +T} T{ +Equivalent +T} +_ +T{ +\fB*\fP +T} T{ +Any number of characters +T} T{ +\fB\&.*\fP +T} +_ +T{ +\fB?\fP +T} T{ +Any single character +T} T{ +\fB\&.\fP +T} +_ +T{ +\fB\e\fP +T} T{ +Escape the next character +T} T{ +\fB\e\fP +T} +_ +T{ +\fB[a\-z]\fP +T} T{ +Character class +T} T{ +\fB[a\-z]\fP +T} +_ +T{ +\fB[!a\-z]\fP, \fB[^a\-z]\fP +T} T{ +Negated character class +T} T{ +\fB[^a\-z]\fP +T} +_ +.TE +.sp +Additionally, starting a wildcard with ‘!’ will prevent a match, even if +another flag matches. For example \fB\-w \-N \(aq*\(aq \-N \(aq!x\(aq\fP will strip all symbols +except for \fBx\fP\&. +.sp +The order of wildcards does not matter. For example, \fB\-w \-N \(aq*\(aq \-N \(aq!x\(aq\fP is +the same as \fB\-w \-N \(aq!x\(aq \-N \(aq*\(aq\fP\&. +.UNINDENT +.INDENT 0.0 +.TP +.B @ +Read command\-line options and commands from response file \fI\fP\&. +.UNINDENT +.SH ELF-SPECIFIC OPTIONS +.sp +The following options are implemented only for ELF objects. If used with other +objects, \fBllvm\-objcopy\fP will either emit an error or silently ignore +them. +.INDENT 0.0 +.TP +.B \-\-add\-symbol =[
:][,] +Add a new symbol called \fB\fP to the output symbol table, in the section +named \fB
\fP, with value \fB\fP\&. If \fB
\fP is not specified, +the symbol is added as an absolute symbol. The \fB\fP affect the symbol +properties. Accepted values are: +.INDENT 7.0 +.IP \(bu 2 +\fIglobal\fP = the symbol will have global binding. +.IP \(bu 2 +\fIlocal\fP = the symbol will have local binding. +.IP \(bu 2 +\fIweak\fP = the symbol will have weak binding. +.IP \(bu 2 +\fIdefault\fP = the symbol will have default visibility. +.IP \(bu 2 +\fIhidden\fP = the symbol will have hidden visibility. +.IP \(bu 2 +\fIprotected\fP = the symbol will have protected visibility. +.IP \(bu 2 +\fIfile\fP = the symbol will be an \fISTT_FILE\fP symbol. +.IP \(bu 2 +\fIsection\fP = the symbol will be an \fISTT_SECTION\fP symbol. +.IP \(bu 2 +\fIobject\fP = the symbol will be an \fISTT_OBJECT\fP symbol. +.IP \(bu 2 +\fIfunction\fP = the symbol will be an \fISTT_FUNC\fP symbol. +.IP \(bu 2 +\fIindirect\-function\fP = the symbol will be an \fISTT_GNU_IFUNC\fP symbol. +.UNINDENT +.sp +Additionally, the following flags are accepted but ignored: \fIdebug\fP, +\fIconstructor\fP, \fIwarning\fP, \fIindirect\fP, \fIsynthetic\fP, \fIunique\-object\fP, \fIbefore\fP\&. +.sp +Can be specified multiple times to add multiple symbols. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-allow\-broken\-links +Allow \fBllvm\-objcopy\fP to remove sections even if it would leave invalid +section references. Any invalid sh_link fields will be set to zero. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-change\-start , \-\-adjust\-start +Add \fB\fP to the program’s start address. Can be specified multiple +times, in which case the values will be applied cumulatively. +.UNINDENT +.INDENT 0.0 +.TP +.B \-\-compress\-debug\-sections [